Mercurial > audlegacy
changeset 2559:b474ecb5bde4 trunk
[svn] revise str_to_utf8():
- new utf8 validator using libguess DFA has been implemented. str_to_utf8() tries utf8 validation first.
- default conversion from ISO-8859-1 is enabled regardless of chardet.
- libguess and librcd is always compiled in.
- some libguess cleanups.
author | yaz |
---|---|
date | Wed, 21 Feb 2007 04:25:12 -0800 |
parents | d4ecf0a91222 |
children | 5511818eb9d3 |
files | ChangeLog configure.ac src/audacious/build_stamp.c src/audacious/strings.c src/libguess/Makefile src/libguess/guess.c src/libguess/guess.scm src/libguess/guess_tab.c src/libguess/libguess.h src/librcd/Makefile |
diffstat | 10 files changed, 77 insertions(+), 217 deletions(-) [+] |
line wrap: on
line diff
--- a/ChangeLog Wed Feb 21 03:52:52 2007 -0800 +++ b/ChangeLog Wed Feb 21 04:25:12 2007 -0800 @@ -1,3 +1,11 @@ +2007-02-21 11:52:52 +0000 Yoshiki Yazawa <yaz@cc.rim.or.jp> + revision [4126] + - vfs layer doesn't provide fdopen. + + trunk/src/libid3tag/file.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + + 2007-02-21 00:17:08 +0000 George Averill <nhjm449@gmail.com> revision [4124] - Make unplayable files dialog resizable and centered on the screen. (closes #802)
--- a/configure.ac Wed Feb 21 03:52:52 2007 -0800 +++ b/configure.ac Wed Feb 21 04:25:12 2007 -0800 @@ -188,6 +188,11 @@ ;; esac +dnl libguess always compiled in +dnl ======================== +SUBDIR_GUESS="libguess librcd" +CHARDET_LIBS="../libguess/libguess.a ../librcd/librcd.a" + dnl chardet support dnl ======================== AC_ARG_ENABLE(chardet, @@ -195,8 +200,6 @@ enable_chardet=$enableval, enable_chardet=no) if test "x$enable_chardet" = xyes; then AC_DEFINE(USE_CHARDET, 1, [Define if character set detection enabled] ) - AC_CHECK_LIB(guess, guess_jp, [SUBDIR_GUESS=[''] CHARDET_LIBS=['-lguess']], [SUBDIR_GUESS=['libguess'] CHARDET_LIBS=['../libguess/libguess.a']]) - AC_CHECK_LIB(rcd, rcdGetRussianCharset, [CHARDET_LIBS=["$CHARDET_LIBS -lrcd"]], [SUBDIR_GUESS=["$SUBDIR_GUESS librcd"] CHARDET_LIBS=["$CHARDET_LIBS ../librcd/librcd.a"]]) AC_CHECK_LIB(udet_c, detectCharset, [AC_DEFINE(HAVE_UDET, 1,[Define if the system has Mozilla universal character detector library]) CHARDET_LIBS=["$CHARDET_LIBS -ludet -ludet_c"]]) fi AM_CONDITIONAL(USE_CHARDET,test "x$enable_chardet" = xyes)
--- a/src/audacious/build_stamp.c Wed Feb 21 03:52:52 2007 -0800 +++ b/src/audacious/build_stamp.c Wed Feb 21 04:25:12 2007 -0800 @@ -1,2 +1,2 @@ #include <glib.h> -const gchar *svn_stamp = "20070221-4124"; +const gchar *svn_stamp = "20070221-4126";
--- a/src/audacious/strings.c Wed Feb 21 03:52:52 2007 -0800 +++ b/src/audacious/strings.c Wed Feb 21 04:25:12 2007 -0800 @@ -34,13 +34,11 @@ #include "main.h" -#ifdef USE_CHARDET - #include "../libguess/libguess.h" - #include "../librcd/librcd.h" +#include "../libguess/libguess.h" +#include "../librcd/librcd.h" #ifdef HAVE_UDET #include <libudet_c.h> #endif -#endif /* * escape_shell_chars() @@ -203,18 +201,32 @@ * if the string is already converted into utf-8. * chardet_to_utf8() would convert a valid utf-8 string into a * different utf-8 string, if fallback encodings were supplied and - * the given string could be treated as a string in one of fallback - * encodings. To avoid this, the order of evaluation has been - * changed. (It might cause a drawback?) + * the given string could be treated as a string in one of + * fallback encodings. To avoid this, g_utf8_validate() had been + * used at the top of evaluation. + */ + + /* Note 2: g_utf8_validate() has so called encapsulated utf-8 + * problem, thus chardet_to_utf8() took the place of that. */ + + /* Note 3: As introducing madplug, the problem of conversion from + * ISO-8859-1 to UTF-8 arose. This may be coped with g_convert() + * located near the end of chardet_to_utf8(), but it requires utf8 + * validation guard where g_utf8_validate() was. New + * dfa_validate_utf8() employs libguess' DFA engine to validate + * utf-8 and can properly distinguish examples of encapsulated + * utf-8. It is considered to be safe to use as a guard. + */ + + /* already UTF-8? */ + if (dfa_validate_utf8(str, strlen(str))) + return g_strdup(str); + /* chardet encoding detector */ if ((out_str = chardet_to_utf8(str, strlen(str), NULL, NULL, NULL))) return out_str; - /* already UTF-8? */ - if (g_utf8_validate(str, -1, NULL)) - return g_strdup(str); - /* assume encoding associated with locale */ if ((out_str = g_locale_to_utf8(str, -1, NULL, NULL, NULL))) return out_str; @@ -335,15 +347,9 @@ } } -#ifdef USE_CHARDET - /* many tag libraries return 2byte latin1 utf8 character as - converted 8bit iso-8859-1 character, if they are asked to return - latin1 string. - */ if(!ret){ ret = g_convert(str, len, "UTF-8", "ISO-8859-1", bytes_read, bytes_write, error); } -#endif if(ret){ if(g_utf8_validate(ret, -1, NULL))
--- a/src/libguess/Makefile Wed Feb 21 03:52:52 2007 -0800 +++ b/src/libguess/Makefile Wed Feb 21 04:25:12 2007 -0800 @@ -14,6 +14,3 @@ OBJECTS = ${SOURCES:.c=.o} include ../../mk/objective.mk - -libguess.a: $(OBJECTS) - $(AR) cq $@ $(OBJECTS)
--- a/src/libguess/guess.c Wed Feb 21 03:52:52 2007 -0800 +++ b/src/libguess/guess.c Wed Feb 21 04:25:12 2007 -0800 @@ -98,6 +98,23 @@ /* include DFA table generated by guess.scm */ #include "guess_tab.c" + +int dfa_validate_utf8(const char *buf, int buflen) +{ + int i; + guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar); + + for (i = 0; i < buflen; i++) { + int c = (unsigned char) buf[i]; + + if (DFA_ALIVE(utf8)) + DFA_NEXT(utf8, c); + else + return 0; + } + return 1; +} + const char *guess_jp(const char *buf, int buflen) { int i;
--- a/src/libguess/guess.scm Wed Feb 21 03:52:52 2007 -0800 +++ b/src/libguess/guess.scm Wed Feb 21 04:25:12 2007 -0800 @@ -231,33 +231,32 @@ ;;; ;;; UCS-2LE ;;; - -(define-dfa ucs2le - (init - ((#xff) le 1.0) - (((#x00 #x7f)) ascii 1.0) - (((#x00 #xff)) multi 1.0)) - (le - ((#xfe) init 1.0)) - (ascii - ((#x00) init 1.0)) - (multi - (((#x00 #xff)) init 1.0))) +; (define-dfa ucs2le +; (init +; ((#xff) le 1.0) +; (((#x00 #x7f)) ascii 1.0) +; (((#x00 #xff)) multi 1.0)) +; (le +; ((#xfe) init 1.0)) +; (ascii +; ((#x00) init 1.0)) +; (multi +; (((#x00 #xff)) init 1.0))) ;;; ;;; UCS-2BE ;;; -(define-dfa ucs2be - (init - ((#xfe) be 1.0) - ((#x00) ascii 1.0) - (((#x00 #xff)) multi 1.0)) - (be - ((#xff) init 1.0)) - (ascii - (((#x00 #x7f)) init 1.0)) - (multi - (((#x00 #xff)) init 1.0))) +; (define-dfa ucs2be +; (init +; ((#xfe) be 1.0) +; ((#x00) ascii 1.0) +; (((#x00 #xff)) multi 1.0)) +; (be +; ((#xff) init 1.0)) +; (ascii +; (((#x00 #x7f)) init 1.0)) +; (multi +; (((#x00 #xff)) init 1.0))) ;;;
--- a/src/libguess/guess_tab.c Wed Feb 21 03:52:52 2007 -0800 +++ b/src/libguess/guess_tab.c Wed Feb 21 04:25:12 2007 -0800 @@ -259,174 +259,6 @@ { 4, 1.0 }, /* 5byte_more -> 4byte_more */ }; -static signed char guess_ucs2le_st[][256] = { - { /* state init */ - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - }, - { /* state le */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, -1, - }, - { /* state ascii */ - 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - }, - { /* state multi */ - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - }, -}; - -static guess_arc guess_ucs2le_ar[] = { - { 1, 1.0 }, /* init -> le */ - { 2, 1.0 }, /* init -> ascii */ - { 3, 1.0 }, /* init -> multi */ - { 0, 1.0 }, /* le -> init */ - { 0, 1.0 }, /* ascii -> init */ - { 0, 1.0 }, /* multi -> init */ -}; - -static signed char guess_ucs2be_st[][256] = { - { /* state init */ - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - }, - { /* state be */ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, - }, - { /* state ascii */ - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - }, - { /* state multi */ - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - }, -}; - -static guess_arc guess_ucs2be_ar[] = { - { 1, 1.0 }, /* init -> be */ - { 2, 1.0 }, /* init -> ascii */ - { 3, 1.0 }, /* init -> multi */ - { 0, 1.0 }, /* be -> init */ - { 0, 1.0 }, /* ascii -> init */ - { 0, 1.0 }, /* multi -> init */ -}; - static signed char guess_big5_st[][256] = { { /* state init */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
--- a/src/libguess/libguess.h Wed Feb 21 03:52:52 2007 -0800 +++ b/src/libguess/libguess.h Wed Feb 21 04:25:12 2007 -0800 @@ -43,5 +43,6 @@ const char *guess_tw(const char *buf, int buflen); const char *guess_cn(const char *buf, int buflen); const char *guess_kr(const char *buf, int buflen); +int dfa_validate_utf8(const char *buf, int buflen); #endif