changeset 2559:b474ecb5bde4 trunk

[svn] revise str_to_utf8(): - new utf8 validator using libguess DFA has been implemented. str_to_utf8() tries utf8 validation first. - default conversion from ISO-8859-1 is enabled regardless of chardet. - libguess and librcd is always compiled in. - some libguess cleanups.
author yaz
date Wed, 21 Feb 2007 04:25:12 -0800
parents d4ecf0a91222
children 5511818eb9d3
files ChangeLog configure.ac src/audacious/build_stamp.c src/audacious/strings.c src/libguess/Makefile src/libguess/guess.c src/libguess/guess.scm src/libguess/guess_tab.c src/libguess/libguess.h src/librcd/Makefile
diffstat 10 files changed, 77 insertions(+), 217 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Wed Feb 21 03:52:52 2007 -0800
+++ b/ChangeLog	Wed Feb 21 04:25:12 2007 -0800
@@ -1,3 +1,11 @@
+2007-02-21 11:52:52 +0000  Yoshiki Yazawa <yaz@cc.rim.or.jp>
+  revision [4126]
+  - vfs layer doesn't provide fdopen.
+  
+  trunk/src/libid3tag/file.c |    2 +-
+  1 file changed, 1 insertion(+), 1 deletion(-)
+
+
 2007-02-21 00:17:08 +0000  George Averill <nhjm449@gmail.com>
   revision [4124]
   - Make unplayable files dialog resizable and centered on the screen. (closes #802)
--- a/configure.ac	Wed Feb 21 03:52:52 2007 -0800
+++ b/configure.ac	Wed Feb 21 04:25:12 2007 -0800
@@ -188,6 +188,11 @@
 		;;
 esac
 
+dnl libguess always compiled in
+dnl ========================
+SUBDIR_GUESS="libguess librcd"
+CHARDET_LIBS="../libguess/libguess.a ../librcd/librcd.a"
+
 dnl chardet support
 dnl ========================
 AC_ARG_ENABLE(chardet,
@@ -195,8 +200,6 @@
 			enable_chardet=$enableval, enable_chardet=no)
 if test "x$enable_chardet" = xyes; then
     AC_DEFINE(USE_CHARDET, 1, [Define if character set detection enabled] )
-    AC_CHECK_LIB(guess, guess_jp, [SUBDIR_GUESS=[''] CHARDET_LIBS=['-lguess']], [SUBDIR_GUESS=['libguess'] CHARDET_LIBS=['../libguess/libguess.a']])
-    AC_CHECK_LIB(rcd, rcdGetRussianCharset, [CHARDET_LIBS=["$CHARDET_LIBS -lrcd"]], [SUBDIR_GUESS=["$SUBDIR_GUESS librcd"] CHARDET_LIBS=["$CHARDET_LIBS ../librcd/librcd.a"]])    
     AC_CHECK_LIB(udet_c, detectCharset, [AC_DEFINE(HAVE_UDET, 1,[Define if the system has Mozilla universal character detector library]) CHARDET_LIBS=["$CHARDET_LIBS -ludet -ludet_c"]])
 fi
 AM_CONDITIONAL(USE_CHARDET,test "x$enable_chardet" = xyes)
--- a/src/audacious/build_stamp.c	Wed Feb 21 03:52:52 2007 -0800
+++ b/src/audacious/build_stamp.c	Wed Feb 21 04:25:12 2007 -0800
@@ -1,2 +1,2 @@
 #include <glib.h>
-const gchar *svn_stamp = "20070221-4124";
+const gchar *svn_stamp = "20070221-4126";
--- a/src/audacious/strings.c	Wed Feb 21 03:52:52 2007 -0800
+++ b/src/audacious/strings.c	Wed Feb 21 04:25:12 2007 -0800
@@ -34,13 +34,11 @@
 
 #include "main.h"
 
-#ifdef USE_CHARDET
-    #include "../libguess/libguess.h"
-    #include "../librcd/librcd.h"
+#include "../libguess/libguess.h"
+#include "../librcd/librcd.h"
 #ifdef HAVE_UDET
     #include <libudet_c.h>
 #endif
-#endif
 
 /*
  * escape_shell_chars()
@@ -203,18 +201,32 @@
      * if the string is already converted into utf-8.
      * chardet_to_utf8() would convert a valid utf-8 string into a
      * different utf-8 string, if fallback encodings were supplied and
-     * the given string could be treated as a string in one of fallback
-     * encodings. To avoid this, the order of evaluation has been
-     * changed. (It might cause a drawback?)
+     * the given string could be treated as a string in one of
+     * fallback encodings. To avoid this, g_utf8_validate() had been
+     * used at the top of evaluation.
+     */
+
+    /* Note 2: g_utf8_validate() has so called encapsulated utf-8
+     * problem, thus chardet_to_utf8() took the place of that.
      */
+
+    /* Note 3: As introducing madplug, the problem of conversion from
+     * ISO-8859-1 to UTF-8 arose. This may be coped with g_convert()
+     * located near the end of chardet_to_utf8(), but it requires utf8
+     * validation guard where g_utf8_validate() was. New
+     * dfa_validate_utf8() employs libguess' DFA engine to validate
+     * utf-8 and can properly distinguish examples of encapsulated
+     * utf-8. It is considered to be safe to use as a guard.
+     */
+    
+    /* already UTF-8? */
+    if (dfa_validate_utf8(str, strlen(str)))
+        return g_strdup(str);
+
     /* chardet encoding detector */
     if ((out_str = chardet_to_utf8(str, strlen(str), NULL, NULL, NULL)))
         return out_str;
 
-    /* already UTF-8? */
-    if (g_utf8_validate(str, -1, NULL))
-        return g_strdup(str);
-
     /* assume encoding associated with locale */
     if ((out_str = g_locale_to_utf8(str, -1, NULL, NULL, NULL)))
         return out_str;
@@ -335,15 +347,9 @@
 		}
 	}
 
-#ifdef USE_CHARDET
-	/* many tag libraries return 2byte latin1 utf8 character as
-	   converted 8bit iso-8859-1 character, if they are asked to return
-	   latin1 string.
-	 */
 	if(!ret){
 		ret = g_convert(str, len, "UTF-8", "ISO-8859-1", bytes_read, bytes_write, error);
 	}
-#endif
 
 	if(ret){
 		if(g_utf8_validate(ret, -1, NULL))
--- a/src/libguess/Makefile	Wed Feb 21 03:52:52 2007 -0800
+++ b/src/libguess/Makefile	Wed Feb 21 04:25:12 2007 -0800
@@ -14,6 +14,3 @@
 OBJECTS = ${SOURCES:.c=.o}
 
 include ../../mk/objective.mk
-
-libguess.a: $(OBJECTS)
-	$(AR) cq $@ $(OBJECTS)
--- a/src/libguess/guess.c	Wed Feb 21 03:52:52 2007 -0800
+++ b/src/libguess/guess.c	Wed Feb 21 04:25:12 2007 -0800
@@ -98,6 +98,23 @@
 /* include DFA table generated by guess.scm */
 #include "guess_tab.c"
 
+
+int dfa_validate_utf8(const char *buf, int buflen)
+{
+    int i;
+    guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar);
+
+    for (i = 0; i < buflen; i++) {
+        int c = (unsigned char) buf[i];
+
+        if (DFA_ALIVE(utf8))
+            DFA_NEXT(utf8, c);
+        else
+            return 0;
+    }
+    return 1;
+}
+
 const char *guess_jp(const char *buf, int buflen)
 {
     int i;
--- a/src/libguess/guess.scm	Wed Feb 21 03:52:52 2007 -0800
+++ b/src/libguess/guess.scm	Wed Feb 21 04:25:12 2007 -0800
@@ -231,33 +231,32 @@
 ;;;
 ;;; UCS-2LE
 ;;;
-
-(define-dfa ucs2le
-  (init
-   ((#xff) le 1.0)
-   (((#x00 #x7f)) ascii 1.0)
-   (((#x00 #xff)) multi 1.0))
-  (le
-   ((#xfe) init 1.0))
-  (ascii
-   ((#x00) init 1.0))
-  (multi
-   (((#x00 #xff)) init 1.0)))
+; (define-dfa ucs2le
+;   (init
+;    ((#xff) le 1.0)
+;    (((#x00 #x7f)) ascii 1.0)
+;    (((#x00 #xff)) multi 1.0))
+;   (le
+;    ((#xfe) init 1.0))
+;   (ascii
+;    ((#x00) init 1.0))
+;   (multi
+;    (((#x00 #xff)) init 1.0)))
 
 ;;;
 ;;; UCS-2BE
 ;;;
-(define-dfa ucs2be
-  (init
-   ((#xfe) be 1.0)
-   ((#x00) ascii 1.0)
-   (((#x00 #xff)) multi 1.0))
-  (be
-   ((#xff) init 1.0))
-  (ascii
-   (((#x00 #x7f)) init 1.0))
-  (multi
-   (((#x00 #xff)) init 1.0)))
+; (define-dfa ucs2be
+;   (init
+;    ((#xfe) be 1.0)
+;    ((#x00) ascii 1.0)
+;    (((#x00 #xff)) multi 1.0))
+;   (be
+;    ((#xff) init 1.0))
+;   (ascii
+;    (((#x00 #x7f)) init 1.0))
+;   (multi
+;    (((#x00 #xff)) init 1.0)))
 
 
 ;;;
--- a/src/libguess/guess_tab.c	Wed Feb 21 03:52:52 2007 -0800
+++ b/src/libguess/guess_tab.c	Wed Feb 21 04:25:12 2007 -0800
@@ -259,174 +259,6 @@
  {  4, 1.0   }, /* 5byte_more -> 4byte_more */
 };
 
-static signed char guess_ucs2le_st[][256] = {
- { /* state init */
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
- },
- { /* state le */
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  3, -1,
- },
- { /* state ascii */
-  4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- },
- { /* state multi */
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
- },
-};
-
-static guess_arc guess_ucs2le_ar[] = {
- {  1, 1.0   }, /* init -> le */
- {  2, 1.0   }, /* init -> ascii */
- {  3, 1.0   }, /* init -> multi */
- {  0, 1.0   }, /* le -> init */
- {  0, 1.0   }, /* ascii -> init */
- {  0, 1.0   }, /* multi -> init */
-};
-
-static signed char guess_ucs2be_st[][256] = {
- { /* state init */
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
- },
- { /* state be */
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  3,
- },
- { /* state ascii */
-  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
-  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
-  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
-  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
-  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
-  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
-  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
-  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- },
- { /* state multi */
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
- },
-};
-
-static guess_arc guess_ucs2be_ar[] = {
- {  1, 1.0   }, /* init -> be */
- {  2, 1.0   }, /* init -> ascii */
- {  3, 1.0   }, /* init -> multi */
- {  0, 1.0   }, /* be -> init */
- {  0, 1.0   }, /* ascii -> init */
- {  0, 1.0   }, /* multi -> init */
-};
-
 static signed char guess_big5_st[][256] = {
  { /* state init */
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
--- a/src/libguess/libguess.h	Wed Feb 21 03:52:52 2007 -0800
+++ b/src/libguess/libguess.h	Wed Feb 21 04:25:12 2007 -0800
@@ -43,5 +43,6 @@
 const char *guess_tw(const char *buf, int buflen);
 const char *guess_cn(const char *buf, int buflen);
 const char *guess_kr(const char *buf, int buflen);
+int dfa_validate_utf8(const char *buf, int buflen);
 
 #endif
--- a/src/librcd/Makefile	Wed Feb 21 03:52:52 2007 -0800
+++ b/src/librcd/Makefile	Wed Feb 21 04:25:12 2007 -0800
@@ -14,6 +14,3 @@
 OBJECTS = ${SOURCES:.c=.o}
 
 include ../../mk/objective.mk
-
-libguess.a: $(OBJECTS)
-	$(AR) cq $@ $(OBJECTS)