changeset 2:754a4550c64e

- added arabic, greek, hebrew and turkish DFAs - new UCS-2LE/BE DFAs - now arabic_impl.c uses arabic DFAs - dfa common macros have been moved to dfa.h - minor cleanups
author Yoshiki Yazawa <yaz@cc.rim.or.jp>
date Wed, 11 Jun 2008 00:11:30 +0900
parents 04f2be1c8464
children 70e2c306231e
files Makefile arabic_impl.c cjk_impl.c dfa.h guess.scm guess_tab.c russian_impl.c turkish_impl.c
diffstat 8 files changed, 781 insertions(+), 143 deletions(-) [+]
line wrap: on
line diff
--- a/Makefile	Sat Dec 01 03:27:31 2007 +0900
+++ b/Makefile	Wed Jun 11 00:11:30 2008 +0900
@@ -55,6 +55,8 @@
 clean:
 	rm -f $(LIBS) $(OBJS) test
 
+mostlyclean: clean
+	rm -f guess_tab.c
+
 distclean: clean
 	rm -f *~ core*
-
--- a/arabic_impl.c	Sat Dec 01 03:27:31 2007 +0900
+++ b/arabic_impl.c	Wed Jun 11 00:11:30 2008 +0900
@@ -1,28 +1,58 @@
 #include "libguess.h"
+#include "dfa.h"
+#include "guess_tab.c"
 
-static const char *_guess_ar(const unsigned char *ptr, int size)
+#define ORDER_AR &utf8, &iso8859_6, &cp1256
+
+const char *guess_ar(const char *buf, int buflen)
 {
     int i;
+    guess_dfa cp1256 = DFA_INIT(guess_cp1256_st, guess_cp1256_ar);
+    guess_dfa iso8859_6 = DFA_INIT(guess_iso8859_6_st, guess_iso8859_6_ar);
+    guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar);
+    guess_dfa *top = NULL;
 
-    for (i = 0; i < size; i++)
-    {
-        if ((ptr[i] >= 0x80 && ptr[i] <= 0x9F) ||
-            ptr[i] == 0xA1 || ptr[i] == 0xA2 || ptr[i] == 0xA3 ||
-            (ptr[i] >= 0xA5 && ptr[i] <= 0xAB) ||
-            (ptr[i] >= 0xAE && ptr[i] <= 0xBA) ||
-            ptr[i] == 0xBC || ptr[i] == 0xBD ||
-            ptr[i] == 0xBE || ptr[i] == 0xC0 ||
-            (ptr[i] >= 0xDB && ptr[i] <= 0xDF) || (ptr[i] >= 0xF3))
-            return "CP1256";
+    guess_dfa *order[] = { ORDER_AR, NULL };
+
+    for (i = 0; i < buflen; i++) {
+        int c = (unsigned char) buf[i];
+
+        if (DFA_ALIVE(cp1256)) {
+            if (!DFA_ALIVE(iso8859_6) && !DFA_ALIVE(utf8))
+                return "CP1256";
+            DFA_NEXT(cp1256, c);
+        }
+        if (DFA_ALIVE(iso8859_6)) {
+            if (!DFA_ALIVE(cp1256) && !DFA_ALIVE(utf8))
+                return "ISO-8859-6";
+            DFA_NEXT(iso8859_6, c);
+        }
+        if (DFA_ALIVE(utf8)) {
+            if (!DFA_ALIVE(cp1256) && !DFA_ALIVE(iso8859_6))
+                return "UTF-8";
+            DFA_NEXT(utf8, c);
+        }
+
+        if (!DFA_ALIVE(cp1256) && !DFA_ALIVE(iso8859_6) && !DFA_ALIVE(utf8)) {
+            /* we ran out the possibilities */
+            return NULL;
+        }
     }
 
-    return "ISO-8859-6";
-}
+    /* Now, we have ambigous code.  Pick the highest score.  If more than
+       one candidate tie, pick the default encoding. */
+    for (i = 0; order[i] != NULL; i++) {
+        if (order[i]->state >= 0) { //DFA_ALIVE()
+            if (top == NULL || order[i]->score > top->score)
+                top = order[i];
+        }
+    }
 
-const char *guess_ar(const char *ptr, int size)
-{
-    if (dfa_validate_utf8(ptr, size))
+    if (top == &cp1256)
+        return "CP1256";
+    if (top == &utf8)
         return "UTF-8";
-
-    return _guess_ar((const unsigned char *)ptr, size);
+    if (top == &iso8859_6)
+        return "ISO-8859-6";
+    return NULL;
 }
--- a/cjk_impl.c	Sat Dec 01 03:27:31 2007 +0900
+++ b/cjk_impl.c	Wed Jun 11 00:11:30 2008 +0900
@@ -4,14 +4,14 @@
  */
 
 /*
- * guess.c - guessing character encoding 
+ * guess.c - guessing character encoding
  *
  *   Copyright (c) 2000-2003 Shiro Kawai, All rights reserved.
- * 
+ *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
  *   are met:
- * 
+ *
  *   1. Redistributions of source code must retain the above copyright
  *      notice, this list of conditions and the following disclaimer.
  *
@@ -38,13 +38,7 @@
  */
 
 #include "libguess.h"
-
-/* take precedence if scores are same. you can customize the order as: */
-/* ORDER_** &highest, &second, ... &lowest */
-#define ORDER_JP &utf8, &sjis, &eucj
-#define ORDER_TW &utf8, &big5
-#define ORDER_CN &utf8, &gb2312, &gb18030
-#define ORDER_KR &utf8, &euck, &johab
+#include "dfa.h"
 
 /* workaround for that glib's g_convert can't convert
    properly from UCS-2BE/LE trailing after BOM. */
@@ -59,40 +53,12 @@
 const char UCS_2LE[] = "UCS-2LE";
 #endif
 
-/* data types */
-typedef struct guess_arc_rec
-{
-    unsigned int next;          /* next state */
-    double score;               /* score */
-} guess_arc;
-
-typedef struct guess_dfa_rec
-{
-    signed char (*states)[256];
-    guess_arc *arcs;
-    int state;
-    double score;
-} guess_dfa;
-
-/* macros */
-#define DFA_INIT(st, ar) \
-    { st, ar, 0, 1.0 }
-
-#define DFA_NEXT(dfa, ch)                               \
-    do {                                                \
-        int arc__;                                      \
-        if (dfa.state >= 0) {                           \
-            arc__ = dfa.states[dfa.state][ch];          \
-            if (arc__ < 0) {                            \
-                dfa.state = -1;                         \
-            } else {                                    \
-                dfa.state = dfa.arcs[arc__].next;       \
-                dfa.score *= dfa.arcs[arc__].score;     \
-            }                                           \
-        }                                               \
-    } while (0)
-
-#define DFA_ALIVE(dfa)  (dfa.state >= 0)
+/* take precedence if scores are same. you can customize the order as: */
+/* ORDER_** &highest, &second, ... &lowest */
+#define ORDER_JP &utf8, &sjis, &eucj
+#define ORDER_TW &utf8, &big5
+#define ORDER_CN &utf8, &gb2312, &gb18030
+#define ORDER_KR &utf8, &euck, &johab
 
 /* include DFA table generated by guess.scm */
 #include "guess_tab.c"
@@ -116,7 +82,7 @@
 
     if(DFA_ALIVE(utf8))
         return 1;
-    else 
+    else
         return 0;
 }
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dfa.h	Wed Jun 11 00:11:30 2008 +0900
@@ -0,0 +1,39 @@
+#ifndef __DFA_H__
+#define __DFA_H__
+
+/* data types */
+typedef struct guess_arc_rec
+{
+    unsigned int next;          /* next state */
+    double score;               /* score */
+} guess_arc;
+
+typedef struct guess_dfa_rec
+{
+    signed char (*states)[256];
+    guess_arc *arcs;
+    int state;
+    double score;
+} guess_dfa;
+
+/* macros */
+#define DFA_INIT(st, ar) \
+    { st, ar, 0, 1.0 }
+
+#define DFA_NEXT(dfa, ch)                               \
+    do {                                                \
+        int arc__;                                      \
+        if (dfa.state >= 0) {                           \
+            arc__ = dfa.states[dfa.state][ch];          \
+            if (arc__ < 0) {                            \
+                dfa.state = -1;                         \
+            } else {                                    \
+                dfa.state = dfa.arcs[arc__].next;       \
+                dfa.score *= dfa.arcs[arc__].score;     \
+            }                                           \
+        }                                               \
+    } while (0)
+
+#define DFA_ALIVE(dfa)  (dfa.state >= 0)
+
+#endif
--- a/guess.scm	Sat Dec 01 03:27:31 2007 +0900
+++ b/guess.scm	Wed Jun 11 00:11:30 2008 +0900
@@ -5,24 +5,24 @@
 
 ;;;
 ;;; Auxiliary script to generate japanese code guessing table
-;;;  
+;;;
 ;;;   Copyright (c) 2000-2003 Shiro Kawai, All rights reserved.
-;;;   
+;;;
 ;;;   Redistribution and use in source and binary forms, with or without
 ;;;   modification, are permitted provided that the following conditions
 ;;;   are met:
-;;;   
+;;;
 ;;;   1. Redistributions of source code must retain the above copyright
 ;;;      notice, this list of conditions and the following disclaimer.
-;;;  
+;;;
 ;;;   2. Redistributions in binary form must reproduce the above copyright
 ;;;      notice, this list of conditions and the following disclaimer in the
 ;;;      documentation and/or other materials provided with the distribution.
-;;;  
+;;;
 ;;;   3. Neither the name of the authors nor the names of its contributors
 ;;;      may be used to endorse or promote products derived from this
 ;;;      software without specific prior written permission.
-;;;  
+;;;
 ;;;   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 ;;;   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 ;;;   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -34,7 +34,7 @@
 ;;;   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 ;;;   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 ;;;   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-;;;  
+;;;
 ;;;  $Id: guess.scm,v 1.3 2003/07/05 03:29:10 shirok Exp $
 ;;;
 
@@ -174,19 +174,16 @@
    (((#x00 #x7f)) init         1.0)   ; ASCII range
    ((#x8e)        jis0201_kana 0.8)   ; JISX 0201 kana
    ((#x8f)        jis0213_2    0.95)  ; JISX 0213 plane 2
-   (((#xa1 #xfe)) jis0213_1    1.0)   ; JISX 0213 plane 1
-   )
+   (((#xa1 #xfe)) jis0213_1    1.0))   ; JISX 0213 plane 1
   ;; jis x 0201 kana
   (jis0201_kana
-   (((#xa1 #xdf)) init         1.0)
-   )
+   (((#xa1 #xdf)) init         1.0))
   ;; jis x 0208 and jis x 0213 plane 1
   (jis0213_1
    (((#xa1 #xfe)) init         1.0))
   ;; jis x 0213 plane 2
   (jis0213_2
-   (((#xa1 #xfe)) init         1.0))
-  )
+   (((#xa1 #xfe)) init         1.0)))
 
 ;;;
 ;;; Shift_JIS
@@ -201,8 +198,7 @@
    (((#xf0 #xfc)) jis0213      0.95)    ;jisx0213 plane 2
    (((#xfd #xff)) init         0.8))    ;vendor extension
   (jis0213
-   (((#x40 #x7e) (#x80 #xfc)) init 1.0))
-  )
+   (((#x40 #x7e) (#x80 #xfc)) init 1.0)))
 
 ;;;
 ;;; UTF-8
@@ -225,42 +221,57 @@
   (4byte_more
    (((#x80 #xbf)) 3byte_more   1.0))
   (5byte_more
-   (((#x80 #xbf)) 4byte_more   1.0))
-  )
+   (((#x80 #xbf)) 4byte_more   1.0)))
 
 ;;;
 ;;; UCS-2LE
 ;;;
-; (define-dfa ucs2le
-;   (init
-;    ((#xff) le 1.0)
-;    (((#x00 #x7f)) ascii 1.0)
-;    (((#x00 #xff)) multi 1.0))
-;   (le
-;    ((#xfe) init 1.0))
-;   (ascii
-;    ((#x00) init 1.0))
-;   (multi
-;    (((#x00 #xff)) init 1.0)))
+(define-dfa ucs2le
+  (init
+   ((#xfe) bom-be 1.0)
+   ((#xff) bom-le 1.0)
+   (((#x00 #xfd)) byte2 1.0))
+  (bom-le
+   (((#x00 #xff)) init 1.0))
+  (bom-be
+   (((#x00 #xfe)) init 1.0))  ;; if be (0xfeff), die.
+  (byte2
+   (((#x00 #xff)) init 1.0)))
 
 ;;;
 ;;; UCS-2BE
 ;;;
-; (define-dfa ucs2be
-;   (init
-;    ((#xfe) be 1.0)
-;    ((#x00) ascii 1.0)
-;    (((#x00 #xff)) multi 1.0))
-;   (be
-;    ((#xff) init 1.0))
-;   (ascii
-;    (((#x00 #x7f)) init 1.0))
-;   (multi
-;    (((#x00 #xff)) init 1.0)))
-
+(define-dfa ucs2be
+  (init
+   ((#xfe) bom-be 1.0)
+   ((#xff) bom-le 1.0)
+   (((#x00 #xfd)) byte2 1.0))
+  (bom-le
+   (((#x00 #xfd)) init 1.0)
+   ((#xff) init 1.0)) ;; if le (0xfffe), die.
+  (bom-be
+   (((#x00 #xff)) init 1.0))
+  (byte2
+   (((#x00 #xff)) init 1.0)))
 
 ;;;
-;;; JIS (ISO2022JP)
+;;; UTF-16
+;;;
+(define-dfa utf16
+  (init
+   ((#xfe) bom-be 1.0)
+   ((#xff) bom-le 1.0))
+  (init1
+   (((#x00 #xff)) byte2 1.0))
+  (bom-be
+   ((#xff) init1 1.0))
+  (bom-le
+   ((#xfe) init1 1.0))
+  (byte2
+   (((#x00 #xff)) init1 1.0)))
+
+;;;
+;;; ISO2022JP (JIS)
 ;;;
 
 ;; NB: for now, we just check the sequence of <ESC> $ or <ESC> '('.
@@ -269,22 +280,18 @@
    ((#x1b)        esc          1.0)
    (((#x00 #x1a)  (#x1c #x1f)) init 1.0) ;C0
    (((#x20 #x7f)) init         1.0)      ;ASCII
-   (((#xa1 #xdf)) init         0.7)      ;JIS8bit kana
-   )
+   (((#xa1 #xdf)) init         0.7))     ;JIS8bit kana
   (esc
    ((#x0d #x0a)   init         0.9)      ;cancel
    ((#\( )        esc-paren    1.0)
    ((#\$ )        esc-$        1.0)
-   ((#\& )        esc-&        1.0)
-   )
+   ((#\& )        esc-&        1.0))
   (esc-paren
    ((#\B #\J #\H) init         1.0)
-   ((#\I)         jis0201kana  0.8)
-   )
+   ((#\I)         jis0201kana  0.8))
   (esc-$
    ((#\@ #\B)     kanji        1.0)
-   ((#\( )        esc-$-paren  1.0)
-   )
+   ((#\( )        esc-$-paren  1.0))
   (esc-$-paren
    ((#\D #\O #\P) kanji        1.0))
   (esc-&
@@ -296,8 +303,7 @@
    ((#x1b)        esc          1.0)
    (((#x21 #x7e)) kanji-2      1.0))
   (kanji-2
-   (((#x21 #x7e)) kanji        1.0))
-  )
+   (((#x21 #x7e)) kanji        1.0)) )
 
 ;;;
 ;;; Big5
@@ -306,12 +312,10 @@
 (define-dfa big5
   ;; first byte
   (init
-   (((#x00 #x7f)) init         1.0)     ;ascii
-   (((#xa1 #xfe)) 2byte        1.0)     ;big5-2byte
-   )
+   (((#x00 #x7f)) init         1.0)      ;ascii
+   (((#xa1 #xfe)) 2byte        1.0))     ;big5-2byte
   (2byte
-   (((#x40 #x7e) (#xa1 #xfe)) init 1.0))
-  )
+   (((#x40 #x7e) (#xa1 #xfe)) init 1.0)))
 
 ;;;
 ;;; GB2312 (EUC-CN?)
@@ -320,12 +324,10 @@
 (define-dfa gb2312
   ;; first byte
   (init
-   (((#x00 #x7f)) init         1.0)     ;ascii
-   (((#xa1 #xfe)) 2byte        1.0)     ;gb2312 2byte
-   )
+   (((#x00 #x7f)) init         1.0)      ;ascii
+   (((#xa1 #xfe)) 2byte        1.0))     ;gb2312 2byte
   (2byte
-   (((#xa1 #xfe)) init 1.0))
-  )
+   (((#xa1 #xfe)) init 1.0)))
 
 ;;;
 ;;; GB18030
@@ -336,8 +338,7 @@
   (init
    (((#x00 #x80)) init         1.0)     ;ascii
    (((#x81 #xfe)) 2byte        1.0)     ;gb18030 2byte
-   (((#x81 #xfe)) 4byte2       1.0)     ;gb18030 2byte
-   )
+   (((#x81 #xfe)) 4byte2       1.0))     ;gb18030 2byte
   (2byte
    (((#x40 #x7e) (#x80 #xfe)) init 1.0))
   (4byte2
@@ -345,8 +346,7 @@
   (4byte3
    (((#x81 #xfe)) 4byte4 1.0))
   (4byte4
-   (((#x30 #x39)) init   1.0))
-  )
+   (((#x30 #x39)) init   1.0)) )
 
 ;;;
 ;;; EUC-KR
@@ -356,12 +356,10 @@
   ;; first byte
   (init
    (((#x00 #x7f)) init      1.0)   ; ASCII range
-   (((#xa1 #xfe)) ks1001    1.0)   ; KSX 1001
-   )
+   (((#xa1 #xfe)) ks1001    1.0))   ; KSX 1001
   ;; ks x 1001
   (ks1001
-   (((#xa1 #xfe)) init      1.0))
-  )
+   (((#xa1 #xfe)) init      1.0)))
 
 ;;;
 ;;; Johab
@@ -372,12 +370,104 @@
   (init
    (((#x00 #x7f)) init         1.0)   ; ASCII range
    (((#x84 #xd3)) jamo51       1.0)   ; jamo51
-   (((#xd8 #xde) (#xe0 #xf9)) jamo42  0.95)   ; jamo42
-   )
+   (((#xd8 #xde) (#xe0 #xf9)) jamo42  0.95))   ; jamo42
   ;; second byte
   (jamo51
    (((#x41 #x7e) (#x81 #xfe)) init         1.0))
   (jamo42
-   (((#x31 #x7e) (#x91 #xfe)) init         1.0))
-  )
+   (((#x31 #x7e) (#x91 #xfe)) init         1.0)))
+
+
+
+
+
+
+;;;
+;;; arabic
+;;;
+
+(define-dfa iso8859_6
+  (init
+   (((#x00 #x7f)) init         1.0)     ;ascii
+   ((#xa0)        init         1.0)
+   ((#xa4)        init         1.0)
+   ((#xac)        init         1.0)
+   ((#xad)        init         1.0)     ;SHY xxx
+   ((#xbb)        init         1.0)
+   ((#xbf)        init         1.0)
+   (((#xc1 #xda)) init         1.0)
+   (((#xe0 #xf2)) init         1.0)))
+
+(define-dfa cp1256
+  (init
+   (((#x00 #x7f)) init         1.0)     ;ascii
+   (((#x80 #xff)) init         1.0)))   ;high bit
+
+
+;;;
+;;; greek
+;;;
+
+(define-dfa iso8859_7
+  (init
+   (((#x00 #x7f)) init         1.0)     ;ascii
+   (((#xa0 #xad)) init         1.0)
+   (((#xaf #xd1)) init         1.0)
+   (((#xd3 #xfe)) init         1.0)))
 
+(define-dfa cp1253
+  (init
+   (((#x00 #x7f)) init         1.0)     ;ascii
+   ((#x80)        init         1.0)
+   (((#x82 #x87)) init         1.0)
+   ((#x89)        init         1.0)
+   ((#x8b)        init         1.0)
+   (((#x91 #x97)) init         1.0)
+   ((#x99)        init         1.0)
+   ((#x9b)        init         1.0)
+   (((#xa0 #xa9)) init         1.0)
+   (((#xab #xd1)) init         1.0)
+   (((#xd3 #xfe)) init         1.0)))
+
+;;;
+;;; hebrew
+;;;
+
+(define-dfa iso8859_8
+  (init
+   (((#x00 #x7f)) init         1.0)     ;ascii
+   ((#xa0)        init         1.0)
+   (((#xa2 #xbe)) init         1.0)
+   (((#xdf #xfa)) init         1.0)
+   (((#xfd #xfe)) init         1.0)))
+
+(define-dfa cp1255
+  (init
+   (((#x00 #x7f)) init         1.0)     ;ascii
+   ((#x80)        init         1.0)
+   (((#x82 #x89)) init         1.0)
+   ((#x8b)        init         1.0)
+   (((#x91 #x99)) init         1.0)
+   ((#x9b)        init         1.0)
+   (((#xa0 #xc9)) init         1.0)
+   (((#xcb #xd8)) init         1.0)
+   (((#xe0 #xfa)) init         1.0)
+   (((#xfd #xfe)) init         1.0)))
+
+;;;
+;;; turkish
+;;;
+
+(define-dfa iso8859_9
+  (init
+   (((#x00 #x7f)) init         1.0)     ;ascii
+   (((#xa0 #xff)) init         1.0)))
+
+(define-dfa cp1254
+  (init
+   (((#x00 #x7f)) init         1.0)     ;ascii
+   ((#x80)        init         1.0)
+   (((#x82 #x8c)) init         1.0)
+   (((#x91 #x9c)) init         1.0)
+   (((#x9f #xff)) init         1.0)))
+
--- a/guess_tab.c	Sat Dec 01 03:27:31 2007 +0900
+++ b/guess_tab.c	Wed Jun 11 00:11:30 2008 +0900
@@ -259,6 +259,277 @@
  {  4, 1.0   }, /* 5byte_more -> 4byte_more */
 };
 
+static signed char guess_ucs2le_st[][256] = {
+ { /* state init */
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  0,  1,
+ },
+ { /* state bom-le */
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+ },
+ { /* state bom-be */
+  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4, -1,
+ },
+ { /* state byte2 */
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+ },
+};
+
+static guess_arc guess_ucs2le_ar[] = {
+ {  2, 1.0   }, /* init -> bom-be */
+ {  1, 1.0   }, /* init -> bom-le */
+ {  3, 1.0   }, /* init -> byte2 */
+ {  0, 1.0   }, /* bom-le -> init */
+ {  0, 1.0   }, /* bom-be -> init */
+ {  0, 1.0   }, /* byte2 -> init */
+};
+
+static signed char guess_ucs2be_st[][256] = {
+ { /* state init */
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  0,  1,
+ },
+ { /* state bom-le */
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3, -1,  4,
+ },
+ { /* state bom-be */
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+ },
+ { /* state byte2 */
+  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
+  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
+  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
+  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
+  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
+  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
+  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
+  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
+  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
+  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
+  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
+  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
+  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
+  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
+  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
+  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
+ },
+};
+
+static guess_arc guess_ucs2be_ar[] = {
+ {  2, 1.0   }, /* init -> bom-be */
+ {  1, 1.0   }, /* init -> bom-le */
+ {  3, 1.0   }, /* init -> byte2 */
+ {  0, 1.0   }, /* bom-le -> init */
+ {  0, 1.0   }, /* bom-le -> init */
+ {  0, 1.0   }, /* bom-be -> init */
+ {  0, 1.0   }, /* byte2 -> init */
+};
+
+static signed char guess_utf16_st[][256] = {
+ { /* state init */
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  0,  1,
+ },
+ { /* state init1 */
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+ },
+ { /* state bom-be */
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  3,
+ },
+ { /* state bom-le */
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  4, -1,
+ },
+ { /* state byte2 */
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+ },
+};
+
+static guess_arc guess_utf16_ar[] = {
+ {  2, 1.0   }, /* init -> bom-be */
+ {  3, 1.0   }, /* init -> bom-le */
+ {  4, 1.0   }, /* init1 -> byte2 */
+ {  1, 1.0   }, /* bom-be -> init1 */
+ {  1, 1.0   }, /* bom-le -> init1 */
+ {  1, 1.0   }, /* byte2 -> init1 */
+};
+
 static signed char guess_big5_st[][256] = {
  { /* state init */
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
@@ -562,3 +833,243 @@
  {  0, 1.0   }, /* jamo42 -> init */
 };
 
+static signed char guess_iso8859_6_st[][256] = {
+ { /* state init */
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+  1, -1, -1, -1,  2, -1, -1, -1, -1, -1, -1, -1,  3,  4, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  5, -1, -1, -1,  6,
+ -1,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
+  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7, -1, -1, -1, -1, -1,
+  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+  8,  8,  8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ },
+};
+
+static guess_arc guess_iso8859_6_ar[] = {
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+};
+
+static signed char guess_cp1256_st[][256] = {
+ { /* state init */
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+ },
+};
+
+static guess_arc guess_cp1256_ar[] = {
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+};
+
+static signed char guess_iso8859_7_st[][256] = {
+ { /* state init */
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2, -1,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3, -1,
+ },
+};
+
+static guess_arc guess_iso8859_7_ar[] = {
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+};
+
+static signed char guess_cp1253_st[][256] = {
+ { /* state init */
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  1, -1,  2,  2,  2,  2,  2,  2, -1,  3, -1,  4, -1, -1, -1, -1,
+ -1,  5,  5,  5,  5,  5,  5,  5, -1,  6, -1,  7, -1, -1, -1, -1,
+  8,  8,  8,  8,  8,  8,  8,  8,  8,  8, -1,  9,  9,  9,  9,  9,
+  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+  9,  9, -1, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, -1,
+ },
+};
+
+static guess_arc guess_cp1253_ar[] = {
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+};
+
+static signed char guess_iso8859_8_st[][256] = {
+ { /* state init */
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+  1, -1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3, -1, -1,  4,  4, -1,
+ },
+};
+
+static guess_arc guess_iso8859_8_ar[] = {
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+};
+
+static signed char guess_cp1255_st[][256] = {
+ { /* state init */
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  1, -1,  2,  2,  2,  2,  2,  2,  2,  2, -1,  3, -1, -1, -1, -1,
+ -1,  4,  4,  4,  4,  4,  4,  4,  4,  4, -1,  5, -1, -1, -1, -1,
+  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
+  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
+  6,  6,  6,  6,  6,  6,  6,  6,  6,  6, -1,  7,  7,  7,  7,  7,
+  7,  7,  7,  7,  7,  7,  7,  7,  7, -1, -1, -1, -1, -1, -1, -1,
+  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8, -1, -1,  9,  9, -1,
+ },
+};
+
+static guess_arc guess_cp1255_ar[] = {
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+};
+
+static signed char guess_iso8859_9_st[][256] = {
+ { /* state init */
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+ },
+};
+
+static guess_arc guess_iso8859_9_ar[] = {
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+};
+
+static signed char guess_cp1254_st[][256] = {
+ { /* state init */
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  1, -1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, -1, -1, -1,
+ -1,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3, -1, -1,  4,
+  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+ },
+};
+
+static guess_arc guess_cp1254_ar[] = {
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+ {  0, 1.0   }, /* init -> init */
+};
+
--- a/russian_impl.c	Sat Dec 01 03:27:31 2007 +0900
+++ b/russian_impl.c	Wed Jun 11 00:11:30 2008 +0900
@@ -32,7 +32,7 @@
 static int start_symbol(char ch) {
     if ((ch=='\t')||ch=='\r'||ch=='\n'||(ch==' ')||(ch=='(')||(ch=='"')||(ch=='\'')) return 1;
     return 0;
-}    
+}
 
 typedef const struct lng_stat2 *lng_stat2_ptr;
 
@@ -48,7 +48,7 @@
     altptr=0;
     do{
       d>>=1;
-    
+
       if(!ws){
        if (wi>indexes2) wi-=d;
        else {
@@ -124,7 +124,7 @@
   double winstep,koistep,altstep,winestep,koiestep,altestep,winsstep,koisstep,altsstep;
   double winstat=0,koistat=0,altstat=0,winestat=0,koiestat=0,altestat=0,winsstat=0,koisstat=0,altsstat=0;
   long j;
-  
+
 #ifdef _AUTO_DEBUG
   fprintf(stderr,"Word: %s\n",txt);
 #endif
@@ -185,7 +185,7 @@
 	fprintf(stderr,", Win %lf, Koi %lf, Alt %lf\n",winstep,koistep,altstep);
 #endif
     }
-    
+
     winstat+=winstep;
     koistat+=koistep;
     altstat+=altstep;
--- a/turkish_impl.c	Sat Dec 01 03:27:31 2007 +0900
+++ b/turkish_impl.c	Wed Jun 11 00:11:30 2008 +0900
@@ -6,9 +6,9 @@
 
     for (i = 0; i < size; i++)
     {
-        if (ptr[i] == 0x80 || 
+        if (ptr[i] == 0x80 ||
             (ptr[i] >= 0x82 && ptr[i] <= 0x8C) ||
-            (ptr[i] >= 0x91 && ptr[i] <= 0x9C) || 
+            (ptr[i] >= 0x91 && ptr[i] <= 0x9C) ||
             ptr[ i ] == 0x9F)
             return "CP1254";
     }