Mercurial > audlegacy
diff src/libguess/guess.c @ 2559:b474ecb5bde4 trunk
[svn] revise str_to_utf8():
- new utf8 validator using libguess DFA has been implemented. str_to_utf8() tries utf8 validation first.
- default conversion from ISO-8859-1 is enabled regardless of chardet.
- libguess and librcd is always compiled in.
- some libguess cleanups.
author | yaz |
---|---|
date | Wed, 21 Feb 2007 04:25:12 -0800 |
parents | 3149d4b1a9a9 |
children | 37c7a3dbb212 |
line wrap: on
line diff
--- a/src/libguess/guess.c Wed Feb 21 03:52:52 2007 -0800 +++ b/src/libguess/guess.c Wed Feb 21 04:25:12 2007 -0800 @@ -98,6 +98,23 @@ /* include DFA table generated by guess.scm */ #include "guess_tab.c" + +int dfa_validate_utf8(const char *buf, int buflen) +{ + int i; + guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar); + + for (i = 0; i < buflen; i++) { + int c = (unsigned char) buf[i]; + + if (DFA_ALIVE(utf8)) + DFA_NEXT(utf8, c); + else + return 0; + } + return 1; +} + const char *guess_jp(const char *buf, int buflen) { int i;