Mercurial > audlegacy
comparison src/libguess/guess.c @ 2559:b474ecb5bde4 trunk
[svn] revise str_to_utf8():
- new utf8 validator using libguess DFA has been implemented. str_to_utf8() tries utf8 validation first.
- default conversion from ISO-8859-1 is enabled regardless of chardet.
- libguess and librcd is always compiled in.
- some libguess cleanups.
author | yaz |
---|---|
date | Wed, 21 Feb 2007 04:25:12 -0800 |
parents | 3149d4b1a9a9 |
children | 37c7a3dbb212 |
comparison
equal
deleted
inserted
replaced
2558:d4ecf0a91222 | 2559:b474ecb5bde4 |
---|---|
96 #define DFA_ALIVE(dfa) (dfa.state >= 0) | 96 #define DFA_ALIVE(dfa) (dfa.state >= 0) |
97 | 97 |
98 /* include DFA table generated by guess.scm */ | 98 /* include DFA table generated by guess.scm */ |
99 #include "guess_tab.c" | 99 #include "guess_tab.c" |
100 | 100 |
101 | |
102 int dfa_validate_utf8(const char *buf, int buflen) | |
103 { | |
104 int i; | |
105 guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar); | |
106 | |
107 for (i = 0; i < buflen; i++) { | |
108 int c = (unsigned char) buf[i]; | |
109 | |
110 if (DFA_ALIVE(utf8)) | |
111 DFA_NEXT(utf8, c); | |
112 else | |
113 return 0; | |
114 } | |
115 return 1; | |
116 } | |
117 | |
101 const char *guess_jp(const char *buf, int buflen) | 118 const char *guess_jp(const char *buf, int buflen) |
102 { | 119 { |
103 int i; | 120 int i; |
104 guess_dfa eucj = DFA_INIT(guess_eucj_st, guess_eucj_ar); | 121 guess_dfa eucj = DFA_INIT(guess_eucj_st, guess_eucj_ar); |
105 guess_dfa sjis = DFA_INIT(guess_sjis_st, guess_sjis_ar); | 122 guess_dfa sjis = DFA_INIT(guess_sjis_st, guess_sjis_ar); |