Mercurial > libguess
diff hebrew_impl.c @ 3:70e2c306231e
- implemented dfa utility functions.
- added dfa.c.
- rewrote guess functions for ar, gr, hw and tr scripts with dfa utilities.
- guess functions for cjk scripts too.
author | Yoshiki Yazawa <yaz@cc.rim.or.jp> |
---|---|
date | Thu, 12 Jun 2008 20:20:43 +0900 |
parents | d9b6ff839eab |
children |
line wrap: on
line diff
--- a/hebrew_impl.c Wed Jun 11 00:11:30 2008 +0900 +++ b/hebrew_impl.c Thu Jun 12 20:20:43 2008 +0900 @@ -1,23 +1,56 @@ -const char *_guess_hw(const unsigned char *ptr, int size) +#include "libguess.h" +#include "dfa.h" +#include "guess_tab.c" + +/* precedence order */ +#define ORDER &utf8, &iso8859_8, &cp1255 + +/* encodings */ +static guess_dfa cp1255 = DFA_INIT(guess_cp1255_st, guess_cp1255_ar, "CP1255"); +static guess_dfa iso8859_8 = DFA_INIT(guess_iso8859_8_st, guess_iso8859_8_ar, "ISO-8859-8-I"); +static guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar, "UTF-8"); + + +/* common */ +const char *guess_hw(const char *buf, int buflen) { int i; + const char *rv = NULL; + guess_dfa *top = NULL; + guess_dfa *order[] = { ORDER, NULL }; - for (i = 0; i < size; i++) - { - if (ptr[i] == 0x80 || (ptr[i] >= 0x82 && ptr[i] <= 0x89) || ptr[i] == 0x8B || - (ptr[i] >= 0x91 && ptr[i] <= 0x99) || ptr[i] == 0x9B || ptr[i] == 0xA1 || - (ptr[i] >= 0xBF && ptr[i] <= 0xC9) || - (ptr[i] >= 0xCB && ptr[i] <= 0xD8)) - return "CP1255"; + for (i = 0; i < buflen; i++) { + int c = (unsigned char) buf[i]; - if (ptr[i] == 0xDF) - return "ISO-8859-8-I"; + /* special treatment of BOM */ + if (i == 0 && c == 0xff) { + if (i < buflen - 1) { + c = (unsigned char) buf[i + 1]; + if (c == 0xfe) + return UCS_2LE; + } + } + if (i == 0 && c == 0xfe) { + if (i < buflen - 1) { + c = (unsigned char) buf[i + 1]; + if (c == 0xff) + return UCS_2BE; + } + } + + rv = dfa_process(order, c); + if(rv) + return rv; + + if (dfa_none(order)) { + /* we ran out the possibilities */ + return NULL; + } } - return "ISO-8859-8-I"; + top = dfa_top(order); + if (top) + return top->name; + else + return NULL; } - -const char *guess_hw(const char *ptr, int size) -{ - return _guess_hw((const unsigned char *) ptr, size); -}