Mercurial > libguess
comparison hebrew_impl.c @ 3:70e2c306231e
- implemented dfa utility functions.
- added dfa.c.
- rewrote guess functions for ar, gr, hw and tr scripts with dfa utilities.
- guess functions for cjk scripts too.
author | Yoshiki Yazawa <yaz@cc.rim.or.jp> |
---|---|
date | Thu, 12 Jun 2008 20:20:43 +0900 |
parents | d9b6ff839eab |
children |
comparison
equal
deleted
inserted
replaced
2:754a4550c64e | 3:70e2c306231e |
---|---|
1 const char *_guess_hw(const unsigned char *ptr, int size) | 1 #include "libguess.h" |
2 #include "dfa.h" | |
3 #include "guess_tab.c" | |
4 | |
5 /* precedence order */ | |
6 #define ORDER &utf8, &iso8859_8, &cp1255 | |
7 | |
8 /* encodings */ | |
9 static guess_dfa cp1255 = DFA_INIT(guess_cp1255_st, guess_cp1255_ar, "CP1255"); | |
10 static guess_dfa iso8859_8 = DFA_INIT(guess_iso8859_8_st, guess_iso8859_8_ar, "ISO-8859-8-I"); | |
11 static guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar, "UTF-8"); | |
12 | |
13 | |
14 /* common */ | |
15 const char *guess_hw(const char *buf, int buflen) | |
2 { | 16 { |
3 int i; | 17 int i; |
18 const char *rv = NULL; | |
19 guess_dfa *top = NULL; | |
20 guess_dfa *order[] = { ORDER, NULL }; | |
4 | 21 |
5 for (i = 0; i < size; i++) | 22 for (i = 0; i < buflen; i++) { |
6 { | 23 int c = (unsigned char) buf[i]; |
7 if (ptr[i] == 0x80 || (ptr[i] >= 0x82 && ptr[i] <= 0x89) || ptr[i] == 0x8B || | |
8 (ptr[i] >= 0x91 && ptr[i] <= 0x99) || ptr[i] == 0x9B || ptr[i] == 0xA1 || | |
9 (ptr[i] >= 0xBF && ptr[i] <= 0xC9) || | |
10 (ptr[i] >= 0xCB && ptr[i] <= 0xD8)) | |
11 return "CP1255"; | |
12 | 24 |
13 if (ptr[i] == 0xDF) | 25 /* special treatment of BOM */ |
14 return "ISO-8859-8-I"; | 26 if (i == 0 && c == 0xff) { |
27 if (i < buflen - 1) { | |
28 c = (unsigned char) buf[i + 1]; | |
29 if (c == 0xfe) | |
30 return UCS_2LE; | |
31 } | |
32 } | |
33 if (i == 0 && c == 0xfe) { | |
34 if (i < buflen - 1) { | |
35 c = (unsigned char) buf[i + 1]; | |
36 if (c == 0xff) | |
37 return UCS_2BE; | |
38 } | |
39 } | |
40 | |
41 rv = dfa_process(order, c); | |
42 if(rv) | |
43 return rv; | |
44 | |
45 if (dfa_none(order)) { | |
46 /* we ran out the possibilities */ | |
47 return NULL; | |
48 } | |
15 } | 49 } |
16 | 50 |
17 return "ISO-8859-8-I"; | 51 top = dfa_top(order); |
52 if (top) | |
53 return top->name; | |
54 else | |
55 return NULL; | |
18 } | 56 } |
19 | |
20 const char *guess_hw(const char *ptr, int size) | |
21 { | |
22 return _guess_hw((const unsigned char *) ptr, size); | |
23 } |