comparison hebrew_impl.c @ 3:70e2c306231e

- implemented dfa utility functions. - added dfa.c. - rewrote guess functions for ar, gr, hw and tr scripts with dfa utilities. - guess functions for cjk scripts too.
author Yoshiki Yazawa <yaz@cc.rim.or.jp>
date Thu, 12 Jun 2008 20:20:43 +0900
parents d9b6ff839eab
children
comparison
equal deleted inserted replaced
2:754a4550c64e 3:70e2c306231e
1 const char *_guess_hw(const unsigned char *ptr, int size) 1 #include "libguess.h"
2 #include "dfa.h"
3 #include "guess_tab.c"
4
5 /* precedence order */
6 #define ORDER &utf8, &iso8859_8, &cp1255
7
8 /* encodings */
9 static guess_dfa cp1255 = DFA_INIT(guess_cp1255_st, guess_cp1255_ar, "CP1255");
10 static guess_dfa iso8859_8 = DFA_INIT(guess_iso8859_8_st, guess_iso8859_8_ar, "ISO-8859-8-I");
11 static guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar, "UTF-8");
12
13
14 /* common */
15 const char *guess_hw(const char *buf, int buflen)
2 { 16 {
3 int i; 17 int i;
18 const char *rv = NULL;
19 guess_dfa *top = NULL;
20 guess_dfa *order[] = { ORDER, NULL };
4 21
5 for (i = 0; i < size; i++) 22 for (i = 0; i < buflen; i++) {
6 { 23 int c = (unsigned char) buf[i];
7 if (ptr[i] == 0x80 || (ptr[i] >= 0x82 && ptr[i] <= 0x89) || ptr[i] == 0x8B ||
8 (ptr[i] >= 0x91 && ptr[i] <= 0x99) || ptr[i] == 0x9B || ptr[i] == 0xA1 ||
9 (ptr[i] >= 0xBF && ptr[i] <= 0xC9) ||
10 (ptr[i] >= 0xCB && ptr[i] <= 0xD8))
11 return "CP1255";
12 24
13 if (ptr[i] == 0xDF) 25 /* special treatment of BOM */
14 return "ISO-8859-8-I"; 26 if (i == 0 && c == 0xff) {
27 if (i < buflen - 1) {
28 c = (unsigned char) buf[i + 1];
29 if (c == 0xfe)
30 return UCS_2LE;
31 }
32 }
33 if (i == 0 && c == 0xfe) {
34 if (i < buflen - 1) {
35 c = (unsigned char) buf[i + 1];
36 if (c == 0xff)
37 return UCS_2BE;
38 }
39 }
40
41 rv = dfa_process(order, c);
42 if(rv)
43 return rv;
44
45 if (dfa_none(order)) {
46 /* we ran out the possibilities */
47 return NULL;
48 }
15 } 49 }
16 50
17 return "ISO-8859-8-I"; 51 top = dfa_top(order);
52 if (top)
53 return top->name;
54 else
55 return NULL;
18 } 56 }
19
20 const char *guess_hw(const char *ptr, int size)
21 {
22 return _guess_hw((const unsigned char *) ptr, size);
23 }