annotate arabic_impl.c @ 2:754a4550c64e

- added arabic, greek, hebrew and turkish DFAs - new UCS-2LE/BE DFAs - now arabic_impl.c uses arabic DFAs - dfa common macros have been moved to dfa.h - minor cleanups
author Yoshiki Yazawa <yaz@cc.rim.or.jp>
date Wed, 11 Jun 2008 00:11:30 +0900
parents d9b6ff839eab
children 70e2c306231e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d9b6ff839eab initial import
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
1 #include "libguess.h"
2
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
2 #include "dfa.h"
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
3 #include "guess_tab.c"
0
d9b6ff839eab initial import
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
4
2
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
5 #define ORDER_AR &utf8, &iso8859_6, &cp1256
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
6
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
7 const char *guess_ar(const char *buf, int buflen)
0
d9b6ff839eab initial import
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
8 {
d9b6ff839eab initial import
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
9 int i;
2
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
10 guess_dfa cp1256 = DFA_INIT(guess_cp1256_st, guess_cp1256_ar);
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
11 guess_dfa iso8859_6 = DFA_INIT(guess_iso8859_6_st, guess_iso8859_6_ar);
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
12 guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar);
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
13 guess_dfa *top = NULL;
0
d9b6ff839eab initial import
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
14
2
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
15 guess_dfa *order[] = { ORDER_AR, NULL };
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
16
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
17 for (i = 0; i < buflen; i++) {
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
18 int c = (unsigned char) buf[i];
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
19
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
20 if (DFA_ALIVE(cp1256)) {
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
21 if (!DFA_ALIVE(iso8859_6) && !DFA_ALIVE(utf8))
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
22 return "CP1256";
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
23 DFA_NEXT(cp1256, c);
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
24 }
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
25 if (DFA_ALIVE(iso8859_6)) {
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
26 if (!DFA_ALIVE(cp1256) && !DFA_ALIVE(utf8))
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
27 return "ISO-8859-6";
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
28 DFA_NEXT(iso8859_6, c);
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
29 }
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
30 if (DFA_ALIVE(utf8)) {
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
31 if (!DFA_ALIVE(cp1256) && !DFA_ALIVE(iso8859_6))
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
32 return "UTF-8";
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
33 DFA_NEXT(utf8, c);
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
34 }
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
35
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
36 if (!DFA_ALIVE(cp1256) && !DFA_ALIVE(iso8859_6) && !DFA_ALIVE(utf8)) {
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
37 /* we ran out the possibilities */
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
38 return NULL;
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
39 }
0
d9b6ff839eab initial import
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
40 }
d9b6ff839eab initial import
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
41
2
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
42 /* Now, we have ambigous code. Pick the highest score. If more than
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
43 one candidate tie, pick the default encoding. */
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
44 for (i = 0; order[i] != NULL; i++) {
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
45 if (order[i]->state >= 0) { //DFA_ALIVE()
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
46 if (top == NULL || order[i]->score > top->score)
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
47 top = order[i];
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
48 }
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
49 }
0
d9b6ff839eab initial import
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
50
2
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
51 if (top == &cp1256)
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
52 return "CP1256";
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
53 if (top == &utf8)
0
d9b6ff839eab initial import
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
54 return "UTF-8";
2
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
55 if (top == &iso8859_6)
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
56 return "ISO-8859-6";
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 0
diff changeset
57 return NULL;
0
d9b6ff839eab initial import
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
58 }