Mercurial > libguess
annotate arabic_impl.c @ 6:c61a7765c8f5 default tip
added COPYING to make the licence and copyrights clear.
author | Yoshiki Yazawa <yaz@honeyplanet.jp> |
---|---|
date | Thu, 08 Mar 2012 11:08:07 +0900 |
parents | 70e2c306231e |
children |
rev | line source |
---|---|
0 | 1 #include "libguess.h" |
2
754a4550c64e
- added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
2 #include "dfa.h" |
754a4550c64e
- added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
3 #include "guess_tab.c" |
0 | 4 |
3
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
5 /* precedence order */ |
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
6 #define ORDER &utf8, &iso8859_6, &cp1256 |
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
7 |
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
8 /* encodings */ |
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
9 static guess_dfa cp1256 = DFA_INIT(guess_cp1256_st, guess_cp1256_ar, "CP1256"); |
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
10 static guess_dfa iso8859_6 = DFA_INIT(guess_iso8859_6_st, guess_iso8859_6_ar, "ISO-8859-6"); |
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
11 static guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar, "UTF-8"); |
2
754a4550c64e
- added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
12 |
754a4550c64e
- added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
13 const char *guess_ar(const char *buf, int buflen) |
0 | 14 { |
15 int i; | |
3
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
16 const char *rv = NULL; |
2
754a4550c64e
- added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
17 guess_dfa *top = NULL; |
3
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
18 guess_dfa *order[] = { ORDER, NULL }; |
2
754a4550c64e
- added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
19 |
754a4550c64e
- added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
20 for (i = 0; i < buflen; i++) { |
754a4550c64e
- added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
21 int c = (unsigned char) buf[i]; |
754a4550c64e
- added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
22 |
3
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
23 /* special treatment of BOM */ |
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
24 if (i == 0 && c == 0xff) { |
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
25 if (i < buflen - 1) { |
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
26 c = (unsigned char) buf[i + 1]; |
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
27 if (c == 0xfe) |
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
28 return UCS_2LE; |
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
29 } |
2
754a4550c64e
- added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
30 } |
3
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
31 if (i == 0 && c == 0xfe) { |
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
32 if (i < buflen - 1) { |
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
33 c = (unsigned char) buf[i + 1]; |
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
34 if (c == 0xff) |
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
35 return UCS_2BE; |
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
36 } |
2
754a4550c64e
- added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
37 } |
754a4550c64e
- added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
38 |
3
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
39 rv = dfa_process(order, c); |
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
40 if(rv) |
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
41 return rv; |
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
42 |
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
43 if (dfa_none(order)) { |
2
754a4550c64e
- added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
44 /* we ran out the possibilities */ |
754a4550c64e
- added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
45 return NULL; |
754a4550c64e
- added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
46 } |
0 | 47 } |
48 | |
3
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
49 top = dfa_top(order); |
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
50 if (top) |
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
51 return top->name; |
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
52 else |
70e2c306231e
- implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
2
diff
changeset
|
53 return NULL; |
0 | 54 } |