annotate dfa.h @ 3:70e2c306231e

- implemented dfa utility functions. - added dfa.c. - rewrote guess functions for ar, gr, hw and tr scripts with dfa utilities. - guess functions for cjk scripts too.
author Yoshiki Yazawa <yaz@cc.rim.or.jp>
date Thu, 12 Jun 2008 20:20:43 +0900
parents 754a4550c64e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
1 #ifndef __DFA_H__
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
2 #define __DFA_H__
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
3
3
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
4 typedef int boolean;
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
5 #define TRUE 1
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
6 #define FALSE 0
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
7
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
8 /* workaround for that glib's g_convert can't convert
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
9 properly from UCS-2BE/LE trailing after BOM. */
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
10 #define WITH_G_CONVERT 1
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
11 /* #undef WITH_G_CONVERT */
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
12
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
13 #ifdef WITH_G_CONVERT
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
14 #define UCS_2BE "UTF-16"
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
15 #define UCS_2LE "UTF-16"
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
16 #else
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
17 #define UCS_2BE "UCS_2BE"
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
18 #define UCS_2LE "UCS_2LE"
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
19 #endif
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
20
2
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
21 /* data types */
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
22 typedef struct guess_arc_rec
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
23 {
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
24 unsigned int next; /* next state */
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
25 double score; /* score */
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
26 } guess_arc;
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
27
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
28 typedef struct guess_dfa_rec
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
29 {
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
30 signed char (*states)[256];
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
31 guess_arc *arcs;
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
32 int state;
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
33 double score;
3
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
34 char *name;
2
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
35 } guess_dfa;
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
36
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
37 /* macros */
3
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
38 #define DFA_INIT(st, ar, name) \
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
39 { st, ar, 0, 1.0 ,name}
2
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
40
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
41 #define DFA_NEXT(dfa, ch) \
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
42 do { \
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
43 int arc__; \
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
44 if (dfa.state >= 0) { \
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
45 arc__ = dfa.states[dfa.state][ch]; \
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
46 if (arc__ < 0) { \
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
47 dfa.state = -1; \
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
48 } else { \
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
49 dfa.state = dfa.arcs[arc__].next; \
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
50 dfa.score *= dfa.arcs[arc__].score; \
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
51 } \
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
52 } \
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
53 } while (0)
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
54
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
55 #define DFA_ALIVE(dfa) (dfa.state >= 0)
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
56
3
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
57 #define DFA_NEXT_P(dfa, ch) \
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
58 do { \
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
59 int arc__; \
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
60 if (dfa->state >= 0) { \
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
61 arc__ = dfa->states[dfa->state][ch]; \
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
62 if (arc__ < 0) { \
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
63 dfa->state = -1; \
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
64 } else { \
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
65 dfa->state = dfa->arcs[arc__].next; \
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
66 dfa->score *= dfa->arcs[arc__].score; \
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
67 } \
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
68 } \
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
69 } while (0)
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
70
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
71 #define DFA_ALIVE_P(dfa) (dfa->state >= 0)
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
72
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
73 /* prototypes */
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
74 boolean dfa_alone(guess_dfa *dfa, guess_dfa *order[]);
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
75 boolean dfa_none(guess_dfa *order[]);
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
76 guess_dfa *dfa_top(guess_dfa *order[]);
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
77 const char *dfa_process(guess_dfa *order[], int c);
70e2c306231e - implemented dfa utility functions.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents: 2
diff changeset
78
2
754a4550c64e - added arabic, greek, hebrew and turkish DFAs
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
diff changeset
79 #endif