comparison turkish_impl.c @ 3:70e2c306231e

- implemented dfa utility functions. - added dfa.c. - rewrote guess functions for ar, gr, hw and tr scripts with dfa utilities. - guess functions for cjk scripts too.
author Yoshiki Yazawa <yaz@cc.rim.or.jp>
date Thu, 12 Jun 2008 20:20:43 +0900
parents 754a4550c64e
children
comparison
equal deleted inserted replaced
2:754a4550c64e 3:70e2c306231e
1 #include "libguess.h" 1 #include "libguess.h"
2 #include "dfa.h"
3 #include "guess_tab.c"
2 4
3 static const char *_guess_tr(const unsigned char *ptr, int size) 5 /* precedence order */
6 #define ORDER &utf8, &iso8859_9, &cp1254
7
8 /* encodings */
9 static guess_dfa cp1254 = DFA_INIT(guess_cp1253_st, guess_cp1253_ar, "CP1254");
10 static guess_dfa iso8859_9 = DFA_INIT(guess_iso8859_9_st, guess_iso8859_9_ar, "ISO-8859-9");
11 static guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar, "UTF-8");
12
13 /* common */
14 const char *guess_tr(const char *buf, int buflen)
4 { 15 {
5 int i; 16 int i;
17 const char *rv = NULL;
18 guess_dfa *top = NULL;
19 guess_dfa *order[] = { ORDER, NULL };
6 20
7 for (i = 0; i < size; i++) 21 for (i = 0; i < buflen; i++) {
8 { 22 int c = (unsigned char) buf[i];
9 if (ptr[i] == 0x80 || 23
10 (ptr[i] >= 0x82 && ptr[i] <= 0x8C) || 24 /* special treatment of BOM */
11 (ptr[i] >= 0x91 && ptr[i] <= 0x9C) || 25 if (i == 0 && c == 0xff) {
12 ptr[ i ] == 0x9F) 26 if (i < buflen - 1) {
13 return "CP1254"; 27 c = (unsigned char) buf[i + 1];
28 if (c == 0xfe)
29 return UCS_2LE;
30 }
31 }
32 if (i == 0 && c == 0xfe) {
33 if (i < buflen - 1) {
34 c = (unsigned char) buf[i + 1];
35 if (c == 0xff)
36 return UCS_2BE;
37 }
38 }
39
40 rv = dfa_process(order, c);
41 if(rv)
42 return rv;
43
44 if (dfa_none(order)) {
45 /* we ran out the possibilities */
46 return NULL;
47 }
14 } 48 }
15 49
16 return "ISO-8859-9"; 50 top = dfa_top(order);
51 if (top)
52 return top->name;
53 else
54 return NULL;
17 } 55 }
18
19 const char *guess_tr(const char *ptr, int size)
20 {
21 if (dfa_validate_utf8(ptr, size))
22 return "UTF-8";
23
24 return _guess_tr((const unsigned char *)ptr, size);
25 }