comparison russian_impl.c @ 0:d9b6ff839eab

initial import
author Yoshiki Yazawa <yaz@cc.rim.or.jp>
date Fri, 30 Nov 2007 19:34:51 +0900
parents
children 754a4550c64e
comparison
equal deleted inserted replaced
-1:000000000000 0:d9b6ff839eab
1 /*
2 * This code is derivitive of librcd.
3 * No copyright notice was found.
4 */
5
6 #include <stdio.h>
7 #include <string.h>
8
9 #include "libguess.h"
10
11 #define NF_VALUE -2
12 #define max(a,b) ((a>b)?a:b)
13 #define min(a,b) ((a<b)?a:b)
14 #define bit(i) (1<<i)
15
16 typedef struct lng_stat2 {
17 unsigned char a;
18 unsigned char b;
19 double rate;
20 double srate;
21 double erate;
22 } lng_stat2;
23
24 #include "russian_tab.c"
25
26
27 static int end_symbol(char ch) {
28 if (ch=='\r'||ch=='\n'||ch==0||ch==' '||ch=='\t'||ch==','||ch=='.'||ch=='!'||ch=='?'||ch==';'||ch=='-'||ch==':'||ch=='"'||ch=='\''||ch==')') return 1;
29 return 0;
30 }
31
32 static int start_symbol(char ch) {
33 if ((ch=='\t')||ch=='\r'||ch=='\n'||(ch==' ')||(ch=='(')||(ch=='"')||(ch=='\'')) return 1;
34 return 0;
35 }
36
37 typedef const struct lng_stat2 *lng_stat2_ptr;
38
39 static void bfind(const unsigned char *a, lng_stat2_ptr *w, lng_stat2_ptr *k, lng_stat2_ptr *al) {
40 const struct lng_stat2 *winptr, *koiptr,*altptr;
41 int ki,wi,ai,d,ws=0,ks=0,as=0;
42 d=npow2>>1;
43 wi=d;
44 ki=d;
45 ai=d;
46 winptr=0;
47 koiptr=0;
48 altptr=0;
49 do{
50 d>>=1;
51
52 if(!ws){
53 if (wi>indexes2) wi-=d;
54 else {
55 winptr=enc_win+wi-1;
56 if(a[0]==winptr->a){
57 if(a[1]==winptr->b){
58 ws=1;
59 }else if(a[1]<winptr->b){
60 wi-=d;
61 }else{ //b>win[wi].b
62 wi+=d;
63 }
64 }else if(a[0]<winptr->a){
65 wi-=d;
66 }else{ //a>win[wi].a
67 wi+=d;
68 }
69 }
70 }
71 if(!ks){
72 if (ki>indexes2) ki-=d;
73 else {
74 koiptr=enc_koi+ki-1;
75 if(a[0]==koiptr->a){
76 if(a[1]==koiptr->b){
77 ks=1;
78 }else if(a[1]<koiptr->b){
79 ki-=d;
80 }else{ //b>win[wi].b
81 ki+=d;
82 }
83 }else if(a[0]<koiptr->a){
84 ki-=d;
85 }else{ //a>win[wi].a
86 ki+=d;
87 }
88 }
89 }
90 if(!as){
91 if (ai>indexes2) ai-=d;
92 else {
93 altptr=enc_alt+ai-1;
94 if(a[0]==altptr->a){
95 if(a[1]==altptr->b){
96 as=1;
97 }else if(a[1]<altptr->b){
98 ai-=d;
99 }else{ //b>win[wi].b
100 ai+=d;
101 }
102 }else if(a[0]<altptr->a){
103 ai-=d;
104 }else{ //a>win[wi].a
105 ai+=d;
106 }
107 }
108 }
109 }while(d);
110 if (ws) *w=winptr;
111 else *w=NULL;
112 if (ks) *k=koiptr;
113 else *k=NULL;
114 if (as) *al=altptr;
115 else *al=NULL;
116 }
117
118 static double calculate(double s, double m, double e) {
119 return s+m+e;
120 }
121
122 static const char *is_win_charset2(const unsigned char *txt, int len){
123 const struct lng_stat2 *winptr, *koiptr,*altptr;
124 double winstep,koistep,altstep,winestep,koiestep,altestep,winsstep,koisstep,altsstep;
125 double winstat=0,koistat=0,altstat=0,winestat=0,koiestat=0,altestat=0,winsstat=0,koisstat=0,altsstat=0;
126 long j;
127
128 #ifdef _AUTO_DEBUG
129 fprintf(stderr,"Word: %s\n",txt);
130 #endif
131 for(j=0;j<len-1;j++){
132 //skip bottom half of table
133 if(txt[j]<128 || txt[j+1]<128) continue;
134 #ifdef _AUTO_DEBUG
135 fprintf(stderr,"Pair: %c%c",txt[j],txt[j+1]);
136 #endif
137 bfind(txt+j,&winptr,&koiptr,&altptr);
138
139 if ((j==0)||(start_symbol(txt[j-1]))) {
140 if (winptr) winsstep=winptr->srate;
141 else winsstep=NF_VALUE;
142 if (koiptr) koisstep=koiptr->srate;
143 else koisstep=NF_VALUE;
144 if (altptr) altsstep=altptr->srate;
145 else altsstep=NF_VALUE;
146 winestep=0;
147 koiestep=0;
148 altestep=0;
149 winstep=0;
150 koistep=0;
151 altstep=0;
152 #ifdef _AUTO_DEBUG
153 fprintf(stderr,", Win %lf, Koi %lf, Alt: %lf\n",winsstep,koisstep,altsstep);
154 #endif
155 } else if ((j==len-2)||(end_symbol(txt[j+2]))) {
156 if (winptr) winestep=winptr->erate;
157 else winestep=NF_VALUE;
158 if (koiptr) koiestep=koiptr->erate;
159 else koiestep=NF_VALUE;
160 if (altptr) altestep=altptr->erate;
161 else altestep=NF_VALUE;
162 winsstep=0;
163 koisstep=0;
164 altsstep=0;
165 winstep=0;
166 koistep=0;
167 altstep=0;
168 #ifdef _AUTO_DEBUG
169 fprintf(stderr,", Win %lf, Koi %lf, Alt %lf\n",winestep,koiestep,altestep);
170 #endif
171 } else {
172 if (winptr) winstep=winptr->rate;
173 else winstep=NF_VALUE;
174 if (koiptr) koistep=koiptr->rate;
175 else koistep=NF_VALUE;
176 if (altptr) altstep=altptr->rate;
177 else altstep=NF_VALUE;
178 winsstep=0;
179 winestep=0;
180 koisstep=0;
181 koiestep=0;
182 altsstep=0;
183 altestep=0;
184 #ifdef _AUTO_DEBUG
185 fprintf(stderr,", Win %lf, Koi %lf, Alt %lf\n",winstep,koistep,altstep);
186 #endif
187 }
188
189 winstat+=winstep;
190 koistat+=koistep;
191 altstat+=altstep;
192 winsstat+=winsstep;
193 koisstat+=koisstep;
194 altsstat+=altsstep;
195 winestat+=winestep;
196 koiestat+=koiestep;
197 altestat+=altestep;
198 }
199
200 #ifdef _AUTO_DEBUG
201 fprintf(stderr,"Start. Win: %lf, Koi: %lf, Alt: %lf\n",winsstat,koisstat,altsstat);
202 fprintf(stderr,"Middle. Win: %lf, Koi: %lf, Alt: %lf\n",winstat,koistat,altstat);
203 fprintf(stderr,"End. Win: %lf, Koi: %lf, Alt: %lf\n",winestat,koiestat,altestat);
204 fprintf(stderr,"Final. Win: %lf, Koi: %lf, Alt: %lf\n",calculate(winsstat,winstat,winestat),calculate(koisstat,koistat,koiestat),calculate(altsstat,altstat,altestat));
205 #endif
206 if ((calculate(altsstat,altstat,altestat)>calculate(koisstat,koistat,koiestat))&&(calculate(altsstat,altstat,altestat)>calculate(winsstat,winstat,winestat))) return "CP866";
207 if (calculate(koisstat,koistat,koiestat)>calculate(winsstat,winstat,winestat)) return "KOI8-R";
208 return "CP1251";
209 }
210
211 const char *guess_ru(const char *buf, int len)
212 {
213 if (dfa_validate_utf8(buf, len))
214 return "UTF-8";
215
216 return is_win_charset2((const unsigned char *) buf, len);
217 }
218