Mercurial > audlegacy
annotate src/libguess/guess.c @ 3206:6bcfc6561711 trunk
Implement support for Arabic and Turkish.
author | William Pitcock <nenolod@atheme-project.org> |
---|---|
date | Wed, 01 Aug 2007 08:08:13 -0500 |
parents | e9f66c3905ec |
children | 1b9251ab3655 |
rev | line source |
---|---|
2313 | 1 /* |
2 * This code is derivative of guess.c of Gauche-0.8.3. | |
3 * The following is the original copyright notice. | |
4 */ | |
5 | |
6 /* | |
7 * guess.c - guessing character encoding | |
8 * | |
9 * Copyright (c) 2000-2003 Shiro Kawai, All rights reserved. | |
10 * | |
11 * Redistribution and use in source and binary forms, with or without | |
12 * modification, are permitted provided that the following conditions | |
13 * are met: | |
14 * | |
15 * 1. Redistributions of source code must retain the above copyright | |
16 * notice, this list of conditions and the following disclaimer. | |
17 * | |
18 * 2. Redistributions in binary form must reproduce the above copyright | |
19 * notice, this list of conditions and the following disclaimer in the | |
20 * documentation and/or other materials provided with the distribution. | |
21 * | |
22 * 3. Neither the name of the authors nor the names of its contributors | |
23 * may be used to endorse or promote products derived from this | |
24 * software without specific prior written permission. | |
25 * | |
26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
27 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
28 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
29 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
30 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
31 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED | |
32 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
33 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | |
34 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
35 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
36 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
37 * | |
38 */ | |
39 | |
40 #include "libguess.h" | |
41 #define NULL ((void *)0) | |
42 | |
43 /* take precedence if scores are same. you can customize the order as: */ | |
44 /* ORDER_** &highest, &second, ... &lowest */ | |
45 #define ORDER_JP &utf8, &sjis, &eucj | |
46 #define ORDER_TW &utf8, &big5 | |
47 #define ORDER_CN &utf8, &gb2312, &gb18030 | |
48 #define ORDER_KR &utf8, &euck, &johab | |
49 | |
50 /* workaround for that glib's g_convert can't convert | |
51 properly from UCS-2BE/LE trailing after BOM. */ | |
52 #define WITH_G_CONVERT 1 | |
53 /* #undef WITH_G_CONVERT */ | |
54 | |
55 #ifdef WITH_G_CONVERT | |
56 const char UCS_2BE[] = "UTF-16"; | |
57 const char UCS_2LE[] = "UTF-16"; | |
58 #else | |
59 const char UCS_2BE[] = "UCS-2BE"; | |
60 const char UCS_2LE[] = "UCS-2LE"; | |
61 #endif | |
62 | |
63 /* data types */ | |
64 typedef struct guess_arc_rec | |
65 { | |
66 unsigned int next; /* next state */ | |
67 double score; /* score */ | |
68 } guess_arc; | |
69 | |
70 typedef struct guess_dfa_rec | |
71 { | |
72 signed char (*states)[256]; | |
73 guess_arc *arcs; | |
74 int state; | |
75 double score; | |
76 } guess_dfa; | |
77 | |
78 /* macros */ | |
79 #define DFA_INIT(st, ar) \ | |
80 { st, ar, 0, 1.0 } | |
81 | |
82 #define DFA_NEXT(dfa, ch) \ | |
83 do { \ | |
84 int arc__; \ | |
85 if (dfa.state >= 0) { \ | |
86 arc__ = dfa.states[dfa.state][ch]; \ | |
87 if (arc__ < 0) { \ | |
88 dfa.state = -1; \ | |
89 } else { \ | |
90 dfa.state = dfa.arcs[arc__].next; \ | |
91 dfa.score *= dfa.arcs[arc__].score; \ | |
92 } \ | |
93 } \ | |
94 } while (0) | |
95 | |
96 #define DFA_ALIVE(dfa) (dfa.state >= 0) | |
97 | |
98 /* include DFA table generated by guess.scm */ | |
99 #include "guess_tab.c" | |
100 | |
2559 | 101 |
102 int dfa_validate_utf8(const char *buf, int buflen) | |
103 { | |
104 int i; | |
105 guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar); | |
106 | |
107 for (i = 0; i < buflen; i++) { | |
108 int c = (unsigned char) buf[i]; | |
109 | |
110 if (DFA_ALIVE(utf8)) | |
111 DFA_NEXT(utf8, c); | |
112 else | |
2599 | 113 break; |
2559 | 114 } |
2599 | 115 |
116 if(DFA_ALIVE(utf8)) | |
117 return 1; | |
118 else | |
119 return 0; | |
2559 | 120 } |
121 | |
2313 | 122 const char *guess_jp(const char *buf, int buflen) |
123 { | |
124 int i; | |
125 guess_dfa eucj = DFA_INIT(guess_eucj_st, guess_eucj_ar); | |
126 guess_dfa sjis = DFA_INIT(guess_sjis_st, guess_sjis_ar); | |
127 guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar); | |
128 guess_dfa *top = NULL; | |
129 | |
130 guess_dfa *order[] = { ORDER_JP, NULL }; | |
131 | |
132 for (i = 0; i < buflen; i++) { | |
133 int c = (unsigned char) buf[i]; | |
134 | |
135 /* special treatment of iso-2022 escape sequence */ | |
136 if (c == 0x1b) { | |
137 if (i < buflen - 1) { | |
138 c = (unsigned char) buf[++i]; | |
139 if (c == '$' || c == '(') | |
140 return "ISO-2022-JP"; | |
141 } | |
142 } | |
143 | |
144 /* special treatment of BOM */ | |
145 if (i == 0 && c == 0xff) { | |
146 if (i < buflen - 1) { | |
147 c = (unsigned char) buf[i + 1]; | |
148 if (c == 0xfe) | |
149 return UCS_2LE; | |
150 } | |
151 } | |
152 if (i == 0 && c == 0xfe) { | |
153 if (i < buflen - 1) { | |
154 c = (unsigned char) buf[i + 1]; | |
155 if (c == 0xff) | |
156 return UCS_2BE; | |
157 } | |
158 } | |
159 | |
160 if (DFA_ALIVE(eucj)) { | |
161 if (!DFA_ALIVE(sjis) && !DFA_ALIVE(utf8)) | |
162 return "EUC-JP"; | |
163 DFA_NEXT(eucj, c); | |
164 } | |
165 if (DFA_ALIVE(sjis)) { | |
166 if (!DFA_ALIVE(eucj) && !DFA_ALIVE(utf8)) | |
167 return "SJIS"; | |
168 DFA_NEXT(sjis, c); | |
169 } | |
170 if (DFA_ALIVE(utf8)) { | |
171 if (!DFA_ALIVE(sjis) && !DFA_ALIVE(eucj)) | |
172 return "UTF-8"; | |
173 DFA_NEXT(utf8, c); | |
174 } | |
175 | |
176 if (!DFA_ALIVE(eucj) && !DFA_ALIVE(sjis) && !DFA_ALIVE(utf8)) { | |
177 /* we ran out the possibilities */ | |
178 return NULL; | |
179 } | |
180 } | |
181 | |
182 /* Now, we have ambigous code. Pick the highest score. If more than | |
183 one candidate tie, pick the default encoding. */ | |
184 for (i = 0; order[i] != NULL; i++) { | |
185 if (order[i]->state >= 0) { //DFA_ALIVE() | |
186 if (top == NULL || order[i]->score > top->score) | |
187 top = order[i]; | |
188 } | |
189 } | |
190 | |
191 if (top == &eucj) | |
192 return "EUC-JP"; | |
193 if (top == &utf8) | |
194 return "UTF-8"; | |
195 if (top == &sjis) | |
196 return "SJIS"; | |
197 return NULL; | |
198 } | |
199 | |
200 const char *guess_tw(const char *buf, int buflen) | |
201 { | |
202 int i; | |
203 guess_dfa big5 = DFA_INIT(guess_big5_st, guess_big5_ar); | |
204 guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar); | |
205 guess_dfa *top = NULL; | |
206 | |
207 guess_dfa *order[] = { ORDER_TW, NULL }; | |
208 | |
209 for (i = 0; i < buflen; i++) { | |
210 int c = (unsigned char) buf[i]; | |
211 | |
212 /* special treatment of iso-2022 escape sequence */ | |
213 if (c == 0x1b) { | |
214 if (i < buflen - 1) { | |
215 c = (unsigned char) buf[++i]; | |
216 if (c == '$' || c == '(') | |
217 return "ISO-2022-TW"; | |
218 } | |
219 } | |
220 | |
221 /* special treatment of BOM */ | |
222 if (i == 0 && c == 0xff) { | |
223 if (i < buflen - 1) { | |
224 c = (unsigned char) buf[i + 1]; | |
225 if (c == 0xfe) | |
226 return UCS_2LE; | |
227 } | |
228 } | |
229 if (i == 0 && c == 0xfe) { | |
230 if (i < buflen - 1) { | |
231 c = (unsigned char) buf[i + 1]; | |
232 if (c == 0xff) | |
233 return UCS_2BE; | |
234 } | |
235 } | |
236 | |
237 if (DFA_ALIVE(big5)) { | |
238 if (!DFA_ALIVE(utf8)) | |
239 return "BIG5"; | |
240 DFA_NEXT(big5, c); | |
241 } | |
242 if (DFA_ALIVE(utf8)) { | |
243 if (!DFA_ALIVE(big5)) | |
244 return "UTF-8"; | |
245 DFA_NEXT(utf8, c); | |
246 } | |
247 | |
248 if (!DFA_ALIVE(big5) && !DFA_ALIVE(utf8)) { | |
249 /* we ran out the possibilities */ | |
250 return NULL; | |
251 } | |
252 } | |
253 | |
254 /* Now, we have ambigous code. Pick the highest score. If more than | |
255 one candidate tie, pick the default encoding. */ | |
256 for (i = 0; order[i] != NULL; i++) { | |
257 if (order[i]->state >= 0) { //DFA_ALIVE() | |
258 if (top == NULL || order[i]->score > top->score) | |
259 top = order[i]; | |
260 } | |
261 } | |
262 | |
263 if (top == &big5) | |
264 return "BIG5"; | |
265 if (top == &utf8) | |
266 return "UTF-8"; | |
267 return NULL; | |
268 } | |
269 | |
270 const char *guess_cn(const char *buf, int buflen) | |
271 { | |
272 int i; | |
273 guess_dfa gb2312 = DFA_INIT(guess_gb2312_st, guess_gb2312_ar); | |
274 guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar); | |
275 guess_dfa gb18030 = DFA_INIT(guess_gb18030_st, guess_gb18030_ar); | |
276 guess_dfa *top = NULL; | |
277 | |
278 guess_dfa *order[] = { ORDER_CN, NULL }; | |
279 | |
280 for (i = 0; i < buflen; i++) { | |
281 int c = (unsigned char) buf[i]; | |
282 int c2; | |
283 | |
284 /* special treatment of iso-2022 escape sequence */ | |
285 if (c == 0x1b) { | |
286 if (i < buflen - 1) { | |
287 c = (unsigned char) buf[i + 1]; | |
288 c2 = (unsigned char) buf[i + 2]; | |
289 if (c == '$' && (c2 == ')' || c2 == '+')) | |
290 return "ISO-2022-CN"; | |
291 } | |
292 } | |
293 | |
294 /* special treatment of BOM */ | |
295 if (i == 0 && c == 0xff) { | |
296 if (i < buflen - 1) { | |
297 c = (unsigned char) buf[i + 1]; | |
298 if (c == 0xfe) | |
299 return UCS_2LE; | |
300 } | |
301 } | |
302 if (i == 0 && c == 0xfe) { | |
303 if (i < buflen - 1) { | |
304 c = (unsigned char) buf[i + 1]; | |
305 if (c == 0xff) | |
306 return UCS_2BE; | |
307 } | |
308 } | |
309 | |
310 if (DFA_ALIVE(gb2312)) { | |
311 if (!DFA_ALIVE(utf8) && !DFA_ALIVE(gb18030)) | |
312 return "GB2312"; | |
313 DFA_NEXT(gb2312, c); | |
314 } | |
315 if (DFA_ALIVE(utf8)) { | |
316 if (!DFA_ALIVE(gb2312) && !DFA_ALIVE(gb18030)) | |
317 return "UTF-8"; | |
318 DFA_NEXT(utf8, c); | |
319 } | |
320 if (DFA_ALIVE(gb18030)) { | |
321 if (!DFA_ALIVE(utf8) && !DFA_ALIVE(gb2312)) | |
322 return "GB18030"; | |
323 DFA_NEXT(gb18030, c); | |
324 } | |
325 | |
326 if (!DFA_ALIVE(gb2312) && !DFA_ALIVE(utf8) && !DFA_ALIVE(gb18030)) { | |
327 /* we ran out the possibilities */ | |
328 return NULL; | |
329 } | |
330 } | |
331 | |
332 /* Now, we have ambigous code. Pick the highest score. If more than | |
333 one candidate tie, pick the default encoding. */ | |
334 for (i = 0; order[i] != NULL; i++) { | |
335 if (order[i]->state >= 0) { //DFA_ALIVE() | |
336 if (top == NULL || order[i]->score > top->score) | |
337 top = order[i]; | |
338 } | |
339 } | |
340 | |
341 if (top == &gb2312) | |
342 return "GB2312"; | |
343 if (top == &utf8) | |
344 return "UTF-8"; | |
345 if (top == &gb18030) | |
346 return "GB18030"; | |
347 return NULL; | |
348 } | |
349 | |
350 const char *guess_kr(const char *buf, int buflen) | |
351 { | |
352 int i; | |
353 guess_dfa euck = DFA_INIT(guess_euck_st, guess_euck_ar); | |
354 guess_dfa utf8 = DFA_INIT(guess_utf8_st, guess_utf8_ar); | |
355 guess_dfa johab = DFA_INIT(guess_johab_st, guess_johab_ar); | |
356 guess_dfa *top = NULL; | |
357 | |
358 guess_dfa *order[] = { ORDER_KR, NULL }; | |
359 | |
360 for (i = 0; i < buflen; i++) { | |
361 int c = (unsigned char) buf[i]; | |
362 int c2; | |
363 | |
364 /* special treatment of iso-2022 escape sequence */ | |
365 if (c == 0x1b) { | |
366 if (i < buflen - 1) { | |
367 c = (unsigned char) buf[i + 1]; | |
368 c2 = (unsigned char) buf[i + 2]; | |
369 if (c == '$' && c2 == ')') | |
370 return "ISO-2022-KR"; | |
371 } | |
372 } | |
373 | |
374 /* special treatment of BOM */ | |
375 if (i == 0 && c == 0xff) { | |
376 if (i < buflen - 1) { | |
377 c = (unsigned char) buf[i + 1]; | |
378 if (c == 0xfe) | |
379 return UCS_2LE; | |
380 } | |
381 } | |
382 if (i == 0 && c == 0xfe) { | |
383 if (i < buflen - 1) { | |
384 c = (unsigned char) buf[i + 1]; | |
385 if (c == 0xff) | |
386 return UCS_2BE; | |
387 } | |
388 } | |
389 | |
390 if (DFA_ALIVE(euck)) { | |
391 if (!DFA_ALIVE(johab) && !DFA_ALIVE(utf8)) | |
392 return "EUC-KR"; | |
393 DFA_NEXT(euck, c); | |
394 } | |
395 if (DFA_ALIVE(johab)) { | |
396 if (!DFA_ALIVE(euck) && !DFA_ALIVE(utf8)) | |
397 return "JOHAB"; | |
398 DFA_NEXT(johab, c); | |
399 } | |
400 if (DFA_ALIVE(utf8)) { | |
401 if (!DFA_ALIVE(euck) && !DFA_ALIVE(johab)) | |
402 return "UTF-8"; | |
403 DFA_NEXT(utf8, c); | |
404 } | |
405 | |
406 if (!DFA_ALIVE(euck) && !DFA_ALIVE(johab) && !DFA_ALIVE(utf8)) { | |
407 /* we ran out the possibilities */ | |
408 return NULL; | |
409 } | |
410 } | |
411 | |
412 /* Now, we have ambigous code. Pick the highest score. If more than | |
413 one candidate tie, pick the default encoding. */ | |
414 for (i = 0; order[i] != NULL; i++) { | |
415 if (order[i]->state >= 0) { //DFA_ALIVE() | |
416 if (top == NULL || order[i]->score > top->score) | |
417 top = order[i]; | |
418 } | |
419 } | |
420 | |
421 if (top == &euck) | |
422 return "EUC-KR"; | |
423 if (top == &utf8) | |
424 return "UTF-8"; | |
425 if (top == &johab) | |
426 return "JOHAB"; | |
427 return NULL; | |
428 } | |
3201
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
429 |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
430 typedef struct _guess_impl { |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
431 struct _guess_impl *next; |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
432 const char *name; |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
433 const char *(*impl)(const char *buf, int len); |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
434 } guess_impl; |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
435 |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
436 static guess_impl *guess_impl_list = NULL; |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
437 |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
438 void guess_impl_register(const char *lang, |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
439 const char *(*impl)(const char *buf, int len)) |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
440 { |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
441 guess_impl *iptr = calloc(sizeof(guess_impl), 1); |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
442 |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
443 iptr->name = lang; |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
444 iptr->impl = impl; |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
445 iptr->next = guess_impl_list; |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
446 |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
447 guess_impl_list = iptr; |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
448 } |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
449 |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
450 void guess_init(void) |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
451 { |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
452 /* check if already initialized */ |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
453 if (guess_impl_list != NULL) |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
454 return; |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
455 |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
456 guess_impl_register(GUESS_REGION_JP, guess_jp); |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
457 guess_impl_register(GUESS_REGION_TW, guess_tw); |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
458 guess_impl_register(GUESS_REGION_CN, guess_cn); |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
459 guess_impl_register(GUESS_REGION_KR, guess_kr); |
3202
e9f66c3905ec
Migrate relevant libRCD code to libguess.
William Pitcock <nenolod@atheme-project.org>
parents:
3201
diff
changeset
|
460 guess_impl_register(GUESS_REGION_RU, guess_ru); |
3206
6bcfc6561711
Implement support for Arabic and Turkish.
William Pitcock <nenolod@atheme-project.org>
parents:
3202
diff
changeset
|
461 guess_impl_register(GUESS_REGION_AR, guess_ar); |
6bcfc6561711
Implement support for Arabic and Turkish.
William Pitcock <nenolod@atheme-project.org>
parents:
3202
diff
changeset
|
462 guess_impl_register(GUESS_REGION_TR, guess_tr); |
3201
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
463 } |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
464 |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
465 const char *guess_encoding(const char *inbuf, int buflen, const char *lang) |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
466 { |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
467 guess_impl *iter; |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
468 |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
469 guess_init(); |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
470 |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
471 for (iter = guess_impl_list; iter != NULL; iter = iter->next) |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
472 { |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
473 if (!strcasecmp(lang, iter->name)) |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
474 return iter->impl(inbuf, buflen); |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
475 } |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
476 |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
477 /* TODO: try other languages as fallback? */ |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
478 |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
479 return NULL; |
e1470a536417
Allow libguess to be extended for other languages using a common API.
William Pitcock <nenolod@atheme-project.org>
parents:
2599
diff
changeset
|
480 } |