0
|
1 /*
|
|
2 * $Id: yincoding.c,v 1.7 2005/04/10 15:26:37 aonoto Exp $
|
|
3 */
|
|
4
|
|
5 /*
|
|
6 * FreeWnn is a network-extensible Kana-to-Kanji conversion system.
|
|
7 * This file is part of FreeWnn.
|
|
8 *
|
|
9 * Copyright Kyoto University Research Institute for Mathematical Sciences
|
|
10 * 1987, 1988, 1989, 1990, 1991, 1992
|
|
11 * Copyright OMRON Corporation. 1987, 1988, 1989, 1990, 1991, 1992, 1999
|
|
12 * Copyright ASTEC, Inc. 1987, 1988, 1989, 1990, 1991, 1992
|
|
13 * Copyright FreeWnn Project 1999, 2000, 2002
|
|
14 *
|
|
15 * Maintainer: FreeWnn Project <freewnn@tomo.gr.jp>
|
|
16 *
|
|
17 * This library is free software; you can redistribute it and/or
|
|
18 * modify it under the terms of the GNU Lesser General Public
|
|
19 * License as published by the Free Software Foundation; either
|
|
20 * version 2 of the License, or (at your option) any later version.
|
|
21 *
|
|
22 * This library is distributed in the hope that it will be useful,
|
|
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
25 * Lesser General Public License for more details.
|
|
26 *
|
|
27 * You should have received a copy of the GNU Lesser General Public
|
|
28 * License along with this library; if not, write to the
|
|
29 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
30 * Boston, MA 02111-1307, USA.
|
|
31 */
|
|
32
|
|
33 /** cWnn Version 1.1 **/
|
|
34 #ifdef HAVE_CONFIG_H
|
|
35 # include <config.h>
|
|
36 #endif
|
|
37
|
|
38 #include <stdio.h>
|
|
39 #include <ctype.h>
|
|
40 #include "commonhd.h"
|
|
41 #ifdef CHINESE
|
|
42 #if STDC_HEADERS
|
|
43 # include <string.h>
|
|
44 #elif HAVE_STRINGS_H
|
|
45 # include <strings.h>
|
|
46 #endif /* STDC_HEADERS */
|
|
47
|
|
48 #include "cplib.h"
|
|
49 #include "rk_spclval.h"
|
|
50 #include "jh.h"
|
|
51 #include "wnn_string.h"
|
|
52
|
|
53 extern char *py_table[];
|
|
54 extern char *zy_table[];
|
|
55 unsigned char last_mark; /* Using to reme previous auto_state() */
|
|
56
|
|
57 /* copied from old sstrings.c */
|
|
58 static int
|
|
59 cwnn_sStrcpy (c, w)
|
|
60 register char *c;
|
|
61 register w_char *w;
|
|
62 {
|
|
63 char *c0 = c;
|
|
64 for (; *w != 0; w++)
|
|
65 {
|
|
66 if ((*w & 0x8080) == 0x8000)
|
|
67 {
|
|
68 *c++ = 0x8f;
|
|
69 *c++ = (char) ((*w & 0xff00) >> 8);
|
|
70 *c++ = (char) ((*w & 0x007f) | 0x0080);
|
|
71 }
|
|
72 else
|
|
73 {
|
|
74 if (*w & 0xff00)
|
|
75 *c++ = (char) ((*w & 0xff00) >> 8);
|
|
76 else if (*w & 0x80)
|
|
77 *c++ = 0x8e;
|
|
78 *c++ = (char) (*w & 0x00ff);
|
|
79 }
|
|
80 }
|
|
81 *c = '\0';
|
|
82 return (c - c0);
|
|
83 }
|
|
84
|
|
85
|
|
86 static int
|
|
87 cwnn_Sstrcpy (w, c)
|
|
88 w_char *w;
|
|
89 unsigned char *c;
|
|
90 {
|
|
91 w_char *w0 = w;
|
|
92
|
|
93 for (; *c;)
|
|
94 {
|
|
95 if (*c & 0x80)
|
|
96 {
|
|
97 if (*c == 0x8e)
|
|
98 {
|
|
99 c++;
|
|
100 *w++ = (unsigned short) *c++;
|
|
101 }
|
|
102 else if (*c == 0x8f)
|
|
103 {
|
|
104 c++;
|
|
105 *w = (unsigned short) (*c++ << 8);
|
|
106 *w++ |= (unsigned short) (*c++ & 0x7f);
|
|
107 }
|
|
108 else
|
|
109 {
|
|
110 *w = (unsigned short) (*c++ << 8);
|
|
111 *w++ |= (unsigned short) *c++;
|
|
112 }
|
|
113 }
|
|
114 else
|
|
115 {
|
|
116 *w++ = (unsigned short) *c++;
|
|
117 }
|
|
118 }
|
|
119 *w = 0;
|
|
120 return (w - w0);
|
|
121 }
|
|
122
|
|
123 static int
|
|
124 cwnn_Sstrcat (w, c)
|
|
125 w_char *w;
|
|
126 unsigned char *c;
|
|
127 {
|
|
128 w_char *w0 = w;
|
|
129
|
|
130 for (; *w; w++);
|
|
131
|
|
132 for (; *c;)
|
|
133 {
|
|
134 if (*c & 0x80)
|
|
135 {
|
|
136 if (*c == 0x8e)
|
|
137 {
|
|
138 c++;
|
|
139 *w++ = (unsigned short) *c++;
|
|
140 }
|
|
141 else if (*c == 0x8f)
|
|
142 {
|
|
143 c++;
|
|
144 *w = (unsigned short) (*c++ << 8);
|
|
145 *w++ |= (unsigned short) (*c++ & 0x7f);
|
|
146 }
|
|
147 else
|
|
148 {
|
|
149 *w = (unsigned short) (*c++ << 8);
|
|
150 *w++ |= (unsigned short) *c++;
|
|
151 }
|
|
152 }
|
|
153 else
|
|
154 {
|
|
155 *w++ = (unsigned short) *c++;
|
|
156 }
|
|
157 }
|
|
158 *w = 0;
|
|
159 return (w - w0);
|
|
160 }
|
|
161
|
|
162 /********** py_yunmu(), zy_yunmu(): if yuyin with YunMu, return YunMu's
|
|
163 position in YunMu table. after that, you must calculate its
|
|
164 real yun_raw static if without YunMu, return -1
|
|
165 ***********/
|
|
166 static int
|
|
167 py_yunmu (yuyin)
|
|
168 register char *yuyin;
|
|
169 {
|
|
170 register int i;
|
|
171 for (i = (PY_NUM_YUNMU * 5) - 1; i >= 0; i--)
|
|
172 {
|
|
173 if (strncmp (yuyin, py_yunmu_tbl[i], strlen (py_yunmu_tbl[i])) == 0)
|
|
174 return (i);
|
|
175 }
|
|
176 return (-1);
|
|
177 }
|
|
178
|
|
179 static int
|
|
180 zy_yunmu (yuyin)
|
|
181 register char *yuyin;
|
|
182 {
|
|
183 register int i;
|
|
184 for (i = (ZY_NUM_YUNMU * 5) - 1; i >= 0; i--)
|
|
185 {
|
|
186 if (strncmp (yuyin, zy_yunmu_tbl[i], strlen (zy_yunmu_tbl[i])) == 0)
|
|
187 return (i);
|
|
188 }
|
|
189 return (-1);
|
|
190 }
|
|
191
|
|
192 /* is_pinyin(): if is PinYin with Shengmu, return 1 */
|
|
193 /* if is PinYin without Shengmu , return 0 */
|
|
194 /* else return -1 */
|
|
195
|
|
196 static int
|
|
197 is_pinyin (sheng_raw, yun_raw)
|
|
198 register int sheng_raw;
|
|
199 register int yun_raw;
|
|
200 {
|
|
201
|
|
202 if ((sheng_raw >= 0) && (sheng_raw < PY_NUM_SHENGMU) && (yun_raw >= 0) && (yun_raw < PY_NUM_YUNMU) && (pinyin_tbl[sheng_raw * PY_NUM_YUNMU + yun_raw] == 1))
|
|
203 {
|
|
204 if (sheng_raw == EMPTY_SHENG_RAW)
|
|
205 return (0);
|
|
206 else
|
|
207 return (1);
|
|
208 }
|
|
209 else
|
|
210 return (-1);
|
|
211
|
|
212 }
|
|
213
|
|
214 /* is_zhuyin(): if is ZhuYin with Shengmu: return 1 */
|
|
215 /* if is ZhuYin without Shengmu: return 0 */
|
|
216 /* else: return -1 */
|
|
217
|
|
218 static int
|
|
219 is_zhuyin (sheng_raw, yun_raw)
|
|
220 register int sheng_raw;
|
|
221 register int yun_raw;
|
|
222 {
|
|
223
|
|
224 if ((sheng_raw >= 0)
|
|
225 && (sheng_raw < ZY_NUM_SHENGMU)
|
|
226 && (yun_raw >= 0)
|
|
227 && (yun_raw < ZY_NUM_YUNMU)
|
|
228 && ((zhuyin_tbl[sheng_raw * ZY_NUM_YUNMU + yun_raw] & 0x8000) == 0x8000))
|
|
229 {
|
|
230 if (sheng_raw == EMPTY_SHENG_RAW)
|
|
231 return (0);
|
|
232 else
|
|
233 return (1);
|
|
234 }
|
|
235 return (-1);
|
|
236 }
|
|
237
|
|
238 /* py_shengmu(), zy_shengmu():
|
|
239 if yuyin with ShengMu, return Shengmu's position
|
|
240 in ShengMu table. if without ShengMu, return -1
|
|
241 */
|
|
242 static int
|
|
243 py_shengmu (yuyin)
|
|
244 register char *yuyin;
|
|
245 {
|
|
246 register int i;
|
|
247 for (i = PY_NUM_SHENGMU - 1; i > 0; i--)
|
|
248 {
|
|
249 if (strncmp (yuyin, py_shengmu_tbl[i], strlen (py_shengmu_tbl[i])) == 0)
|
|
250 return (i);
|
|
251 }
|
|
252 return (-1);
|
|
253 }
|
|
254
|
|
255 static int
|
|
256 zy_shengmu (yuyin)
|
|
257 register char *yuyin;
|
|
258 {
|
|
259 register int i;
|
|
260 for (i = ZY_NUM_SHENGMU - 1; i > 0; i--)
|
|
261 {
|
|
262 if (strncmp (yuyin, zy_shengmu_tbl[i], strlen (zy_shengmu_tbl[i])) == 0)
|
|
263 return (i);
|
|
264 }
|
|
265 return (-1);
|
|
266 }
|
|
267
|
|
268 #ifdef nodef
|
|
269 static void
|
|
270 getstr_pzy (lstr, yincod, which)
|
|
271 register letter *lstr;
|
|
272 register w_char yincod;
|
|
273 int which;
|
|
274 {
|
|
275 register letter *ltmp;
|
|
276
|
|
277 ltmp = lstr;
|
|
278 for (; *lstr; lstr++)
|
|
279
|
|
280 if ((which == CWNN_PINYIN) && ((*lstr & 0x0000ffff) == PY_EOF))
|
|
281 break;
|
|
282 else if ((which == CWNN_ZHUYIN) && (isZY_EOF (*lstr & 0x0000ffff)))
|
|
283 break;
|
|
284 lstr++;
|
|
285 *ltmp++ = yincod;
|
|
286 for (; *lstr; lstr++)
|
|
287 *ltmp++ = *lstr;
|
|
288 *ltmp = 0;
|
|
289 }
|
|
290 #endif
|
|
291
|
|
292 /* create_yincod(): input:
|
|
293 raw in ShengMu table of PinYin, raw in YunMu table of PinYin,
|
|
294 sisheng output: yincod: if is PinYin, otherwise:0.
|
|
295 */
|
|
296
|
|
297 static w_char
|
|
298 create_yincod (sheng_raw, yun_raw, ss)
|
|
299 register int sheng_raw;
|
|
300 register int yun_raw;
|
|
301 register int ss;
|
|
302 {
|
|
303 int ret = 0;
|
|
304 if (is_pinyin (sheng_raw, yun_raw) == 1) /*Has Shengmu */
|
|
305 ret = 0x0080 + (((yun_raw << 1) + 0x20) << 8) + ((sheng_raw - 1) << 2) + 0x20;
|
|
306 else if (is_pinyin (sheng_raw, yun_raw) == 0) /*Not Shengmu */
|
|
307 ret = 0x0080 + (((yun_raw << 1) + 0x20) << 8) + ((X_SHENG_RAW - 1) << 2) + 0x20; /*Rent this */
|
|
308 else
|
|
309 return (ret);
|
|
310 if ((ss > 0) && (ss <= 4))
|
|
311 ret += 0x0100 + ss - 1;
|
|
312 return (ret);
|
|
313 }
|
|
314
|
|
315 /* pzy_yincod()
|
|
316 in: The param is expected to be a PinYin or a ZhuYin.
|
|
317 return: a Yin_code is returned, if it is a PinYin or a ZhuYin.
|
|
318 otherwise, 0 is returned,
|
|
319 */
|
|
320 static int
|
|
321 pzy_get_sheng_yun (yuyin, ss, sheng_raw, yun_raw, which)
|
|
322 register char *yuyin; /* one PinYin or ZhuYin with end of character */
|
|
323 register int *ss; /* get SiSheng from PinYin or ZhuYin */
|
|
324 register int *sheng_raw; /* position in ShengMu table */
|
|
325 int *yun_raw; /* position in YunMu table */
|
|
326 int which;
|
|
327 {
|
|
328 /*
|
|
329 register int j;
|
|
330 */
|
|
331 register char *pzytmp;
|
|
332
|
|
333 *ss = -1;
|
|
334 *sheng_raw = -1;
|
|
335 *yun_raw = -1;
|
|
336
|
|
337 pzytmp = yuyin;
|
|
338
|
|
339 if (which == CWNN_PINYIN)
|
|
340 { /* for Pinyin case */
|
|
341
|
|
342 if ((*sheng_raw = py_shengmu (pzytmp)) == -1)
|
|
343 { /* no ShengMu */
|
|
344 if ((*yun_raw = py_yunmu (pzytmp)) == -1)
|
|
345 return (0);
|
|
346 else
|
|
347 {
|
|
348 pzytmp += strlen (py_yunmu_tbl[*yun_raw]);
|
|
349 *sheng_raw = 0;
|
|
350 *ss = *yun_raw % 5;
|
|
351 *yun_raw = *yun_raw / 5;
|
|
352 return (pzytmp - yuyin);
|
|
353 }
|
|
354 }
|
|
355 else
|
|
356 { /* has ShengMu */
|
|
357 /*
|
|
358 for ( j = 0; (int)j < (int)strlen(py_shengmu_tbl[*sheng_raw]); j++)
|
|
359 pzytmp++;
|
|
360 */
|
|
361 pzytmp += strlen (py_shengmu_tbl[*sheng_raw]);
|
|
362 if (strlen (pzytmp) == 0)
|
|
363 return (0);
|
|
364 if ((*yun_raw = py_yunmu (pzytmp)) != -1)
|
|
365 {
|
|
366 pzytmp += strlen (py_yunmu_tbl[*yun_raw]);
|
|
367 *ss = *yun_raw % 5;
|
|
368 *yun_raw = *yun_raw / 5;
|
|
369 return (pzytmp - yuyin);
|
|
370 }
|
|
371 else
|
|
372 {
|
|
373 pzytmp = yuyin;
|
|
374 if ((*yun_raw = py_yunmu (pzytmp)) == -1)
|
|
375 return (0);
|
|
376 else
|
|
377 {
|
|
378 pzytmp += strlen (py_yunmu_tbl[*yun_raw]);
|
|
379 *sheng_raw = 0;
|
|
380 *ss = *yun_raw % 5;
|
|
381 *yun_raw = *yun_raw / 5;
|
|
382 return (pzytmp - yuyin);
|
|
383 }
|
|
384 }
|
|
385 } /* has ShengMu when Pinyin */
|
|
386 }
|
|
387 else
|
|
388 { /* for Zhuyin case */
|
|
389
|
|
390 if ((*sheng_raw = zy_shengmu (pzytmp)) == -1)
|
|
391 { /* no ShengMu */
|
|
392 if ((*yun_raw = zy_yunmu (pzytmp)) == -1)
|
|
393 return (0);
|
|
394 else
|
|
395 {
|
|
396 pzytmp += strlen (zy_yunmu_tbl[*yun_raw]);
|
|
397 *sheng_raw = 0;
|
|
398 *ss = *yun_raw % 5;
|
|
399 *yun_raw = *yun_raw / 5;
|
|
400 return (pzytmp - yuyin);
|
|
401 }
|
|
402 }
|
|
403 else
|
|
404 { /* has ShengMu */
|
|
405 /*
|
|
406 for ( j = 0; (int)j < (int)strlen(zy_shengmu_tbl[*sheng_raw]); j++)
|
|
407 pzytmp++;
|
|
408 */
|
|
409 pzytmp += strlen (zy_shengmu_tbl[*sheng_raw]);
|
|
410 if (strlen (pzytmp) == 0)
|
|
411 return (0);
|
|
412 if ((*yun_raw = zy_yunmu (pzytmp)) != -1)
|
|
413 {
|
|
414 pzytmp += strlen (zy_yunmu_tbl[*yun_raw]);
|
|
415 *ss = *yun_raw % 5;
|
|
416 *yun_raw = *yun_raw / 5;
|
|
417 return (pzytmp - yuyin);
|
|
418 }
|
|
419 else
|
|
420 {
|
|
421 pzytmp = yuyin;
|
|
422 if ((*yun_raw = zy_yunmu (pzytmp)) == -1)
|
|
423 return (0);
|
|
424 else
|
|
425 {
|
|
426 pzytmp += strlen (zy_yunmu_tbl[*yun_raw]);
|
|
427 *sheng_raw = 0;
|
|
428 *ss = *yun_raw % 5;
|
|
429 *yun_raw = *yun_raw / 5;
|
|
430 return (pzytmp - yuyin);
|
|
431 }
|
|
432 }
|
|
433 } /* has ShengMu when Zhuyin */
|
|
434 } /* which */
|
|
435 }
|
|
436
|
|
437 static w_char
|
|
438 pzy_yincod (one_yuyin, len)
|
|
439 register char *one_yuyin;
|
|
440 register int *len;
|
|
441 {
|
|
442 int ss[1];
|
|
443 int sheng_raw[1];
|
|
444 int yun_raw[1];
|
|
445 register int zytmp;
|
|
446 register int ret;
|
|
447
|
|
448 *len = 0;
|
|
449 /* for Pinyin */
|
|
450 if (ret = pzy_get_sheng_yun (one_yuyin, ss, sheng_raw, yun_raw, CWNN_PINYIN))
|
|
451 if (is_pinyin (sheng_raw[0], yun_raw[0]) != -1)
|
|
452 {
|
|
453 *len = ret;
|
|
454 return (create_yincod (sheng_raw[0], yun_raw[0], ss[0]));
|
|
455 }
|
|
456 /* for Zhuyin */
|
|
457 if (ret = pzy_get_sheng_yun (one_yuyin, ss, sheng_raw, yun_raw, CWNN_ZHUYIN))
|
|
458 {
|
|
459 zytmp = zhuyin_tbl[sheng_raw[0] * ZY_NUM_YUNMU + yun_raw[0]];
|
|
460 if (is_zhuyin (sheng_raw[0], yun_raw[0]) != -1)
|
|
461 {
|
|
462 if ((zytmp & 0x0080) == 0x0080)
|
|
463 {
|
|
464 sheng_raw[0] = (zytmp >> 8) & 0x7f;
|
|
465 yun_raw[0] = zytmp & 0x7f;
|
|
466 }
|
|
467 *len = ret;
|
|
468 return (create_yincod (sheng_raw[0], yun_raw[0], ss[0]));
|
|
469 }
|
|
470 }
|
|
471 return (0); /* Otherwise, Not a Pinyin nor Zhuyin */
|
|
472 }
|
|
473
|
|
474 /* ltoScpy(): copy strings from letter type to w_char type */
|
|
475
|
|
476 static int
|
|
477 ltoScpy (w, l)
|
|
478 register w_char *w;
|
|
479 register letter *l;
|
|
480 {
|
|
481 register w_char *w0 = w;
|
|
482
|
|
483 for (; *l; l++)
|
|
484 {
|
|
485 if ( /* ((*l & 0x0000ffff) == PY_EOF) || isZY_EOF(*l & 0x0000ffff)
|
|
486 || */ (*l == EOLTTR))
|
|
487 /* add by Kuwari */
|
|
488 break;
|
|
489 *w++ = (*l & 0x0000ffff);
|
|
490 }
|
|
491 *w = (w_char) 0;
|
|
492 return (w - w0);
|
|
493 }
|
|
494
|
|
495 /* find_pinyin(): find a YuYin in a string. if there is a YuYin.
|
|
496 it must be at the tail of string. return point of start YuYin
|
|
497 else return -1 eg. ;abcdHuang. 'Huang.' is a PinYin & return 5
|
|
498 012345
|
|
499 */
|
|
500 static int
|
|
501 find_pinyin (str)
|
|
502 register char *str;
|
|
503 {
|
|
504 register char *py_zy_tmp;
|
|
505 register int i;
|
|
506 register int pnt;
|
|
507 int len;
|
|
508 pnt = -1;
|
|
509 if ((((*(str + strlen (str) - 2) << 8) & 0xff00) | (*(str + strlen (str) - 1) & 0x00ff)) != PY_EOF)
|
|
510 return (-1);
|
|
511 for (i = strlen (str) - 1; i >= 0; i--)
|
|
512 {
|
|
513 if ((int) (strlen (str) - i) > PY_LEN)
|
|
514 return (pnt);
|
|
515 py_zy_tmp = str + i;
|
|
516 if (pzy_yincod (py_zy_tmp, &len) != 0)
|
|
517 pnt = i;
|
|
518 }
|
|
519 return (pnt);
|
|
520 }
|
|
521
|
|
522 static int
|
|
523 find_zhuyin (str)
|
|
524 register char *str;
|
|
525 {
|
|
526 register char *py_zy_tmp;
|
|
527 register int i;
|
|
528 register int pnt;
|
|
529 int len;
|
|
530 pnt = -1;
|
|
531 if (!isZY_EOF (((*(str + strlen (str) - 2) << 8) & 0xff00) | (*(str + strlen (str) - 1) & 0x00ff)))
|
|
532 return (-1);
|
|
533 for (i = strlen (str) - 1; i >= 0; i--)
|
|
534 {
|
|
535 if ((int) (strlen (str) - i) > PY_LEN)
|
|
536 return (pnt);
|
|
537 py_zy_tmp = str + i;
|
|
538 if (pzy_yincod (py_zy_tmp, &len) != 0)
|
|
539 pnt = i;
|
|
540 }
|
|
541 return (pnt);
|
|
542 }
|
|
543
|
|
544 /* get_one_zhuyin(): get one ZhuYin from ZhuYin strings */
|
|
545 /* get_one_pinyin(): get one PinYin from PinYin strings */
|
|
546 static int
|
|
547 get_one_pinyin (pinzhuyin_str, one_pinzhuyin)
|
|
548 register unsigned char *pinzhuyin_str;
|
|
549 register char *one_pinzhuyin;
|
|
550 {
|
|
551 register w_char chrtmp;
|
|
552 for (; (chrtmp = (((*pinzhuyin_str << 8) & 0xff00) | (*(pinzhuyin_str + 1) & 0x00ff))) != PY_EOF && *pinzhuyin_str != 0; pinzhuyin_str++)
|
|
553 *one_pinzhuyin++ = *pinzhuyin_str;
|
|
554 if (chrtmp == PY_EOF)
|
|
555 {
|
|
556 *one_pinzhuyin++ = *pinzhuyin_str;
|
|
557 pinzhuyin_str++;
|
|
558 *one_pinzhuyin++ = *pinzhuyin_str;
|
|
559 *one_pinzhuyin = 0;
|
|
560 return (1);
|
|
561 }
|
|
562 else
|
|
563 {
|
|
564 *one_pinzhuyin = 0;
|
|
565 return (0);
|
|
566 }
|
|
567 }
|
|
568
|
|
569 static int
|
|
570 get_one_zhuyin (pinzhuyin_str, one_pinzhuyin)
|
|
571 register unsigned char *pinzhuyin_str;
|
|
572 register char *one_pinzhuyin;
|
|
573 {
|
|
574 register w_char chrtmp;
|
|
575 for (; !isZY_EOF (chrtmp = (((*pinzhuyin_str << 8) & 0xff00) | (*(pinzhuyin_str + 1) & 0x00ff))) && *pinzhuyin_str != 0; pinzhuyin_str++)
|
|
576 *one_pinzhuyin++ = *pinzhuyin_str;
|
|
577 if (isZY_EOF (chrtmp))
|
|
578 {
|
|
579 *one_pinzhuyin++ = *pinzhuyin_str;
|
|
580 pinzhuyin_str++;
|
|
581 *one_pinzhuyin++ = *pinzhuyin_str;
|
|
582 *one_pinzhuyin = 0;
|
|
583 return (1);
|
|
584 }
|
|
585 else
|
|
586 {
|
|
587 *one_pinzhuyin = 0;
|
|
588 return (0);
|
|
589 }
|
|
590 }
|
|
591
|
|
592 /* cwnn_is_yincod(c) To check is "c"is a yincod.
|
|
593 if so, return(1) otherwise return 0*/
|
|
594 int
|
|
595 cwnn_is_yincod (c)
|
|
596 register w_char c;
|
|
597 {
|
|
598 register int sheng_raw;
|
|
599 register int yun_raw;
|
|
600
|
|
601 if (!_cwnn_isyincod_d (c))
|
|
602 return (0);
|
|
603
|
|
604 sheng_raw = Shengraw (c);
|
|
605 yun_raw = Yunraw (c);
|
|
606 if (is_pinyin (sheng_raw, yun_raw))
|
|
607 return (1);
|
|
608 if (sheng_raw == X_SHENG_RAW && is_pinyin (EMPTY_SHENG_RAW, yun_raw) == 0)
|
|
609 return (1);
|
|
610 else
|
|
611 return (0);
|
|
612 }
|
|
613
|
|
614 /* For a given 'yincod', creat the corresponding Pinyin or Zhuyin
|
|
615 to pzy_buf as a w_char string. If the given 'yincod' is not a yincod,
|
|
616 'yincod', followed by a NULL is created to pzy_fub.
|
|
617 Return: the lenth of pzy_buf is returned.
|
|
618 Lenth means the lenth in console but not num of character.
|
|
619 */
|
|
620 int
|
|
621 cwnn_yincod_pzy (pzy_buf, c, which)
|
|
622 register w_char *pzy_buf; /* out: a Pinyin or Zhuyin */
|
|
623 register w_char c; /* input: a yincod */
|
|
624 int which; /* option Pinyin or Zhuyin */
|
|
625 {
|
|
626 register int sheng_raw;
|
|
627 register int yun_raw;
|
|
628 register int ss; /* for Sisheng */
|
|
629 register int zytmp;
|
|
630
|
|
631 if (!cwnn_is_yincod (c))
|
|
632 {
|
|
633 *pzy_buf = c;
|
|
634 *(pzy_buf + 1) = 0;
|
|
635 return (1);
|
|
636
|
|
637 /* if ( ((c&0x00ff)>0xa0) && ((c&0x00ff)< 0xff) &&
|
|
638 ((c>>8) > 0xa0) && ((c>>8) < 0xff) )
|
|
639 return(2);
|
|
640 else return(1);
|
|
641 */
|
|
642 }
|
|
643
|
|
644 sheng_raw = Shengraw (c);
|
|
645 yun_raw = Yunraw (c);
|
|
646 ss = _cwnn_sisheng (c);
|
|
647
|
|
648 if (which == CWNN_PINYIN)
|
|
649 { /* For Pinyin case */
|
|
650 if (sheng_raw == X_SHENG_RAW && is_pinyin (sheng_raw, yun_raw) == -1)
|
|
651 if (is_pinyin (EMPTY_SHENG_RAW, yun_raw) == 0)
|
|
652 sheng_raw = EMPTY_SHENG_RAW;
|
|
653 cwnn_Sstrcpy (pzy_buf, py_shengmu_tbl[sheng_raw]);
|
|
654 if (_cwnn_has_sisheng (c))
|
|
655 cwnn_Sstrcat (pzy_buf, py_yunmu_tbl[yun_raw * 5 + ss]);
|
|
656 else
|
|
657 cwnn_Sstrcat (pzy_buf, py_yunmu_tbl[yun_raw * 5]);
|
|
658 }
|
|
659 else
|
|
660 { /* For Zhuyin case */
|
|
661
|
|
662 zytmp = zhuyin_tbl[sheng_raw * ZY_NUM_YUNMU + yun_raw];
|
|
663 if (is_zhuyin (sheng_raw, yun_raw) == -1)
|
|
664 {
|
|
665 if ((zytmp & 0x0080) == 0x0080)
|
|
666 {
|
|
667 sheng_raw = (zytmp >> 8) & 0x7f;
|
|
668 yun_raw = zytmp & 0x7f;
|
|
669 }
|
|
670 else
|
|
671 {
|
|
672 if ((sheng_raw == X_SHENG_RAW) && (is_zhuyin (EMPTY_SHENG_RAW, yun_raw) == 0))
|
|
673 sheng_raw = EMPTY_SHENG_RAW;
|
|
674 }
|
|
675 }
|
|
676 cwnn_Sstrcpy (pzy_buf, zy_shengmu_tbl[sheng_raw]);
|
|
677 if (yun_raw == EMPTY_YUN_RAW)
|
|
678 {
|
|
679 w_char tmp_w;
|
|
680 if (_cwnn_has_sisheng (c))
|
|
681 {
|
|
682 switch (ss)
|
|
683 {
|
|
684 case 1:
|
|
685 tmp_w = ZY_EOF_1;
|
|
686 break;
|
|
687 case 2:
|
|
688 tmp_w = ZY_EOF_2;
|
|
689 break;
|
|
690 case 3:
|
|
691 tmp_w = ZY_EOF_3;
|
|
692 break;
|
|
693 case 4:
|
|
694 tmp_w = ZY_EOF_4;
|
|
695 break;
|
|
696 }
|
|
697 }
|
|
698 else
|
|
699 {
|
|
700 tmp_w = ZY_EOF_0;
|
|
701 }
|
|
702 wnn_Strncat (pzy_buf, &tmp_w, 1);
|
|
703 }
|
|
704 else
|
|
705 {
|
|
706 if (_cwnn_has_sisheng (c))
|
|
707 cwnn_Sstrcat (pzy_buf, zy_yunmu_tbl[yun_raw * 5 + ss]);
|
|
708 else
|
|
709 cwnn_Sstrcat (pzy_buf, zy_yunmu_tbl[yun_raw * 5]);
|
|
710 }
|
|
711 }
|
|
712 return (wnn_Strlen (pzy_buf));
|
|
713 }
|
|
714
|
|
715 /* Copy s2 which having yincod to s1 in which yincod are replaced by
|
|
716 the corresponding Pinyin or Zhuyin. Lenth of s2 is returned
|
|
717 */
|
|
718 int
|
|
719 cwnn_yincod_pzy_str (s1, s2, n, which)
|
|
720 register w_char *s1; /* result string having Pinyin or Zhuyin */
|
|
721 register w_char *s2; /* input string having Yincod */
|
|
722 int n;
|
|
723 int which;
|
|
724 {
|
|
725 w_char s2tmp[LINE_SIZE];
|
|
726 register int i, j;
|
|
727 w_char pzy_buf[10];
|
|
728 int len, sum_len;
|
|
729
|
|
730 len = 0;
|
|
731 sum_len = 0;
|
|
732 for (i = 0; i < n; i++)
|
|
733 s2tmp[i] = s2[i];
|
|
734 for (i = 0; i < n; i++)
|
|
735 {
|
|
736
|
|
737 /* len = cwnn_yincod_pzy(pzy_buf, s2tmp[i], which);
|
|
738 for (j = 0; j < len; j++)
|
|
739 *s1++ = pzy_buf[j];
|
|
740 sum_len += len;
|
|
741 */
|
|
742 /* Strlen(pzy_buf) is the num of w_char , but "len" means the width */
|
|
743
|
|
744 len = cwnn_yincod_pzy (pzy_buf, s2tmp[i], which);
|
|
745 for (j = 0; j < wnn_Strlen (pzy_buf); j++)
|
|
746 *s1++ = pzy_buf[j];
|
|
747 sum_len += wnn_Strlen (pzy_buf);
|
|
748 }
|
|
749 *s1 = 0;
|
|
750 return (sum_len);
|
|
751 }
|
|
752
|
|
753 /* cwnn_pzy_yincod(s1, s2, which):
|
|
754 After the matching in automaton, the string may be a Pinyin or a Zhuyin
|
|
755 If so, it will be replace by the coreesponding Yincod */
|
|
756
|
|
757 int
|
|
758 cwnn_pzy_yincod (s1, s2, which)
|
|
759 letter *s1, *s2;
|
|
760 int which;
|
|
761 {
|
|
762 /*
|
|
763 register w_char codetmp2[PY_LEN];
|
|
764 register char *codetmp1 = {" "};
|
|
765 */
|
|
766 w_char codetmp2_buf[PY_LEN * 10 + 1];
|
|
767 char codetmp1_buf[PY_LEN * 20 + 2];
|
|
768 register w_char *codetmp2 = codetmp2_buf;
|
|
769 register char *codetmp1 = codetmp1_buf;
|
|
770 register letter *lettertmp = s2, *s1tmp = s1;
|
|
771 register w_char yincod;
|
|
772 int len;
|
|
773 int conv = 0;
|
|
774 w_char save_w[2];
|
|
775 char save, tmp[6];
|
|
776
|
|
777 save_w[0] = save_w[1] = 0;
|
|
778 ltoScpy (codetmp2, lettertmp);
|
|
779 if (cwnn_sStrcpy (codetmp1, codetmp2) <= 0)
|
|
780 return (0);
|
|
781
|
|
782 /* if ((yincod = pzy_yincod(codetmp1)) != 0)
|
|
783 getstr_pzy(s1, yincod, which);
|
|
784
|
|
785 Jun 13 Zhong */
|
|
786 for (; *lettertmp && *lettertmp != EOLTTR;)
|
|
787 {
|
|
788 if ((yincod = pzy_yincod (codetmp1, &len)) != 0)
|
|
789 {
|
|
790 conv++;
|
|
791 *s1tmp++ = (letter) yincod;
|
|
792 save = codetmp1[len];
|
|
793 codetmp1[len] = '\0';
|
|
794 lettertmp += cwnn_Sstrcpy (codetmp2, codetmp1);
|
|
795 codetmp1[len] = save;
|
|
796 codetmp1 += len;
|
|
797 }
|
|
798 else
|
|
799 {
|
|
800 save_w[0] = (w_char) (*lettertmp & 0xffff);
|
|
801 *s1tmp++ = *lettertmp++;
|
|
802 codetmp1 += cwnn_sStrcpy (tmp, save_w);
|
|
803 }
|
|
804 }
|
|
805 if (*lettertmp == EOLTTR)
|
|
806 *s1tmp++ = *lettertmp++;
|
|
807 if (conv)
|
|
808 {
|
|
809 return (s1tmp - s1);
|
|
810 }
|
|
811 else
|
|
812 {
|
|
813 return (0);
|
|
814 }
|
|
815 }
|
|
816
|
|
817 /* cwnn_py_yincod_str(), cwnn_zy_yincod_str():HUANG: for atod
|
|
818 we get yomi as PinYin or ZhuYin strings from ascii-dictionary and
|
|
819 translate it to YINcode
|
|
820 */
|
|
821 void
|
|
822 cwnn_py_yincod_str (yuyin_str, css, un_sisheng_yincod_str, yincod_str)
|
|
823 register char *yuyin_str; /* yomi: PinYin or ZhuYin strings */
|
|
824 register char *css; /* get sisheng strings from PinYin strings */
|
|
825 register w_char *un_sisheng_yincod_str; /* no-sisheng Yincod strings */
|
|
826 register w_char *yincod_str; /* Yincod strings with sisheng */
|
|
827 {
|
|
828 /*
|
|
829 register char one_yuyin[LINE_SIZE];
|
|
830 register w_char not_yuyin[LINE_SIZE];
|
|
831 */
|
|
832 char one_yuyin_buf[LINE_SIZE];
|
|
833 w_char not_yuyin_buf[LINE_SIZE];
|
|
834 register char *one_yuyin = one_yuyin_buf;
|
|
835 register w_char *not_yuyin = not_yuyin_buf;
|
|
836 register int yin_eof;
|
|
837 register w_char yincod;
|
|
838 register int i, pst;
|
|
839 int len;
|
|
840
|
|
841 for (; *yuyin_str;)
|
|
842 {
|
|
843 yin_eof = get_one_pinyin (yuyin_str, one_yuyin);
|
|
844 yuyin_str += strlen (one_yuyin);
|
|
845 cwnn_Sstrcpy (not_yuyin, one_yuyin);
|
|
846 pst = find_pinyin (one_yuyin);
|
|
847 if (yin_eof == 1 && pst != -1)
|
|
848 {
|
|
849 for (i = 0; i < pst; i++)
|
|
850 {
|
|
851 *yincod_str++ = not_yuyin[i];
|
|
852 *un_sisheng_yincod_str++ = not_yuyin[i];
|
|
853 *css++ = '5';
|
|
854 }
|
|
855 yincod = pzy_yincod (one_yuyin, &len);
|
|
856 *yincod_str++ = yincod;
|
|
857 *un_sisheng_yincod_str++ = _cwnn_yincod_0 (yincod);
|
|
858 *css++ = (char) (_cwnn_sisheng (yincod) + 0x30);
|
|
859 }
|
|
860 else
|
|
861 {
|
|
862 for (i = 0; not_yuyin[i]; i++)
|
|
863 {
|
|
864 *yincod_str++ = not_yuyin[i];
|
|
865 *un_sisheng_yincod_str++ = not_yuyin[i];
|
|
866 *css++ = '5';
|
|
867 }
|
|
868 }
|
|
869 }
|
|
870 *yincod_str = 0;
|
|
871 *un_sisheng_yincod_str = 0;
|
|
872 *css = 0;
|
|
873 }
|
|
874
|
|
875 void
|
|
876 cwnn_zy_yincod_str (yuyin_str, css, un_sisheng_yincod_str, yincod_str)
|
|
877 register char *yuyin_str; /* yomi: PinYin or ZhuYin strings */
|
|
878 register char *css; /* get sisheng strings from PinYin strings */
|
|
879 register w_char *un_sisheng_yincod_str; /* no-sisheng Yincod strings */
|
|
880 register w_char *yincod_str; /* Yincod strings with sisheng */
|
|
881 {
|
|
882 /*
|
|
883 register char one_yuyin[LINE_SIZE];
|
|
884 register w_char not_yuyin[LINE_SIZE];
|
|
885 */
|
|
886 char one_yuyin_buf[LINE_SIZE];
|
|
887 w_char not_yuyin_buf[LINE_SIZE];
|
|
888 register char *one_yuyin = one_yuyin_buf;
|
|
889 register w_char *not_yuyin = not_yuyin_buf;
|
|
890 register int yin_eof;
|
|
891 register w_char yincod;
|
|
892 register int i, pst;
|
|
893 int len;
|
|
894
|
|
895 for (; *yuyin_str;)
|
|
896 {
|
|
897 yin_eof = get_one_zhuyin (yuyin_str, one_yuyin);
|
|
898 yuyin_str += strlen (one_yuyin);
|
|
899 cwnn_Sstrcpy (not_yuyin, one_yuyin);
|
|
900 pst = find_zhuyin (one_yuyin);
|
|
901 if (yin_eof == 1 && pst != -1)
|
|
902 {
|
|
903 for (i = 0; i < pst; i++)
|
|
904 {
|
|
905 *yincod_str++ = not_yuyin[i];
|
|
906 *un_sisheng_yincod_str++ = not_yuyin[i];
|
|
907 *css++ = '5';
|
|
908 }
|
|
909 yincod = pzy_yincod (one_yuyin, &len);
|
|
910 *yincod_str++ = yincod;
|
|
911 *un_sisheng_yincod_str++ = _cwnn_yincod_0 (yincod);
|
|
912 *css++ = (char) (_cwnn_sisheng (yincod) + 0x30);
|
|
913 }
|
|
914 else
|
|
915 {
|
|
916 for (i = 0; not_yuyin[i]; i++)
|
|
917 {
|
|
918 *yincod_str++ = not_yuyin[i];
|
|
919 *un_sisheng_yincod_str++ = not_yuyin[i];
|
|
920 *css++ = '5';
|
|
921 }
|
|
922 }
|
|
923 }
|
|
924 *yincod_str = 0;
|
|
925 *un_sisheng_yincod_str = 0;
|
|
926 *css = 0;
|
|
927 }
|
|
928
|
|
929 /* cwnn_py_str_analysis(), cp_zy_str_analysis(): HUANG: for atod
|
|
930 we get yomi as PinYin or ZhuYin strings from ascii-dictionary
|
|
931 and translate it to YINcode
|
|
932 */
|
|
933 void
|
|
934 cwnn_py_str_analysis (yuyin_str, css, un_sisheng_yincod_str, yincod_str)
|
|
935 register char *yuyin_str; /* yomi: PinYin or ZhuYin strings */
|
|
936 register char *css; /* get sisheng strings from PinYin strings */
|
|
937 register w_char *un_sisheng_yincod_str; /* no-sisheng Yincod strings */
|
|
938 register w_char *yincod_str; /* Yincod strings with sisheng */
|
|
939 {
|
|
940 /*
|
|
941 register char one_yuyin[LINE_SIZE];
|
|
942 register w_char not_yuyin[LINE_SIZE];
|
|
943 */
|
|
944 char one_yuyin_buf[LINE_SIZE];
|
|
945 w_char not_yuyin_buf[LINE_SIZE];
|
|
946 register char *one_yuyin = one_yuyin_buf;
|
|
947 register w_char *not_yuyin = not_yuyin_buf;
|
|
948 register int yin_eof;
|
|
949 register w_char yincod;
|
|
950 register int i, pst;
|
|
951 int len;
|
|
952 for (; *yuyin_str;)
|
|
953 {
|
|
954 yin_eof = get_one_pinyin (yuyin_str, one_yuyin);
|
|
955 yuyin_str += strlen (one_yuyin);
|
|
956 cwnn_Sstrcpy (not_yuyin, one_yuyin);
|
|
957 pst = find_pinyin (one_yuyin);
|
|
958 if (yin_eof == 1 && pst != -1)
|
|
959 {
|
|
960 for (i = 0; i < pst; i++)
|
|
961 {
|
|
962 *yincod_str++ = not_yuyin[i];
|
|
963 *un_sisheng_yincod_str++ = not_yuyin[i];
|
|
964 *css++ = '5';
|
|
965 }
|
|
966 yincod = pzy_yincod (one_yuyin, &len);
|
|
967 *yincod_str++ = yincod;
|
|
968 *un_sisheng_yincod_str++ = _cwnn_yincod_0 (yincod);
|
|
969 *css++ = (char) (_cwnn_sisheng (yincod) + 0x30);
|
|
970 }
|
|
971 else
|
|
972 {
|
|
973 for (i = 0; not_yuyin[i]; i++)
|
|
974 {
|
|
975 *yincod_str++ = not_yuyin[i];
|
|
976 *un_sisheng_yincod_str++ = not_yuyin[i];
|
|
977 *css++ = '5';
|
|
978 }
|
|
979 }
|
|
980 }
|
|
981 *yincod_str = 0;
|
|
982 *un_sisheng_yincod_str = 0;
|
|
983 *css = 0;
|
|
984 }
|
|
985
|
|
986 void
|
|
987 cwnn_zy_str_analysis (yuyin_str, css, un_sisheng_yincod_str, yincod_str)
|
|
988 register char *yuyin_str; /* yomi: PinYin or ZhuYin strings */
|
|
989 register char *css; /* get sisheng strings from PinYin strings */
|
|
990 register w_char *un_sisheng_yincod_str; /* no-sisheng Yincod strings */
|
|
991 register w_char *yincod_str; /* Yincod strings with sisheng */
|
|
992 {
|
|
993 /*
|
|
994 register char one_yuyin[LINE_SIZE];
|
|
995 register w_char not_yuyin[LINE_SIZE];
|
|
996 */
|
|
997 char one_yuyin_buf[LINE_SIZE];
|
|
998 w_char not_yuyin_buf[LINE_SIZE];
|
|
999 register char *one_yuyin = one_yuyin_buf;
|
|
1000 register w_char *not_yuyin = not_yuyin_buf;
|
|
1001 register int yin_eof;
|
|
1002 register w_char yincod;
|
|
1003 register int i, pst;
|
|
1004 int len;
|
|
1005 for (; *yuyin_str;)
|
|
1006 {
|
|
1007 yin_eof = get_one_zhuyin (yuyin_str, one_yuyin);
|
|
1008 yuyin_str += strlen (one_yuyin);
|
|
1009 cwnn_Sstrcpy (not_yuyin, one_yuyin);
|
|
1010 pst = find_zhuyin (one_yuyin);
|
|
1011 if (yin_eof == 1 && pst != -1)
|
|
1012 {
|
|
1013 for (i = 0; i < pst; i++)
|
|
1014 {
|
|
1015 *yincod_str++ = not_yuyin[i];
|
|
1016 *un_sisheng_yincod_str++ = not_yuyin[i];
|
|
1017 *css++ = '5';
|
|
1018 }
|
|
1019 yincod = pzy_yincod (one_yuyin, &len);
|
|
1020 *yincod_str++ = yincod;
|
|
1021 *un_sisheng_yincod_str++ = _cwnn_yincod_0 (yincod);
|
|
1022 *css++ = (char) (_cwnn_sisheng (yincod) + 0x30);
|
|
1023 }
|
|
1024 else
|
|
1025 {
|
|
1026 for (i = 0; not_yuyin[i]; i++)
|
|
1027 {
|
|
1028 *yincod_str++ = not_yuyin[i];
|
|
1029 *un_sisheng_yincod_str++ = not_yuyin[i];
|
|
1030 *css++ = '5';
|
|
1031 }
|
|
1032 }
|
|
1033 }
|
|
1034 *yincod_str = 0;
|
|
1035 *un_sisheng_yincod_str = 0;
|
|
1036 *css = 0;
|
|
1037 }
|
|
1038 #endif /* CHINESE */
|