Mercurial > freewnn
comparison Wnn/jutil/ujisf.c @ 0:bbc77ca4def5
initial import
author | Yoshiki Yazawa <yaz@cc.rim.or.jp> |
---|---|
date | Thu, 13 Dec 2007 04:30:14 +0900 |
parents | |
children | 6ab41ec6f895 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:bbc77ca4def5 |
---|---|
1 /* | |
2 * $Id: ujisf.c,v 1.7 2002/07/14 04:26:57 hiroo Exp $ | |
3 */ | |
4 | |
5 /* | |
6 * FreeWnn is a network-extensible Kana-to-Kanji conversion system. | |
7 * This file is part of FreeWnn. | |
8 * | |
9 * Copyright Kyoto University Research Institute for Mathematical Sciences | |
10 * 1987, 1988, 1989, 1990, 1991, 1992 | |
11 * Copyright OMRON Corporation. 1987, 1988, 1989, 1990, 1991, 1992, 1999 | |
12 * Copyright ASTEC, Inc. 1987, 1988, 1989, 1990, 1991, 1992 | |
13 * Copyright FreeWnn Project 1999, 2000, 2002 | |
14 * | |
15 * Maintainer: FreeWnn Project <freewnn@tomo.gr.jp> | |
16 * | |
17 * This program is free software; you can redistribute it and/or modify | |
18 * it under the terms of the GNU General Public License as published by | |
19 * the Free Software Foundation; either version 2 of the License, or | |
20 * (at your option) any later version. | |
21 * | |
22 * This program is distributed in the hope that it will be useful, | |
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
25 * GNU General Public License for more details. | |
26 * | |
27 * You should have received a copy of the GNU General Public License | |
28 * along with this program; if not, write to the Free Software | |
29 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
30 */ | |
31 | |
32 /* | |
33 * Ujis format <--> internal data. | |
34 */ | |
35 | |
36 #ifdef HAVE_CONFIG_H | |
37 # include <config.h> | |
38 #endif | |
39 | |
40 #include <stdio.h> | |
41 #if STDC_HEADERS | |
42 # include <stdlib.h> | |
43 # include <string.h> | |
44 #else | |
45 # if HAVE_MALLOC_H | |
46 # include <malloc.h> | |
47 # endif | |
48 # if HAVE_STRINGS_H | |
49 # include <strings.h> | |
50 # endif | |
51 #endif /* STDC_HEADERS */ | |
52 | |
53 #include "commonhd.h" | |
54 #include "jslib.h" | |
55 #include "jh.h" | |
56 #include "jdata.h" | |
57 #include "wnn_os.h" | |
58 #include "wnn_string.h" | |
59 | |
60 #ifdef CHINESE | |
61 #include "cplib.h" | |
62 int pzy_flag = CWNN_PINYIN; /* Pinyin or Zhuyin */ | |
63 static void sisheng_num (), read_kanji_str_w (); | |
64 #endif | |
65 | |
66 #ifndef min | |
67 #define min(a, b) ((a > b)? b:a) | |
68 #define max(a, b) ((a < b)? b:a) | |
69 #endif | |
70 | |
71 extern unsigned char kanjiaddr (); | |
72 extern void Print_entry (); | |
73 extern int wnn_find_hinsi_by_name (); | |
74 #ifdef CHINESE | |
75 extern void cwnn_zy_str_analysis (), cwnn_py_str_analysis (); | |
76 #endif | |
77 int sort_func (), Sorted (), w_stradd (); | |
78 static void Kanjistradd (), bunpou_num (), read_kanji_str (), toesc (); | |
79 void exit1 (); | |
80 | |
81 extern struct JT jt; | |
82 | |
83 /* extern variables */ | |
84 | |
85 struct je **jeary; | |
86 int wnnerror; | |
87 | |
88 #define WNN_HINSI_LEN 4096 | |
89 | |
90 w_char file_comment[WNN_COMMENT_LEN]; | |
91 w_char hinsi_list[WNN_HINSI_LEN]; | |
92 | |
93 | |
94 | |
95 /* static variables */ | |
96 static UCHAR *heap, *hp, *heapend; | |
97 static w_char *yomi_heap, *yhp, *yheapend; | |
98 static struct je *je_heap, *jehp, *jeheapend; | |
99 static FILE *ifpter; | |
100 static int maxline; | |
101 | |
102 /* extern functions of this file are | |
103 ujis_header(); | |
104 read_ujis(reversep, to_esc, which_dict); | |
105 reverse_yomi(); | |
106 sort(); | |
107 uniq_je(func); | |
108 output_ujis(opter, serial_out, esc_exp); | |
109 */ | |
110 | |
111 int lc; | |
112 | |
113 static char stack[LINE_SIZE] = { 0 }; | |
114 | |
115 int | |
116 get_line (c) | |
117 register char *c; | |
118 { | |
119 if (stack[0]) | |
120 { | |
121 strcpy (c, stack); | |
122 stack[0] = 0; | |
123 } | |
124 else | |
125 { | |
126 if (fgets (c, LINE_SIZE, ifpter) == NULL) | |
127 { | |
128 return (EOF); | |
129 } | |
130 } | |
131 return (0); | |
132 } | |
133 | |
134 void | |
135 unget_line (c) | |
136 char *c; | |
137 { | |
138 strcpy (stack, c); | |
139 } | |
140 | |
141 | |
142 char * | |
143 get_string (str, buf) | |
144 register char *str; | |
145 char *buf; | |
146 { | |
147 register char *c = buf; | |
148 for (; *c == '\t' || *c == ' '; c++); | |
149 if (*c == '\0' || *c == '\n') | |
150 { | |
151 *str = 0; | |
152 return (NULL); | |
153 } | |
154 for (; *c != '\t' && *c != ' ' && *c != '\n' && *c != '\0'; c++) | |
155 { | |
156 *str++ = *c; | |
157 } | |
158 *str = 0; | |
159 return (c); | |
160 } | |
161 | |
162 void | |
163 bad_line (bf) | |
164 char *bf; | |
165 { | |
166 static int badl = 0; | |
167 | |
168 fprintf (stderr, "Bad line \"%s\"\n", bf); | |
169 fprintf (stderr, "Bad line omitted\n"); | |
170 if (++badl > BADLMAX) | |
171 { | |
172 fprintf (stderr, "Too many bad lines.\n"); | |
173 exit1 (); | |
174 } | |
175 } | |
176 | |
177 void | |
178 error_no_heap () | |
179 { | |
180 fprintf (stderr, "Heap area is exhausted.\n"); | |
181 exit1 (); | |
182 } | |
183 | |
184 static int | |
185 get_one_line (buffer, jep, rev, to_esc, which_dict) | |
186 char *buffer; | |
187 register struct je **jep; | |
188 int rev; | |
189 int to_esc; | |
190 int which_dict; | |
191 { | |
192 register char *c = buffer; | |
193 static char tmp[LINE_SIZE]; | |
194 static char ckanji[LINE_SIZE]; | |
195 static char cyomi[LINE_SIZE]; | |
196 static w_char yomi[LINE_SIZE]; | |
197 static w_char kanji[LINE_SIZE]; | |
198 static w_char comm[LINE_SIZE]; | |
199 #ifdef CHINESE | |
200 static w_char un_sisheng_yincod_str[LINE_SIZE]; | |
201 static w_char yincod_str[LINE_SIZE]; | |
202 static char csisheng[LINE_SIZE]; | |
203 static w_char wtmp[LINE_SIZE]; | |
204 #endif | |
205 char *c1; | |
206 | |
207 if (jehp == jeheapend) | |
208 { | |
209 if ((jehp = je_heap = (struct je *) malloc ((HEAPINC * sizeof (struct je)))) == NULL) | |
210 { | |
211 fprintf (stderr, "Malloc Failed\n"); | |
212 return (-1); | |
213 } | |
214 jeheapend = je_heap + HEAPINC; | |
215 } | |
216 *jep = jehp; | |
217 jehp++; | |
218 | |
219 if (rev == REVERSE) | |
220 { | |
221 if ((c = get_string (ckanji, c)) == NULL) | |
222 return (1); | |
223 } | |
224 else | |
225 { | |
226 if ((c = get_string (cyomi, c)) == NULL) | |
227 return (1); | |
228 } | |
229 if (rev == REVERSE) | |
230 { | |
231 if ((c = get_string (cyomi, c)) == NULL) | |
232 return (-1); | |
233 } | |
234 else | |
235 { | |
236 if ((c = get_string (ckanji, c)) == NULL) | |
237 return (-1); | |
238 } | |
239 #ifdef CHINESE | |
240 /* here ,should seperate pinyin to two part */ | |
241 /* one is usually pinyin string like Zhong.Guo. */ | |
242 /* the is sisheng string like 23 */ | |
243 | |
244 if (which_dict == CWNN_REV_DICT || which_dict == BWNN_REV_DICT) | |
245 { | |
246 if (pzy_flag == CWNN_ZHUYIN) | |
247 cwnn_zy_str_analysis (cyomi, csisheng, un_sisheng_yincod_str, yincod_str); | |
248 else | |
249 cwnn_py_str_analysis (cyomi, csisheng, un_sisheng_yincod_str, yincod_str); | |
250 | |
251 sisheng_num (csisheng, &((*jep)->ss)); | |
252 read_kanji_str_w (wtmp, un_sisheng_yincod_str); | |
253 wnn_Strcpy (yomi, wtmp); | |
254 } | |
255 else | |
256 { | |
257 read_kanji_str (tmp, cyomi); | |
258 wnn_Sstrcpy (yomi, tmp); | |
259 } | |
260 #else | |
261 read_kanji_str (tmp, cyomi); | |
262 wnn_Sstrcpy (yomi, tmp); | |
263 #endif | |
264 if (wnn_Strlen (yomi) >= LENGTHYOMI) | |
265 { | |
266 fprintf (stderr, "YOMI is longer in line %d.\n", lc); | |
267 return (-1); | |
268 } | |
269 w_stradd (yomi, &((*jep)->yomi)); | |
270 | |
271 read_kanji_str (tmp, ckanji); | |
272 wnn_Sstrcpy (kanji, tmp); | |
273 if (wnn_Strlen (kanji) >= LENGTHYOMI) | |
274 { | |
275 fprintf (stderr, "KANJI is longer in line %d.\n", lc); | |
276 return (-1); | |
277 } | |
278 w_stradd (kanji, &((*jep)->kan)); | |
279 | |
280 if ((c = get_string (tmp, c)) == NULL) | |
281 return (-1); | |
282 bunpou_num (tmp, &((*jep)->hinsi)); | |
283 | |
284 if ((c = get_string (tmp, c)) == NULL) | |
285 return (-1); | |
286 if (tmp[0] == '-') | |
287 { | |
288 (*jep)->hindo = -1; /* Real hindo == -1 means Not to use it */ | |
289 } | |
290 else | |
291 { | |
292 sscanf (tmp, "%d", &((*jep)->hindo)); | |
293 } | |
294 | |
295 if ((get_string (tmp, c)) == NULL) | |
296 { | |
297 c1 = NULL; | |
298 (*jep)->comm = NULL; | |
299 comm[0] = 0; | |
300 } | |
301 else | |
302 { | |
303 /* left entries are all considered as comment */ | |
304 for (; *c == '\t' || *c == ' '; c++); | |
305 if (c[strlen (c) - 1] == '\n') | |
306 c[strlen (c) - 1] = '\0'; | |
307 c1 = c; | |
308 wnn_Sstrcpy (comm, c1); | |
309 if (wnn_Strlen (comm) >= LENGTHYOMI) | |
310 { | |
311 fprintf (stderr, "COMMENT is longer in line %d.\n", lc); | |
312 return (-1); | |
313 } | |
314 w_stradd (comm, &((*jep)->comm)); | |
315 } | |
316 | |
317 if (to_esc) | |
318 { | |
319 toesc (ckanji, cyomi); | |
320 } | |
321 /* | |
322 if(strchr(ckanji, DIC_COMMENT_CHAR) || | |
323 strchr(ckanji, DIC_YOMI_CHAR)){ | |
324 fprintf(stderr, "Bad character in kanji\n"); | |
325 return(-1); | |
326 } | |
327 if(which_dict){ | |
328 if(strchr(cyomi, DIC_COMMENT_CHAR) || | |
329 strchr(cyomi, DIC_YOMI_CHAR)){ | |
330 fprintf(stderr, "Bad character in yomi\n"); | |
331 return(-1); | |
332 } | |
333 } | |
334 */ | |
335 Kanjistradd (kanji, | |
336 #ifdef CHINESE | |
337 ((which_dict == CWNN_REV_DICT || which_dict == BWNN_REV_DICT) ? yincod_str : ((which_dict == WNN_REV_DICT) ? yomi : NULL)), | |
338 #else | |
339 (which_dict) ? yomi : NULL, | |
340 #endif | |
341 comm, &(*jep)->kanji); | |
342 return (0); | |
343 } | |
344 | |
345 static void | |
346 Kanjistradd (k, y, c, cp) | |
347 register UCHAR **cp; | |
348 w_char *k, *y, *c; | |
349 { | |
350 int len; | |
351 if (hp + LENGTHKANJI >= heapend) | |
352 { | |
353 if ((hp = heap = (UCHAR *) malloc ((HEAPINC * HEAP_PER_LINE))) == NULL) | |
354 { | |
355 fprintf (stderr, "Malloc Failed\n"); | |
356 exit (1); | |
357 } | |
358 heapend = heap + (HEAPINC * HEAP_PER_LINE); | |
359 } | |
360 *cp = hp; | |
361 if ((len = kanjiaddr (hp, k, y, c)) >= LENGTHKANJI) | |
362 { | |
363 fprintf (stderr, "KANJI is longer in line %d.\n", lc); | |
364 exit (1); | |
365 } | |
366 hp += len; | |
367 } | |
368 | |
369 int | |
370 w_stradd (str, cp) | |
371 register w_char **cp; | |
372 register w_char *str; | |
373 { | |
374 register int len = wnn_Strlen (str); | |
375 | |
376 if (yhp + len + 1 >= yheapend) | |
377 { | |
378 if ((yhp = yomi_heap = (w_char *) malloc ((HEAPINC * sizeof (w_char)))) == NULL) | |
379 { | |
380 fprintf (stderr, "Malloc Failed\n"); | |
381 return (-1); | |
382 } | |
383 yheapend = yomi_heap + HEAPINC; | |
384 } | |
385 *cp = yhp; | |
386 wnn_Strcpy (yhp, str); | |
387 yhp += len + 1; | |
388 return (0); | |
389 } | |
390 | |
391 void | |
392 #ifdef CHINESE | |
393 ujis_header (which_dict) | |
394 int *which_dict; | |
395 #else | |
396 ujis_header () | |
397 #endif | |
398 { | |
399 char buffer[LINE_SIZE]; | |
400 char *c = buffer; | |
401 char str[LINE_SIZE]; | |
402 | |
403 jt.total = 0; | |
404 file_comment[0] = 0; | |
405 hinsi_list[0] = 0; | |
406 | |
407 for (;;) | |
408 { | |
409 if (get_line (buffer) == EOF) | |
410 { | |
411 goto EOF_HEAD; | |
412 } | |
413 c = buffer; | |
414 if ((c = get_string (str, c)) == NULL) | |
415 continue; | |
416 if (strcmp (str, COMMENT) == 0) | |
417 { | |
418 /* for(;;){ | |
419 if(get_line(buffer) == EOF){ | |
420 goto EOF_EHAD; | |
421 } | |
422 if(buffer[0] == '\\'){ | |
423 unget_line(buffer); | |
424 break; | |
425 } | |
426 if(wnn_Strlen(file_comment) + strlen(buffer) | |
427 >= WNN_COMMENT_LEN){ | |
428 fprintf(stderr, "Too Long Comment.\n"); | |
429 exit1(); | |
430 } | |
431 wnn_Sstrcpy(file_comment + wnn_Strlen(file_comment), buffer); | |
432 } | |
433 */ | |
434 get_string (str, c); | |
435 /* | |
436 if(str[strlen(str) - 1] == '\n'){ | |
437 c[strlen(str) - 1] = '\0'; | |
438 } | |
439 */ | |
440 wnn_Sstrcpy (file_comment, str); | |
441 #ifdef CHINESE | |
442 } | |
443 else if (strcmp (str, PINYIN) == 0) | |
444 { | |
445 *which_dict = CWNN_REV_DICT; | |
446 pzy_flag = CWNN_PINYIN; | |
447 } | |
448 else if (strcmp (str, ZHUYIN) == 0) | |
449 { | |
450 *which_dict = CWNN_REV_DICT; | |
451 pzy_flag = CWNN_ZHUYIN; | |
452 } | |
453 else if (strcmp (str, BIXING) == 0) | |
454 { | |
455 *which_dict = BWNN_REV_DICT; | |
456 #endif | |
457 } | |
458 else if (strcmp (str, HINSI) == 0 | |
459 #ifdef CHINESE | |
460 || strcmp (str, CHINSI) == 0 | |
461 #endif | |
462 ) | |
463 { | |
464 for (;;) | |
465 { | |
466 if (get_line (buffer) == EOF) | |
467 { | |
468 goto EOF_HEAD; | |
469 } | |
470 if (buffer[0] == '\\' || buffer[0] == '\n') | |
471 { | |
472 unget_line (buffer); | |
473 break; | |
474 } | |
475 wnn_Sstrcpy (hinsi_list + wnn_Strlen (hinsi_list), buffer); | |
476 } | |
477 } | |
478 else if (strcmp (str, TOTAL) == 0) | |
479 { | |
480 get_string (str, c); | |
481 jt.total = atoi (str); | |
482 } | |
483 else if (strcmp (str, DIC_NO) == 0) | |
484 { /* for word_reg.c */ | |
485 get_string (str, c); | |
486 jt.total = atoi (str); | |
487 } | |
488 else | |
489 { | |
490 unget_line (buffer); | |
491 break; | |
492 } | |
493 } | |
494 EOF_HEAD: | |
495 jt.maxcomment = wnn_Strlen (file_comment); | |
496 jt.maxhinsi_list = wnn_Strlen (hinsi_list) + 1; | |
497 } | |
498 | |
499 void | |
500 read_ujis (rev, to_esc, which_dict) | |
501 int rev; | |
502 int to_esc; | |
503 int which_dict; | |
504 { | |
505 char buffer[LINE_SIZE]; | |
506 register int tmp; | |
507 | |
508 for (lc = 0; get_line (buffer) != EOF;) | |
509 { | |
510 if ((tmp = get_one_line (buffer, jeary + lc, rev, to_esc, which_dict)) == -1) | |
511 { | |
512 bad_line (buffer); | |
513 } | |
514 else if (tmp == 0) | |
515 { /* succeed */ | |
516 lc++; | |
517 if (lc > maxline) | |
518 { | |
519 error_no_heap (); | |
520 } | |
521 } | |
522 } | |
523 jt.maxserial = lc; /* i starts with 1 in order to leave 0 unused */ | |
524 #ifdef CHINESE | |
525 jt.syurui = which_dict; | |
526 #endif | |
527 } | |
528 | |
529 void | |
530 reverse_yomi () | |
531 { | |
532 register int i; | |
533 w_char ytmp[LINE_SIZE]; | |
534 | |
535 for (i = 0; i < jt.maxserial; i++) | |
536 { | |
537 if (jeary[i]->yomi != 0) | |
538 { /* ºï½ü¤µ¤ì¤Æ¤Ê¤¤¤â¤Î¤À¤± */ | |
539 wnn_Sreverse (ytmp, jeary[i]->yomi); | |
540 wnn_Strcpy (jeary[i]->yomi, ytmp); | |
541 wnn_Sreverse (ytmp, jeary[i]->kan); | |
542 wnn_Strcpy (jeary[i]->kan, ytmp); | |
543 } | |
544 | |
545 } | |
546 } | |
547 | |
548 extern char *wnn_get_hinsi_name (); | |
549 | |
550 void | |
551 print_je (jep, opter, serial_out, esc_exp) | |
552 register FILE *opter; | |
553 register struct je *jep; | |
554 int serial_out; | |
555 int esc_exp; | |
556 { | |
557 /* if (jep->yomi != 0) { */ | |
558 if (jep->hinsi != SAKUJO_HINSI) | |
559 { | |
560 Print_entry (jep->yomi, jep->kan, jep->comm, jep->hindo, 0, jep->hinsi, serial_out ? jep->serial : -1, opter, esc_exp); | |
561 } | |
562 } | |
563 | |
564 #ifdef nodef | |
565 kprint (fp, kpter) | |
566 register FILE *fp; | |
567 register w_char *kpter; | |
568 { | |
569 char out_str[LENGTHKANJI]; | |
570 register int out_len; | |
571 char tmp[LENGTHKANJI]; | |
572 | |
573 wnn_sStrcpy (tmp, kpter); | |
574 out_len = make_kanji_str (out_str, tmp); | |
575 fprintf (fp, "%s", out_str); | |
576 if (out_len < 8) | |
577 putc ('\t', fp); | |
578 if (out_len < 16) | |
579 putc ('\t', fp); | |
580 putc ('\t', fp); | |
581 } | |
582 #endif | |
583 | |
584 void | |
585 output_ujis (opter, serial_out, esc_exp) | |
586 register FILE *opter; | |
587 int serial_out; | |
588 int esc_exp; | |
589 { | |
590 register struct je **jep; | |
591 char buffer[WNN_COMMENT_LEN + WNN_HINSI_LEN]; | |
592 register int i; | |
593 | |
594 wnn_sStrcpy (buffer, file_comment); | |
595 fprintf (opter, "%s\t%s\n", COMMENT, buffer); | |
596 fprintf (opter, "%s\t%d\n", TOTAL, jt.total); | |
597 wnn_sStrcpy (buffer, hinsi_list); | |
598 #ifdef CHINESE | |
599 fprintf (opter, "%s\n", CHINSI); | |
600 if (jt.syurui == CWNN_REV_DICT) | |
601 { | |
602 if (pzy_flag == CWNN_PINYIN) | |
603 { | |
604 fprintf (opter, "%s\n", PINYIN); | |
605 } | |
606 else | |
607 { | |
608 fprintf (opter, "%s\n", ZHUYIN); | |
609 } | |
610 } | |
611 else if (jt.syurui == BWNN_REV_DICT) | |
612 { | |
613 fprintf (opter, "%s\n", BIXING); | |
614 } | |
615 #else | |
616 fprintf (opter, "%s\n", HINSI); | |
617 #endif | |
618 fprintf (opter, "%s", buffer); | |
619 fprintf (opter, "\n"); | |
620 for (jep = jeary, i = 0; i < jt.maxserial; i++, jep++) | |
621 { | |
622 print_je (*jep, opter, serial_out, esc_exp); | |
623 } | |
624 } | |
625 | |
626 int | |
627 init_heap (hpb, yhpb, l, rl, ipf) | |
628 int hpb, yhpb, l, rl; | |
629 FILE *ipf; | |
630 { | |
631 jehp = je_heap = (struct je *) malloc ((rl * sizeof (struct je))); | |
632 hp = heap = (UCHAR *) malloc (hpb); | |
633 yhp = yomi_heap = (w_char *) malloc ((yhpb * sizeof (w_char))); | |
634 if ((jeary = (struct je **) calloc (l, sizeof (struct je *))) == NULL) | |
635 { | |
636 fprintf (stderr, "Malloc Failed\n"); | |
637 return (-1); | |
638 } | |
639 ifpter = ipf; | |
640 maxline = l; | |
641 heapend = heap + hpb; | |
642 yheapend = yomi_heap + yhpb; | |
643 jeheapend = je_heap + rl; | |
644 return (0); | |
645 } | |
646 | |
647 void | |
648 init_jeary () | |
649 { | |
650 int k; | |
651 for (k = 0; k < jt.maxserial; k++) | |
652 { | |
653 jeary[k] = je_heap + k; | |
654 } | |
655 } | |
656 | |
657 /* test program | |
658 main() | |
659 { | |
660 yhp = yomi_heap = (w_char *)malloc(100000); | |
661 jeary = (struct je *)malloc(100000); | |
662 | |
663 ifpter = stdin; | |
664 ujis_header(); | |
665 read_ujis(); | |
666 | |
667 sort(); | |
668 uniq_je(func); | |
669 output_ujis(stdout, 0, 1); | |
670 } | |
671 */ | |
672 | |
673 void | |
674 exit1 () | |
675 { | |
676 exit (1); | |
677 } | |
678 | |
679 /* must be updated later */ | |
680 | |
681 static void | |
682 bunpou_num (a, p) | |
683 register char *a; | |
684 register int *p; | |
685 { | |
686 int tmp; | |
687 if ((tmp = wnn_find_hinsi_by_name (a)) == -1) | |
688 { | |
689 if (sscanf (a, "%d", p) == 0) | |
690 { | |
691 fprintf (stderr, "Bad hinsi name \"%s\".\n", a); | |
692 exit1 (); | |
693 } | |
694 } | |
695 else | |
696 { | |
697 *p = tmp; | |
698 } | |
699 #ifdef nodef | |
700 sscanf (a, "%d", p); | |
701 #endif | |
702 } | |
703 | |
704 #ifdef CHINESE | |
705 static void | |
706 sisheng_num (a, p) | |
707 register char *a; | |
708 register int *p; | |
709 { | |
710 sscanf (a, "%d", p); | |
711 } | |
712 #endif | |
713 | |
714 int | |
715 sort_func_je (a, b) | |
716 char *a, *b; | |
717 { | |
718 return (sort_func (a, b, D_YOMI)); | |
719 } | |
720 | |
721 int | |
722 sort_func_je_kanji (a, b) | |
723 char *a, *b; | |
724 { | |
725 return (sort_func (a, b, D_KANJI)); | |
726 } | |
727 | |
728 int | |
729 sort_func (a, b, which) | |
730 register char *a, *b; | |
731 int which; | |
732 { | |
733 register int tmp; | |
734 register struct je *pa, *pb; | |
735 pa = *((struct je **) a); | |
736 pb = *((struct je **) b); | |
737 if (pa->hinsi == SAKUJO_HINSI) | |
738 { | |
739 if (pb->hinsi == SAKUJO_HINSI) | |
740 return (0); | |
741 return (-1); | |
742 } | |
743 if (pb->hinsi == SAKUJO_HINSI) | |
744 return (1); | |
745 | |
746 if (which == D_YOMI) | |
747 { | |
748 | |
749 if (!(pa->yomi) || !(pb->yomi)) | |
750 return (0); | |
751 tmp = wnn_Strcmp (pa->yomi, pb->yomi); | |
752 if (tmp) | |
753 return (tmp); | |
754 | |
755 if (pa->hinsi != pb->hinsi) | |
756 { | |
757 return ((int) (pa->hinsi) - (int) (pb->hinsi)); | |
758 } | |
759 | |
760 #ifdef CHINESE | |
761 if (jt.syurui == CWNN_REV_DICT) | |
762 { | |
763 if (pa->ss != pb->ss) | |
764 { | |
765 return ((int) (pa->ss) - (int) (pb->ss)); | |
766 } | |
767 } | |
768 #endif | |
769 if (!(pa->kan) || !(pb->kan)) | |
770 return (0); | |
771 tmp = wnn_Strcmp (pa->kan, pb->kan); | |
772 if (tmp) | |
773 return (tmp); | |
774 } | |
775 else | |
776 { | |
777 if (!(pa->kan) || !(pb->kan)) | |
778 return (0); | |
779 tmp = wnn_Strcmp (pa->kan, pb->kan); | |
780 if (tmp) | |
781 return (tmp); | |
782 | |
783 if (pa->hinsi != pb->hinsi) | |
784 { | |
785 return ((int) (pa->hinsi) - (int) (pb->hinsi)); | |
786 } | |
787 #ifdef CHINESE | |
788 if (jt.syurui == CWNN_REV_DICT) | |
789 { | |
790 if (pa->ss != pb->ss) | |
791 { | |
792 return ((int) (pa->ss) - (int) (pb->ss)); | |
793 } | |
794 } | |
795 #endif | |
796 if (!(pa->yomi) || !(pb->yomi)) | |
797 return (0); | |
798 tmp = wnn_Strcmp (pa->yomi, pb->yomi); | |
799 if (tmp) | |
800 return (tmp); | |
801 } | |
802 return (0); | |
803 } | |
804 | |
805 void | |
806 sort () | |
807 { | |
808 qsort ((char *) jeary, jt.maxserial, sizeof (struct je *), sort_func_je); | |
809 } | |
810 | |
811 void | |
812 sort_if_not_sorted () | |
813 { | |
814 if (!Sorted ((char *) jeary, jt.maxserial, sizeof (struct je *), sort_func_je)) | |
815 { | |
816 sort (); | |
817 } | |
818 } | |
819 | |
820 void | |
821 sort_kanji () | |
822 { | |
823 qsort ((char *) jeary, jt.maxserial, sizeof (struct je *), sort_func_je_kanji); | |
824 } | |
825 | |
826 void | |
827 uniq_je (func) | |
828 int (*func) (); | |
829 { | |
830 int k; | |
831 struct je **prev, **jep; | |
832 | |
833 if (jt.maxserial == 0) | |
834 return; | |
835 prev = &jeary[0]; | |
836 for (k = 1; k < jt.maxserial; k++) | |
837 { | |
838 jep = &jeary[k]; | |
839 if (func ((char *) jep, (char *) prev) == 0) | |
840 { | |
841 w_char tmp[LENGTHYOMI]; | |
842 char tmp1[LENGTHYOMI]; | |
843 char tmp2[LENGTHKANJI]; | |
844 #ifdef CHINESE | |
845 char tmp3[LENGTHKANJI]; | |
846 if (jt.syurui == BWNN_REV_DICT || jt.syurui == CWNN_REV_DICT) | |
847 { | |
848 wnn_Strcpy (tmp, (*jep)->yomi); | |
849 wnn_sStrcpy (tmp1, tmp); | |
850 wnn_Strcpy (tmp, (*jep)->kan); | |
851 wnn_sStrcpy (tmp2, tmp); | |
852 sprintf (tmp3, "%d", (*jep)->ss); | |
853 } | |
854 else | |
855 { | |
856 #endif | |
857 wnn_Sreverse (tmp, (*jep)->yomi); | |
858 wnn_sStrcpy (tmp1, tmp); | |
859 wnn_Sreverse (tmp, (*jep)->kan); | |
860 wnn_sStrcpy (tmp2, tmp); | |
861 #ifdef CHINESE | |
862 } | |
863 if (jt.syurui == CWNN_REV_DICT) | |
864 fprintf (stderr, "Entries with same yomi(%s), kanji(%s), hinsi(%s),sisheng(%s) are merged.\n", tmp1, tmp2, wnn_get_hinsi_name ((*jep)->hinsi), tmp3); | |
865 else | |
866 #endif | |
867 fprintf (stderr, "Entries with same yomi(%s), kanji(%s), hinsi(%s) are merged.\n", tmp1, tmp2, wnn_get_hinsi_name ((*jep)->hinsi)); | |
868 } | |
869 else | |
870 { | |
871 prev++; | |
872 if (prev != jep) | |
873 { | |
874 *prev = *jep; | |
875 } | |
876 } | |
877 } | |
878 prev++; | |
879 jt.maxserial = prev - &jeary[0]; | |
880 } | |
881 | |
882 #ifdef nodef | |
883 make_kanji_str (o, c) | |
884 register UCHAR *o, *c; | |
885 { | |
886 register UCHAR *o0 = o; | |
887 | |
888 for (; *c; c++) | |
889 { | |
890 if (*c == '\\') | |
891 { | |
892 *o++ = '\\'; | |
893 *o++ = '\\'; | |
894 } | |
895 else if (*c > 0x20) | |
896 { | |
897 *o++ = *c; | |
898 } | |
899 else | |
900 { | |
901 sprintf (o, "\\0%o", *c); | |
902 for (; *o; o++); | |
903 } | |
904 } | |
905 *o = 0; | |
906 return (o - o0); | |
907 } | |
908 #endif | |
909 | |
910 static void | |
911 read_kanji_str (c, o) | |
912 register char *c, *o; | |
913 { | |
914 for (; *o; c++) | |
915 { | |
916 if (*o == '\\') | |
917 { | |
918 if (*++o == '0') | |
919 { | |
920 o += 1; | |
921 if (*o >= '0' && *o <= '7') | |
922 { | |
923 *c = (*o++ - '0'); | |
924 } | |
925 else | |
926 continue; | |
927 if (*o >= '0' && *o <= '7') | |
928 { | |
929 *c *= 8; | |
930 *c |= (*o++ - '0'); | |
931 } | |
932 else | |
933 continue; | |
934 } | |
935 else | |
936 { | |
937 *c = *o++; | |
938 } | |
939 } | |
940 else | |
941 { | |
942 *c = *o++; | |
943 } | |
944 } | |
945 *c = 0; | |
946 } | |
947 | |
948 #ifdef CHINESE | |
949 static void | |
950 read_kanji_str_w (c, o) | |
951 register w_char *c, *o; | |
952 { | |
953 for (; *o; c++) | |
954 { | |
955 if (*o == (w_char) '\\') | |
956 { | |
957 if (*++o == (w_char) '0') | |
958 { | |
959 o += 1; | |
960 if (*o >= (w_char) '0' && *o <= (w_char) '7') | |
961 { | |
962 *c = (*o++ - (w_char) '0'); | |
963 } | |
964 else | |
965 continue; | |
966 if (*o >= (w_char) '0' && *o <= (w_char) '7') | |
967 { | |
968 *c *= 8; | |
969 *c |= (*o++ - (w_char) '0'); | |
970 } | |
971 else | |
972 continue; | |
973 } | |
974 else | |
975 { | |
976 *c = *o++; | |
977 } | |
978 } | |
979 else | |
980 { | |
981 *c = *o++; | |
982 } | |
983 } | |
984 *c = 0; | |
985 } | |
986 #endif | |
987 | |
988 int | |
989 Sorted (st, lc, size, sort_fun) | |
990 register char *st; | |
991 register int lc; | |
992 int size; | |
993 int (*sort_fun) (); | |
994 { | |
995 char *dst = st + size; | |
996 for (lc--; lc > 0; lc--, st = dst, dst += size) | |
997 { | |
998 if (sort_fun (st, dst) > 0) | |
999 { | |
1000 return (0); | |
1001 } | |
1002 } | |
1003 return (1); | |
1004 } | |
1005 | |
1006 int | |
1007 is_katakana (k, y) | |
1008 register char *k, *y; | |
1009 { | |
1010 for (; *k && *y;) | |
1011 { | |
1012 if (*y == (char) 0xa1 && *k == (char) 0xa1 && *(y + 1) == (char) 0xbc && *(y + 1) == (char) 0xbc) | |
1013 { /*"¡¼" */ | |
1014 y += 2; | |
1015 k += 2; | |
1016 continue; | |
1017 } | |
1018 if (*y++ != (char) 0xa4 || *k++ != (char) 0xa5) | |
1019 return (0); | |
1020 /* be careful, char comparison. */ | |
1021 if (*y++ != *k++) | |
1022 { | |
1023 return (0); | |
1024 } | |
1025 } | |
1026 return (!(*k | *y)); | |
1027 } | |
1028 | |
1029 static void | |
1030 toesc (ckanji, cyomi) | |
1031 char *ckanji, *cyomi; | |
1032 { | |
1033 if (strcmp (ckanji, cyomi) == 0) | |
1034 { | |
1035 strcpy (ckanji, DIC_HIRAGANA); | |
1036 } | |
1037 else if (is_katakana (ckanji, cyomi)) | |
1038 { | |
1039 strcpy (ckanji, DIC_KATAKANA); | |
1040 } | |
1041 } |