Mercurial > freewnn
annotate Wnn/jutil/ujisf.c @ 18:e7e2aba67cb3
disabled build for cWnn and kWnn by default
author | Yoshiki Yazawa <yaz@cc.rim.or.jp> |
---|---|
date | Mon, 14 Apr 2008 17:33:53 +0900 |
parents | 6ab41ec6f895 |
children | c966456648ad |
rev | line source |
---|---|
0 | 1 /* |
2 * $Id: ujisf.c,v 1.7 2002/07/14 04:26:57 hiroo Exp $ | |
3 */ | |
4 | |
5 /* | |
6 * FreeWnn is a network-extensible Kana-to-Kanji conversion system. | |
7 * This file is part of FreeWnn. | |
8 * | |
9 * Copyright Kyoto University Research Institute for Mathematical Sciences | |
10 * 1987, 1988, 1989, 1990, 1991, 1992 | |
11 * Copyright OMRON Corporation. 1987, 1988, 1989, 1990, 1991, 1992, 1999 | |
12 * Copyright ASTEC, Inc. 1987, 1988, 1989, 1990, 1991, 1992 | |
13 * Copyright FreeWnn Project 1999, 2000, 2002 | |
14 * | |
15 * Maintainer: FreeWnn Project <freewnn@tomo.gr.jp> | |
16 * | |
17 * This program is free software; you can redistribute it and/or modify | |
18 * it under the terms of the GNU General Public License as published by | |
19 * the Free Software Foundation; either version 2 of the License, or | |
20 * (at your option) any later version. | |
21 * | |
22 * This program is distributed in the hope that it will be useful, | |
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
25 * GNU General Public License for more details. | |
26 * | |
27 * You should have received a copy of the GNU General Public License | |
28 * along with this program; if not, write to the Free Software | |
29 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
30 */ | |
31 | |
32 /* | |
33 * Ujis format <--> internal data. | |
34 */ | |
35 | |
36 #ifdef HAVE_CONFIG_H | |
37 # include <config.h> | |
38 #endif | |
39 | |
40 #include <stdio.h> | |
41 #if STDC_HEADERS | |
42 # include <stdlib.h> | |
43 # include <string.h> | |
44 #else | |
45 # if HAVE_MALLOC_H | |
46 # include <malloc.h> | |
47 # endif | |
48 # if HAVE_STRINGS_H | |
49 # include <strings.h> | |
50 # endif | |
51 #endif /* STDC_HEADERS */ | |
52 | |
53 #include "commonhd.h" | |
54 #include "jslib.h" | |
55 #include "jh.h" | |
56 #include "jdata.h" | |
57 #include "wnn_os.h" | |
58 #include "wnn_string.h" | |
59 | |
60 #ifdef CHINESE | |
61 #include "cplib.h" | |
62 int pzy_flag = CWNN_PINYIN; /* Pinyin or Zhuyin */ | |
63 static void sisheng_num (), read_kanji_str_w (); | |
64 #endif | |
65 | |
66 #ifndef min | |
67 #define min(a, b) ((a > b)? b:a) | |
68 #define max(a, b) ((a < b)? b:a) | |
69 #endif | |
70 | |
71 extern unsigned char kanjiaddr (); | |
72 extern void Print_entry (); | |
73 extern int wnn_find_hinsi_by_name (); | |
74 #ifdef CHINESE | |
75 extern void cwnn_zy_str_analysis (), cwnn_py_str_analysis (); | |
76 #endif | |
77 int sort_func (), Sorted (), w_stradd (); | |
78 static void Kanjistradd (), bunpou_num (), read_kanji_str (), toesc (); | |
79 void exit1 (); | |
80 | |
81 extern struct JT jt; | |
82 | |
83 /* extern variables */ | |
84 | |
85 struct je **jeary; | |
86 int wnnerror; | |
87 | |
88 #define WNN_HINSI_LEN 4096 | |
89 | |
90 w_char file_comment[WNN_COMMENT_LEN]; | |
91 w_char hinsi_list[WNN_HINSI_LEN]; | |
92 | |
93 | |
94 | |
95 /* static variables */ | |
96 static UCHAR *heap, *hp, *heapend; | |
97 static w_char *yomi_heap, *yhp, *yheapend; | |
98 static struct je *je_heap, *jehp, *jeheapend; | |
99 static FILE *ifpter; | |
100 static int maxline; | |
101 | |
102 /* extern functions of this file are | |
103 ujis_header(); | |
104 read_ujis(reversep, to_esc, which_dict); | |
105 reverse_yomi(); | |
106 sort(); | |
107 uniq_je(func); | |
108 output_ujis(opter, serial_out, esc_exp); | |
109 */ | |
110 | |
111 int lc; | |
112 | |
113 static char stack[LINE_SIZE] = { 0 }; | |
114 | |
115 int | |
116 get_line (c) | |
117 register char *c; | |
118 { | |
119 if (stack[0]) | |
120 { | |
121 strcpy (c, stack); | |
122 stack[0] = 0; | |
123 } | |
124 else | |
125 { | |
126 if (fgets (c, LINE_SIZE, ifpter) == NULL) | |
127 { | |
128 return (EOF); | |
129 } | |
130 } | |
131 return (0); | |
132 } | |
133 | |
134 void | |
135 unget_line (c) | |
136 char *c; | |
137 { | |
138 strcpy (stack, c); | |
139 } | |
140 | |
141 | |
142 char * | |
143 get_string (str, buf) | |
144 register char *str; | |
145 char *buf; | |
146 { | |
147 register char *c = buf; | |
148 for (; *c == '\t' || *c == ' '; c++); | |
149 if (*c == '\0' || *c == '\n') | |
150 { | |
151 *str = 0; | |
152 return (NULL); | |
153 } | |
154 for (; *c != '\t' && *c != ' ' && *c != '\n' && *c != '\0'; c++) | |
155 { | |
156 *str++ = *c; | |
157 } | |
158 *str = 0; | |
159 return (c); | |
160 } | |
161 | |
162 void | |
163 bad_line (bf) | |
164 char *bf; | |
165 { | |
166 static int badl = 0; | |
167 | |
168 fprintf (stderr, "Bad line \"%s\"\n", bf); | |
169 fprintf (stderr, "Bad line omitted\n"); | |
170 if (++badl > BADLMAX) | |
171 { | |
172 fprintf (stderr, "Too many bad lines.\n"); | |
173 exit1 (); | |
174 } | |
175 } | |
176 | |
177 void | |
178 error_no_heap () | |
179 { | |
180 fprintf (stderr, "Heap area is exhausted.\n"); | |
181 exit1 (); | |
182 } | |
183 | |
184 static int | |
185 get_one_line (buffer, jep, rev, to_esc, which_dict) | |
186 char *buffer; | |
187 register struct je **jep; | |
188 int rev; | |
189 int to_esc; | |
190 int which_dict; | |
191 { | |
192 register char *c = buffer; | |
193 static char tmp[LINE_SIZE]; | |
194 static char ckanji[LINE_SIZE]; | |
195 static char cyomi[LINE_SIZE]; | |
196 static w_char yomi[LINE_SIZE]; | |
197 static w_char kanji[LINE_SIZE]; | |
198 static w_char comm[LINE_SIZE]; | |
199 #ifdef CHINESE | |
200 static w_char un_sisheng_yincod_str[LINE_SIZE]; | |
201 static w_char yincod_str[LINE_SIZE]; | |
202 static char csisheng[LINE_SIZE]; | |
203 static w_char wtmp[LINE_SIZE]; | |
204 #endif | |
205 char *c1; | |
206 | |
207 if (jehp == jeheapend) | |
208 { | |
209 if ((jehp = je_heap = (struct je *) malloc ((HEAPINC * sizeof (struct je)))) == NULL) | |
210 { | |
211 fprintf (stderr, "Malloc Failed\n"); | |
212 return (-1); | |
213 } | |
214 jeheapend = je_heap + HEAPINC; | |
215 } | |
216 *jep = jehp; | |
217 jehp++; | |
218 | |
219 if (rev == REVERSE) | |
220 { | |
221 if ((c = get_string (ckanji, c)) == NULL) | |
222 return (1); | |
223 } | |
224 else | |
225 { | |
226 if ((c = get_string (cyomi, c)) == NULL) | |
227 return (1); | |
228 } | |
229 if (rev == REVERSE) | |
230 { | |
231 if ((c = get_string (cyomi, c)) == NULL) | |
232 return (-1); | |
233 } | |
234 else | |
235 { | |
236 if ((c = get_string (ckanji, c)) == NULL) | |
237 return (-1); | |
238 } | |
239 #ifdef CHINESE | |
240 /* here ,should seperate pinyin to two part */ | |
241 /* one is usually pinyin string like Zhong.Guo. */ | |
242 /* the is sisheng string like 23 */ | |
243 | |
244 if (which_dict == CWNN_REV_DICT || which_dict == BWNN_REV_DICT) | |
245 { | |
246 if (pzy_flag == CWNN_ZHUYIN) | |
247 cwnn_zy_str_analysis (cyomi, csisheng, un_sisheng_yincod_str, yincod_str); | |
248 else | |
249 cwnn_py_str_analysis (cyomi, csisheng, un_sisheng_yincod_str, yincod_str); | |
250 | |
251 sisheng_num (csisheng, &((*jep)->ss)); | |
252 read_kanji_str_w (wtmp, un_sisheng_yincod_str); | |
253 wnn_Strcpy (yomi, wtmp); | |
254 } | |
255 else | |
256 { | |
257 read_kanji_str (tmp, cyomi); | |
258 wnn_Sstrcpy (yomi, tmp); | |
259 } | |
260 #else | |
261 read_kanji_str (tmp, cyomi); | |
262 wnn_Sstrcpy (yomi, tmp); | |
263 #endif | |
264 if (wnn_Strlen (yomi) >= LENGTHYOMI) | |
265 { | |
266 fprintf (stderr, "YOMI is longer in line %d.\n", lc); | |
267 return (-1); | |
268 } | |
269 w_stradd (yomi, &((*jep)->yomi)); | |
270 | |
271 read_kanji_str (tmp, ckanji); | |
272 wnn_Sstrcpy (kanji, tmp); | |
273 if (wnn_Strlen (kanji) >= LENGTHYOMI) | |
274 { | |
275 fprintf (stderr, "KANJI is longer in line %d.\n", lc); | |
276 return (-1); | |
277 } | |
278 w_stradd (kanji, &((*jep)->kan)); | |
279 | |
280 if ((c = get_string (tmp, c)) == NULL) | |
281 return (-1); | |
282 bunpou_num (tmp, &((*jep)->hinsi)); | |
283 | |
284 if ((c = get_string (tmp, c)) == NULL) | |
285 return (-1); | |
286 if (tmp[0] == '-') | |
287 { | |
288 (*jep)->hindo = -1; /* Real hindo == -1 means Not to use it */ | |
289 } | |
290 else | |
291 { | |
292 sscanf (tmp, "%d", &((*jep)->hindo)); | |
293 } | |
294 | |
295 if ((get_string (tmp, c)) == NULL) | |
296 { | |
297 c1 = NULL; | |
298 (*jep)->comm = NULL; | |
299 comm[0] = 0; | |
300 } | |
301 else | |
302 { | |
303 /* left entries are all considered as comment */ | |
304 for (; *c == '\t' || *c == ' '; c++); | |
305 if (c[strlen (c) - 1] == '\n') | |
306 c[strlen (c) - 1] = '\0'; | |
307 c1 = c; | |
308 wnn_Sstrcpy (comm, c1); | |
309 if (wnn_Strlen (comm) >= LENGTHYOMI) | |
310 { | |
311 fprintf (stderr, "COMMENT is longer in line %d.\n", lc); | |
312 return (-1); | |
313 } | |
314 w_stradd (comm, &((*jep)->comm)); | |
315 } | |
316 | |
317 if (to_esc) | |
318 { | |
319 toesc (ckanji, cyomi); | |
320 } | |
321 /* | |
322 if(strchr(ckanji, DIC_COMMENT_CHAR) || | |
323 strchr(ckanji, DIC_YOMI_CHAR)){ | |
324 fprintf(stderr, "Bad character in kanji\n"); | |
325 return(-1); | |
326 } | |
327 if(which_dict){ | |
328 if(strchr(cyomi, DIC_COMMENT_CHAR) || | |
329 strchr(cyomi, DIC_YOMI_CHAR)){ | |
330 fprintf(stderr, "Bad character in yomi\n"); | |
331 return(-1); | |
332 } | |
333 } | |
334 */ | |
335 Kanjistradd (kanji, | |
336 #ifdef CHINESE | |
337 ((which_dict == CWNN_REV_DICT || which_dict == BWNN_REV_DICT) ? yincod_str : ((which_dict == WNN_REV_DICT) ? yomi : NULL)), | |
338 #else | |
339 (which_dict) ? yomi : NULL, | |
340 #endif | |
341 comm, &(*jep)->kanji); | |
342 return (0); | |
343 } | |
344 | |
345 static void | |
346 Kanjistradd (k, y, c, cp) | |
347 register UCHAR **cp; | |
348 w_char *k, *y, *c; | |
349 { | |
350 int len; | |
351 if (hp + LENGTHKANJI >= heapend) | |
352 { | |
353 if ((hp = heap = (UCHAR *) malloc ((HEAPINC * HEAP_PER_LINE))) == NULL) | |
354 { | |
355 fprintf (stderr, "Malloc Failed\n"); | |
356 exit (1); | |
357 } | |
358 heapend = heap + (HEAPINC * HEAP_PER_LINE); | |
359 } | |
360 *cp = hp; | |
361 if ((len = kanjiaddr (hp, k, y, c)) >= LENGTHKANJI) | |
362 { | |
363 fprintf (stderr, "KANJI is longer in line %d.\n", lc); | |
364 exit (1); | |
365 } | |
366 hp += len; | |
367 } | |
368 | |
369 int | |
370 w_stradd (str, cp) | |
371 register w_char **cp; | |
372 register w_char *str; | |
373 { | |
374 register int len = wnn_Strlen (str); | |
375 | |
376 if (yhp + len + 1 >= yheapend) | |
377 { | |
378 if ((yhp = yomi_heap = (w_char *) malloc ((HEAPINC * sizeof (w_char)))) == NULL) | |
379 { | |
380 fprintf (stderr, "Malloc Failed\n"); | |
381 return (-1); | |
382 } | |
383 yheapend = yomi_heap + HEAPINC; | |
384 } | |
385 *cp = yhp; | |
386 wnn_Strcpy (yhp, str); | |
387 yhp += len + 1; | |
388 return (0); | |
389 } | |
390 | |
391 void | |
392 #ifdef CHINESE | |
393 ujis_header (which_dict) | |
394 int *which_dict; | |
395 #else | |
396 ujis_header () | |
397 #endif | |
398 { | |
399 char buffer[LINE_SIZE]; | |
400 char *c = buffer; | |
401 char str[LINE_SIZE]; | |
402 | |
403 jt.total = 0; | |
404 file_comment[0] = 0; | |
405 hinsi_list[0] = 0; | |
406 | |
407 for (;;) | |
408 { | |
409 if (get_line (buffer) == EOF) | |
410 { | |
411 goto EOF_HEAD; | |
412 } | |
413 c = buffer; | |
414 if ((c = get_string (str, c)) == NULL) | |
415 continue; | |
416 if (strcmp (str, COMMENT) == 0) | |
417 { | |
418 /* for(;;){ | |
419 if(get_line(buffer) == EOF){ | |
420 goto EOF_EHAD; | |
421 } | |
422 if(buffer[0] == '\\'){ | |
423 unget_line(buffer); | |
424 break; | |
425 } | |
426 if(wnn_Strlen(file_comment) + strlen(buffer) | |
427 >= WNN_COMMENT_LEN){ | |
428 fprintf(stderr, "Too Long Comment.\n"); | |
429 exit1(); | |
430 } | |
431 wnn_Sstrcpy(file_comment + wnn_Strlen(file_comment), buffer); | |
432 } | |
433 */ | |
434 get_string (str, c); | |
435 /* | |
436 if(str[strlen(str) - 1] == '\n'){ | |
437 c[strlen(str) - 1] = '\0'; | |
438 } | |
439 */ | |
440 wnn_Sstrcpy (file_comment, str); | |
441 #ifdef CHINESE | |
442 } | |
443 else if (strcmp (str, PINYIN) == 0) | |
444 { | |
445 *which_dict = CWNN_REV_DICT; | |
446 pzy_flag = CWNN_PINYIN; | |
447 } | |
448 else if (strcmp (str, ZHUYIN) == 0) | |
449 { | |
450 *which_dict = CWNN_REV_DICT; | |
451 pzy_flag = CWNN_ZHUYIN; | |
452 } | |
453 else if (strcmp (str, BIXING) == 0) | |
454 { | |
455 *which_dict = BWNN_REV_DICT; | |
456 #endif | |
457 } | |
458 else if (strcmp (str, HINSI) == 0 | |
459 #ifdef CHINESE | |
460 || strcmp (str, CHINSI) == 0 | |
461 #endif | |
462 ) | |
463 { | |
464 for (;;) | |
465 { | |
466 if (get_line (buffer) == EOF) | |
467 { | |
468 goto EOF_HEAD; | |
469 } | |
470 if (buffer[0] == '\\' || buffer[0] == '\n') | |
471 { | |
472 unget_line (buffer); | |
473 break; | |
474 } | |
475 wnn_Sstrcpy (hinsi_list + wnn_Strlen (hinsi_list), buffer); | |
476 } | |
477 } | |
478 else if (strcmp (str, TOTAL) == 0) | |
479 { | |
480 get_string (str, c); | |
481 jt.total = atoi (str); | |
482 } | |
483 else if (strcmp (str, DIC_NO) == 0) | |
484 { /* for word_reg.c */ | |
485 get_string (str, c); | |
486 jt.total = atoi (str); | |
487 } | |
488 else | |
489 { | |
490 unget_line (buffer); | |
491 break; | |
492 } | |
493 } | |
494 EOF_HEAD: | |
495 jt.maxcomment = wnn_Strlen (file_comment); | |
496 jt.maxhinsi_list = wnn_Strlen (hinsi_list) + 1; | |
497 } | |
498 | |
499 void | |
500 read_ujis (rev, to_esc, which_dict) | |
501 int rev; | |
502 int to_esc; | |
503 int which_dict; | |
504 { | |
505 char buffer[LINE_SIZE]; | |
506 register int tmp; | |
507 | |
508 for (lc = 0; get_line (buffer) != EOF;) | |
509 { | |
510 if ((tmp = get_one_line (buffer, jeary + lc, rev, to_esc, which_dict)) == -1) | |
511 { | |
512 bad_line (buffer); | |
513 } | |
514 else if (tmp == 0) | |
515 { /* succeed */ | |
516 lc++; | |
517 if (lc > maxline) | |
518 { | |
519 error_no_heap (); | |
520 } | |
521 } | |
522 } | |
523 jt.maxserial = lc; /* i starts with 1 in order to leave 0 unused */ | |
524 #ifdef CHINESE | |
525 jt.syurui = which_dict; | |
526 #endif | |
527 } | |
528 | |
529 void | |
530 reverse_yomi () | |
531 { | |
532 register int i; | |
533 w_char ytmp[LINE_SIZE]; | |
534 | |
535 for (i = 0; i < jt.maxserial; i++) | |
536 { | |
537 if (jeary[i]->yomi != 0) | |
538 { /* ºï½ü¤µ¤ì¤Æ¤Ê¤¤¤â¤Î¤À¤± */ | |
539 wnn_Sreverse (ytmp, jeary[i]->yomi); | |
540 wnn_Strcpy (jeary[i]->yomi, ytmp); | |
541 wnn_Sreverse (ytmp, jeary[i]->kan); | |
542 wnn_Strcpy (jeary[i]->kan, ytmp); | |
543 } | |
544 | |
545 } | |
546 } | |
547 | |
548 extern char *wnn_get_hinsi_name (); | |
549 | |
550 void | |
551 print_je (jep, opter, serial_out, esc_exp) | |
552 register FILE *opter; | |
553 register struct je *jep; | |
554 int serial_out; | |
555 int esc_exp; | |
556 { | |
557 /* if (jep->yomi != 0) { */ | |
558 if (jep->hinsi != SAKUJO_HINSI) | |
559 { | |
560 Print_entry (jep->yomi, jep->kan, jep->comm, jep->hindo, 0, jep->hinsi, serial_out ? jep->serial : -1, opter, esc_exp); | |
561 } | |
562 } | |
563 | |
564 #ifdef nodef | |
565 kprint (fp, kpter) | |
566 register FILE *fp; | |
567 register w_char *kpter; | |
568 { | |
569 char out_str[LENGTHKANJI]; | |
570 register int out_len; | |
571 char tmp[LENGTHKANJI]; | |
572 | |
573 wnn_sStrcpy (tmp, kpter); | |
574 out_len = make_kanji_str (out_str, tmp); | |
575 fprintf (fp, "%s", out_str); | |
576 if (out_len < 8) | |
577 putc ('\t', fp); | |
578 if (out_len < 16) | |
579 putc ('\t', fp); | |
580 putc ('\t', fp); | |
581 } | |
582 #endif | |
583 | |
584 void | |
585 output_ujis (opter, serial_out, esc_exp) | |
586 register FILE *opter; | |
587 int serial_out; | |
588 int esc_exp; | |
589 { | |
590 register struct je **jep; | |
591 char buffer[WNN_COMMENT_LEN + WNN_HINSI_LEN]; | |
592 register int i; | |
593 | |
594 wnn_sStrcpy (buffer, file_comment); | |
595 fprintf (opter, "%s\t%s\n", COMMENT, buffer); | |
596 fprintf (opter, "%s\t%d\n", TOTAL, jt.total); | |
597 wnn_sStrcpy (buffer, hinsi_list); | |
598 #ifdef CHINESE | |
599 fprintf (opter, "%s\n", CHINSI); | |
600 if (jt.syurui == CWNN_REV_DICT) | |
601 { | |
602 if (pzy_flag == CWNN_PINYIN) | |
603 { | |
604 fprintf (opter, "%s\n", PINYIN); | |
605 } | |
606 else | |
607 { | |
608 fprintf (opter, "%s\n", ZHUYIN); | |
609 } | |
610 } | |
611 else if (jt.syurui == BWNN_REV_DICT) | |
612 { | |
613 fprintf (opter, "%s\n", BIXING); | |
614 } | |
615 #else | |
616 fprintf (opter, "%s\n", HINSI); | |
617 #endif | |
618 fprintf (opter, "%s", buffer); | |
619 fprintf (opter, "\n"); | |
7
6ab41ec6f895
fix dtoa crash when it encounters malformed entry.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
620 for (jep = jeary, i = 0; i < jt.maxserial; i++, jep++) { |
6ab41ec6f895
fix dtoa crash when it encounters malformed entry.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
621 if (jep && opter && (*jep)->yomi) { |
6ab41ec6f895
fix dtoa crash when it encounters malformed entry.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
622 print_je (*jep, opter, serial_out, esc_exp); |
6ab41ec6f895
fix dtoa crash when it encounters malformed entry.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
623 } |
6ab41ec6f895
fix dtoa crash when it encounters malformed entry.
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
624 } |
0 | 625 } |
626 | |
627 int | |
628 init_heap (hpb, yhpb, l, rl, ipf) | |
629 int hpb, yhpb, l, rl; | |
630 FILE *ipf; | |
631 { | |
632 jehp = je_heap = (struct je *) malloc ((rl * sizeof (struct je))); | |
633 hp = heap = (UCHAR *) malloc (hpb); | |
634 yhp = yomi_heap = (w_char *) malloc ((yhpb * sizeof (w_char))); | |
635 if ((jeary = (struct je **) calloc (l, sizeof (struct je *))) == NULL) | |
636 { | |
637 fprintf (stderr, "Malloc Failed\n"); | |
638 return (-1); | |
639 } | |
640 ifpter = ipf; | |
641 maxline = l; | |
642 heapend = heap + hpb; | |
643 yheapend = yomi_heap + yhpb; | |
644 jeheapend = je_heap + rl; | |
645 return (0); | |
646 } | |
647 | |
648 void | |
649 init_jeary () | |
650 { | |
651 int k; | |
652 for (k = 0; k < jt.maxserial; k++) | |
653 { | |
654 jeary[k] = je_heap + k; | |
655 } | |
656 } | |
657 | |
658 /* test program | |
659 main() | |
660 { | |
661 yhp = yomi_heap = (w_char *)malloc(100000); | |
662 jeary = (struct je *)malloc(100000); | |
663 | |
664 ifpter = stdin; | |
665 ujis_header(); | |
666 read_ujis(); | |
667 | |
668 sort(); | |
669 uniq_je(func); | |
670 output_ujis(stdout, 0, 1); | |
671 } | |
672 */ | |
673 | |
674 void | |
675 exit1 () | |
676 { | |
677 exit (1); | |
678 } | |
679 | |
680 /* must be updated later */ | |
681 | |
682 static void | |
683 bunpou_num (a, p) | |
684 register char *a; | |
685 register int *p; | |
686 { | |
687 int tmp; | |
688 if ((tmp = wnn_find_hinsi_by_name (a)) == -1) | |
689 { | |
690 if (sscanf (a, "%d", p) == 0) | |
691 { | |
692 fprintf (stderr, "Bad hinsi name \"%s\".\n", a); | |
693 exit1 (); | |
694 } | |
695 } | |
696 else | |
697 { | |
698 *p = tmp; | |
699 } | |
700 #ifdef nodef | |
701 sscanf (a, "%d", p); | |
702 #endif | |
703 } | |
704 | |
705 #ifdef CHINESE | |
706 static void | |
707 sisheng_num (a, p) | |
708 register char *a; | |
709 register int *p; | |
710 { | |
711 sscanf (a, "%d", p); | |
712 } | |
713 #endif | |
714 | |
715 int | |
716 sort_func_je (a, b) | |
717 char *a, *b; | |
718 { | |
719 return (sort_func (a, b, D_YOMI)); | |
720 } | |
721 | |
722 int | |
723 sort_func_je_kanji (a, b) | |
724 char *a, *b; | |
725 { | |
726 return (sort_func (a, b, D_KANJI)); | |
727 } | |
728 | |
729 int | |
730 sort_func (a, b, which) | |
731 register char *a, *b; | |
732 int which; | |
733 { | |
734 register int tmp; | |
735 register struct je *pa, *pb; | |
736 pa = *((struct je **) a); | |
737 pb = *((struct je **) b); | |
738 if (pa->hinsi == SAKUJO_HINSI) | |
739 { | |
740 if (pb->hinsi == SAKUJO_HINSI) | |
741 return (0); | |
742 return (-1); | |
743 } | |
744 if (pb->hinsi == SAKUJO_HINSI) | |
745 return (1); | |
746 | |
747 if (which == D_YOMI) | |
748 { | |
749 | |
750 if (!(pa->yomi) || !(pb->yomi)) | |
751 return (0); | |
752 tmp = wnn_Strcmp (pa->yomi, pb->yomi); | |
753 if (tmp) | |
754 return (tmp); | |
755 | |
756 if (pa->hinsi != pb->hinsi) | |
757 { | |
758 return ((int) (pa->hinsi) - (int) (pb->hinsi)); | |
759 } | |
760 | |
761 #ifdef CHINESE | |
762 if (jt.syurui == CWNN_REV_DICT) | |
763 { | |
764 if (pa->ss != pb->ss) | |
765 { | |
766 return ((int) (pa->ss) - (int) (pb->ss)); | |
767 } | |
768 } | |
769 #endif | |
770 if (!(pa->kan) || !(pb->kan)) | |
771 return (0); | |
772 tmp = wnn_Strcmp (pa->kan, pb->kan); | |
773 if (tmp) | |
774 return (tmp); | |
775 } | |
776 else | |
777 { | |
778 if (!(pa->kan) || !(pb->kan)) | |
779 return (0); | |
780 tmp = wnn_Strcmp (pa->kan, pb->kan); | |
781 if (tmp) | |
782 return (tmp); | |
783 | |
784 if (pa->hinsi != pb->hinsi) | |
785 { | |
786 return ((int) (pa->hinsi) - (int) (pb->hinsi)); | |
787 } | |
788 #ifdef CHINESE | |
789 if (jt.syurui == CWNN_REV_DICT) | |
790 { | |
791 if (pa->ss != pb->ss) | |
792 { | |
793 return ((int) (pa->ss) - (int) (pb->ss)); | |
794 } | |
795 } | |
796 #endif | |
797 if (!(pa->yomi) || !(pb->yomi)) | |
798 return (0); | |
799 tmp = wnn_Strcmp (pa->yomi, pb->yomi); | |
800 if (tmp) | |
801 return (tmp); | |
802 } | |
803 return (0); | |
804 } | |
805 | |
806 void | |
807 sort () | |
808 { | |
809 qsort ((char *) jeary, jt.maxserial, sizeof (struct je *), sort_func_je); | |
810 } | |
811 | |
812 void | |
813 sort_if_not_sorted () | |
814 { | |
815 if (!Sorted ((char *) jeary, jt.maxserial, sizeof (struct je *), sort_func_je)) | |
816 { | |
817 sort (); | |
818 } | |
819 } | |
820 | |
821 void | |
822 sort_kanji () | |
823 { | |
824 qsort ((char *) jeary, jt.maxserial, sizeof (struct je *), sort_func_je_kanji); | |
825 } | |
826 | |
827 void | |
828 uniq_je (func) | |
829 int (*func) (); | |
830 { | |
831 int k; | |
832 struct je **prev, **jep; | |
833 | |
834 if (jt.maxserial == 0) | |
835 return; | |
836 prev = &jeary[0]; | |
837 for (k = 1; k < jt.maxserial; k++) | |
838 { | |
839 jep = &jeary[k]; | |
840 if (func ((char *) jep, (char *) prev) == 0) | |
841 { | |
842 w_char tmp[LENGTHYOMI]; | |
843 char tmp1[LENGTHYOMI]; | |
844 char tmp2[LENGTHKANJI]; | |
845 #ifdef CHINESE | |
846 char tmp3[LENGTHKANJI]; | |
847 if (jt.syurui == BWNN_REV_DICT || jt.syurui == CWNN_REV_DICT) | |
848 { | |
849 wnn_Strcpy (tmp, (*jep)->yomi); | |
850 wnn_sStrcpy (tmp1, tmp); | |
851 wnn_Strcpy (tmp, (*jep)->kan); | |
852 wnn_sStrcpy (tmp2, tmp); | |
853 sprintf (tmp3, "%d", (*jep)->ss); | |
854 } | |
855 else | |
856 { | |
857 #endif | |
858 wnn_Sreverse (tmp, (*jep)->yomi); | |
859 wnn_sStrcpy (tmp1, tmp); | |
860 wnn_Sreverse (tmp, (*jep)->kan); | |
861 wnn_sStrcpy (tmp2, tmp); | |
862 #ifdef CHINESE | |
863 } | |
864 if (jt.syurui == CWNN_REV_DICT) | |
865 fprintf (stderr, "Entries with same yomi(%s), kanji(%s), hinsi(%s),sisheng(%s) are merged.\n", tmp1, tmp2, wnn_get_hinsi_name ((*jep)->hinsi), tmp3); | |
866 else | |
867 #endif | |
868 fprintf (stderr, "Entries with same yomi(%s), kanji(%s), hinsi(%s) are merged.\n", tmp1, tmp2, wnn_get_hinsi_name ((*jep)->hinsi)); | |
869 } | |
870 else | |
871 { | |
872 prev++; | |
873 if (prev != jep) | |
874 { | |
875 *prev = *jep; | |
876 } | |
877 } | |
878 } | |
879 prev++; | |
880 jt.maxserial = prev - &jeary[0]; | |
881 } | |
882 | |
883 #ifdef nodef | |
884 make_kanji_str (o, c) | |
885 register UCHAR *o, *c; | |
886 { | |
887 register UCHAR *o0 = o; | |
888 | |
889 for (; *c; c++) | |
890 { | |
891 if (*c == '\\') | |
892 { | |
893 *o++ = '\\'; | |
894 *o++ = '\\'; | |
895 } | |
896 else if (*c > 0x20) | |
897 { | |
898 *o++ = *c; | |
899 } | |
900 else | |
901 { | |
902 sprintf (o, "\\0%o", *c); | |
903 for (; *o; o++); | |
904 } | |
905 } | |
906 *o = 0; | |
907 return (o - o0); | |
908 } | |
909 #endif | |
910 | |
911 static void | |
912 read_kanji_str (c, o) | |
913 register char *c, *o; | |
914 { | |
915 for (; *o; c++) | |
916 { | |
917 if (*o == '\\') | |
918 { | |
919 if (*++o == '0') | |
920 { | |
921 o += 1; | |
922 if (*o >= '0' && *o <= '7') | |
923 { | |
924 *c = (*o++ - '0'); | |
925 } | |
926 else | |
927 continue; | |
928 if (*o >= '0' && *o <= '7') | |
929 { | |
930 *c *= 8; | |
931 *c |= (*o++ - '0'); | |
932 } | |
933 else | |
934 continue; | |
935 } | |
936 else | |
937 { | |
938 *c = *o++; | |
939 } | |
940 } | |
941 else | |
942 { | |
943 *c = *o++; | |
944 } | |
945 } | |
946 *c = 0; | |
947 } | |
948 | |
949 #ifdef CHINESE | |
950 static void | |
951 read_kanji_str_w (c, o) | |
952 register w_char *c, *o; | |
953 { | |
954 for (; *o; c++) | |
955 { | |
956 if (*o == (w_char) '\\') | |
957 { | |
958 if (*++o == (w_char) '0') | |
959 { | |
960 o += 1; | |
961 if (*o >= (w_char) '0' && *o <= (w_char) '7') | |
962 { | |
963 *c = (*o++ - (w_char) '0'); | |
964 } | |
965 else | |
966 continue; | |
967 if (*o >= (w_char) '0' && *o <= (w_char) '7') | |
968 { | |
969 *c *= 8; | |
970 *c |= (*o++ - (w_char) '0'); | |
971 } | |
972 else | |
973 continue; | |
974 } | |
975 else | |
976 { | |
977 *c = *o++; | |
978 } | |
979 } | |
980 else | |
981 { | |
982 *c = *o++; | |
983 } | |
984 } | |
985 *c = 0; | |
986 } | |
987 #endif | |
988 | |
989 int | |
990 Sorted (st, lc, size, sort_fun) | |
991 register char *st; | |
992 register int lc; | |
993 int size; | |
994 int (*sort_fun) (); | |
995 { | |
996 char *dst = st + size; | |
997 for (lc--; lc > 0; lc--, st = dst, dst += size) | |
998 { | |
999 if (sort_fun (st, dst) > 0) | |
1000 { | |
1001 return (0); | |
1002 } | |
1003 } | |
1004 return (1); | |
1005 } | |
1006 | |
1007 int | |
1008 is_katakana (k, y) | |
1009 register char *k, *y; | |
1010 { | |
1011 for (; *k && *y;) | |
1012 { | |
1013 if (*y == (char) 0xa1 && *k == (char) 0xa1 && *(y + 1) == (char) 0xbc && *(y + 1) == (char) 0xbc) | |
1014 { /*"¡¼" */ | |
1015 y += 2; | |
1016 k += 2; | |
1017 continue; | |
1018 } | |
1019 if (*y++ != (char) 0xa4 || *k++ != (char) 0xa5) | |
1020 return (0); | |
1021 /* be careful, char comparison. */ | |
1022 if (*y++ != *k++) | |
1023 { | |
1024 return (0); | |
1025 } | |
1026 } | |
1027 return (!(*k | *y)); | |
1028 } | |
1029 | |
1030 static void | |
1031 toesc (ckanji, cyomi) | |
1032 char *ckanji, *cyomi; | |
1033 { | |
1034 if (strcmp (ckanji, cyomi) == 0) | |
1035 { | |
1036 strcpy (ckanji, DIC_HIRAGANA); | |
1037 } | |
1038 else if (is_katakana (ckanji, cyomi)) | |
1039 { | |
1040 strcpy (ckanji, DIC_KATAKANA); | |
1041 } | |
1042 } |