Mercurial > freewnn
comparison PubdicPlus/pod.c @ 0:bbc77ca4def5
initial import
author | Yoshiki Yazawa <yaz@cc.rim.or.jp> |
---|---|
date | Thu, 13 Dec 2007 04:30:14 +0900 |
parents | |
children | 466fe6732d8d |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:bbc77ca4def5 |
---|---|
1 /* Copyright 1994 Pubdic Project. | |
2 * | |
3 * Permission to use, copy, modify, distribute and sell this software | |
4 * and its documentation for any purpose is hereby granted without | |
5 * fee, provided that the above copyright notice appear in all copies | |
6 * and that both that copyright notice and this permission notice | |
7 * appear in supporting documentation, and that the name of Pubdic | |
8 * Project not be used in advertising or publicity pertaining to | |
9 * distribution of the software without specific, written prior | |
10 * permission. Pubdic Project makes no representations about the | |
11 * suitability of this software for any purpose. It is provided "as | |
12 * is" without express or implied warranty. | |
13 * | |
14 * PUBDIC PROJECT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, | |
15 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN | |
16 * NO EVENT SHALL PUBDIC PROJECT BE LIABLE FOR ANY SPECIAL, INDIRECT OR | |
17 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF | |
18 * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR | |
19 * OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | |
20 * PERFORMANCE OF THIS SOFTWARE. | |
21 */ | |
22 | |
23 #ifndef lint | |
24 static char rcsid[] = "$Id: pod.c,v 1.7 2005/12/10 18:50:43 aonoto Exp $"; | |
25 #endif | |
26 | |
27 #ifdef HAVE_CONFIG_H | |
28 # include <config.h> | |
29 #endif | |
30 | |
31 #include <stdio.h> | |
32 #if STDC_HEADERS | |
33 # include <stdlib.h> | |
34 # include <stddef.h> | |
35 # include <string.h> | |
36 #else | |
37 # if HAVE_MALLOC_H | |
38 # include <malloc.h> | |
39 # endif | |
40 # if HAVE_STRINGS_H | |
41 # include <strings.h> | |
42 # endif | |
43 #endif /* STDC_HEADERS */ | |
44 | |
45 #define POD_WCHAR | |
46 #ifdef POD_WCHAR | |
47 typedef unsigned short Wchar; | |
48 #else | |
49 #include <locale.h> | |
50 #include <widec.h> | |
51 #define Wchar wchar_t | |
52 #endif | |
53 | |
54 #if !(HAVE_BZERO) && (HAVE_MEMSET) | |
55 # define bzero(a, c) memset(a, 0, c) | |
56 #endif | |
57 | |
58 static char *program; | |
59 static int compare, ignore_hinshi_to_compare, sort_by_frequency, merge_sj3; | |
60 static int merge_kind, wnn_type_output, canna_type_output, sj3_type_output; | |
61 static int list_kinds; | |
62 static int copy_frequency, extract_kana = 0; | |
63 static long specific_kind; | |
64 static FILE *in1, *in2; | |
65 static char *common_out, *old_out, *new_out, *hinshi_table, *bunrui; | |
66 static char *description_table; | |
67 static int selhinshi = 0; | |
68 | |
69 /* hinshi_direction */ | |
70 #define INORDER 0 | |
71 #define REVERSE 1 | |
72 | |
73 static int hinshi_direction = INORDER; /* see above */ | |
74 | |
75 #define READBUFSIZE 128 | |
76 #define DICBUFSIZE (2 << 13) | |
77 #define DICBUFINDEXMASK (DICBUFSIZE - 1) | |
78 #define HINSHIBUFSIZE (2 << 13) | |
79 #define HINSHIBUFINDEXMASK (HINSHIBUFSIZE - 1) | |
80 | |
81 /* status of intern() */ | |
82 #define FOUND 0 | |
83 #define CREATE 1 | |
84 | |
85 /* 品詞を表す構造体 */ | |
86 | |
87 struct hinshipack | |
88 { | |
89 int nhinshis; | |
90 Wchar *hinshi; | |
91 unsigned flags; /* see below */ | |
92 struct hinshipack *next; | |
93 }; | |
94 | |
95 /* values of (struct hinshipack.)flags */ | |
96 #define REPLACED 1 | |
97 | |
98 /* 終止形を追加するためのルールファイルの内部表現(だと思う) */ | |
99 | |
100 struct descpack | |
101 { | |
102 Wchar *hinshi, *tandesc, *yomdesc; | |
103 struct descpack *next; | |
104 }; | |
105 | |
106 /* エントリの種別を表す構造体その他 */ | |
107 | |
108 struct kindpack | |
109 { | |
110 Wchar *kind; | |
111 long kindbit; | |
112 }; | |
113 | |
114 /* 辞書を表す構造体 */ | |
115 | |
116 struct dicpack | |
117 { | |
118 Wchar *yomi, *tango; | |
119 struct hinshipack *hinshi; | |
120 int hindo; | |
121 long kind; | |
122 Wchar *extdata; | |
123 unsigned flags; /* SEE BELOW */ | |
124 struct dicpack *next; | |
125 }; | |
126 | |
127 /* values of (struct dicpack.)flags */ | |
128 #define COMMON 001 | |
129 #define NEW 002 | |
130 | |
131 #if defined (__STDC__) || defined (_AIX) || (defined (__mips) && defined (_SYSTYPE_SVR4)) || defined(_WIN32) | |
132 /* Prototype for C89 (or later) */ | |
133 #ifdef POD_WCHAR | |
134 size_t Mbstowcs (Wchar *d, char *ss, int n); | |
135 size_t Wcstombs (char *d, Wchar *s, int n); | |
136 int Wscmp (register Wchar *s1, register Wchar *s2); | |
137 Wchar *Wscpy (Wchar *d, register Wchar *s); | |
138 int Wslen (Wchar *s); | |
139 int Watoi (Wchar *s); | |
140 static void Fputws (Wchar *s, FILE *f); | |
141 Wchar *Fgetws (Wchar *buf, int siz, FILE *f); | |
142 #endif /* POD_WCHAR */ | |
143 | |
144 static int all_kana (Wchar *s); | |
145 static Wchar *findslash (Wchar *s); | |
146 static Wchar *extstr (Wchar *p, Wchar **pp, int *key_return); | |
147 static void malloc_failed (void); | |
148 static struct hinshipack *internhinshi (Wchar *str, int flag); | |
149 static void replace_hinshi (void); | |
150 static void select_hinshi (int n); | |
151 static void freedesc (struct descpack *p); | |
152 static struct descpack *interndesc (Wchar *hin, Wchar *tan, Wchar *yom); | |
153 static struct descpack *searchdesc (Wchar *hin); | |
154 static void store_description (void); | |
155 static long internkind (Wchar *s); | |
156 static void listkinds (void); | |
157 static int kindcompar (struct kindpack *k1, struct kindpack *k2); | |
158 static void sortkind (void); | |
159 static struct dicpack *intern (int key, Wchar *yomi, Wchar *kouho, Wchar *hinshi, int hindo, long kind, int *stat, long flags); | |
160 static void storepd (FILE *file); | |
161 static void comparepd (FILE *file); | |
162 static void canna_output (FILE *cf, struct dicpack *p, Wchar *h, int n); | |
163 static void entry_out (FILE *cf, struct dicpack *p, Wchar *h, int n, Wchar *ex); | |
164 static void printentry (FILE *cf, struct dicpack *p); | |
165 static void showentry (struct dicpack **pd, int n); | |
166 static int diccompar (struct dicpack **p1, struct dicpack **p2); | |
167 static int dichindocompar (struct dicpack **p1, struct dicpack **p2); | |
168 void shrinkargs (char **argv, int n, int count); | |
169 static void parseargs (int argc, char *argv[]); | |
170 #endif | |
171 | |
172 #ifndef POD_WCHAR | |
173 # define Mbstowcs mbstowcs | |
174 # define Wcstombs wcstombs | |
175 # define Wscmp wscmp | |
176 # define Wscpy wscpy | |
177 # define Wslen wslen | |
178 # define Fgetws fgetws | |
179 # define Fputws fputws | |
180 #else | |
181 # define SS2 0x8e | |
182 # define SS3 0x8f | |
183 # define MSB 0x80 | |
184 # define MSK 0x7f | |
185 | |
186 # define WCG0 0x0000 | |
187 # define WCG1 0x8080 | |
188 # define WCG2 0x0080 | |
189 # define WCG3 0x8000 | |
190 # define WCMSK 0x8080 | |
191 | |
192 size_t | |
193 Mbstowcs (d, ss, n) | |
194 Wchar *d; | |
195 char *ss; | |
196 int n; | |
197 { | |
198 register Wchar *p = d; | |
199 register int ch; | |
200 register unsigned char *s = (unsigned char *) ss; | |
201 | |
202 while ((ch = *s++) && (p - d < n)) | |
203 { | |
204 if (ch & MSB) | |
205 { | |
206 if (ch == SS2) | |
207 { /* kana */ | |
208 *p++ = (Wchar) * s++; | |
209 } | |
210 else if (ch == SS3) | |
211 { | |
212 *p++ = (Wchar) ((*s << 8) | (*(s + 1) & MSK)); | |
213 s += 2; | |
214 } | |
215 else | |
216 { | |
217 *p++ = (Wchar) ((ch << 8) | (*s++ & 0xff)); | |
218 } | |
219 } | |
220 else | |
221 { | |
222 *p++ = (Wchar) ch; | |
223 } | |
224 } | |
225 *p = (Wchar) 0; | |
226 return p - d; | |
227 } | |
228 | |
229 size_t | |
230 Wcstombs (d, s, n) | |
231 char *d; | |
232 Wchar *s; | |
233 int n; | |
234 { | |
235 register char *p = d; | |
236 register Wchar ch; | |
237 | |
238 while ((ch = *s++) && (p - d + 2 < n)) | |
239 { | |
240 switch (ch & WCMSK) | |
241 { | |
242 case WCG0: | |
243 *p++ = (char) ch; | |
244 break; | |
245 | |
246 case WCG1: | |
247 *p++ = (char) ((ch >> 8) & 0xff); | |
248 *p++ = (char) (ch & 0xff); | |
249 break; | |
250 | |
251 case WCG2: | |
252 *p++ = SS2; | |
253 *p++ = (char) ch; | |
254 break; | |
255 | |
256 case WCG3: | |
257 *p++ = SS3; | |
258 *p++ = (char) ((ch >> 8) & 0xff); | |
259 *p++ = (char) ((ch & 0xff) | MSB); | |
260 break; | |
261 } | |
262 } | |
263 *p = '\0'; | |
264 return p - d; | |
265 } | |
266 | |
267 int | |
268 Wscmp (s1, s2) | |
269 register Wchar *s1, *s2; | |
270 { | |
271 register int res; | |
272 | |
273 /* 以下のコードはいささかトリッキーなので、説明を加えておこう。 | |
274 以下ではこのコメント内にあるようなことをしたいわけである。 | |
275 | |
276 while (*s1 && *s2 && && *s1 == *s2) { | |
277 s1++; s2++; | |
278 } | |
279 return *s1 - *s2; | |
280 | |
281 すなわち、s1 も s2 も EOS ('\0') を指していなくて、しかも値が | |
282 異なる間はそれぞれのポインタを進める。いずれかが EOS になるか、 | |
283 値が違ってきた場合には、*s1 - *s2 を返す。 | |
284 */ | |
285 | |
286 while (!(res = *s1 - *s2++) && *s1++) | |
287 ; | |
288 return res; | |
289 } | |
290 | |
291 Wchar * | |
292 Wscpy (d, s) | |
293 Wchar *d; | |
294 register Wchar *s; | |
295 { | |
296 register Wchar *p = d, ch; | |
297 | |
298 while (ch = *s++) | |
299 { | |
300 *p++ = ch; | |
301 } | |
302 *p = (Wchar) 0; | |
303 return d; | |
304 } | |
305 | |
306 int | |
307 Wslen (s) | |
308 Wchar *s; | |
309 { | |
310 register Wchar *p = s; | |
311 | |
312 while (*p) | |
313 p++; | |
314 return p - s; | |
315 } | |
316 | |
317 int | |
318 Watoi (s) | |
319 Wchar *s; | |
320 { | |
321 register int res = 0; | |
322 register Wchar ch; | |
323 | |
324 while ((ch = *s++) && ((Wchar) '0' <= ch) && (ch <= (Wchar) '9')) | |
325 { | |
326 res *= 10; | |
327 res += ch - (Wchar) '0'; | |
328 } | |
329 return res; | |
330 } | |
331 | |
332 static void | |
333 Fputws (s, f) | |
334 Wchar *s; | |
335 FILE *f; | |
336 { | |
337 char buf[READBUFSIZE]; | |
338 | |
339 if (Wcstombs (buf, s, READBUFSIZE)) | |
340 { | |
341 (void) fputs (buf, f); | |
342 } | |
343 } | |
344 | |
345 Wchar * | |
346 Fgetws (buf, siz, f) | |
347 Wchar *buf; | |
348 int siz; | |
349 FILE *f; | |
350 { | |
351 char mbuf[READBUFSIZE], *p; | |
352 | |
353 p = fgets (mbuf, READBUFSIZE, f); | |
354 if (p) | |
355 { | |
356 if (Mbstowcs (buf, mbuf, siz)) | |
357 { | |
358 return buf; | |
359 } | |
360 } | |
361 return (Wchar *) 0; | |
362 } | |
363 #endif | |
364 | |
365 /* s が全てカタカナから構成されているかどうかを返す関数 */ | |
366 | |
367 static int | |
368 all_kana (s) | |
369 Wchar *s; | |
370 { | |
371 static Wchar xa = 0, xke, aa, *p; | |
372 | |
373 if (!xa) | |
374 { | |
375 Mbstowcs (&xa, "\045\041", 1); | |
376 Mbstowcs (&xke, "\045\166", 1); | |
377 Mbstowcs (&aa, "\041\074", 1); | |
378 } | |
379 | |
380 for (p = s; *p; p++) | |
381 { | |
382 if (!(*p == aa || (xa <= *p && *p <= xke))) | |
383 { | |
384 return 0; | |
385 } | |
386 } | |
387 | |
388 return 1; | |
389 } | |
390 | |
391 /* スラッシュを探す */ | |
392 | |
393 static Wchar * | |
394 findslash (s) | |
395 Wchar *s; | |
396 { | |
397 while (*s) | |
398 { | |
399 if (*s == (Wchar) '/') | |
400 { | |
401 return s; | |
402 } | |
403 s++; | |
404 } | |
405 return (Wchar *) 0; | |
406 } | |
407 | |
408 /* トークンを一個取り出す */ | |
409 | |
410 static Wchar * | |
411 extstr (p, pp, key_return) | |
412 Wchar *p, **pp; | |
413 int *key_return; | |
414 { | |
415 Wchar *res; | |
416 int key = 0; | |
417 | |
418 while (*p == (Wchar) ' ' || *p == (Wchar) '\t') | |
419 p++; | |
420 res = p; | |
421 while (*p && *p != (Wchar) ' ' && *p != (Wchar) '\t' && *p != (Wchar) '\n') | |
422 { | |
423 key += (int) *p++; | |
424 } | |
425 *p++ = (Wchar) '\0'; | |
426 if (pp) | |
427 *pp = p; | |
428 if (key_return) | |
429 *key_return = key; | |
430 return res; | |
431 } | |
432 | |
433 static struct hinshipack *partsofspeech[HINSHIBUFSIZE]; | |
434 | |
435 static void | |
436 malloc_failed () | |
437 { | |
438 (void) fprintf (stderr, "%s: malloc failed.\n", program); | |
439 } | |
440 | |
441 /* 品詞名を品詞名テーブルに登録する */ | |
442 | |
443 static struct hinshipack * | |
444 internhinshi (str, flag) | |
445 Wchar *str; | |
446 int flag; | |
447 { | |
448 struct hinshipack *p, **pp; | |
449 Wchar *s; | |
450 int key = 0; | |
451 | |
452 for (s = str; *s; s++) | |
453 key += (int) *s; | |
454 key = ((unsigned) key & HINSHIBUFINDEXMASK); | |
455 for (pp = partsofspeech + key; p = *pp; pp = &(p->next)) | |
456 { | |
457 if (!Wscmp (p->hinshi, str)) | |
458 { | |
459 return p; | |
460 } | |
461 } | |
462 if (flag) | |
463 { | |
464 p = (struct hinshipack *) malloc (sizeof (struct hinshipack)); | |
465 if (p) | |
466 { | |
467 *pp = p; | |
468 (void) bzero (p, sizeof (struct hinshipack)); | |
469 p->hinshi = (Wchar *) malloc ((Wslen (str) + 1) * sizeof (Wchar)); | |
470 if (p->hinshi) | |
471 { | |
472 (void) Wscpy (p->hinshi, str); | |
473 p->nhinshis = 1; | |
474 return p; | |
475 } | |
476 free (p); | |
477 } | |
478 malloc_failed (); | |
479 } | |
480 return (struct hinshipack *) 0; | |
481 } | |
482 | |
483 /* 品詞名を置き換える */ | |
484 | |
485 static void | |
486 replace_hinshi () | |
487 { | |
488 FILE *f; | |
489 Wchar readbuf[READBUFSIZE], *to, *from, *s; | |
490 struct hinshipack *hinshientry, *p; | |
491 int i, err = 0; | |
492 | |
493 f = fopen (hinshi_table, "r"); | |
494 if (!f) | |
495 { | |
496 (void) fprintf (stderr, "%s: can not open the table file of parts of speech \"%s\".\n", program, hinshi_table); | |
497 exit (1); | |
498 } | |
499 while (s = Fgetws (readbuf, READBUFSIZE, f)) | |
500 { | |
501 from = extstr (s, &s, 0); | |
502 to = extstr (s, &s, 0); | |
503 if (hinshi_direction == REVERSE) | |
504 { | |
505 Wchar *xx = from; | |
506 from = to; | |
507 to = xx; | |
508 } | |
509 | |
510 hinshientry = internhinshi (from, 0); | |
511 if (hinshientry) | |
512 { | |
513 Wchar *xx; | |
514 | |
515 xx = (Wchar *) malloc ((Wslen (to) + 1) * sizeof (Wchar)); | |
516 if (xx) | |
517 { | |
518 Wchar *cp; | |
519 int n = 1; | |
520 | |
521 (void) Wscpy (xx, to); | |
522 free (hinshientry->hinshi); | |
523 hinshientry->hinshi = xx; | |
524 for (cp = xx; *cp; cp++) | |
525 { | |
526 if (*cp == (Wchar) '/') | |
527 { | |
528 *cp = (Wchar) 0; | |
529 n++; | |
530 } | |
531 } | |
532 hinshientry->nhinshis = n; | |
533 hinshientry->flags |= REPLACED; | |
534 } | |
535 else | |
536 { | |
537 malloc_failed (); | |
538 } | |
539 } | |
540 } | |
541 (void) fclose (f); | |
542 | |
543 for (i = 0; i < HINSHIBUFSIZE; i++) | |
544 { | |
545 for (p = partsofspeech[i]; p; p = p->next) | |
546 { | |
547 if (!(p->flags & REPLACED)) | |
548 { | |
549 (void) fprintf (stderr, "%s: The replacement for \"", program); | |
550 Fputws (p->hinshi, stderr); | |
551 (void) fprintf (stderr, "\" is not mentioned in the table.\n"); | |
552 err = 1; | |
553 } | |
554 } | |
555 } | |
556 if (err) | |
557 { | |
558 exit (1); | |
559 } | |
560 } | |
561 | |
562 static void | |
563 select_hinshi (n) | |
564 int n; | |
565 { | |
566 Wchar *s, *t, *xx; | |
567 struct hinshipack *p; | |
568 int i; | |
569 | |
570 if (!n) | |
571 return; | |
572 | |
573 for (i = 0; i < HINSHIBUFSIZE; i++) | |
574 { | |
575 for (p = partsofspeech[i]; p; p = p->next) | |
576 { | |
577 switch (n) | |
578 { | |
579 case 1: | |
580 s = findslash (p->hinshi); | |
581 if (s) | |
582 { | |
583 *s = (Wchar) 0; | |
584 } | |
585 break; | |
586 | |
587 case 2: | |
588 s = findslash (p->hinshi); | |
589 if (s) | |
590 { | |
591 s++; | |
592 t = findslash (s); | |
593 if (t) | |
594 { | |
595 xx = (Wchar *) malloc ((t - s + 1) * sizeof (Wchar)); | |
596 if (xx) | |
597 { | |
598 *t = (Wchar) 0; | |
599 Wscpy (xx, s); | |
600 t = p->hinshi; | |
601 p->hinshi = xx; | |
602 (void) free ((char *) t); | |
603 } | |
604 } | |
605 } | |
606 break; | |
607 | |
608 case 3: | |
609 s = findslash (p->hinshi); | |
610 if (s) | |
611 { | |
612 t = findslash (s + 1); | |
613 if (t) | |
614 { | |
615 t++; | |
616 xx = (Wchar *) malloc ((Wslen (t) + 1) * sizeof (Wchar)); | |
617 if (xx) | |
618 { | |
619 Wscpy (xx, t); | |
620 t = p->hinshi; | |
621 p->hinshi = xx; | |
622 (void) free ((char *) t); | |
623 } | |
624 } | |
625 } | |
626 break; | |
627 | |
628 default: | |
629 break; | |
630 } | |
631 } | |
632 } | |
633 } | |
634 | |
635 static void | |
636 freedesc (p) | |
637 struct descpack *p; | |
638 { | |
639 free (p->hinshi); | |
640 free (p->tandesc); | |
641 free (p->yomdesc); | |
642 free (p); | |
643 } | |
644 | |
645 static struct descpack *description[HINSHIBUFSIZE]; | |
646 | |
647 /* ルールの登録 */ | |
648 | |
649 static struct descpack * | |
650 interndesc (hin, tan, yom) | |
651 Wchar *hin, *tan, *yom; | |
652 { | |
653 struct descpack *p, **pp, *next = (struct descpack *) 0; | |
654 Wchar *s; | |
655 int key = 0; | |
656 | |
657 for (s = hin; *s; s++) | |
658 key += (int) *s; | |
659 key = ((unsigned) key & HINSHIBUFINDEXMASK); | |
660 for (pp = description + key; p = *pp; pp = &(p->next)) | |
661 { | |
662 if (!Wscmp (p->hinshi, hin)) | |
663 { | |
664 if (!Wscmp (p->tandesc, tan) && !Wscmp (p->yomdesc, yom)) | |
665 { | |
666 return p; | |
667 } | |
668 else | |
669 { | |
670 *pp = next = p->next; | |
671 freedesc (p); | |
672 break; | |
673 } | |
674 } | |
675 } | |
676 p = (struct descpack *) malloc (sizeof (struct descpack)); | |
677 if (p) | |
678 { | |
679 *pp = p; | |
680 (void) bzero (p, sizeof (struct descpack)); | |
681 p->next = next; | |
682 p->hinshi = (Wchar *) malloc ((Wslen (hin) + 1) * sizeof (Wchar)); | |
683 if (p->hinshi) | |
684 { | |
685 (void) Wscpy (p->hinshi, hin); | |
686 p->tandesc = (Wchar *) malloc ((Wslen (tan) + 1) * sizeof (Wchar)); | |
687 if (p->tandesc) | |
688 { | |
689 (void) Wscpy (p->tandesc, tan); | |
690 p->yomdesc = (Wchar *) malloc ((Wslen (yom) + 1) * sizeof (Wchar)); | |
691 if (p->yomdesc) | |
692 { | |
693 (void) Wscpy (p->yomdesc, yom); | |
694 return p; | |
695 } | |
696 free (p->tandesc); | |
697 } | |
698 free (p->hinshi); | |
699 } | |
700 free (p); | |
701 } | |
702 malloc_failed (); | |
703 return (struct descpack *) 0; | |
704 } | |
705 | |
706 /* ルールの探索 */ | |
707 | |
708 static struct descpack * | |
709 searchdesc (hin) | |
710 Wchar *hin; | |
711 { | |
712 struct descpack *p, **pp; | |
713 Wchar *s; | |
714 int key = 0; | |
715 | |
716 for (s = hin; *s; s++) | |
717 key += (int) *s; | |
718 key = ((unsigned) key & HINSHIBUFINDEXMASK); | |
719 for (pp = description + key; p = *pp; pp = &(p->next)) | |
720 { | |
721 if (!Wscmp (p->hinshi, hin)) | |
722 { | |
723 return p; | |
724 } | |
725 } | |
726 return (struct descpack *) 0; | |
727 } | |
728 | |
729 static void | |
730 store_description () | |
731 { | |
732 FILE *f; | |
733 Wchar readbuf[READBUFSIZE], *hin, *tan, *yom, *s; | |
734 | |
735 if (!description_table) | |
736 { | |
737 return; | |
738 } | |
739 | |
740 f = fopen (description_table, "r"); | |
741 if (!f) | |
742 { | |
743 (void) fprintf (stderr, "%s: can not open the table file of parts of speech \"%s\".\n", program, description_table); | |
744 exit (1); | |
745 } | |
746 while (s = Fgetws (readbuf, READBUFSIZE, f)) | |
747 { | |
748 Wchar nl[1]; | |
749 | |
750 nl[0] = (Wchar) 0; | |
751 hin = tan = yom = nl; | |
752 hin = extstr (s, &s, 0); | |
753 if (*hin) | |
754 { | |
755 tan = extstr (s, &s, 0); | |
756 if (*tan) | |
757 { | |
758 yom = extstr (s, &s, 0); | |
759 } | |
760 } | |
761 | |
762 interndesc (hin, tan, yom); | |
763 } | |
764 (void) fclose (f); | |
765 } | |
766 | |
767 struct kindpack kinds[sizeof (long) * 8]; | |
768 static int nkinds; | |
769 | |
770 #define KIHONBIT 1L | |
771 | |
772 /* 種別の登録 */ | |
773 | |
774 static long | |
775 internkind (s) | |
776 Wchar *s; | |
777 { | |
778 int i; | |
779 Wchar *p; | |
780 | |
781 p = findslash (s); | |
782 if (p) | |
783 { | |
784 long res; | |
785 | |
786 *p = (Wchar) '\0'; | |
787 res = internkind (s); | |
788 res |= internkind (p + 1); | |
789 return res; | |
790 } | |
791 else | |
792 { | |
793 for (i = 0; i < nkinds; i++) | |
794 { | |
795 if (!Wscmp (s, kinds[i].kind)) | |
796 { | |
797 return kinds[i].kindbit; | |
798 } | |
799 } | |
800 if (nkinds < (sizeof (long) * 8) && (kinds[nkinds].kind = (Wchar *) malloc ((Wslen (s) + 1) * sizeof (Wchar)))) | |
801 { | |
802 (void) Wscpy (kinds[nkinds].kind, s); | |
803 kinds[nkinds].kindbit = 1 << nkinds; | |
804 return kinds[nkinds++].kindbit; | |
805 } | |
806 return 0; | |
807 } | |
808 } | |
809 | |
810 /* 種別の一覧の出力 */ | |
811 | |
812 static void | |
813 listkinds () | |
814 { | |
815 int i; | |
816 | |
817 for (i = 0; i < nkinds; i++) | |
818 { | |
819 Fputws (kinds[i].kind, stdout); | |
820 putchar ('\n'); | |
821 } | |
822 } | |
823 | |
824 static int | |
825 kindcompar (k1, k2) | |
826 struct kindpack *k1, *k2; | |
827 { | |
828 return Wscmp (k1->kind, k2->kind); | |
829 } | |
830 | |
831 static void | |
832 sortkind () | |
833 { | |
834 qsort (kinds, nkinds, sizeof (struct kindpack), kindcompar); | |
835 } | |
836 | |
837 static struct dicpack *dic[DICBUFSIZE], **pdic; | |
838 static int ndicentries = 0; | |
839 | |
840 /* | |
841 | |
842 intern -- 辞書エントリの検索/登録 | |
843 | |
844 第6引数の stat としてヌルでないアドレスが指定された場合には、同じエントリ | |
845 が登録されていない場合には登録を行う。アドレスがヌルの場合には登録しない。 | |
846 | |
847 flags によっていろいろと指定をする。(以下を見てね)。 | |
848 | |
849 hinshi に 0 を渡してはいけない。kind は 0 を渡しても可だが、-m の時じゃない | |
850 マッチはしないので注意。 | |
851 | |
852 */ | |
853 | |
854 /* flags */ | |
855 #define IGNORE_HINSHI 1L | |
856 #define IGNORE_KIND 2L | |
857 | |
858 static struct dicpack * | |
859 intern (key, yomi, kouho, hinshi, hindo, kind, stat, flags) | |
860 int key, hindo, *stat; | |
861 Wchar *yomi, *kouho, *hinshi; | |
862 long kind, flags; | |
863 { | |
864 struct dicpack *p, **pp; | |
865 struct descpack *dp; | |
866 Wchar nl[1], *yomdesc = nl, *tandesc = nl; | |
867 Wchar *yom = (Wchar *) 0, *tan = (Wchar *) 0, *dhinshi, *dh; | |
868 | |
869 nl[0] = (Wchar) '\0'; | |
870 | |
871 if (description_table) | |
872 { | |
873 dhinshi = dh = hinshi; /* かんなの品詞を探す */ | |
874 while (*dh) | |
875 { | |
876 if (*dh++ == (Wchar) '/') | |
877 { | |
878 dhinshi = dh; | |
879 } | |
880 } | |
881 dp = searchdesc (dhinshi); | |
882 if (dp) | |
883 { | |
884 yomdesc = dp->yomdesc; | |
885 tandesc = dp->tandesc; | |
886 if (Wslen (yomdesc)) | |
887 { | |
888 Wchar *t; | |
889 t = (Wchar *) malloc ((Wslen (yomi) + Wslen (yomdesc) + 1) * sizeof (Wchar)); | |
890 if (t) | |
891 { | |
892 Wscpy (t, yomi); | |
893 yom = yomi = t; | |
894 Wscpy (yomi + Wslen (yomi), yomdesc); | |
895 } | |
896 } | |
897 if (Wslen (tandesc)) | |
898 { | |
899 Wchar *t; | |
900 t = (Wchar *) malloc ((Wslen (kouho) + Wslen (tandesc) + 1) * sizeof (Wchar)); | |
901 if (t) | |
902 { | |
903 Wscpy (t, kouho); | |
904 tan = kouho = t; | |
905 Wscpy (kouho + Wslen (kouho), tandesc); | |
906 } | |
907 } | |
908 } | |
909 else | |
910 { | |
911 char foo[64]; | |
912 | |
913 fprintf (stderr, "no description rule for "); | |
914 Wcstombs (foo, dhinshi, 64); | |
915 fprintf (stderr, "%s.\n", foo); | |
916 } | |
917 } | |
918 | |
919 key = ((unsigned) key & DICBUFINDEXMASK); | |
920 for (pp = dic + key; p = *pp; pp = &(p->next)) | |
921 { | |
922 if (!Wscmp (p->yomi, yomi) && !Wscmp (p->tango, kouho) && ((flags & IGNORE_HINSHI) || !Wscmp (p->hinshi->hinshi, hinshi)) && ((flags & IGNORE_KIND) || ((p->kind & kind) == kind))) | |
923 { | |
924 /* match */ | |
925 if (stat) | |
926 *stat = FOUND; | |
927 if (yom) | |
928 free (yom); | |
929 if (tan) | |
930 free (tan); | |
931 return p; | |
932 } | |
933 } | |
934 if (stat) | |
935 { | |
936 p = (struct dicpack *) malloc (sizeof (struct dicpack)); | |
937 if (p) | |
938 { | |
939 *pp = p; | |
940 (void) bzero (p, sizeof (struct dicpack)); | |
941 p->yomi = (Wchar *) malloc ((Wslen (yomi) + 1) * sizeof (Wchar)); | |
942 if (p->yomi) | |
943 { | |
944 (void) Wscpy (p->yomi, yomi); | |
945 p->tango = (Wchar *) malloc ((Wslen (kouho) + 1) * sizeof (Wchar)); | |
946 if (p->tango) | |
947 { | |
948 (void) Wscpy (p->tango, kouho); | |
949 p->hinshi = internhinshi (hinshi, 1); | |
950 if (p->hinshi) | |
951 { | |
952 p->hindo = hindo; | |
953 *stat = CREATE; | |
954 ndicentries++; | |
955 p->kind = kind; | |
956 p->extdata = (Wchar *) 0; | |
957 if (yom) | |
958 free (yom); | |
959 if (tan) | |
960 free (tan); | |
961 return p; | |
962 } | |
963 free (p->tango); | |
964 } | |
965 free (p->yomi); | |
966 } | |
967 free (p); | |
968 } | |
969 malloc_failed (); | |
970 } | |
971 if (yom) | |
972 free (yom); | |
973 if (tan) | |
974 free (tan); | |
975 return (struct dicpack *) 0; | |
976 } | |
977 | |
978 /* 登録されているエントリに対して fn を実行する */ | |
979 | |
980 static void | |
981 for_all_interned (fn) | |
982 void (*fn) (); | |
983 { | |
984 int i; | |
985 struct dicpack *p; | |
986 | |
987 for (i = 0; i < DICBUFSIZE; i++) | |
988 { | |
989 for (p = dic[i]; p; p = p->next) | |
990 { | |
991 (*fn) (p); | |
992 } | |
993 } | |
994 } | |
995 | |
996 static void | |
997 storepd (file) | |
998 FILE *file; | |
999 { | |
1000 Wchar readbuf[READBUFSIZE], *p, *yomi, *hinshi, *kouho, *hindo, *kind; | |
1001 int nhindo, key, tkey, stat; | |
1002 long kindbit; | |
1003 struct dicpack *dicentry; | |
1004 | |
1005 while (p = Fgetws (readbuf, READBUFSIZE, file)) | |
1006 { | |
1007 key = 0; | |
1008 yomi = extstr (p, &p, &tkey); | |
1009 key += tkey; | |
1010 kouho = extstr (p, &p, &tkey); | |
1011 key += tkey; | |
1012 hinshi = extstr (p, &p, 0); | |
1013 hindo = extstr (p, &p, 0); | |
1014 nhindo = Watoi (hindo); | |
1015 | |
1016 kind = extstr (p, 0, 0); | |
1017 if (*kind) | |
1018 { | |
1019 kindbit = internkind (kind); | |
1020 } | |
1021 else | |
1022 { | |
1023 kindbit = KIHONBIT; | |
1024 } | |
1025 | |
1026 dicentry = intern (key, yomi, kouho, hinshi, nhindo, kindbit, &stat, IGNORE_KIND); | |
1027 if (dicentry) | |
1028 { | |
1029 dicentry->kind |= kindbit; | |
1030 } | |
1031 } | |
1032 } | |
1033 | |
1034 static void | |
1035 comparepd (file) | |
1036 FILE *file; | |
1037 { | |
1038 Wchar readbuf[READBUFSIZE], *p, *yomi, *hinshi, *kouho, *hindo, *kind; | |
1039 int nhindo, key, tkey, stat, *statp = &stat; | |
1040 struct dicpack *dicentry; | |
1041 long kindbit, flags = 0L; | |
1042 | |
1043 while (p = Fgetws (readbuf, READBUFSIZE, file)) | |
1044 { | |
1045 key = 0; | |
1046 yomi = extstr (p, &p, &tkey); | |
1047 key += tkey; | |
1048 kouho = extstr (p, &p, &tkey); | |
1049 key += tkey; | |
1050 hinshi = extstr (p, &p, 0); | |
1051 if (ignore_hinshi_to_compare) | |
1052 { | |
1053 flags |= IGNORE_HINSHI; | |
1054 } | |
1055 hindo = extstr (p, &p, 0); | |
1056 nhindo = Watoi (hindo); | |
1057 | |
1058 kind = extstr (p, 0, 0); | |
1059 if (*kind) | |
1060 { | |
1061 kindbit = internkind (kind); | |
1062 } | |
1063 else | |
1064 { | |
1065 kindbit = KIHONBIT; | |
1066 } | |
1067 if (merge_kind || merge_sj3) | |
1068 { | |
1069 flags |= IGNORE_KIND; | |
1070 } | |
1071 if (copy_frequency) | |
1072 { | |
1073 statp = (int *) 0; | |
1074 } | |
1075 | |
1076 dicentry = intern (key, yomi, kouho, hinshi, nhindo, kindbit, statp, flags); | |
1077 | |
1078 if (dicentry) | |
1079 { | |
1080 if (copy_frequency) | |
1081 { | |
1082 dicentry->hindo = nhindo; | |
1083 dicentry->flags &= ~COMMON; | |
1084 } | |
1085 else if (ignore_hinshi_to_compare && stat == FOUND) | |
1086 { | |
1087 /* この場合、同じキーのチェーンが返る */ | |
1088 struct dicpack *pd; | |
1089 | |
1090 for (pd = dicentry; pd; pd = pd->next) | |
1091 { | |
1092 if (!Wscmp (pd->yomi, yomi) && !Wscmp (pd->tango, kouho)) | |
1093 { | |
1094 pd->flags |= COMMON; | |
1095 if (!merge_sj3) | |
1096 { | |
1097 pd->kind |= kindbit; | |
1098 } | |
1099 | |
1100 if (merge_sj3) | |
1101 { | |
1102 int len = 0; | |
1103 Wchar *dat; | |
1104 | |
1105 if (pd->extdata) | |
1106 { | |
1107 len = Wslen (pd->extdata); | |
1108 } | |
1109 dat = (Wchar *) malloc ((Wslen (hinshi) + 1 + len) * sizeof (Wchar)); | |
1110 if (dat) | |
1111 { | |
1112 if (len) | |
1113 { | |
1114 (void) Wscpy (dat, pd->extdata); | |
1115 (void) free ((char *) pd->extdata); | |
1116 } | |
1117 (void) Wscpy (dat + len, hinshi); | |
1118 pd->extdata = dat; | |
1119 } | |
1120 } | |
1121 } | |
1122 } | |
1123 } | |
1124 else | |
1125 { | |
1126 dicentry->kind |= kindbit; | |
1127 if (stat == FOUND) | |
1128 { | |
1129 dicentry->flags |= COMMON; | |
1130 } | |
1131 else | |
1132 { /* CREATE */ | |
1133 dicentry->flags |= NEW; | |
1134 } | |
1135 } | |
1136 } | |
1137 } | |
1138 } | |
1139 | |
1140 static void | |
1141 canna_output (cf, p, h, n) | |
1142 FILE *cf; | |
1143 struct dicpack *p; | |
1144 Wchar *h; | |
1145 int n; | |
1146 { | |
1147 for (; n-- > 0; h += Wslen (h) + 1) | |
1148 { | |
1149 Fputws (p->yomi, cf); | |
1150 (void) putc (' ', cf); | |
1151 Fputws (h, cf); | |
1152 if (p->hindo) | |
1153 { | |
1154 (void) fprintf (cf, "*%d", p->hindo); | |
1155 } | |
1156 (void) putc (' ', cf); | |
1157 Fputws (p->tango, cf); | |
1158 (void) putc ('\n', cf); | |
1159 } | |
1160 } | |
1161 | |
1162 static void | |
1163 entry_out (cf, p, h, n, ex) | |
1164 FILE *cf; | |
1165 struct dicpack *p; | |
1166 Wchar *h; | |
1167 int n; | |
1168 Wchar *ex; | |
1169 { | |
1170 int i, f = 1; | |
1171 long b; | |
1172 | |
1173 for (; n-- > 0; h += Wslen (h) + 1) | |
1174 { | |
1175 Fputws (p->yomi, cf); | |
1176 (void) putc (' ', cf); | |
1177 Fputws (p->tango, cf); | |
1178 (void) putc (' ', cf); | |
1179 if (merge_sj3 && ex) | |
1180 { | |
1181 Fputws (ex, cf); | |
1182 (void) putc ('/', cf); | |
1183 } | |
1184 Fputws (h, cf); | |
1185 if (!sj3_type_output) | |
1186 { | |
1187 (void) fprintf (cf, " %d", p->hindo); | |
1188 } | |
1189 | |
1190 if (!wnn_type_output) | |
1191 { | |
1192 if (bunrui) | |
1193 { | |
1194 (void) printf (" %s", bunrui); | |
1195 } | |
1196 else | |
1197 { | |
1198 if (specific_kind) | |
1199 { | |
1200 b = (specific_kind & p->kind); | |
1201 } | |
1202 else | |
1203 { | |
1204 b = p->kind; | |
1205 } | |
1206 if (b != KIHONBIT) | |
1207 { /* 基本だけだったら何も書かない */ | |
1208 for (i = 0; i < nkinds; i++) | |
1209 { | |
1210 if (b & kinds[i].kindbit) | |
1211 { | |
1212 if (f) | |
1213 { | |
1214 (void) putc (' ', cf); | |
1215 f = 0; | |
1216 } | |
1217 else | |
1218 { | |
1219 (void) putc ('/', cf); | |
1220 } | |
1221 Fputws (kinds[i].kind, cf); | |
1222 } | |
1223 } | |
1224 } | |
1225 } | |
1226 } | |
1227 (void) putc ('\n', cf); | |
1228 } | |
1229 } | |
1230 | |
1231 /* p で表されるエントリをファイル cf に出力する */ | |
1232 | |
1233 static void | |
1234 printentry (cf, p) | |
1235 FILE *cf; | |
1236 struct dicpack *p; | |
1237 { | |
1238 if (specific_kind && !(p->kind & specific_kind)) | |
1239 { | |
1240 return; | |
1241 } | |
1242 | |
1243 if (extract_kana && !all_kana (p->tango)) | |
1244 { | |
1245 return; | |
1246 } | |
1247 | |
1248 if (selhinshi && !p->hinshi->hinshi[0]) | |
1249 { | |
1250 return; | |
1251 } | |
1252 | |
1253 if (canna_type_output) | |
1254 { | |
1255 canna_output (cf, p, p->hinshi->hinshi, p->hinshi->nhinshis); | |
1256 } | |
1257 else | |
1258 { | |
1259 entry_out (cf, p, p->hinshi->hinshi, p->hinshi->nhinshis, p->extdata); | |
1260 } | |
1261 } | |
1262 | |
1263 static void | |
1264 showdeleted (p) | |
1265 struct dicpack *p; | |
1266 { | |
1267 if (!(p->flags & COMMON)) | |
1268 { | |
1269 (void) printf ("- "); | |
1270 printentry (stdout, p); | |
1271 } | |
1272 } | |
1273 | |
1274 static void | |
1275 showentry (pd, n) | |
1276 struct dicpack **pd; | |
1277 int n; | |
1278 { | |
1279 FILE *cf = (FILE *) 0, *of = (FILE *) 0, *nf = (FILE *) 0; | |
1280 struct dicpack *p; | |
1281 int i; | |
1282 | |
1283 if (common_out) | |
1284 { | |
1285 if (common_out[0] != '-' || common_out[1]) | |
1286 { | |
1287 cf = fopen (common_out, "w"); | |
1288 if (!cf) | |
1289 { | |
1290 (void) fprintf (stderr, "%s: can not open file \"%s\".\n", program, common_out); | |
1291 exit (1); | |
1292 } | |
1293 } | |
1294 else | |
1295 { | |
1296 cf = stdout; | |
1297 } | |
1298 } | |
1299 if (old_out) | |
1300 { | |
1301 if (old_out[0] != '-' || old_out[1]) | |
1302 { | |
1303 of = fopen (old_out, "w"); | |
1304 if (!of) | |
1305 { | |
1306 (void) fprintf (stderr, "%s: can not open file \"%s\".\n", program, old_out); | |
1307 exit (1); | |
1308 } | |
1309 } | |
1310 else | |
1311 { | |
1312 of = stdout; | |
1313 } | |
1314 } | |
1315 if (new_out) | |
1316 { | |
1317 if (new_out[0] != '-' || new_out[1]) | |
1318 { | |
1319 nf = fopen (new_out, "w"); | |
1320 if (!nf) | |
1321 { | |
1322 (void) fprintf (stderr, "%s: can not open file \"%s\".\n", program, new_out); | |
1323 exit (1); | |
1324 } | |
1325 } | |
1326 else | |
1327 { | |
1328 nf = stdout; | |
1329 } | |
1330 } | |
1331 | |
1332 for (i = 0; i < n; i++) | |
1333 { | |
1334 p = pd[i]; | |
1335 if (compare) | |
1336 { | |
1337 if (p->flags & COMMON) | |
1338 { | |
1339 if (cf) | |
1340 { | |
1341 printentry (cf, p); | |
1342 } | |
1343 } | |
1344 else if (p->flags & NEW) | |
1345 { | |
1346 if (nf) | |
1347 { | |
1348 printentry (nf, p); | |
1349 } | |
1350 } | |
1351 else | |
1352 { | |
1353 if (of) | |
1354 { | |
1355 printentry (of, p); | |
1356 } | |
1357 } | |
1358 } | |
1359 else | |
1360 { /* just print the normalized dictionary */ | |
1361 printentry (stdout, p); | |
1362 } | |
1363 } | |
1364 } | |
1365 | |
1366 static int | |
1367 diccompar (p1, p2) | |
1368 struct dicpack **p1, **p2; | |
1369 { | |
1370 int n; | |
1371 if (n = Wscmp ((*p1)->yomi, (*p2)->yomi)) | |
1372 { | |
1373 return n; | |
1374 } | |
1375 else if (n = Wscmp ((*p1)->tango, (*p2)->tango)) | |
1376 { | |
1377 return n; | |
1378 } | |
1379 else if (n = Wscmp ((*p1)->hinshi->hinshi, (*p2)->hinshi->hinshi)) | |
1380 { | |
1381 return n; | |
1382 } | |
1383 else | |
1384 { /* impossible */ | |
1385 return 0; | |
1386 } | |
1387 } | |
1388 | |
1389 static int | |
1390 dichindocompar (p1, p2) | |
1391 struct dicpack **p1, **p2; | |
1392 { | |
1393 int n; | |
1394 if (n = Wscmp ((*p1)->yomi, (*p2)->yomi)) | |
1395 { | |
1396 return n; | |
1397 } | |
1398 else if (n = ((*p2)->hindo - (*p1)->hindo)) | |
1399 { | |
1400 return n; | |
1401 } | |
1402 else if (n = Wscmp ((*p1)->tango, (*p2)->tango)) | |
1403 { | |
1404 return n; | |
1405 } | |
1406 else if (n = Wscmp ((*p1)->hinshi->hinshi, (*p2)->hinshi->hinshi)) | |
1407 { | |
1408 return n; | |
1409 } | |
1410 else | |
1411 { /* impossible */ | |
1412 return 0; | |
1413 } | |
1414 } | |
1415 | |
1416 void | |
1417 shrinkargs (argv, n, count) | |
1418 char **argv; | |
1419 int n, count; | |
1420 { | |
1421 int i; | |
1422 | |
1423 for (i = 0; i + n < count; i++) | |
1424 { | |
1425 argv[i] = argv[i + n]; | |
1426 } | |
1427 } | |
1428 | |
1429 static void | |
1430 parseargs (argc, argv) | |
1431 int argc; | |
1432 char *argv[]; | |
1433 { | |
1434 int i; | |
1435 | |
1436 for (program = argv[0] + strlen (argv[0]); argv[0] < program; program--) | |
1437 { | |
1438 if (program[0] == '/') | |
1439 { | |
1440 program++; | |
1441 break; | |
1442 } | |
1443 } | |
1444 | |
1445 for (i = 1; i < argc;) | |
1446 { | |
1447 if (argv[i][0] == '-' && argv[i][2] == '\0') | |
1448 { | |
1449 switch (argv[i][1]) | |
1450 { | |
1451 case '1': | |
1452 case '2': | |
1453 case '3': | |
1454 selhinshi = argv[i][1] - '0'; | |
1455 shrinkargs (argv + i, 1, argc - i); | |
1456 argc -= 1; | |
1457 break; | |
1458 | |
1459 case 'b': | |
1460 bunrui = argv[i + 1]; | |
1461 shrinkargs (argv + i, 2, argc - i); | |
1462 argc -= 2; | |
1463 break; | |
1464 | |
1465 case 'c': | |
1466 common_out = argv[i + 1]; | |
1467 shrinkargs (argv + i, 2, argc - i); | |
1468 argc -= 2; | |
1469 break; | |
1470 | |
1471 case 'd': | |
1472 description_table = argv[i + 1]; | |
1473 shrinkargs (argv + i, 2, argc - i); | |
1474 argc -= 2; | |
1475 break; | |
1476 | |
1477 case 'f': | |
1478 copy_frequency = 1; | |
1479 shrinkargs (argv + i, 1, argc - i); | |
1480 argc -= 1; | |
1481 break; | |
1482 | |
1483 case 'h': | |
1484 ignore_hinshi_to_compare = 1; | |
1485 shrinkargs (argv + i, 1, argc - i); | |
1486 argc -= 1; | |
1487 break; | |
1488 | |
1489 case 'i': | |
1490 canna_type_output = 1; | |
1491 wnn_type_output = 0; | |
1492 shrinkargs (argv + i, 1, argc - i); | |
1493 argc -= 1; | |
1494 break; | |
1495 | |
1496 case 'j': | |
1497 extract_kana = 1; | |
1498 shrinkargs (argv + i, 1, argc - i); | |
1499 argc -= 1; | |
1500 break; | |
1501 | |
1502 case 'k': | |
1503 { | |
1504 Wchar buf[READBUFSIZE]; | |
1505 | |
1506 (void) Mbstowcs (buf, argv[i + 1], READBUFSIZE); | |
1507 specific_kind |= internkind (buf); | |
1508 } | |
1509 shrinkargs (argv + i, 2, argc - i); | |
1510 argc -= 2; | |
1511 break; | |
1512 | |
1513 case 'l': | |
1514 list_kinds = 1; | |
1515 shrinkargs (argv + i, 1, argc - i); | |
1516 argc -= 1; | |
1517 break; | |
1518 | |
1519 case 'm': | |
1520 merge_kind = 1; | |
1521 shrinkargs (argv + i, 1, argc - 1); | |
1522 argc -= 1; | |
1523 break; | |
1524 | |
1525 case 'n': | |
1526 new_out = argv[i + 1]; | |
1527 shrinkargs (argv + i, 2, argc - i); | |
1528 argc -= 2; | |
1529 break; | |
1530 | |
1531 case 'o': | |
1532 old_out = argv[i + 1]; | |
1533 shrinkargs (argv + i, 2, argc - i); | |
1534 argc -= 2; | |
1535 break; | |
1536 | |
1537 case 'p': | |
1538 sort_by_frequency = 1; | |
1539 shrinkargs (argv + i, 1, argc - i); | |
1540 argc -= 1; | |
1541 break; | |
1542 | |
1543 case 'r': | |
1544 hinshi_table = argv[i + 1]; | |
1545 shrinkargs (argv + i, 2, argc - i); | |
1546 argc -= 2; | |
1547 hinshi_direction = REVERSE; | |
1548 break; | |
1549 | |
1550 case 's': | |
1551 hinshi_table = argv[i + 1]; | |
1552 shrinkargs (argv + i, 2, argc - i); | |
1553 argc -= 2; | |
1554 break; | |
1555 | |
1556 case 'v': | |
1557 sj3_type_output = 1; | |
1558 wnn_type_output = 1; /* Wnn 形式と似ているので立てる */ | |
1559 shrinkargs (argv + i, 1, argc - i); | |
1560 argc -= 1; | |
1561 break; | |
1562 | |
1563 case 'w': | |
1564 canna_type_output = 0; | |
1565 sj3_type_output = 0; | |
1566 wnn_type_output = 1; | |
1567 shrinkargs (argv + i, 1, argc - i); | |
1568 argc -= 1; | |
1569 break; | |
1570 | |
1571 case 'x': | |
1572 merge_sj3 = 1; | |
1573 ignore_hinshi_to_compare = 1; | |
1574 shrinkargs (argv + i, 1, argc - i); | |
1575 argc -= 1; | |
1576 break; | |
1577 | |
1578 default: | |
1579 i++; | |
1580 break; | |
1581 } | |
1582 } | |
1583 else | |
1584 { | |
1585 i++; | |
1586 } | |
1587 } | |
1588 | |
1589 if (argc < 2) | |
1590 { | |
1591 (void) fprintf (stderr, "Usage: %s dic1 [dic2] [-c filecommon] ...\n", program); | |
1592 exit (1); | |
1593 } | |
1594 | |
1595 if (argv[1][0] != '-' || argv[1][1]) | |
1596 { | |
1597 in1 = fopen (argv[1], "r"); | |
1598 if (!in1) | |
1599 { | |
1600 (void) fprintf (stderr, "%s: can not open file \"%s\".\n", program, argv[1]); | |
1601 exit (1); | |
1602 } | |
1603 } | |
1604 if (argc == 3) | |
1605 { | |
1606 if (argv[2][0] != '-' || argv[2][1]) | |
1607 { | |
1608 in2 = fopen (argv[2], "r"); | |
1609 if (!in2) | |
1610 { | |
1611 (void) fprintf (stderr, "%s: can not open file \"%s\".\n", program, argv[2]); | |
1612 exit (1); | |
1613 } | |
1614 } | |
1615 } | |
1616 else | |
1617 { | |
1618 in2 = (FILE *) 0; | |
1619 } | |
1620 if (description_table) | |
1621 { | |
1622 store_description (); | |
1623 } | |
1624 } | |
1625 | |
1626 static Wchar kihonh[] = { | |
1627 (Wchar) 'k', (Wchar) 'i', (Wchar) 'h', (Wchar) 'o', (Wchar) 'n', (Wchar) 0, | |
1628 }; | |
1629 | |
1630 int | |
1631 main (argc, argv) | |
1632 int argc; | |
1633 char *argv[]; | |
1634 { | |
1635 #ifndef POD_WCHAR | |
1636 setlocale (LC_ALL, ""); | |
1637 #endif | |
1638 | |
1639 in1 = in2 = stdin; | |
1640 (void) internkind (kihonh); /* 基本辞書用。1L として登録 */ | |
1641 parseargs (argc, argv); | |
1642 storepd (in1); | |
1643 (void) fclose (in1); | |
1644 | |
1645 if (in2) | |
1646 { | |
1647 compare = 1; | |
1648 comparepd (in2); | |
1649 (void) fclose (in2); | |
1650 } | |
1651 | |
1652 if (list_kinds) | |
1653 { | |
1654 listkinds (); | |
1655 exit (0); | |
1656 } | |
1657 | |
1658 if (selhinshi) | |
1659 { | |
1660 select_hinshi (selhinshi); | |
1661 } | |
1662 else if (hinshi_table) | |
1663 { | |
1664 replace_hinshi (); | |
1665 } | |
1666 | |
1667 pdic = (struct dicpack **) malloc (ndicentries * sizeof (struct dicpack *)); | |
1668 if (pdic) | |
1669 { | |
1670 int i, j; | |
1671 struct dicpack *p; | |
1672 | |
1673 for (i = 0, j = 0; i < DICBUFSIZE; i++) | |
1674 { | |
1675 for (p = dic[i]; p; p = p->next) | |
1676 { | |
1677 pdic[j++] = p; | |
1678 } | |
1679 } | |
1680 if (sort_by_frequency) | |
1681 { | |
1682 qsort (pdic, ndicentries, sizeof (struct dicpack *), dichindocompar); | |
1683 } | |
1684 else | |
1685 { | |
1686 qsort (pdic, ndicentries, sizeof (struct dicpack *), diccompar); | |
1687 } | |
1688 sortkind (); | |
1689 showentry (pdic, ndicentries); | |
1690 } | |
1691 else | |
1692 { | |
1693 malloc_failed (); | |
1694 } | |
1695 exit (0); | |
1696 } |