Mercurial > freewnn
annotate PubdicPlus/pod.c @ 29:35bc1f2e3f14 default tip
minor fix
author | Yoshiki Yazawa <yaz@honeyplanet.jp> |
---|---|
date | Sat, 06 Mar 2010 23:55:24 +0900 |
parents | a7ccf412ba02 |
children |
rev | line source |
---|---|
0 | 1 /* Copyright 1994 Pubdic Project. |
2 * | |
3 * Permission to use, copy, modify, distribute and sell this software | |
4 * and its documentation for any purpose is hereby granted without | |
5 * fee, provided that the above copyright notice appear in all copies | |
6 * and that both that copyright notice and this permission notice | |
7 * appear in supporting documentation, and that the name of Pubdic | |
8 * Project not be used in advertising or publicity pertaining to | |
9 * distribution of the software without specific, written prior | |
10 * permission. Pubdic Project makes no representations about the | |
11 * suitability of this software for any purpose. It is provided "as | |
12 * is" without express or implied warranty. | |
13 * | |
14 * PUBDIC PROJECT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, | |
15 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN | |
16 * NO EVENT SHALL PUBDIC PROJECT BE LIABLE FOR ANY SPECIAL, INDIRECT OR | |
17 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF | |
18 * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR | |
19 * OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | |
20 * PERFORMANCE OF THIS SOFTWARE. | |
21 */ | |
22 | |
23 #ifdef HAVE_CONFIG_H | |
24 # include <config.h> | |
25 #endif | |
26 | |
27 #include <stdio.h> | |
28 #if STDC_HEADERS | |
29 # include <stdlib.h> | |
30 # include <stddef.h> | |
31 # include <string.h> | |
32 #else | |
33 # if HAVE_MALLOC_H | |
34 # include <malloc.h> | |
35 # endif | |
36 # if HAVE_STRINGS_H | |
37 # include <strings.h> | |
38 # endif | |
39 #endif /* STDC_HEADERS */ | |
40 | |
41 #define POD_WCHAR | |
42 #ifdef POD_WCHAR | |
43 typedef unsigned short Wchar; | |
44 #else | |
45 #include <locale.h> | |
46 #include <widec.h> | |
47 #define Wchar wchar_t | |
48 #endif | |
49 | |
50 #if !(HAVE_BZERO) && (HAVE_MEMSET) | |
51 # define bzero(a, c) memset(a, 0, c) | |
52 #endif | |
53 | |
54 static char *program; | |
55 static int compare, ignore_hinshi_to_compare, sort_by_frequency, merge_sj3; | |
56 static int merge_kind, wnn_type_output, canna_type_output, sj3_type_output; | |
57 static int list_kinds; | |
58 static int copy_frequency, extract_kana = 0; | |
59 static long specific_kind; | |
60 static FILE *in1, *in2; | |
61 static char *common_out, *old_out, *new_out, *hinshi_table, *bunrui; | |
62 static char *description_table; | |
63 static int selhinshi = 0; | |
64 | |
65 /* hinshi_direction */ | |
66 #define INORDER 0 | |
67 #define REVERSE 1 | |
68 | |
69 static int hinshi_direction = INORDER; /* see above */ | |
70 | |
71 #define READBUFSIZE 128 | |
72 #define DICBUFSIZE (2 << 13) | |
73 #define DICBUFINDEXMASK (DICBUFSIZE - 1) | |
74 #define HINSHIBUFSIZE (2 << 13) | |
75 #define HINSHIBUFINDEXMASK (HINSHIBUFSIZE - 1) | |
76 | |
77 /* status of intern() */ | |
78 #define FOUND 0 | |
79 #define CREATE 1 | |
80 | |
81 /* 品詞を表す構造体 */ | |
82 | |
83 struct hinshipack | |
84 { | |
85 int nhinshis; | |
86 Wchar *hinshi; | |
87 unsigned flags; /* see below */ | |
88 struct hinshipack *next; | |
89 }; | |
90 | |
91 /* values of (struct hinshipack.)flags */ | |
92 #define REPLACED 1 | |
93 | |
94 /* 終止形を追加するためのルールファイルの内部表現(だと思う) */ | |
95 | |
96 struct descpack | |
97 { | |
98 Wchar *hinshi, *tandesc, *yomdesc; | |
99 struct descpack *next; | |
100 }; | |
101 | |
102 /* エントリの種別を表す構造体その他 */ | |
103 | |
104 struct kindpack | |
105 { | |
106 Wchar *kind; | |
107 long kindbit; | |
108 }; | |
109 | |
110 /* 辞書を表す構造体 */ | |
111 | |
112 struct dicpack | |
113 { | |
114 Wchar *yomi, *tango; | |
115 struct hinshipack *hinshi; | |
116 int hindo; | |
117 long kind; | |
118 Wchar *extdata; | |
119 unsigned flags; /* SEE BELOW */ | |
120 struct dicpack *next; | |
121 }; | |
122 | |
123 /* values of (struct dicpack.)flags */ | |
124 #define COMMON 001 | |
125 #define NEW 002 | |
126 | |
127 #if defined (__STDC__) || defined (_AIX) || (defined (__mips) && defined (_SYSTYPE_SVR4)) || defined(_WIN32) | |
128 /* Prototype for C89 (or later) */ | |
129 #ifdef POD_WCHAR | |
130 size_t Mbstowcs (Wchar *d, char *ss, int n); | |
131 size_t Wcstombs (char *d, Wchar *s, int n); | |
132 int Wscmp (register Wchar *s1, register Wchar *s2); | |
133 Wchar *Wscpy (Wchar *d, register Wchar *s); | |
134 int Wslen (Wchar *s); | |
135 int Watoi (Wchar *s); | |
136 static void Fputws (Wchar *s, FILE *f); | |
137 Wchar *Fgetws (Wchar *buf, int siz, FILE *f); | |
138 #endif /* POD_WCHAR */ | |
139 | |
140 static int all_kana (Wchar *s); | |
141 static Wchar *findslash (Wchar *s); | |
142 static Wchar *extstr (Wchar *p, Wchar **pp, int *key_return); | |
143 static void malloc_failed (void); | |
144 static struct hinshipack *internhinshi (Wchar *str, int flag); | |
145 static void replace_hinshi (void); | |
146 static void select_hinshi (int n); | |
147 static void freedesc (struct descpack *p); | |
148 static struct descpack *interndesc (Wchar *hin, Wchar *tan, Wchar *yom); | |
149 static struct descpack *searchdesc (Wchar *hin); | |
150 static void store_description (void); | |
151 static long internkind (Wchar *s); | |
152 static void listkinds (void); | |
25
466fe6732d8d
- fixed more NULL pointer related errata
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
0
diff
changeset
|
153 static int kindcompar (const void *p1, const void *p2); |
0 | 154 static void sortkind (void); |
155 static struct dicpack *intern (int key, Wchar *yomi, Wchar *kouho, Wchar *hinshi, int hindo, long kind, int *stat, long flags); | |
156 static void storepd (FILE *file); | |
157 static void comparepd (FILE *file); | |
158 static void canna_output (FILE *cf, struct dicpack *p, Wchar *h, int n); | |
159 static void entry_out (FILE *cf, struct dicpack *p, Wchar *h, int n, Wchar *ex); | |
160 static void printentry (FILE *cf, struct dicpack *p); | |
161 static void showentry (struct dicpack **pd, int n); | |
25
466fe6732d8d
- fixed more NULL pointer related errata
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
0
diff
changeset
|
162 static int diccompar (const void *pp1, const void *pp2); |
466fe6732d8d
- fixed more NULL pointer related errata
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
0
diff
changeset
|
163 static int dichindocompar (const void *pp1, const void *pp2); |
0 | 164 void shrinkargs (char **argv, int n, int count); |
165 static void parseargs (int argc, char *argv[]); | |
166 #endif | |
167 | |
168 #ifndef POD_WCHAR | |
169 # define Mbstowcs mbstowcs | |
170 # define Wcstombs wcstombs | |
171 # define Wscmp wscmp | |
172 # define Wscpy wscpy | |
173 # define Wslen wslen | |
174 # define Fgetws fgetws | |
175 # define Fputws fputws | |
176 #else | |
177 # define SS2 0x8e | |
178 # define SS3 0x8f | |
179 # define MSB 0x80 | |
180 # define MSK 0x7f | |
181 | |
182 # define WCG0 0x0000 | |
183 # define WCG1 0x8080 | |
184 # define WCG2 0x0080 | |
185 # define WCG3 0x8000 | |
186 # define WCMSK 0x8080 | |
187 | |
188 size_t | |
189 Mbstowcs (d, ss, n) | |
190 Wchar *d; | |
191 char *ss; | |
192 int n; | |
193 { | |
194 register Wchar *p = d; | |
195 register int ch; | |
196 register unsigned char *s = (unsigned char *) ss; | |
197 | |
198 while ((ch = *s++) && (p - d < n)) | |
199 { | |
200 if (ch & MSB) | |
201 { | |
202 if (ch == SS2) | |
203 { /* kana */ | |
204 *p++ = (Wchar) * s++; | |
205 } | |
206 else if (ch == SS3) | |
207 { | |
208 *p++ = (Wchar) ((*s << 8) | (*(s + 1) & MSK)); | |
209 s += 2; | |
210 } | |
211 else | |
212 { | |
213 *p++ = (Wchar) ((ch << 8) | (*s++ & 0xff)); | |
214 } | |
215 } | |
216 else | |
217 { | |
218 *p++ = (Wchar) ch; | |
219 } | |
220 } | |
221 *p = (Wchar) 0; | |
222 return p - d; | |
223 } | |
224 | |
225 size_t | |
226 Wcstombs (d, s, n) | |
227 char *d; | |
228 Wchar *s; | |
229 int n; | |
230 { | |
231 register char *p = d; | |
232 register Wchar ch; | |
233 | |
234 while ((ch = *s++) && (p - d + 2 < n)) | |
235 { | |
236 switch (ch & WCMSK) | |
237 { | |
238 case WCG0: | |
239 *p++ = (char) ch; | |
240 break; | |
241 | |
242 case WCG1: | |
243 *p++ = (char) ((ch >> 8) & 0xff); | |
244 *p++ = (char) (ch & 0xff); | |
245 break; | |
246 | |
247 case WCG2: | |
248 *p++ = SS2; | |
249 *p++ = (char) ch; | |
250 break; | |
251 | |
252 case WCG3: | |
253 *p++ = SS3; | |
254 *p++ = (char) ((ch >> 8) & 0xff); | |
255 *p++ = (char) ((ch & 0xff) | MSB); | |
256 break; | |
257 } | |
258 } | |
259 *p = '\0'; | |
260 return p - d; | |
261 } | |
262 | |
263 int | |
264 Wscmp (s1, s2) | |
265 register Wchar *s1, *s2; | |
266 { | |
267 register int res; | |
268 | |
269 /* 以下のコードはいささかトリッキーなので、説明を加えておこう。 | |
270 以下ではこのコメント内にあるようなことをしたいわけである。 | |
271 | |
272 while (*s1 && *s2 && && *s1 == *s2) { | |
273 s1++; s2++; | |
274 } | |
275 return *s1 - *s2; | |
276 | |
277 すなわち、s1 も s2 も EOS ('\0') を指していなくて、しかも値が | |
278 異なる間はそれぞれのポインタを進める。いずれかが EOS になるか、 | |
279 値が違ってきた場合には、*s1 - *s2 を返す。 | |
280 */ | |
281 | |
282 while (!(res = *s1 - *s2++) && *s1++) | |
283 ; | |
284 return res; | |
285 } | |
286 | |
287 Wchar * | |
288 Wscpy (d, s) | |
289 Wchar *d; | |
290 register Wchar *s; | |
291 { | |
292 register Wchar *p = d, ch; | |
293 | |
28
a7ccf412ba02
more warning suppression
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
25
diff
changeset
|
294 while ((ch = *s++)) |
0 | 295 { |
296 *p++ = ch; | |
297 } | |
298 *p = (Wchar) 0; | |
299 return d; | |
300 } | |
301 | |
302 int | |
303 Wslen (s) | |
304 Wchar *s; | |
305 { | |
306 register Wchar *p = s; | |
307 | |
308 while (*p) | |
309 p++; | |
310 return p - s; | |
311 } | |
312 | |
313 int | |
314 Watoi (s) | |
315 Wchar *s; | |
316 { | |
317 register int res = 0; | |
318 register Wchar ch; | |
319 | |
320 while ((ch = *s++) && ((Wchar) '0' <= ch) && (ch <= (Wchar) '9')) | |
321 { | |
322 res *= 10; | |
323 res += ch - (Wchar) '0'; | |
324 } | |
325 return res; | |
326 } | |
327 | |
328 static void | |
329 Fputws (s, f) | |
330 Wchar *s; | |
331 FILE *f; | |
332 { | |
333 char buf[READBUFSIZE]; | |
334 | |
335 if (Wcstombs (buf, s, READBUFSIZE)) | |
336 { | |
337 (void) fputs (buf, f); | |
338 } | |
339 } | |
340 | |
341 Wchar * | |
342 Fgetws (buf, siz, f) | |
343 Wchar *buf; | |
344 int siz; | |
345 FILE *f; | |
346 { | |
347 char mbuf[READBUFSIZE], *p; | |
348 | |
349 p = fgets (mbuf, READBUFSIZE, f); | |
350 if (p) | |
351 { | |
352 if (Mbstowcs (buf, mbuf, siz)) | |
353 { | |
354 return buf; | |
355 } | |
356 } | |
357 return (Wchar *) 0; | |
358 } | |
359 #endif | |
360 | |
361 /* s が全てカタカナから構成されているかどうかを返す関数 */ | |
362 | |
363 static int | |
364 all_kana (s) | |
365 Wchar *s; | |
366 { | |
367 static Wchar xa = 0, xke, aa, *p; | |
368 | |
369 if (!xa) | |
370 { | |
371 Mbstowcs (&xa, "\045\041", 1); | |
372 Mbstowcs (&xke, "\045\166", 1); | |
373 Mbstowcs (&aa, "\041\074", 1); | |
374 } | |
375 | |
376 for (p = s; *p; p++) | |
377 { | |
378 if (!(*p == aa || (xa <= *p && *p <= xke))) | |
379 { | |
380 return 0; | |
381 } | |
382 } | |
383 | |
384 return 1; | |
385 } | |
386 | |
387 /* スラッシュを探す */ | |
388 | |
389 static Wchar * | |
390 findslash (s) | |
391 Wchar *s; | |
392 { | |
393 while (*s) | |
394 { | |
395 if (*s == (Wchar) '/') | |
396 { | |
397 return s; | |
398 } | |
399 s++; | |
400 } | |
401 return (Wchar *) 0; | |
402 } | |
403 | |
404 /* トークンを一個取り出す */ | |
405 | |
406 static Wchar * | |
407 extstr (p, pp, key_return) | |
408 Wchar *p, **pp; | |
409 int *key_return; | |
410 { | |
411 Wchar *res; | |
412 int key = 0; | |
413 | |
414 while (*p == (Wchar) ' ' || *p == (Wchar) '\t') | |
415 p++; | |
416 res = p; | |
417 while (*p && *p != (Wchar) ' ' && *p != (Wchar) '\t' && *p != (Wchar) '\n') | |
418 { | |
419 key += (int) *p++; | |
420 } | |
421 *p++ = (Wchar) '\0'; | |
422 if (pp) | |
423 *pp = p; | |
424 if (key_return) | |
425 *key_return = key; | |
426 return res; | |
427 } | |
428 | |
429 static struct hinshipack *partsofspeech[HINSHIBUFSIZE]; | |
430 | |
431 static void | |
432 malloc_failed () | |
433 { | |
434 (void) fprintf (stderr, "%s: malloc failed.\n", program); | |
435 } | |
436 | |
437 /* 品詞名を品詞名テーブルに登録する */ | |
438 | |
439 static struct hinshipack * | |
440 internhinshi (str, flag) | |
441 Wchar *str; | |
442 int flag; | |
443 { | |
444 struct hinshipack *p, **pp; | |
445 Wchar *s; | |
446 int key = 0; | |
447 | |
448 for (s = str; *s; s++) | |
449 key += (int) *s; | |
450 key = ((unsigned) key & HINSHIBUFINDEXMASK); | |
28
a7ccf412ba02
more warning suppression
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
25
diff
changeset
|
451 for (pp = partsofspeech + key; (p = *pp); pp = &(p->next)) |
0 | 452 { |
453 if (!Wscmp (p->hinshi, str)) | |
454 { | |
455 return p; | |
456 } | |
457 } | |
458 if (flag) | |
459 { | |
460 p = (struct hinshipack *) malloc (sizeof (struct hinshipack)); | |
461 if (p) | |
462 { | |
463 *pp = p; | |
464 (void) bzero (p, sizeof (struct hinshipack)); | |
465 p->hinshi = (Wchar *) malloc ((Wslen (str) + 1) * sizeof (Wchar)); | |
466 if (p->hinshi) | |
467 { | |
468 (void) Wscpy (p->hinshi, str); | |
469 p->nhinshis = 1; | |
470 return p; | |
471 } | |
472 free (p); | |
473 } | |
474 malloc_failed (); | |
475 } | |
476 return (struct hinshipack *) 0; | |
477 } | |
478 | |
479 /* 品詞名を置き換える */ | |
480 | |
481 static void | |
482 replace_hinshi () | |
483 { | |
484 FILE *f; | |
485 Wchar readbuf[READBUFSIZE], *to, *from, *s; | |
486 struct hinshipack *hinshientry, *p; | |
487 int i, err = 0; | |
488 | |
489 f = fopen (hinshi_table, "r"); | |
490 if (!f) | |
491 { | |
492 (void) fprintf (stderr, "%s: can not open the table file of parts of speech \"%s\".\n", program, hinshi_table); | |
493 exit (1); | |
494 } | |
28
a7ccf412ba02
more warning suppression
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
25
diff
changeset
|
495 while ((s = Fgetws (readbuf, READBUFSIZE, f))) |
0 | 496 { |
497 from = extstr (s, &s, 0); | |
498 to = extstr (s, &s, 0); | |
499 if (hinshi_direction == REVERSE) | |
500 { | |
501 Wchar *xx = from; | |
502 from = to; | |
503 to = xx; | |
504 } | |
505 | |
506 hinshientry = internhinshi (from, 0); | |
507 if (hinshientry) | |
508 { | |
509 Wchar *xx; | |
510 | |
511 xx = (Wchar *) malloc ((Wslen (to) + 1) * sizeof (Wchar)); | |
512 if (xx) | |
513 { | |
514 Wchar *cp; | |
515 int n = 1; | |
516 | |
517 (void) Wscpy (xx, to); | |
518 free (hinshientry->hinshi); | |
519 hinshientry->hinshi = xx; | |
520 for (cp = xx; *cp; cp++) | |
521 { | |
522 if (*cp == (Wchar) '/') | |
523 { | |
524 *cp = (Wchar) 0; | |
525 n++; | |
526 } | |
527 } | |
528 hinshientry->nhinshis = n; | |
529 hinshientry->flags |= REPLACED; | |
530 } | |
531 else | |
532 { | |
533 malloc_failed (); | |
534 } | |
535 } | |
536 } | |
537 (void) fclose (f); | |
538 | |
539 for (i = 0; i < HINSHIBUFSIZE; i++) | |
540 { | |
541 for (p = partsofspeech[i]; p; p = p->next) | |
542 { | |
543 if (!(p->flags & REPLACED)) | |
544 { | |
545 (void) fprintf (stderr, "%s: The replacement for \"", program); | |
546 Fputws (p->hinshi, stderr); | |
547 (void) fprintf (stderr, "\" is not mentioned in the table.\n"); | |
548 err = 1; | |
549 } | |
550 } | |
551 } | |
552 if (err) | |
553 { | |
554 exit (1); | |
555 } | |
556 } | |
557 | |
558 static void | |
559 select_hinshi (n) | |
560 int n; | |
561 { | |
562 Wchar *s, *t, *xx; | |
563 struct hinshipack *p; | |
564 int i; | |
565 | |
566 if (!n) | |
567 return; | |
568 | |
569 for (i = 0; i < HINSHIBUFSIZE; i++) | |
570 { | |
571 for (p = partsofspeech[i]; p; p = p->next) | |
572 { | |
573 switch (n) | |
574 { | |
575 case 1: | |
576 s = findslash (p->hinshi); | |
577 if (s) | |
578 { | |
579 *s = (Wchar) 0; | |
580 } | |
581 break; | |
582 | |
583 case 2: | |
584 s = findslash (p->hinshi); | |
585 if (s) | |
586 { | |
587 s++; | |
588 t = findslash (s); | |
589 if (t) | |
590 { | |
591 xx = (Wchar *) malloc ((t - s + 1) * sizeof (Wchar)); | |
592 if (xx) | |
593 { | |
594 *t = (Wchar) 0; | |
595 Wscpy (xx, s); | |
596 t = p->hinshi; | |
597 p->hinshi = xx; | |
598 (void) free ((char *) t); | |
599 } | |
600 } | |
601 } | |
602 break; | |
603 | |
604 case 3: | |
605 s = findslash (p->hinshi); | |
606 if (s) | |
607 { | |
608 t = findslash (s + 1); | |
609 if (t) | |
610 { | |
611 t++; | |
612 xx = (Wchar *) malloc ((Wslen (t) + 1) * sizeof (Wchar)); | |
613 if (xx) | |
614 { | |
615 Wscpy (xx, t); | |
616 t = p->hinshi; | |
617 p->hinshi = xx; | |
618 (void) free ((char *) t); | |
619 } | |
620 } | |
621 } | |
622 break; | |
623 | |
624 default: | |
625 break; | |
626 } | |
627 } | |
628 } | |
629 } | |
630 | |
631 static void | |
632 freedesc (p) | |
633 struct descpack *p; | |
634 { | |
635 free (p->hinshi); | |
636 free (p->tandesc); | |
637 free (p->yomdesc); | |
638 free (p); | |
639 } | |
640 | |
641 static struct descpack *description[HINSHIBUFSIZE]; | |
642 | |
643 /* ルールの登録 */ | |
644 | |
645 static struct descpack * | |
646 interndesc (hin, tan, yom) | |
647 Wchar *hin, *tan, *yom; | |
648 { | |
649 struct descpack *p, **pp, *next = (struct descpack *) 0; | |
650 Wchar *s; | |
651 int key = 0; | |
652 | |
653 for (s = hin; *s; s++) | |
654 key += (int) *s; | |
655 key = ((unsigned) key & HINSHIBUFINDEXMASK); | |
28
a7ccf412ba02
more warning suppression
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
25
diff
changeset
|
656 for (pp = description + key; (p = *pp); pp = &(p->next)) |
0 | 657 { |
658 if (!Wscmp (p->hinshi, hin)) | |
659 { | |
660 if (!Wscmp (p->tandesc, tan) && !Wscmp (p->yomdesc, yom)) | |
661 { | |
662 return p; | |
663 } | |
664 else | |
665 { | |
666 *pp = next = p->next; | |
667 freedesc (p); | |
668 break; | |
669 } | |
670 } | |
671 } | |
672 p = (struct descpack *) malloc (sizeof (struct descpack)); | |
673 if (p) | |
674 { | |
675 *pp = p; | |
676 (void) bzero (p, sizeof (struct descpack)); | |
677 p->next = next; | |
678 p->hinshi = (Wchar *) malloc ((Wslen (hin) + 1) * sizeof (Wchar)); | |
679 if (p->hinshi) | |
680 { | |
681 (void) Wscpy (p->hinshi, hin); | |
682 p->tandesc = (Wchar *) malloc ((Wslen (tan) + 1) * sizeof (Wchar)); | |
683 if (p->tandesc) | |
684 { | |
685 (void) Wscpy (p->tandesc, tan); | |
686 p->yomdesc = (Wchar *) malloc ((Wslen (yom) + 1) * sizeof (Wchar)); | |
687 if (p->yomdesc) | |
688 { | |
689 (void) Wscpy (p->yomdesc, yom); | |
690 return p; | |
691 } | |
692 free (p->tandesc); | |
693 } | |
694 free (p->hinshi); | |
695 } | |
696 free (p); | |
697 } | |
698 malloc_failed (); | |
699 return (struct descpack *) 0; | |
700 } | |
701 | |
702 /* ルールの探索 */ | |
703 | |
704 static struct descpack * | |
705 searchdesc (hin) | |
706 Wchar *hin; | |
707 { | |
708 struct descpack *p, **pp; | |
709 Wchar *s; | |
710 int key = 0; | |
711 | |
712 for (s = hin; *s; s++) | |
713 key += (int) *s; | |
714 key = ((unsigned) key & HINSHIBUFINDEXMASK); | |
28
a7ccf412ba02
more warning suppression
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
25
diff
changeset
|
715 for (pp = description + key; (p = *pp); pp = &(p->next)) |
0 | 716 { |
717 if (!Wscmp (p->hinshi, hin)) | |
718 { | |
719 return p; | |
720 } | |
721 } | |
722 return (struct descpack *) 0; | |
723 } | |
724 | |
725 static void | |
726 store_description () | |
727 { | |
728 FILE *f; | |
729 Wchar readbuf[READBUFSIZE], *hin, *tan, *yom, *s; | |
730 | |
731 if (!description_table) | |
732 { | |
733 return; | |
734 } | |
735 | |
736 f = fopen (description_table, "r"); | |
737 if (!f) | |
738 { | |
739 (void) fprintf (stderr, "%s: can not open the table file of parts of speech \"%s\".\n", program, description_table); | |
740 exit (1); | |
741 } | |
28
a7ccf412ba02
more warning suppression
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
25
diff
changeset
|
742 while ((s = Fgetws (readbuf, READBUFSIZE, f))) |
0 | 743 { |
744 Wchar nl[1]; | |
745 | |
746 nl[0] = (Wchar) 0; | |
747 hin = tan = yom = nl; | |
748 hin = extstr (s, &s, 0); | |
749 if (*hin) | |
750 { | |
751 tan = extstr (s, &s, 0); | |
752 if (*tan) | |
753 { | |
754 yom = extstr (s, &s, 0); | |
755 } | |
756 } | |
757 | |
758 interndesc (hin, tan, yom); | |
759 } | |
760 (void) fclose (f); | |
761 } | |
762 | |
763 struct kindpack kinds[sizeof (long) * 8]; | |
764 static int nkinds; | |
765 | |
766 #define KIHONBIT 1L | |
767 | |
768 /* 種別の登録 */ | |
769 | |
770 static long | |
771 internkind (s) | |
772 Wchar *s; | |
773 { | |
774 int i; | |
775 Wchar *p; | |
776 | |
777 p = findslash (s); | |
778 if (p) | |
779 { | |
780 long res; | |
781 | |
782 *p = (Wchar) '\0'; | |
783 res = internkind (s); | |
784 res |= internkind (p + 1); | |
785 return res; | |
786 } | |
787 else | |
788 { | |
789 for (i = 0; i < nkinds; i++) | |
790 { | |
791 if (!Wscmp (s, kinds[i].kind)) | |
792 { | |
793 return kinds[i].kindbit; | |
794 } | |
795 } | |
796 if (nkinds < (sizeof (long) * 8) && (kinds[nkinds].kind = (Wchar *) malloc ((Wslen (s) + 1) * sizeof (Wchar)))) | |
797 { | |
798 (void) Wscpy (kinds[nkinds].kind, s); | |
799 kinds[nkinds].kindbit = 1 << nkinds; | |
800 return kinds[nkinds++].kindbit; | |
801 } | |
802 return 0; | |
803 } | |
804 } | |
805 | |
806 /* 種別の一覧の出力 */ | |
807 | |
808 static void | |
809 listkinds () | |
810 { | |
811 int i; | |
812 | |
813 for (i = 0; i < nkinds; i++) | |
814 { | |
815 Fputws (kinds[i].kind, stdout); | |
816 putchar ('\n'); | |
817 } | |
818 } | |
819 | |
820 static int | |
25
466fe6732d8d
- fixed more NULL pointer related errata
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
0
diff
changeset
|
821 kindcompar (const void *p1, const void *p2) |
0 | 822 { |
25
466fe6732d8d
- fixed more NULL pointer related errata
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
0
diff
changeset
|
823 struct kindpack *k1, *k2; |
466fe6732d8d
- fixed more NULL pointer related errata
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
0
diff
changeset
|
824 k1 = (struct kindpack *)p1; |
466fe6732d8d
- fixed more NULL pointer related errata
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
0
diff
changeset
|
825 k2 = (struct kindpack *)p2; |
466fe6732d8d
- fixed more NULL pointer related errata
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
0
diff
changeset
|
826 |
0 | 827 return Wscmp (k1->kind, k2->kind); |
828 } | |
829 | |
830 static void | |
831 sortkind () | |
832 { | |
833 qsort (kinds, nkinds, sizeof (struct kindpack), kindcompar); | |
834 } | |
835 | |
836 static struct dicpack *dic[DICBUFSIZE], **pdic; | |
837 static int ndicentries = 0; | |
838 | |
839 /* | |
840 | |
841 intern -- 辞書エントリの検索/登録 | |
842 | |
843 第6引数の stat としてヌルでないアドレスが指定された場合には、同じエントリ | |
844 が登録されていない場合には登録を行う。アドレスがヌルの場合には登録しない。 | |
845 | |
846 flags によっていろいろと指定をする。(以下を見てね)。 | |
847 | |
848 hinshi に 0 を渡してはいけない。kind は 0 を渡しても可だが、-m の時じゃない | |
849 マッチはしないので注意。 | |
850 | |
851 */ | |
852 | |
853 /* flags */ | |
854 #define IGNORE_HINSHI 1L | |
855 #define IGNORE_KIND 2L | |
856 | |
857 static struct dicpack * | |
858 intern (key, yomi, kouho, hinshi, hindo, kind, stat, flags) | |
859 int key, hindo, *stat; | |
860 Wchar *yomi, *kouho, *hinshi; | |
861 long kind, flags; | |
862 { | |
863 struct dicpack *p, **pp; | |
864 struct descpack *dp; | |
865 Wchar nl[1], *yomdesc = nl, *tandesc = nl; | |
866 Wchar *yom = (Wchar *) 0, *tan = (Wchar *) 0, *dhinshi, *dh; | |
867 | |
868 nl[0] = (Wchar) '\0'; | |
869 | |
870 if (description_table) | |
871 { | |
872 dhinshi = dh = hinshi; /* かんなの品詞を探す */ | |
873 while (*dh) | |
874 { | |
875 if (*dh++ == (Wchar) '/') | |
876 { | |
877 dhinshi = dh; | |
878 } | |
879 } | |
880 dp = searchdesc (dhinshi); | |
881 if (dp) | |
882 { | |
883 yomdesc = dp->yomdesc; | |
884 tandesc = dp->tandesc; | |
885 if (Wslen (yomdesc)) | |
886 { | |
887 Wchar *t; | |
888 t = (Wchar *) malloc ((Wslen (yomi) + Wslen (yomdesc) + 1) * sizeof (Wchar)); | |
889 if (t) | |
890 { | |
891 Wscpy (t, yomi); | |
892 yom = yomi = t; | |
893 Wscpy (yomi + Wslen (yomi), yomdesc); | |
894 } | |
895 } | |
896 if (Wslen (tandesc)) | |
897 { | |
898 Wchar *t; | |
899 t = (Wchar *) malloc ((Wslen (kouho) + Wslen (tandesc) + 1) * sizeof (Wchar)); | |
900 if (t) | |
901 { | |
902 Wscpy (t, kouho); | |
903 tan = kouho = t; | |
904 Wscpy (kouho + Wslen (kouho), tandesc); | |
905 } | |
906 } | |
907 } | |
908 else | |
909 { | |
910 char foo[64]; | |
911 | |
912 fprintf (stderr, "no description rule for "); | |
913 Wcstombs (foo, dhinshi, 64); | |
914 fprintf (stderr, "%s.\n", foo); | |
915 } | |
916 } | |
917 | |
918 key = ((unsigned) key & DICBUFINDEXMASK); | |
28
a7ccf412ba02
more warning suppression
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
25
diff
changeset
|
919 for (pp = dic + key; (p = *pp); pp = &(p->next)) |
0 | 920 { |
921 if (!Wscmp (p->yomi, yomi) && !Wscmp (p->tango, kouho) && ((flags & IGNORE_HINSHI) || !Wscmp (p->hinshi->hinshi, hinshi)) && ((flags & IGNORE_KIND) || ((p->kind & kind) == kind))) | |
922 { | |
923 /* match */ | |
924 if (stat) | |
925 *stat = FOUND; | |
926 if (yom) | |
927 free (yom); | |
928 if (tan) | |
929 free (tan); | |
930 return p; | |
931 } | |
932 } | |
933 if (stat) | |
934 { | |
935 p = (struct dicpack *) malloc (sizeof (struct dicpack)); | |
936 if (p) | |
937 { | |
938 *pp = p; | |
939 (void) bzero (p, sizeof (struct dicpack)); | |
940 p->yomi = (Wchar *) malloc ((Wslen (yomi) + 1) * sizeof (Wchar)); | |
941 if (p->yomi) | |
942 { | |
943 (void) Wscpy (p->yomi, yomi); | |
944 p->tango = (Wchar *) malloc ((Wslen (kouho) + 1) * sizeof (Wchar)); | |
945 if (p->tango) | |
946 { | |
947 (void) Wscpy (p->tango, kouho); | |
948 p->hinshi = internhinshi (hinshi, 1); | |
949 if (p->hinshi) | |
950 { | |
951 p->hindo = hindo; | |
952 *stat = CREATE; | |
953 ndicentries++; | |
954 p->kind = kind; | |
955 p->extdata = (Wchar *) 0; | |
956 if (yom) | |
957 free (yom); | |
958 if (tan) | |
959 free (tan); | |
960 return p; | |
961 } | |
962 free (p->tango); | |
963 } | |
964 free (p->yomi); | |
965 } | |
966 free (p); | |
967 } | |
968 malloc_failed (); | |
969 } | |
970 if (yom) | |
971 free (yom); | |
972 if (tan) | |
973 free (tan); | |
974 return (struct dicpack *) 0; | |
975 } | |
976 | |
977 /* 登録されているエントリに対して fn を実行する */ | |
28
a7ccf412ba02
more warning suppression
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
25
diff
changeset
|
978 #if 0 |
0 | 979 static void |
980 for_all_interned (fn) | |
981 void (*fn) (); | |
982 { | |
983 int i; | |
984 struct dicpack *p; | |
985 | |
986 for (i = 0; i < DICBUFSIZE; i++) | |
987 { | |
988 for (p = dic[i]; p; p = p->next) | |
989 { | |
990 (*fn) (p); | |
991 } | |
992 } | |
993 } | |
28
a7ccf412ba02
more warning suppression
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
25
diff
changeset
|
994 #endif |
0 | 995 |
996 static void | |
997 storepd (file) | |
998 FILE *file; | |
999 { | |
1000 Wchar readbuf[READBUFSIZE], *p, *yomi, *hinshi, *kouho, *hindo, *kind; | |
1001 int nhindo, key, tkey, stat; | |
1002 long kindbit; | |
1003 struct dicpack *dicentry; | |
1004 | |
28
a7ccf412ba02
more warning suppression
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
25
diff
changeset
|
1005 while ((p = Fgetws (readbuf, READBUFSIZE, file))) |
0 | 1006 { |
1007 key = 0; | |
1008 yomi = extstr (p, &p, &tkey); | |
1009 key += tkey; | |
1010 kouho = extstr (p, &p, &tkey); | |
1011 key += tkey; | |
1012 hinshi = extstr (p, &p, 0); | |
1013 hindo = extstr (p, &p, 0); | |
1014 nhindo = Watoi (hindo); | |
1015 | |
1016 kind = extstr (p, 0, 0); | |
1017 if (*kind) | |
1018 { | |
1019 kindbit = internkind (kind); | |
1020 } | |
1021 else | |
1022 { | |
1023 kindbit = KIHONBIT; | |
1024 } | |
1025 | |
1026 dicentry = intern (key, yomi, kouho, hinshi, nhindo, kindbit, &stat, IGNORE_KIND); | |
1027 if (dicentry) | |
1028 { | |
1029 dicentry->kind |= kindbit; | |
1030 } | |
1031 } | |
1032 } | |
1033 | |
1034 static void | |
1035 comparepd (file) | |
1036 FILE *file; | |
1037 { | |
1038 Wchar readbuf[READBUFSIZE], *p, *yomi, *hinshi, *kouho, *hindo, *kind; | |
1039 int nhindo, key, tkey, stat, *statp = &stat; | |
1040 struct dicpack *dicentry; | |
1041 long kindbit, flags = 0L; | |
1042 | |
28
a7ccf412ba02
more warning suppression
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
25
diff
changeset
|
1043 while ((p = Fgetws (readbuf, READBUFSIZE, file))) |
0 | 1044 { |
1045 key = 0; | |
1046 yomi = extstr (p, &p, &tkey); | |
1047 key += tkey; | |
1048 kouho = extstr (p, &p, &tkey); | |
1049 key += tkey; | |
1050 hinshi = extstr (p, &p, 0); | |
1051 if (ignore_hinshi_to_compare) | |
1052 { | |
1053 flags |= IGNORE_HINSHI; | |
1054 } | |
1055 hindo = extstr (p, &p, 0); | |
1056 nhindo = Watoi (hindo); | |
1057 | |
1058 kind = extstr (p, 0, 0); | |
1059 if (*kind) | |
1060 { | |
1061 kindbit = internkind (kind); | |
1062 } | |
1063 else | |
1064 { | |
1065 kindbit = KIHONBIT; | |
1066 } | |
1067 if (merge_kind || merge_sj3) | |
1068 { | |
1069 flags |= IGNORE_KIND; | |
1070 } | |
1071 if (copy_frequency) | |
1072 { | |
1073 statp = (int *) 0; | |
1074 } | |
1075 | |
1076 dicentry = intern (key, yomi, kouho, hinshi, nhindo, kindbit, statp, flags); | |
1077 | |
1078 if (dicentry) | |
1079 { | |
1080 if (copy_frequency) | |
1081 { | |
1082 dicentry->hindo = nhindo; | |
1083 dicentry->flags &= ~COMMON; | |
1084 } | |
1085 else if (ignore_hinshi_to_compare && stat == FOUND) | |
1086 { | |
1087 /* この場合、同じキーのチェーンが返る */ | |
1088 struct dicpack *pd; | |
1089 | |
1090 for (pd = dicentry; pd; pd = pd->next) | |
1091 { | |
1092 if (!Wscmp (pd->yomi, yomi) && !Wscmp (pd->tango, kouho)) | |
1093 { | |
1094 pd->flags |= COMMON; | |
1095 if (!merge_sj3) | |
1096 { | |
1097 pd->kind |= kindbit; | |
1098 } | |
1099 | |
1100 if (merge_sj3) | |
1101 { | |
1102 int len = 0; | |
1103 Wchar *dat; | |
1104 | |
1105 if (pd->extdata) | |
1106 { | |
1107 len = Wslen (pd->extdata); | |
1108 } | |
1109 dat = (Wchar *) malloc ((Wslen (hinshi) + 1 + len) * sizeof (Wchar)); | |
1110 if (dat) | |
1111 { | |
1112 if (len) | |
1113 { | |
1114 (void) Wscpy (dat, pd->extdata); | |
1115 (void) free ((char *) pd->extdata); | |
1116 } | |
1117 (void) Wscpy (dat + len, hinshi); | |
1118 pd->extdata = dat; | |
1119 } | |
1120 } | |
1121 } | |
1122 } | |
1123 } | |
1124 else | |
1125 { | |
1126 dicentry->kind |= kindbit; | |
1127 if (stat == FOUND) | |
1128 { | |
1129 dicentry->flags |= COMMON; | |
1130 } | |
1131 else | |
1132 { /* CREATE */ | |
1133 dicentry->flags |= NEW; | |
1134 } | |
1135 } | |
1136 } | |
1137 } | |
1138 } | |
1139 | |
1140 static void | |
1141 canna_output (cf, p, h, n) | |
1142 FILE *cf; | |
1143 struct dicpack *p; | |
1144 Wchar *h; | |
1145 int n; | |
1146 { | |
1147 for (; n-- > 0; h += Wslen (h) + 1) | |
1148 { | |
1149 Fputws (p->yomi, cf); | |
1150 (void) putc (' ', cf); | |
1151 Fputws (h, cf); | |
1152 if (p->hindo) | |
1153 { | |
1154 (void) fprintf (cf, "*%d", p->hindo); | |
1155 } | |
1156 (void) putc (' ', cf); | |
1157 Fputws (p->tango, cf); | |
1158 (void) putc ('\n', cf); | |
1159 } | |
1160 } | |
1161 | |
1162 static void | |
1163 entry_out (cf, p, h, n, ex) | |
1164 FILE *cf; | |
1165 struct dicpack *p; | |
1166 Wchar *h; | |
1167 int n; | |
1168 Wchar *ex; | |
1169 { | |
1170 int i, f = 1; | |
1171 long b; | |
1172 | |
1173 for (; n-- > 0; h += Wslen (h) + 1) | |
1174 { | |
1175 Fputws (p->yomi, cf); | |
1176 (void) putc (' ', cf); | |
1177 Fputws (p->tango, cf); | |
1178 (void) putc (' ', cf); | |
1179 if (merge_sj3 && ex) | |
1180 { | |
1181 Fputws (ex, cf); | |
1182 (void) putc ('/', cf); | |
1183 } | |
1184 Fputws (h, cf); | |
1185 if (!sj3_type_output) | |
1186 { | |
1187 (void) fprintf (cf, " %d", p->hindo); | |
1188 } | |
1189 | |
1190 if (!wnn_type_output) | |
1191 { | |
1192 if (bunrui) | |
1193 { | |
1194 (void) printf (" %s", bunrui); | |
1195 } | |
1196 else | |
1197 { | |
1198 if (specific_kind) | |
1199 { | |
1200 b = (specific_kind & p->kind); | |
1201 } | |
1202 else | |
1203 { | |
1204 b = p->kind; | |
1205 } | |
1206 if (b != KIHONBIT) | |
1207 { /* 基本だけだったら何も書かない */ | |
1208 for (i = 0; i < nkinds; i++) | |
1209 { | |
1210 if (b & kinds[i].kindbit) | |
1211 { | |
1212 if (f) | |
1213 { | |
1214 (void) putc (' ', cf); | |
1215 f = 0; | |
1216 } | |
1217 else | |
1218 { | |
1219 (void) putc ('/', cf); | |
1220 } | |
1221 Fputws (kinds[i].kind, cf); | |
1222 } | |
1223 } | |
1224 } | |
1225 } | |
1226 } | |
1227 (void) putc ('\n', cf); | |
1228 } | |
1229 } | |
1230 | |
1231 /* p で表されるエントリをファイル cf に出力する */ | |
1232 | |
1233 static void | |
1234 printentry (cf, p) | |
1235 FILE *cf; | |
1236 struct dicpack *p; | |
1237 { | |
1238 if (specific_kind && !(p->kind & specific_kind)) | |
1239 { | |
1240 return; | |
1241 } | |
1242 | |
1243 if (extract_kana && !all_kana (p->tango)) | |
1244 { | |
1245 return; | |
1246 } | |
1247 | |
1248 if (selhinshi && !p->hinshi->hinshi[0]) | |
1249 { | |
1250 return; | |
1251 } | |
1252 | |
1253 if (canna_type_output) | |
1254 { | |
1255 canna_output (cf, p, p->hinshi->hinshi, p->hinshi->nhinshis); | |
1256 } | |
1257 else | |
1258 { | |
1259 entry_out (cf, p, p->hinshi->hinshi, p->hinshi->nhinshis, p->extdata); | |
1260 } | |
1261 } | |
1262 | |
28
a7ccf412ba02
more warning suppression
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
25
diff
changeset
|
1263 #if 0 |
0 | 1264 static void |
1265 showdeleted (p) | |
1266 struct dicpack *p; | |
1267 { | |
1268 if (!(p->flags & COMMON)) | |
1269 { | |
1270 (void) printf ("- "); | |
1271 printentry (stdout, p); | |
1272 } | |
1273 } | |
28
a7ccf412ba02
more warning suppression
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
25
diff
changeset
|
1274 #endif |
0 | 1275 |
1276 static void | |
1277 showentry (pd, n) | |
1278 struct dicpack **pd; | |
1279 int n; | |
1280 { | |
1281 FILE *cf = (FILE *) 0, *of = (FILE *) 0, *nf = (FILE *) 0; | |
1282 struct dicpack *p; | |
1283 int i; | |
1284 | |
1285 if (common_out) | |
1286 { | |
1287 if (common_out[0] != '-' || common_out[1]) | |
1288 { | |
1289 cf = fopen (common_out, "w"); | |
1290 if (!cf) | |
1291 { | |
1292 (void) fprintf (stderr, "%s: can not open file \"%s\".\n", program, common_out); | |
1293 exit (1); | |
1294 } | |
1295 } | |
1296 else | |
1297 { | |
1298 cf = stdout; | |
1299 } | |
1300 } | |
1301 if (old_out) | |
1302 { | |
1303 if (old_out[0] != '-' || old_out[1]) | |
1304 { | |
1305 of = fopen (old_out, "w"); | |
1306 if (!of) | |
1307 { | |
1308 (void) fprintf (stderr, "%s: can not open file \"%s\".\n", program, old_out); | |
1309 exit (1); | |
1310 } | |
1311 } | |
1312 else | |
1313 { | |
1314 of = stdout; | |
1315 } | |
1316 } | |
1317 if (new_out) | |
1318 { | |
1319 if (new_out[0] != '-' || new_out[1]) | |
1320 { | |
1321 nf = fopen (new_out, "w"); | |
1322 if (!nf) | |
1323 { | |
1324 (void) fprintf (stderr, "%s: can not open file \"%s\".\n", program, new_out); | |
1325 exit (1); | |
1326 } | |
1327 } | |
1328 else | |
1329 { | |
1330 nf = stdout; | |
1331 } | |
1332 } | |
1333 | |
1334 for (i = 0; i < n; i++) | |
1335 { | |
1336 p = pd[i]; | |
1337 if (compare) | |
1338 { | |
1339 if (p->flags & COMMON) | |
1340 { | |
1341 if (cf) | |
1342 { | |
1343 printentry (cf, p); | |
1344 } | |
1345 } | |
1346 else if (p->flags & NEW) | |
1347 { | |
1348 if (nf) | |
1349 { | |
1350 printentry (nf, p); | |
1351 } | |
1352 } | |
1353 else | |
1354 { | |
1355 if (of) | |
1356 { | |
1357 printentry (of, p); | |
1358 } | |
1359 } | |
1360 } | |
1361 else | |
1362 { /* just print the normalized dictionary */ | |
1363 printentry (stdout, p); | |
1364 } | |
1365 } | |
1366 } | |
1367 | |
1368 static int | |
25
466fe6732d8d
- fixed more NULL pointer related errata
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
0
diff
changeset
|
1369 diccompar (const void *pp1, const void *pp2) |
0 | 1370 { |
25
466fe6732d8d
- fixed more NULL pointer related errata
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
0
diff
changeset
|
1371 struct dicpack **p1, **p2; |
466fe6732d8d
- fixed more NULL pointer related errata
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
0
diff
changeset
|
1372 p1 = (struct dicpack **)pp1; |
466fe6732d8d
- fixed more NULL pointer related errata
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
0
diff
changeset
|
1373 p2 = (struct dicpack **)pp2; |
466fe6732d8d
- fixed more NULL pointer related errata
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
0
diff
changeset
|
1374 |
0 | 1375 int n; |
28
a7ccf412ba02
more warning suppression
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
25
diff
changeset
|
1376 if ((n = Wscmp ((*p1)->yomi, (*p2)->yomi))) |
0 | 1377 { |
1378 return n; | |
1379 } | |
28
a7ccf412ba02
more warning suppression
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
25
diff
changeset
|
1380 else if ((n = Wscmp ((*p1)->tango, (*p2)->tango))) |
0 | 1381 { |
1382 return n; | |
1383 } | |
28
a7ccf412ba02
more warning suppression
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
25
diff
changeset
|
1384 else if ((n = Wscmp ((*p1)->hinshi->hinshi, (*p2)->hinshi->hinshi))) |
0 | 1385 { |
1386 return n; | |
1387 } | |
1388 else | |
1389 { /* impossible */ | |
1390 return 0; | |
1391 } | |
1392 } | |
1393 | |
1394 static int | |
25
466fe6732d8d
- fixed more NULL pointer related errata
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
0
diff
changeset
|
1395 dichindocompar (const void *pp1, const void *pp2) |
0 | 1396 { |
25
466fe6732d8d
- fixed more NULL pointer related errata
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
0
diff
changeset
|
1397 struct dicpack **p1, **p2; |
466fe6732d8d
- fixed more NULL pointer related errata
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
0
diff
changeset
|
1398 p1 = (struct dicpack **)pp1; |
466fe6732d8d
- fixed more NULL pointer related errata
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
0
diff
changeset
|
1399 p2 = (struct dicpack **)pp2; |
466fe6732d8d
- fixed more NULL pointer related errata
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
0
diff
changeset
|
1400 |
0 | 1401 int n; |
28
a7ccf412ba02
more warning suppression
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
25
diff
changeset
|
1402 if ((n = Wscmp ((*p1)->yomi, (*p2)->yomi))) |
0 | 1403 { |
1404 return n; | |
1405 } | |
28
a7ccf412ba02
more warning suppression
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
25
diff
changeset
|
1406 else if ((n = ((*p2)->hindo - (*p1)->hindo))) |
0 | 1407 { |
1408 return n; | |
1409 } | |
28
a7ccf412ba02
more warning suppression
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
25
diff
changeset
|
1410 else if ((n = Wscmp ((*p1)->tango, (*p2)->tango))) |
0 | 1411 { |
1412 return n; | |
1413 } | |
28
a7ccf412ba02
more warning suppression
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
25
diff
changeset
|
1414 else if ((n = Wscmp ((*p1)->hinshi->hinshi, (*p2)->hinshi->hinshi))) |
0 | 1415 { |
1416 return n; | |
1417 } | |
1418 else | |
1419 { /* impossible */ | |
1420 return 0; | |
1421 } | |
1422 } | |
1423 | |
1424 void | |
1425 shrinkargs (argv, n, count) | |
1426 char **argv; | |
1427 int n, count; | |
1428 { | |
1429 int i; | |
1430 | |
1431 for (i = 0; i + n < count; i++) | |
1432 { | |
1433 argv[i] = argv[i + n]; | |
1434 } | |
1435 } | |
1436 | |
1437 static void | |
1438 parseargs (argc, argv) | |
1439 int argc; | |
1440 char *argv[]; | |
1441 { | |
1442 int i; | |
1443 | |
1444 for (program = argv[0] + strlen (argv[0]); argv[0] < program; program--) | |
1445 { | |
1446 if (program[0] == '/') | |
1447 { | |
1448 program++; | |
1449 break; | |
1450 } | |
1451 } | |
1452 | |
1453 for (i = 1; i < argc;) | |
1454 { | |
1455 if (argv[i][0] == '-' && argv[i][2] == '\0') | |
1456 { | |
1457 switch (argv[i][1]) | |
1458 { | |
1459 case '1': | |
1460 case '2': | |
1461 case '3': | |
1462 selhinshi = argv[i][1] - '0'; | |
1463 shrinkargs (argv + i, 1, argc - i); | |
1464 argc -= 1; | |
1465 break; | |
1466 | |
1467 case 'b': | |
1468 bunrui = argv[i + 1]; | |
1469 shrinkargs (argv + i, 2, argc - i); | |
1470 argc -= 2; | |
1471 break; | |
1472 | |
1473 case 'c': | |
1474 common_out = argv[i + 1]; | |
1475 shrinkargs (argv + i, 2, argc - i); | |
1476 argc -= 2; | |
1477 break; | |
1478 | |
1479 case 'd': | |
1480 description_table = argv[i + 1]; | |
1481 shrinkargs (argv + i, 2, argc - i); | |
1482 argc -= 2; | |
1483 break; | |
1484 | |
1485 case 'f': | |
1486 copy_frequency = 1; | |
1487 shrinkargs (argv + i, 1, argc - i); | |
1488 argc -= 1; | |
1489 break; | |
1490 | |
1491 case 'h': | |
1492 ignore_hinshi_to_compare = 1; | |
1493 shrinkargs (argv + i, 1, argc - i); | |
1494 argc -= 1; | |
1495 break; | |
1496 | |
1497 case 'i': | |
1498 canna_type_output = 1; | |
1499 wnn_type_output = 0; | |
1500 shrinkargs (argv + i, 1, argc - i); | |
1501 argc -= 1; | |
1502 break; | |
1503 | |
1504 case 'j': | |
1505 extract_kana = 1; | |
1506 shrinkargs (argv + i, 1, argc - i); | |
1507 argc -= 1; | |
1508 break; | |
1509 | |
1510 case 'k': | |
1511 { | |
1512 Wchar buf[READBUFSIZE]; | |
1513 | |
1514 (void) Mbstowcs (buf, argv[i + 1], READBUFSIZE); | |
1515 specific_kind |= internkind (buf); | |
1516 } | |
1517 shrinkargs (argv + i, 2, argc - i); | |
1518 argc -= 2; | |
1519 break; | |
1520 | |
1521 case 'l': | |
1522 list_kinds = 1; | |
1523 shrinkargs (argv + i, 1, argc - i); | |
1524 argc -= 1; | |
1525 break; | |
1526 | |
1527 case 'm': | |
1528 merge_kind = 1; | |
1529 shrinkargs (argv + i, 1, argc - 1); | |
1530 argc -= 1; | |
1531 break; | |
1532 | |
1533 case 'n': | |
1534 new_out = argv[i + 1]; | |
1535 shrinkargs (argv + i, 2, argc - i); | |
1536 argc -= 2; | |
1537 break; | |
1538 | |
1539 case 'o': | |
1540 old_out = argv[i + 1]; | |
1541 shrinkargs (argv + i, 2, argc - i); | |
1542 argc -= 2; | |
1543 break; | |
1544 | |
1545 case 'p': | |
1546 sort_by_frequency = 1; | |
1547 shrinkargs (argv + i, 1, argc - i); | |
1548 argc -= 1; | |
1549 break; | |
1550 | |
1551 case 'r': | |
1552 hinshi_table = argv[i + 1]; | |
1553 shrinkargs (argv + i, 2, argc - i); | |
1554 argc -= 2; | |
1555 hinshi_direction = REVERSE; | |
1556 break; | |
1557 | |
1558 case 's': | |
1559 hinshi_table = argv[i + 1]; | |
1560 shrinkargs (argv + i, 2, argc - i); | |
1561 argc -= 2; | |
1562 break; | |
1563 | |
1564 case 'v': | |
1565 sj3_type_output = 1; | |
1566 wnn_type_output = 1; /* Wnn 形式と似ているので立てる */ | |
1567 shrinkargs (argv + i, 1, argc - i); | |
1568 argc -= 1; | |
1569 break; | |
1570 | |
1571 case 'w': | |
1572 canna_type_output = 0; | |
1573 sj3_type_output = 0; | |
1574 wnn_type_output = 1; | |
1575 shrinkargs (argv + i, 1, argc - i); | |
1576 argc -= 1; | |
1577 break; | |
1578 | |
1579 case 'x': | |
1580 merge_sj3 = 1; | |
1581 ignore_hinshi_to_compare = 1; | |
1582 shrinkargs (argv + i, 1, argc - i); | |
1583 argc -= 1; | |
1584 break; | |
1585 | |
1586 default: | |
1587 i++; | |
1588 break; | |
1589 } | |
1590 } | |
1591 else | |
1592 { | |
1593 i++; | |
1594 } | |
1595 } | |
1596 | |
1597 if (argc < 2) | |
1598 { | |
1599 (void) fprintf (stderr, "Usage: %s dic1 [dic2] [-c filecommon] ...\n", program); | |
1600 exit (1); | |
1601 } | |
1602 | |
1603 if (argv[1][0] != '-' || argv[1][1]) | |
1604 { | |
1605 in1 = fopen (argv[1], "r"); | |
1606 if (!in1) | |
1607 { | |
1608 (void) fprintf (stderr, "%s: can not open file \"%s\".\n", program, argv[1]); | |
1609 exit (1); | |
1610 } | |
1611 } | |
1612 if (argc == 3) | |
1613 { | |
1614 if (argv[2][0] != '-' || argv[2][1]) | |
1615 { | |
1616 in2 = fopen (argv[2], "r"); | |
1617 if (!in2) | |
1618 { | |
1619 (void) fprintf (stderr, "%s: can not open file \"%s\".\n", program, argv[2]); | |
1620 exit (1); | |
1621 } | |
1622 } | |
1623 } | |
1624 else | |
1625 { | |
1626 in2 = (FILE *) 0; | |
1627 } | |
1628 if (description_table) | |
1629 { | |
1630 store_description (); | |
1631 } | |
1632 } | |
1633 | |
1634 static Wchar kihonh[] = { | |
1635 (Wchar) 'k', (Wchar) 'i', (Wchar) 'h', (Wchar) 'o', (Wchar) 'n', (Wchar) 0, | |
1636 }; | |
1637 | |
1638 int | |
1639 main (argc, argv) | |
1640 int argc; | |
1641 char *argv[]; | |
1642 { | |
1643 #ifndef POD_WCHAR | |
1644 setlocale (LC_ALL, ""); | |
1645 #endif | |
1646 | |
1647 in1 = in2 = stdin; | |
1648 (void) internkind (kihonh); /* 基本辞書用。1L として登録 */ | |
1649 parseargs (argc, argv); | |
1650 storepd (in1); | |
1651 (void) fclose (in1); | |
1652 | |
1653 if (in2) | |
1654 { | |
1655 compare = 1; | |
1656 comparepd (in2); | |
1657 (void) fclose (in2); | |
1658 } | |
1659 | |
1660 if (list_kinds) | |
1661 { | |
1662 listkinds (); | |
1663 exit (0); | |
1664 } | |
1665 | |
1666 if (selhinshi) | |
1667 { | |
1668 select_hinshi (selhinshi); | |
1669 } | |
1670 else if (hinshi_table) | |
1671 { | |
1672 replace_hinshi (); | |
1673 } | |
1674 | |
1675 pdic = (struct dicpack **) malloc (ndicentries * sizeof (struct dicpack *)); | |
1676 if (pdic) | |
1677 { | |
1678 int i, j; | |
1679 struct dicpack *p; | |
1680 | |
1681 for (i = 0, j = 0; i < DICBUFSIZE; i++) | |
1682 { | |
1683 for (p = dic[i]; p; p = p->next) | |
1684 { | |
1685 pdic[j++] = p; | |
1686 } | |
1687 } | |
1688 if (sort_by_frequency) | |
1689 { | |
1690 qsort (pdic, ndicentries, sizeof (struct dicpack *), dichindocompar); | |
1691 } | |
1692 else | |
1693 { | |
1694 qsort (pdic, ndicentries, sizeof (struct dicpack *), diccompar); | |
1695 } | |
1696 sortkind (); | |
1697 showentry (pdic, ndicentries); | |
1698 } | |
1699 else | |
1700 { | |
1701 malloc_failed (); | |
1702 } | |
1703 exit (0); | |
1704 } |