0
|
1 /* Copyright 1994 Pubdic Project.
|
|
2 *
|
|
3 * Permission to use, copy, modify, distribute and sell this software
|
|
4 * and its documentation for any purpose is hereby granted without
|
|
5 * fee, provided that the above copyright notice appear in all copies
|
|
6 * and that both that copyright notice and this permission notice
|
|
7 * appear in supporting documentation, and that the name of Pubdic
|
|
8 * Project not be used in advertising or publicity pertaining to
|
|
9 * distribution of the software without specific, written prior
|
|
10 * permission. Pubdic Project makes no representations about the
|
|
11 * suitability of this software for any purpose. It is provided "as
|
|
12 * is" without express or implied warranty.
|
|
13 *
|
|
14 * PUBDIC PROJECT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
|
|
15 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN
|
|
16 * NO EVENT SHALL PUBDIC PROJECT BE LIABLE FOR ANY SPECIAL, INDIRECT OR
|
|
17 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
|
|
18 * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
|
|
19 * OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
|
20 * PERFORMANCE OF THIS SOFTWARE.
|
|
21 */
|
|
22
|
|
23 #ifndef lint
|
|
24 static char rcsid[] = "$Id: pod.c,v 1.7 2005/12/10 18:50:43 aonoto Exp $";
|
|
25 #endif
|
|
26
|
|
27 #ifdef HAVE_CONFIG_H
|
|
28 # include <config.h>
|
|
29 #endif
|
|
30
|
|
31 #include <stdio.h>
|
|
32 #if STDC_HEADERS
|
|
33 # include <stdlib.h>
|
|
34 # include <stddef.h>
|
|
35 # include <string.h>
|
|
36 #else
|
|
37 # if HAVE_MALLOC_H
|
|
38 # include <malloc.h>
|
|
39 # endif
|
|
40 # if HAVE_STRINGS_H
|
|
41 # include <strings.h>
|
|
42 # endif
|
|
43 #endif /* STDC_HEADERS */
|
|
44
|
|
45 #define POD_WCHAR
|
|
46 #ifdef POD_WCHAR
|
|
47 typedef unsigned short Wchar;
|
|
48 #else
|
|
49 #include <locale.h>
|
|
50 #include <widec.h>
|
|
51 #define Wchar wchar_t
|
|
52 #endif
|
|
53
|
|
54 #if !(HAVE_BZERO) && (HAVE_MEMSET)
|
|
55 # define bzero(a, c) memset(a, 0, c)
|
|
56 #endif
|
|
57
|
|
58 static char *program;
|
|
59 static int compare, ignore_hinshi_to_compare, sort_by_frequency, merge_sj3;
|
|
60 static int merge_kind, wnn_type_output, canna_type_output, sj3_type_output;
|
|
61 static int list_kinds;
|
|
62 static int copy_frequency, extract_kana = 0;
|
|
63 static long specific_kind;
|
|
64 static FILE *in1, *in2;
|
|
65 static char *common_out, *old_out, *new_out, *hinshi_table, *bunrui;
|
|
66 static char *description_table;
|
|
67 static int selhinshi = 0;
|
|
68
|
|
69 /* hinshi_direction */
|
|
70 #define INORDER 0
|
|
71 #define REVERSE 1
|
|
72
|
|
73 static int hinshi_direction = INORDER; /* see above */
|
|
74
|
|
75 #define READBUFSIZE 128
|
|
76 #define DICBUFSIZE (2 << 13)
|
|
77 #define DICBUFINDEXMASK (DICBUFSIZE - 1)
|
|
78 #define HINSHIBUFSIZE (2 << 13)
|
|
79 #define HINSHIBUFINDEXMASK (HINSHIBUFSIZE - 1)
|
|
80
|
|
81 /* status of intern() */
|
|
82 #define FOUND 0
|
|
83 #define CREATE 1
|
|
84
|
|
85 /* 品詞を表す構造体 */
|
|
86
|
|
87 struct hinshipack
|
|
88 {
|
|
89 int nhinshis;
|
|
90 Wchar *hinshi;
|
|
91 unsigned flags; /* see below */
|
|
92 struct hinshipack *next;
|
|
93 };
|
|
94
|
|
95 /* values of (struct hinshipack.)flags */
|
|
96 #define REPLACED 1
|
|
97
|
|
98 /* 終止形を追加するためのルールファイルの内部表現(だと思う) */
|
|
99
|
|
100 struct descpack
|
|
101 {
|
|
102 Wchar *hinshi, *tandesc, *yomdesc;
|
|
103 struct descpack *next;
|
|
104 };
|
|
105
|
|
106 /* エントリの種別を表す構造体その他 */
|
|
107
|
|
108 struct kindpack
|
|
109 {
|
|
110 Wchar *kind;
|
|
111 long kindbit;
|
|
112 };
|
|
113
|
|
114 /* 辞書を表す構造体 */
|
|
115
|
|
116 struct dicpack
|
|
117 {
|
|
118 Wchar *yomi, *tango;
|
|
119 struct hinshipack *hinshi;
|
|
120 int hindo;
|
|
121 long kind;
|
|
122 Wchar *extdata;
|
|
123 unsigned flags; /* SEE BELOW */
|
|
124 struct dicpack *next;
|
|
125 };
|
|
126
|
|
127 /* values of (struct dicpack.)flags */
|
|
128 #define COMMON 001
|
|
129 #define NEW 002
|
|
130
|
|
131 #if defined (__STDC__) || defined (_AIX) || (defined (__mips) && defined (_SYSTYPE_SVR4)) || defined(_WIN32)
|
|
132 /* Prototype for C89 (or later) */
|
|
133 #ifdef POD_WCHAR
|
|
134 size_t Mbstowcs (Wchar *d, char *ss, int n);
|
|
135 size_t Wcstombs (char *d, Wchar *s, int n);
|
|
136 int Wscmp (register Wchar *s1, register Wchar *s2);
|
|
137 Wchar *Wscpy (Wchar *d, register Wchar *s);
|
|
138 int Wslen (Wchar *s);
|
|
139 int Watoi (Wchar *s);
|
|
140 static void Fputws (Wchar *s, FILE *f);
|
|
141 Wchar *Fgetws (Wchar *buf, int siz, FILE *f);
|
|
142 #endif /* POD_WCHAR */
|
|
143
|
|
144 static int all_kana (Wchar *s);
|
|
145 static Wchar *findslash (Wchar *s);
|
|
146 static Wchar *extstr (Wchar *p, Wchar **pp, int *key_return);
|
|
147 static void malloc_failed (void);
|
|
148 static struct hinshipack *internhinshi (Wchar *str, int flag);
|
|
149 static void replace_hinshi (void);
|
|
150 static void select_hinshi (int n);
|
|
151 static void freedesc (struct descpack *p);
|
|
152 static struct descpack *interndesc (Wchar *hin, Wchar *tan, Wchar *yom);
|
|
153 static struct descpack *searchdesc (Wchar *hin);
|
|
154 static void store_description (void);
|
|
155 static long internkind (Wchar *s);
|
|
156 static void listkinds (void);
|
|
157 static int kindcompar (struct kindpack *k1, struct kindpack *k2);
|
|
158 static void sortkind (void);
|
|
159 static struct dicpack *intern (int key, Wchar *yomi, Wchar *kouho, Wchar *hinshi, int hindo, long kind, int *stat, long flags);
|
|
160 static void storepd (FILE *file);
|
|
161 static void comparepd (FILE *file);
|
|
162 static void canna_output (FILE *cf, struct dicpack *p, Wchar *h, int n);
|
|
163 static void entry_out (FILE *cf, struct dicpack *p, Wchar *h, int n, Wchar *ex);
|
|
164 static void printentry (FILE *cf, struct dicpack *p);
|
|
165 static void showentry (struct dicpack **pd, int n);
|
|
166 static int diccompar (struct dicpack **p1, struct dicpack **p2);
|
|
167 static int dichindocompar (struct dicpack **p1, struct dicpack **p2);
|
|
168 void shrinkargs (char **argv, int n, int count);
|
|
169 static void parseargs (int argc, char *argv[]);
|
|
170 #endif
|
|
171
|
|
172 #ifndef POD_WCHAR
|
|
173 # define Mbstowcs mbstowcs
|
|
174 # define Wcstombs wcstombs
|
|
175 # define Wscmp wscmp
|
|
176 # define Wscpy wscpy
|
|
177 # define Wslen wslen
|
|
178 # define Fgetws fgetws
|
|
179 # define Fputws fputws
|
|
180 #else
|
|
181 # define SS2 0x8e
|
|
182 # define SS3 0x8f
|
|
183 # define MSB 0x80
|
|
184 # define MSK 0x7f
|
|
185
|
|
186 # define WCG0 0x0000
|
|
187 # define WCG1 0x8080
|
|
188 # define WCG2 0x0080
|
|
189 # define WCG3 0x8000
|
|
190 # define WCMSK 0x8080
|
|
191
|
|
192 size_t
|
|
193 Mbstowcs (d, ss, n)
|
|
194 Wchar *d;
|
|
195 char *ss;
|
|
196 int n;
|
|
197 {
|
|
198 register Wchar *p = d;
|
|
199 register int ch;
|
|
200 register unsigned char *s = (unsigned char *) ss;
|
|
201
|
|
202 while ((ch = *s++) && (p - d < n))
|
|
203 {
|
|
204 if (ch & MSB)
|
|
205 {
|
|
206 if (ch == SS2)
|
|
207 { /* kana */
|
|
208 *p++ = (Wchar) * s++;
|
|
209 }
|
|
210 else if (ch == SS3)
|
|
211 {
|
|
212 *p++ = (Wchar) ((*s << 8) | (*(s + 1) & MSK));
|
|
213 s += 2;
|
|
214 }
|
|
215 else
|
|
216 {
|
|
217 *p++ = (Wchar) ((ch << 8) | (*s++ & 0xff));
|
|
218 }
|
|
219 }
|
|
220 else
|
|
221 {
|
|
222 *p++ = (Wchar) ch;
|
|
223 }
|
|
224 }
|
|
225 *p = (Wchar) 0;
|
|
226 return p - d;
|
|
227 }
|
|
228
|
|
229 size_t
|
|
230 Wcstombs (d, s, n)
|
|
231 char *d;
|
|
232 Wchar *s;
|
|
233 int n;
|
|
234 {
|
|
235 register char *p = d;
|
|
236 register Wchar ch;
|
|
237
|
|
238 while ((ch = *s++) && (p - d + 2 < n))
|
|
239 {
|
|
240 switch (ch & WCMSK)
|
|
241 {
|
|
242 case WCG0:
|
|
243 *p++ = (char) ch;
|
|
244 break;
|
|
245
|
|
246 case WCG1:
|
|
247 *p++ = (char) ((ch >> 8) & 0xff);
|
|
248 *p++ = (char) (ch & 0xff);
|
|
249 break;
|
|
250
|
|
251 case WCG2:
|
|
252 *p++ = SS2;
|
|
253 *p++ = (char) ch;
|
|
254 break;
|
|
255
|
|
256 case WCG3:
|
|
257 *p++ = SS3;
|
|
258 *p++ = (char) ((ch >> 8) & 0xff);
|
|
259 *p++ = (char) ((ch & 0xff) | MSB);
|
|
260 break;
|
|
261 }
|
|
262 }
|
|
263 *p = '\0';
|
|
264 return p - d;
|
|
265 }
|
|
266
|
|
267 int
|
|
268 Wscmp (s1, s2)
|
|
269 register Wchar *s1, *s2;
|
|
270 {
|
|
271 register int res;
|
|
272
|
|
273 /* 以下のコードはいささかトリッキーなので、説明を加えておこう。
|
|
274 以下ではこのコメント内にあるようなことをしたいわけである。
|
|
275
|
|
276 while (*s1 && *s2 && && *s1 == *s2) {
|
|
277 s1++; s2++;
|
|
278 }
|
|
279 return *s1 - *s2;
|
|
280
|
|
281 すなわち、s1 も s2 も EOS ('\0') を指していなくて、しかも値が
|
|
282 異なる間はそれぞれのポインタを進める。いずれかが EOS になるか、
|
|
283 値が違ってきた場合には、*s1 - *s2 を返す。
|
|
284 */
|
|
285
|
|
286 while (!(res = *s1 - *s2++) && *s1++)
|
|
287 ;
|
|
288 return res;
|
|
289 }
|
|
290
|
|
291 Wchar *
|
|
292 Wscpy (d, s)
|
|
293 Wchar *d;
|
|
294 register Wchar *s;
|
|
295 {
|
|
296 register Wchar *p = d, ch;
|
|
297
|
|
298 while (ch = *s++)
|
|
299 {
|
|
300 *p++ = ch;
|
|
301 }
|
|
302 *p = (Wchar) 0;
|
|
303 return d;
|
|
304 }
|
|
305
|
|
306 int
|
|
307 Wslen (s)
|
|
308 Wchar *s;
|
|
309 {
|
|
310 register Wchar *p = s;
|
|
311
|
|
312 while (*p)
|
|
313 p++;
|
|
314 return p - s;
|
|
315 }
|
|
316
|
|
317 int
|
|
318 Watoi (s)
|
|
319 Wchar *s;
|
|
320 {
|
|
321 register int res = 0;
|
|
322 register Wchar ch;
|
|
323
|
|
324 while ((ch = *s++) && ((Wchar) '0' <= ch) && (ch <= (Wchar) '9'))
|
|
325 {
|
|
326 res *= 10;
|
|
327 res += ch - (Wchar) '0';
|
|
328 }
|
|
329 return res;
|
|
330 }
|
|
331
|
|
332 static void
|
|
333 Fputws (s, f)
|
|
334 Wchar *s;
|
|
335 FILE *f;
|
|
336 {
|
|
337 char buf[READBUFSIZE];
|
|
338
|
|
339 if (Wcstombs (buf, s, READBUFSIZE))
|
|
340 {
|
|
341 (void) fputs (buf, f);
|
|
342 }
|
|
343 }
|
|
344
|
|
345 Wchar *
|
|
346 Fgetws (buf, siz, f)
|
|
347 Wchar *buf;
|
|
348 int siz;
|
|
349 FILE *f;
|
|
350 {
|
|
351 char mbuf[READBUFSIZE], *p;
|
|
352
|
|
353 p = fgets (mbuf, READBUFSIZE, f);
|
|
354 if (p)
|
|
355 {
|
|
356 if (Mbstowcs (buf, mbuf, siz))
|
|
357 {
|
|
358 return buf;
|
|
359 }
|
|
360 }
|
|
361 return (Wchar *) 0;
|
|
362 }
|
|
363 #endif
|
|
364
|
|
365 /* s が全てカタカナから構成されているかどうかを返す関数 */
|
|
366
|
|
367 static int
|
|
368 all_kana (s)
|
|
369 Wchar *s;
|
|
370 {
|
|
371 static Wchar xa = 0, xke, aa, *p;
|
|
372
|
|
373 if (!xa)
|
|
374 {
|
|
375 Mbstowcs (&xa, "\045\041", 1);
|
|
376 Mbstowcs (&xke, "\045\166", 1);
|
|
377 Mbstowcs (&aa, "\041\074", 1);
|
|
378 }
|
|
379
|
|
380 for (p = s; *p; p++)
|
|
381 {
|
|
382 if (!(*p == aa || (xa <= *p && *p <= xke)))
|
|
383 {
|
|
384 return 0;
|
|
385 }
|
|
386 }
|
|
387
|
|
388 return 1;
|
|
389 }
|
|
390
|
|
391 /* スラッシュを探す */
|
|
392
|
|
393 static Wchar *
|
|
394 findslash (s)
|
|
395 Wchar *s;
|
|
396 {
|
|
397 while (*s)
|
|
398 {
|
|
399 if (*s == (Wchar) '/')
|
|
400 {
|
|
401 return s;
|
|
402 }
|
|
403 s++;
|
|
404 }
|
|
405 return (Wchar *) 0;
|
|
406 }
|
|
407
|
|
408 /* トークンを一個取り出す */
|
|
409
|
|
410 static Wchar *
|
|
411 extstr (p, pp, key_return)
|
|
412 Wchar *p, **pp;
|
|
413 int *key_return;
|
|
414 {
|
|
415 Wchar *res;
|
|
416 int key = 0;
|
|
417
|
|
418 while (*p == (Wchar) ' ' || *p == (Wchar) '\t')
|
|
419 p++;
|
|
420 res = p;
|
|
421 while (*p && *p != (Wchar) ' ' && *p != (Wchar) '\t' && *p != (Wchar) '\n')
|
|
422 {
|
|
423 key += (int) *p++;
|
|
424 }
|
|
425 *p++ = (Wchar) '\0';
|
|
426 if (pp)
|
|
427 *pp = p;
|
|
428 if (key_return)
|
|
429 *key_return = key;
|
|
430 return res;
|
|
431 }
|
|
432
|
|
433 static struct hinshipack *partsofspeech[HINSHIBUFSIZE];
|
|
434
|
|
435 static void
|
|
436 malloc_failed ()
|
|
437 {
|
|
438 (void) fprintf (stderr, "%s: malloc failed.\n", program);
|
|
439 }
|
|
440
|
|
441 /* 品詞名を品詞名テーブルに登録する */
|
|
442
|
|
443 static struct hinshipack *
|
|
444 internhinshi (str, flag)
|
|
445 Wchar *str;
|
|
446 int flag;
|
|
447 {
|
|
448 struct hinshipack *p, **pp;
|
|
449 Wchar *s;
|
|
450 int key = 0;
|
|
451
|
|
452 for (s = str; *s; s++)
|
|
453 key += (int) *s;
|
|
454 key = ((unsigned) key & HINSHIBUFINDEXMASK);
|
|
455 for (pp = partsofspeech + key; p = *pp; pp = &(p->next))
|
|
456 {
|
|
457 if (!Wscmp (p->hinshi, str))
|
|
458 {
|
|
459 return p;
|
|
460 }
|
|
461 }
|
|
462 if (flag)
|
|
463 {
|
|
464 p = (struct hinshipack *) malloc (sizeof (struct hinshipack));
|
|
465 if (p)
|
|
466 {
|
|
467 *pp = p;
|
|
468 (void) bzero (p, sizeof (struct hinshipack));
|
|
469 p->hinshi = (Wchar *) malloc ((Wslen (str) + 1) * sizeof (Wchar));
|
|
470 if (p->hinshi)
|
|
471 {
|
|
472 (void) Wscpy (p->hinshi, str);
|
|
473 p->nhinshis = 1;
|
|
474 return p;
|
|
475 }
|
|
476 free (p);
|
|
477 }
|
|
478 malloc_failed ();
|
|
479 }
|
|
480 return (struct hinshipack *) 0;
|
|
481 }
|
|
482
|
|
483 /* 品詞名を置き換える */
|
|
484
|
|
485 static void
|
|
486 replace_hinshi ()
|
|
487 {
|
|
488 FILE *f;
|
|
489 Wchar readbuf[READBUFSIZE], *to, *from, *s;
|
|
490 struct hinshipack *hinshientry, *p;
|
|
491 int i, err = 0;
|
|
492
|
|
493 f = fopen (hinshi_table, "r");
|
|
494 if (!f)
|
|
495 {
|
|
496 (void) fprintf (stderr, "%s: can not open the table file of parts of speech \"%s\".\n", program, hinshi_table);
|
|
497 exit (1);
|
|
498 }
|
|
499 while (s = Fgetws (readbuf, READBUFSIZE, f))
|
|
500 {
|
|
501 from = extstr (s, &s, 0);
|
|
502 to = extstr (s, &s, 0);
|
|
503 if (hinshi_direction == REVERSE)
|
|
504 {
|
|
505 Wchar *xx = from;
|
|
506 from = to;
|
|
507 to = xx;
|
|
508 }
|
|
509
|
|
510 hinshientry = internhinshi (from, 0);
|
|
511 if (hinshientry)
|
|
512 {
|
|
513 Wchar *xx;
|
|
514
|
|
515 xx = (Wchar *) malloc ((Wslen (to) + 1) * sizeof (Wchar));
|
|
516 if (xx)
|
|
517 {
|
|
518 Wchar *cp;
|
|
519 int n = 1;
|
|
520
|
|
521 (void) Wscpy (xx, to);
|
|
522 free (hinshientry->hinshi);
|
|
523 hinshientry->hinshi = xx;
|
|
524 for (cp = xx; *cp; cp++)
|
|
525 {
|
|
526 if (*cp == (Wchar) '/')
|
|
527 {
|
|
528 *cp = (Wchar) 0;
|
|
529 n++;
|
|
530 }
|
|
531 }
|
|
532 hinshientry->nhinshis = n;
|
|
533 hinshientry->flags |= REPLACED;
|
|
534 }
|
|
535 else
|
|
536 {
|
|
537 malloc_failed ();
|
|
538 }
|
|
539 }
|
|
540 }
|
|
541 (void) fclose (f);
|
|
542
|
|
543 for (i = 0; i < HINSHIBUFSIZE; i++)
|
|
544 {
|
|
545 for (p = partsofspeech[i]; p; p = p->next)
|
|
546 {
|
|
547 if (!(p->flags & REPLACED))
|
|
548 {
|
|
549 (void) fprintf (stderr, "%s: The replacement for \"", program);
|
|
550 Fputws (p->hinshi, stderr);
|
|
551 (void) fprintf (stderr, "\" is not mentioned in the table.\n");
|
|
552 err = 1;
|
|
553 }
|
|
554 }
|
|
555 }
|
|
556 if (err)
|
|
557 {
|
|
558 exit (1);
|
|
559 }
|
|
560 }
|
|
561
|
|
562 static void
|
|
563 select_hinshi (n)
|
|
564 int n;
|
|
565 {
|
|
566 Wchar *s, *t, *xx;
|
|
567 struct hinshipack *p;
|
|
568 int i;
|
|
569
|
|
570 if (!n)
|
|
571 return;
|
|
572
|
|
573 for (i = 0; i < HINSHIBUFSIZE; i++)
|
|
574 {
|
|
575 for (p = partsofspeech[i]; p; p = p->next)
|
|
576 {
|
|
577 switch (n)
|
|
578 {
|
|
579 case 1:
|
|
580 s = findslash (p->hinshi);
|
|
581 if (s)
|
|
582 {
|
|
583 *s = (Wchar) 0;
|
|
584 }
|
|
585 break;
|
|
586
|
|
587 case 2:
|
|
588 s = findslash (p->hinshi);
|
|
589 if (s)
|
|
590 {
|
|
591 s++;
|
|
592 t = findslash (s);
|
|
593 if (t)
|
|
594 {
|
|
595 xx = (Wchar *) malloc ((t - s + 1) * sizeof (Wchar));
|
|
596 if (xx)
|
|
597 {
|
|
598 *t = (Wchar) 0;
|
|
599 Wscpy (xx, s);
|
|
600 t = p->hinshi;
|
|
601 p->hinshi = xx;
|
|
602 (void) free ((char *) t);
|
|
603 }
|
|
604 }
|
|
605 }
|
|
606 break;
|
|
607
|
|
608 case 3:
|
|
609 s = findslash (p->hinshi);
|
|
610 if (s)
|
|
611 {
|
|
612 t = findslash (s + 1);
|
|
613 if (t)
|
|
614 {
|
|
615 t++;
|
|
616 xx = (Wchar *) malloc ((Wslen (t) + 1) * sizeof (Wchar));
|
|
617 if (xx)
|
|
618 {
|
|
619 Wscpy (xx, t);
|
|
620 t = p->hinshi;
|
|
621 p->hinshi = xx;
|
|
622 (void) free ((char *) t);
|
|
623 }
|
|
624 }
|
|
625 }
|
|
626 break;
|
|
627
|
|
628 default:
|
|
629 break;
|
|
630 }
|
|
631 }
|
|
632 }
|
|
633 }
|
|
634
|
|
635 static void
|
|
636 freedesc (p)
|
|
637 struct descpack *p;
|
|
638 {
|
|
639 free (p->hinshi);
|
|
640 free (p->tandesc);
|
|
641 free (p->yomdesc);
|
|
642 free (p);
|
|
643 }
|
|
644
|
|
645 static struct descpack *description[HINSHIBUFSIZE];
|
|
646
|
|
647 /* ルールの登録 */
|
|
648
|
|
649 static struct descpack *
|
|
650 interndesc (hin, tan, yom)
|
|
651 Wchar *hin, *tan, *yom;
|
|
652 {
|
|
653 struct descpack *p, **pp, *next = (struct descpack *) 0;
|
|
654 Wchar *s;
|
|
655 int key = 0;
|
|
656
|
|
657 for (s = hin; *s; s++)
|
|
658 key += (int) *s;
|
|
659 key = ((unsigned) key & HINSHIBUFINDEXMASK);
|
|
660 for (pp = description + key; p = *pp; pp = &(p->next))
|
|
661 {
|
|
662 if (!Wscmp (p->hinshi, hin))
|
|
663 {
|
|
664 if (!Wscmp (p->tandesc, tan) && !Wscmp (p->yomdesc, yom))
|
|
665 {
|
|
666 return p;
|
|
667 }
|
|
668 else
|
|
669 {
|
|
670 *pp = next = p->next;
|
|
671 freedesc (p);
|
|
672 break;
|
|
673 }
|
|
674 }
|
|
675 }
|
|
676 p = (struct descpack *) malloc (sizeof (struct descpack));
|
|
677 if (p)
|
|
678 {
|
|
679 *pp = p;
|
|
680 (void) bzero (p, sizeof (struct descpack));
|
|
681 p->next = next;
|
|
682 p->hinshi = (Wchar *) malloc ((Wslen (hin) + 1) * sizeof (Wchar));
|
|
683 if (p->hinshi)
|
|
684 {
|
|
685 (void) Wscpy (p->hinshi, hin);
|
|
686 p->tandesc = (Wchar *) malloc ((Wslen (tan) + 1) * sizeof (Wchar));
|
|
687 if (p->tandesc)
|
|
688 {
|
|
689 (void) Wscpy (p->tandesc, tan);
|
|
690 p->yomdesc = (Wchar *) malloc ((Wslen (yom) + 1) * sizeof (Wchar));
|
|
691 if (p->yomdesc)
|
|
692 {
|
|
693 (void) Wscpy (p->yomdesc, yom);
|
|
694 return p;
|
|
695 }
|
|
696 free (p->tandesc);
|
|
697 }
|
|
698 free (p->hinshi);
|
|
699 }
|
|
700 free (p);
|
|
701 }
|
|
702 malloc_failed ();
|
|
703 return (struct descpack *) 0;
|
|
704 }
|
|
705
|
|
706 /* ルールの探索 */
|
|
707
|
|
708 static struct descpack *
|
|
709 searchdesc (hin)
|
|
710 Wchar *hin;
|
|
711 {
|
|
712 struct descpack *p, **pp;
|
|
713 Wchar *s;
|
|
714 int key = 0;
|
|
715
|
|
716 for (s = hin; *s; s++)
|
|
717 key += (int) *s;
|
|
718 key = ((unsigned) key & HINSHIBUFINDEXMASK);
|
|
719 for (pp = description + key; p = *pp; pp = &(p->next))
|
|
720 {
|
|
721 if (!Wscmp (p->hinshi, hin))
|
|
722 {
|
|
723 return p;
|
|
724 }
|
|
725 }
|
|
726 return (struct descpack *) 0;
|
|
727 }
|
|
728
|
|
729 static void
|
|
730 store_description ()
|
|
731 {
|
|
732 FILE *f;
|
|
733 Wchar readbuf[READBUFSIZE], *hin, *tan, *yom, *s;
|
|
734
|
|
735 if (!description_table)
|
|
736 {
|
|
737 return;
|
|
738 }
|
|
739
|
|
740 f = fopen (description_table, "r");
|
|
741 if (!f)
|
|
742 {
|
|
743 (void) fprintf (stderr, "%s: can not open the table file of parts of speech \"%s\".\n", program, description_table);
|
|
744 exit (1);
|
|
745 }
|
|
746 while (s = Fgetws (readbuf, READBUFSIZE, f))
|
|
747 {
|
|
748 Wchar nl[1];
|
|
749
|
|
750 nl[0] = (Wchar) 0;
|
|
751 hin = tan = yom = nl;
|
|
752 hin = extstr (s, &s, 0);
|
|
753 if (*hin)
|
|
754 {
|
|
755 tan = extstr (s, &s, 0);
|
|
756 if (*tan)
|
|
757 {
|
|
758 yom = extstr (s, &s, 0);
|
|
759 }
|
|
760 }
|
|
761
|
|
762 interndesc (hin, tan, yom);
|
|
763 }
|
|
764 (void) fclose (f);
|
|
765 }
|
|
766
|
|
767 struct kindpack kinds[sizeof (long) * 8];
|
|
768 static int nkinds;
|
|
769
|
|
770 #define KIHONBIT 1L
|
|
771
|
|
772 /* 種別の登録 */
|
|
773
|
|
774 static long
|
|
775 internkind (s)
|
|
776 Wchar *s;
|
|
777 {
|
|
778 int i;
|
|
779 Wchar *p;
|
|
780
|
|
781 p = findslash (s);
|
|
782 if (p)
|
|
783 {
|
|
784 long res;
|
|
785
|
|
786 *p = (Wchar) '\0';
|
|
787 res = internkind (s);
|
|
788 res |= internkind (p + 1);
|
|
789 return res;
|
|
790 }
|
|
791 else
|
|
792 {
|
|
793 for (i = 0; i < nkinds; i++)
|
|
794 {
|
|
795 if (!Wscmp (s, kinds[i].kind))
|
|
796 {
|
|
797 return kinds[i].kindbit;
|
|
798 }
|
|
799 }
|
|
800 if (nkinds < (sizeof (long) * 8) && (kinds[nkinds].kind = (Wchar *) malloc ((Wslen (s) + 1) * sizeof (Wchar))))
|
|
801 {
|
|
802 (void) Wscpy (kinds[nkinds].kind, s);
|
|
803 kinds[nkinds].kindbit = 1 << nkinds;
|
|
804 return kinds[nkinds++].kindbit;
|
|
805 }
|
|
806 return 0;
|
|
807 }
|
|
808 }
|
|
809
|
|
810 /* 種別の一覧の出力 */
|
|
811
|
|
812 static void
|
|
813 listkinds ()
|
|
814 {
|
|
815 int i;
|
|
816
|
|
817 for (i = 0; i < nkinds; i++)
|
|
818 {
|
|
819 Fputws (kinds[i].kind, stdout);
|
|
820 putchar ('\n');
|
|
821 }
|
|
822 }
|
|
823
|
|
824 static int
|
|
825 kindcompar (k1, k2)
|
|
826 struct kindpack *k1, *k2;
|
|
827 {
|
|
828 return Wscmp (k1->kind, k2->kind);
|
|
829 }
|
|
830
|
|
831 static void
|
|
832 sortkind ()
|
|
833 {
|
|
834 qsort (kinds, nkinds, sizeof (struct kindpack), kindcompar);
|
|
835 }
|
|
836
|
|
837 static struct dicpack *dic[DICBUFSIZE], **pdic;
|
|
838 static int ndicentries = 0;
|
|
839
|
|
840 /*
|
|
841
|
|
842 intern -- 辞書エントリの検索/登録
|
|
843
|
|
844 第6引数の stat としてヌルでないアドレスが指定された場合には、同じエントリ
|
|
845 が登録されていない場合には登録を行う。アドレスがヌルの場合には登録しない。
|
|
846
|
|
847 flags によっていろいろと指定をする。(以下を見てね)。
|
|
848
|
|
849 hinshi に 0 を渡してはいけない。kind は 0 を渡しても可だが、-m の時じゃない
|
|
850 マッチはしないので注意。
|
|
851
|
|
852 */
|
|
853
|
|
854 /* flags */
|
|
855 #define IGNORE_HINSHI 1L
|
|
856 #define IGNORE_KIND 2L
|
|
857
|
|
858 static struct dicpack *
|
|
859 intern (key, yomi, kouho, hinshi, hindo, kind, stat, flags)
|
|
860 int key, hindo, *stat;
|
|
861 Wchar *yomi, *kouho, *hinshi;
|
|
862 long kind, flags;
|
|
863 {
|
|
864 struct dicpack *p, **pp;
|
|
865 struct descpack *dp;
|
|
866 Wchar nl[1], *yomdesc = nl, *tandesc = nl;
|
|
867 Wchar *yom = (Wchar *) 0, *tan = (Wchar *) 0, *dhinshi, *dh;
|
|
868
|
|
869 nl[0] = (Wchar) '\0';
|
|
870
|
|
871 if (description_table)
|
|
872 {
|
|
873 dhinshi = dh = hinshi; /* かんなの品詞を探す */
|
|
874 while (*dh)
|
|
875 {
|
|
876 if (*dh++ == (Wchar) '/')
|
|
877 {
|
|
878 dhinshi = dh;
|
|
879 }
|
|
880 }
|
|
881 dp = searchdesc (dhinshi);
|
|
882 if (dp)
|
|
883 {
|
|
884 yomdesc = dp->yomdesc;
|
|
885 tandesc = dp->tandesc;
|
|
886 if (Wslen (yomdesc))
|
|
887 {
|
|
888 Wchar *t;
|
|
889 t = (Wchar *) malloc ((Wslen (yomi) + Wslen (yomdesc) + 1) * sizeof (Wchar));
|
|
890 if (t)
|
|
891 {
|
|
892 Wscpy (t, yomi);
|
|
893 yom = yomi = t;
|
|
894 Wscpy (yomi + Wslen (yomi), yomdesc);
|
|
895 }
|
|
896 }
|
|
897 if (Wslen (tandesc))
|
|
898 {
|
|
899 Wchar *t;
|
|
900 t = (Wchar *) malloc ((Wslen (kouho) + Wslen (tandesc) + 1) * sizeof (Wchar));
|
|
901 if (t)
|
|
902 {
|
|
903 Wscpy (t, kouho);
|
|
904 tan = kouho = t;
|
|
905 Wscpy (kouho + Wslen (kouho), tandesc);
|
|
906 }
|
|
907 }
|
|
908 }
|
|
909 else
|
|
910 {
|
|
911 char foo[64];
|
|
912
|
|
913 fprintf (stderr, "no description rule for ");
|
|
914 Wcstombs (foo, dhinshi, 64);
|
|
915 fprintf (stderr, "%s.\n", foo);
|
|
916 }
|
|
917 }
|
|
918
|
|
919 key = ((unsigned) key & DICBUFINDEXMASK);
|
|
920 for (pp = dic + key; p = *pp; pp = &(p->next))
|
|
921 {
|
|
922 if (!Wscmp (p->yomi, yomi) && !Wscmp (p->tango, kouho) && ((flags & IGNORE_HINSHI) || !Wscmp (p->hinshi->hinshi, hinshi)) && ((flags & IGNORE_KIND) || ((p->kind & kind) == kind)))
|
|
923 {
|
|
924 /* match */
|
|
925 if (stat)
|
|
926 *stat = FOUND;
|
|
927 if (yom)
|
|
928 free (yom);
|
|
929 if (tan)
|
|
930 free (tan);
|
|
931 return p;
|
|
932 }
|
|
933 }
|
|
934 if (stat)
|
|
935 {
|
|
936 p = (struct dicpack *) malloc (sizeof (struct dicpack));
|
|
937 if (p)
|
|
938 {
|
|
939 *pp = p;
|
|
940 (void) bzero (p, sizeof (struct dicpack));
|
|
941 p->yomi = (Wchar *) malloc ((Wslen (yomi) + 1) * sizeof (Wchar));
|
|
942 if (p->yomi)
|
|
943 {
|
|
944 (void) Wscpy (p->yomi, yomi);
|
|
945 p->tango = (Wchar *) malloc ((Wslen (kouho) + 1) * sizeof (Wchar));
|
|
946 if (p->tango)
|
|
947 {
|
|
948 (void) Wscpy (p->tango, kouho);
|
|
949 p->hinshi = internhinshi (hinshi, 1);
|
|
950 if (p->hinshi)
|
|
951 {
|
|
952 p->hindo = hindo;
|
|
953 *stat = CREATE;
|
|
954 ndicentries++;
|
|
955 p->kind = kind;
|
|
956 p->extdata = (Wchar *) 0;
|
|
957 if (yom)
|
|
958 free (yom);
|
|
959 if (tan)
|
|
960 free (tan);
|
|
961 return p;
|
|
962 }
|
|
963 free (p->tango);
|
|
964 }
|
|
965 free (p->yomi);
|
|
966 }
|
|
967 free (p);
|
|
968 }
|
|
969 malloc_failed ();
|
|
970 }
|
|
971 if (yom)
|
|
972 free (yom);
|
|
973 if (tan)
|
|
974 free (tan);
|
|
975 return (struct dicpack *) 0;
|
|
976 }
|
|
977
|
|
978 /* 登録されているエントリに対して fn を実行する */
|
|
979
|
|
980 static void
|
|
981 for_all_interned (fn)
|
|
982 void (*fn) ();
|
|
983 {
|
|
984 int i;
|
|
985 struct dicpack *p;
|
|
986
|
|
987 for (i = 0; i < DICBUFSIZE; i++)
|
|
988 {
|
|
989 for (p = dic[i]; p; p = p->next)
|
|
990 {
|
|
991 (*fn) (p);
|
|
992 }
|
|
993 }
|
|
994 }
|
|
995
|
|
996 static void
|
|
997 storepd (file)
|
|
998 FILE *file;
|
|
999 {
|
|
1000 Wchar readbuf[READBUFSIZE], *p, *yomi, *hinshi, *kouho, *hindo, *kind;
|
|
1001 int nhindo, key, tkey, stat;
|
|
1002 long kindbit;
|
|
1003 struct dicpack *dicentry;
|
|
1004
|
|
1005 while (p = Fgetws (readbuf, READBUFSIZE, file))
|
|
1006 {
|
|
1007 key = 0;
|
|
1008 yomi = extstr (p, &p, &tkey);
|
|
1009 key += tkey;
|
|
1010 kouho = extstr (p, &p, &tkey);
|
|
1011 key += tkey;
|
|
1012 hinshi = extstr (p, &p, 0);
|
|
1013 hindo = extstr (p, &p, 0);
|
|
1014 nhindo = Watoi (hindo);
|
|
1015
|
|
1016 kind = extstr (p, 0, 0);
|
|
1017 if (*kind)
|
|
1018 {
|
|
1019 kindbit = internkind (kind);
|
|
1020 }
|
|
1021 else
|
|
1022 {
|
|
1023 kindbit = KIHONBIT;
|
|
1024 }
|
|
1025
|
|
1026 dicentry = intern (key, yomi, kouho, hinshi, nhindo, kindbit, &stat, IGNORE_KIND);
|
|
1027 if (dicentry)
|
|
1028 {
|
|
1029 dicentry->kind |= kindbit;
|
|
1030 }
|
|
1031 }
|
|
1032 }
|
|
1033
|
|
1034 static void
|
|
1035 comparepd (file)
|
|
1036 FILE *file;
|
|
1037 {
|
|
1038 Wchar readbuf[READBUFSIZE], *p, *yomi, *hinshi, *kouho, *hindo, *kind;
|
|
1039 int nhindo, key, tkey, stat, *statp = &stat;
|
|
1040 struct dicpack *dicentry;
|
|
1041 long kindbit, flags = 0L;
|
|
1042
|
|
1043 while (p = Fgetws (readbuf, READBUFSIZE, file))
|
|
1044 {
|
|
1045 key = 0;
|
|
1046 yomi = extstr (p, &p, &tkey);
|
|
1047 key += tkey;
|
|
1048 kouho = extstr (p, &p, &tkey);
|
|
1049 key += tkey;
|
|
1050 hinshi = extstr (p, &p, 0);
|
|
1051 if (ignore_hinshi_to_compare)
|
|
1052 {
|
|
1053 flags |= IGNORE_HINSHI;
|
|
1054 }
|
|
1055 hindo = extstr (p, &p, 0);
|
|
1056 nhindo = Watoi (hindo);
|
|
1057
|
|
1058 kind = extstr (p, 0, 0);
|
|
1059 if (*kind)
|
|
1060 {
|
|
1061 kindbit = internkind (kind);
|
|
1062 }
|
|
1063 else
|
|
1064 {
|
|
1065 kindbit = KIHONBIT;
|
|
1066 }
|
|
1067 if (merge_kind || merge_sj3)
|
|
1068 {
|
|
1069 flags |= IGNORE_KIND;
|
|
1070 }
|
|
1071 if (copy_frequency)
|
|
1072 {
|
|
1073 statp = (int *) 0;
|
|
1074 }
|
|
1075
|
|
1076 dicentry = intern (key, yomi, kouho, hinshi, nhindo, kindbit, statp, flags);
|
|
1077
|
|
1078 if (dicentry)
|
|
1079 {
|
|
1080 if (copy_frequency)
|
|
1081 {
|
|
1082 dicentry->hindo = nhindo;
|
|
1083 dicentry->flags &= ~COMMON;
|
|
1084 }
|
|
1085 else if (ignore_hinshi_to_compare && stat == FOUND)
|
|
1086 {
|
|
1087 /* この場合、同じキーのチェーンが返る */
|
|
1088 struct dicpack *pd;
|
|
1089
|
|
1090 for (pd = dicentry; pd; pd = pd->next)
|
|
1091 {
|
|
1092 if (!Wscmp (pd->yomi, yomi) && !Wscmp (pd->tango, kouho))
|
|
1093 {
|
|
1094 pd->flags |= COMMON;
|
|
1095 if (!merge_sj3)
|
|
1096 {
|
|
1097 pd->kind |= kindbit;
|
|
1098 }
|
|
1099
|
|
1100 if (merge_sj3)
|
|
1101 {
|
|
1102 int len = 0;
|
|
1103 Wchar *dat;
|
|
1104
|
|
1105 if (pd->extdata)
|
|
1106 {
|
|
1107 len = Wslen (pd->extdata);
|
|
1108 }
|
|
1109 dat = (Wchar *) malloc ((Wslen (hinshi) + 1 + len) * sizeof (Wchar));
|
|
1110 if (dat)
|
|
1111 {
|
|
1112 if (len)
|
|
1113 {
|
|
1114 (void) Wscpy (dat, pd->extdata);
|
|
1115 (void) free ((char *) pd->extdata);
|
|
1116 }
|
|
1117 (void) Wscpy (dat + len, hinshi);
|
|
1118 pd->extdata = dat;
|
|
1119 }
|
|
1120 }
|
|
1121 }
|
|
1122 }
|
|
1123 }
|
|
1124 else
|
|
1125 {
|
|
1126 dicentry->kind |= kindbit;
|
|
1127 if (stat == FOUND)
|
|
1128 {
|
|
1129 dicentry->flags |= COMMON;
|
|
1130 }
|
|
1131 else
|
|
1132 { /* CREATE */
|
|
1133 dicentry->flags |= NEW;
|
|
1134 }
|
|
1135 }
|
|
1136 }
|
|
1137 }
|
|
1138 }
|
|
1139
|
|
1140 static void
|
|
1141 canna_output (cf, p, h, n)
|
|
1142 FILE *cf;
|
|
1143 struct dicpack *p;
|
|
1144 Wchar *h;
|
|
1145 int n;
|
|
1146 {
|
|
1147 for (; n-- > 0; h += Wslen (h) + 1)
|
|
1148 {
|
|
1149 Fputws (p->yomi, cf);
|
|
1150 (void) putc (' ', cf);
|
|
1151 Fputws (h, cf);
|
|
1152 if (p->hindo)
|
|
1153 {
|
|
1154 (void) fprintf (cf, "*%d", p->hindo);
|
|
1155 }
|
|
1156 (void) putc (' ', cf);
|
|
1157 Fputws (p->tango, cf);
|
|
1158 (void) putc ('\n', cf);
|
|
1159 }
|
|
1160 }
|
|
1161
|
|
1162 static void
|
|
1163 entry_out (cf, p, h, n, ex)
|
|
1164 FILE *cf;
|
|
1165 struct dicpack *p;
|
|
1166 Wchar *h;
|
|
1167 int n;
|
|
1168 Wchar *ex;
|
|
1169 {
|
|
1170 int i, f = 1;
|
|
1171 long b;
|
|
1172
|
|
1173 for (; n-- > 0; h += Wslen (h) + 1)
|
|
1174 {
|
|
1175 Fputws (p->yomi, cf);
|
|
1176 (void) putc (' ', cf);
|
|
1177 Fputws (p->tango, cf);
|
|
1178 (void) putc (' ', cf);
|
|
1179 if (merge_sj3 && ex)
|
|
1180 {
|
|
1181 Fputws (ex, cf);
|
|
1182 (void) putc ('/', cf);
|
|
1183 }
|
|
1184 Fputws (h, cf);
|
|
1185 if (!sj3_type_output)
|
|
1186 {
|
|
1187 (void) fprintf (cf, " %d", p->hindo);
|
|
1188 }
|
|
1189
|
|
1190 if (!wnn_type_output)
|
|
1191 {
|
|
1192 if (bunrui)
|
|
1193 {
|
|
1194 (void) printf (" %s", bunrui);
|
|
1195 }
|
|
1196 else
|
|
1197 {
|
|
1198 if (specific_kind)
|
|
1199 {
|
|
1200 b = (specific_kind & p->kind);
|
|
1201 }
|
|
1202 else
|
|
1203 {
|
|
1204 b = p->kind;
|
|
1205 }
|
|
1206 if (b != KIHONBIT)
|
|
1207 { /* 基本だけだったら何も書かない */
|
|
1208 for (i = 0; i < nkinds; i++)
|
|
1209 {
|
|
1210 if (b & kinds[i].kindbit)
|
|
1211 {
|
|
1212 if (f)
|
|
1213 {
|
|
1214 (void) putc (' ', cf);
|
|
1215 f = 0;
|
|
1216 }
|
|
1217 else
|
|
1218 {
|
|
1219 (void) putc ('/', cf);
|
|
1220 }
|
|
1221 Fputws (kinds[i].kind, cf);
|
|
1222 }
|
|
1223 }
|
|
1224 }
|
|
1225 }
|
|
1226 }
|
|
1227 (void) putc ('\n', cf);
|
|
1228 }
|
|
1229 }
|
|
1230
|
|
1231 /* p で表されるエントリをファイル cf に出力する */
|
|
1232
|
|
1233 static void
|
|
1234 printentry (cf, p)
|
|
1235 FILE *cf;
|
|
1236 struct dicpack *p;
|
|
1237 {
|
|
1238 if (specific_kind && !(p->kind & specific_kind))
|
|
1239 {
|
|
1240 return;
|
|
1241 }
|
|
1242
|
|
1243 if (extract_kana && !all_kana (p->tango))
|
|
1244 {
|
|
1245 return;
|
|
1246 }
|
|
1247
|
|
1248 if (selhinshi && !p->hinshi->hinshi[0])
|
|
1249 {
|
|
1250 return;
|
|
1251 }
|
|
1252
|
|
1253 if (canna_type_output)
|
|
1254 {
|
|
1255 canna_output (cf, p, p->hinshi->hinshi, p->hinshi->nhinshis);
|
|
1256 }
|
|
1257 else
|
|
1258 {
|
|
1259 entry_out (cf, p, p->hinshi->hinshi, p->hinshi->nhinshis, p->extdata);
|
|
1260 }
|
|
1261 }
|
|
1262
|
|
1263 static void
|
|
1264 showdeleted (p)
|
|
1265 struct dicpack *p;
|
|
1266 {
|
|
1267 if (!(p->flags & COMMON))
|
|
1268 {
|
|
1269 (void) printf ("- ");
|
|
1270 printentry (stdout, p);
|
|
1271 }
|
|
1272 }
|
|
1273
|
|
1274 static void
|
|
1275 showentry (pd, n)
|
|
1276 struct dicpack **pd;
|
|
1277 int n;
|
|
1278 {
|
|
1279 FILE *cf = (FILE *) 0, *of = (FILE *) 0, *nf = (FILE *) 0;
|
|
1280 struct dicpack *p;
|
|
1281 int i;
|
|
1282
|
|
1283 if (common_out)
|
|
1284 {
|
|
1285 if (common_out[0] != '-' || common_out[1])
|
|
1286 {
|
|
1287 cf = fopen (common_out, "w");
|
|
1288 if (!cf)
|
|
1289 {
|
|
1290 (void) fprintf (stderr, "%s: can not open file \"%s\".\n", program, common_out);
|
|
1291 exit (1);
|
|
1292 }
|
|
1293 }
|
|
1294 else
|
|
1295 {
|
|
1296 cf = stdout;
|
|
1297 }
|
|
1298 }
|
|
1299 if (old_out)
|
|
1300 {
|
|
1301 if (old_out[0] != '-' || old_out[1])
|
|
1302 {
|
|
1303 of = fopen (old_out, "w");
|
|
1304 if (!of)
|
|
1305 {
|
|
1306 (void) fprintf (stderr, "%s: can not open file \"%s\".\n", program, old_out);
|
|
1307 exit (1);
|
|
1308 }
|
|
1309 }
|
|
1310 else
|
|
1311 {
|
|
1312 of = stdout;
|
|
1313 }
|
|
1314 }
|
|
1315 if (new_out)
|
|
1316 {
|
|
1317 if (new_out[0] != '-' || new_out[1])
|
|
1318 {
|
|
1319 nf = fopen (new_out, "w");
|
|
1320 if (!nf)
|
|
1321 {
|
|
1322 (void) fprintf (stderr, "%s: can not open file \"%s\".\n", program, new_out);
|
|
1323 exit (1);
|
|
1324 }
|
|
1325 }
|
|
1326 else
|
|
1327 {
|
|
1328 nf = stdout;
|
|
1329 }
|
|
1330 }
|
|
1331
|
|
1332 for (i = 0; i < n; i++)
|
|
1333 {
|
|
1334 p = pd[i];
|
|
1335 if (compare)
|
|
1336 {
|
|
1337 if (p->flags & COMMON)
|
|
1338 {
|
|
1339 if (cf)
|
|
1340 {
|
|
1341 printentry (cf, p);
|
|
1342 }
|
|
1343 }
|
|
1344 else if (p->flags & NEW)
|
|
1345 {
|
|
1346 if (nf)
|
|
1347 {
|
|
1348 printentry (nf, p);
|
|
1349 }
|
|
1350 }
|
|
1351 else
|
|
1352 {
|
|
1353 if (of)
|
|
1354 {
|
|
1355 printentry (of, p);
|
|
1356 }
|
|
1357 }
|
|
1358 }
|
|
1359 else
|
|
1360 { /* just print the normalized dictionary */
|
|
1361 printentry (stdout, p);
|
|
1362 }
|
|
1363 }
|
|
1364 }
|
|
1365
|
|
1366 static int
|
|
1367 diccompar (p1, p2)
|
|
1368 struct dicpack **p1, **p2;
|
|
1369 {
|
|
1370 int n;
|
|
1371 if (n = Wscmp ((*p1)->yomi, (*p2)->yomi))
|
|
1372 {
|
|
1373 return n;
|
|
1374 }
|
|
1375 else if (n = Wscmp ((*p1)->tango, (*p2)->tango))
|
|
1376 {
|
|
1377 return n;
|
|
1378 }
|
|
1379 else if (n = Wscmp ((*p1)->hinshi->hinshi, (*p2)->hinshi->hinshi))
|
|
1380 {
|
|
1381 return n;
|
|
1382 }
|
|
1383 else
|
|
1384 { /* impossible */
|
|
1385 return 0;
|
|
1386 }
|
|
1387 }
|
|
1388
|
|
1389 static int
|
|
1390 dichindocompar (p1, p2)
|
|
1391 struct dicpack **p1, **p2;
|
|
1392 {
|
|
1393 int n;
|
|
1394 if (n = Wscmp ((*p1)->yomi, (*p2)->yomi))
|
|
1395 {
|
|
1396 return n;
|
|
1397 }
|
|
1398 else if (n = ((*p2)->hindo - (*p1)->hindo))
|
|
1399 {
|
|
1400 return n;
|
|
1401 }
|
|
1402 else if (n = Wscmp ((*p1)->tango, (*p2)->tango))
|
|
1403 {
|
|
1404 return n;
|
|
1405 }
|
|
1406 else if (n = Wscmp ((*p1)->hinshi->hinshi, (*p2)->hinshi->hinshi))
|
|
1407 {
|
|
1408 return n;
|
|
1409 }
|
|
1410 else
|
|
1411 { /* impossible */
|
|
1412 return 0;
|
|
1413 }
|
|
1414 }
|
|
1415
|
|
1416 void
|
|
1417 shrinkargs (argv, n, count)
|
|
1418 char **argv;
|
|
1419 int n, count;
|
|
1420 {
|
|
1421 int i;
|
|
1422
|
|
1423 for (i = 0; i + n < count; i++)
|
|
1424 {
|
|
1425 argv[i] = argv[i + n];
|
|
1426 }
|
|
1427 }
|
|
1428
|
|
1429 static void
|
|
1430 parseargs (argc, argv)
|
|
1431 int argc;
|
|
1432 char *argv[];
|
|
1433 {
|
|
1434 int i;
|
|
1435
|
|
1436 for (program = argv[0] + strlen (argv[0]); argv[0] < program; program--)
|
|
1437 {
|
|
1438 if (program[0] == '/')
|
|
1439 {
|
|
1440 program++;
|
|
1441 break;
|
|
1442 }
|
|
1443 }
|
|
1444
|
|
1445 for (i = 1; i < argc;)
|
|
1446 {
|
|
1447 if (argv[i][0] == '-' && argv[i][2] == '\0')
|
|
1448 {
|
|
1449 switch (argv[i][1])
|
|
1450 {
|
|
1451 case '1':
|
|
1452 case '2':
|
|
1453 case '3':
|
|
1454 selhinshi = argv[i][1] - '0';
|
|
1455 shrinkargs (argv + i, 1, argc - i);
|
|
1456 argc -= 1;
|
|
1457 break;
|
|
1458
|
|
1459 case 'b':
|
|
1460 bunrui = argv[i + 1];
|
|
1461 shrinkargs (argv + i, 2, argc - i);
|
|
1462 argc -= 2;
|
|
1463 break;
|
|
1464
|
|
1465 case 'c':
|
|
1466 common_out = argv[i + 1];
|
|
1467 shrinkargs (argv + i, 2, argc - i);
|
|
1468 argc -= 2;
|
|
1469 break;
|
|
1470
|
|
1471 case 'd':
|
|
1472 description_table = argv[i + 1];
|
|
1473 shrinkargs (argv + i, 2, argc - i);
|
|
1474 argc -= 2;
|
|
1475 break;
|
|
1476
|
|
1477 case 'f':
|
|
1478 copy_frequency = 1;
|
|
1479 shrinkargs (argv + i, 1, argc - i);
|
|
1480 argc -= 1;
|
|
1481 break;
|
|
1482
|
|
1483 case 'h':
|
|
1484 ignore_hinshi_to_compare = 1;
|
|
1485 shrinkargs (argv + i, 1, argc - i);
|
|
1486 argc -= 1;
|
|
1487 break;
|
|
1488
|
|
1489 case 'i':
|
|
1490 canna_type_output = 1;
|
|
1491 wnn_type_output = 0;
|
|
1492 shrinkargs (argv + i, 1, argc - i);
|
|
1493 argc -= 1;
|
|
1494 break;
|
|
1495
|
|
1496 case 'j':
|
|
1497 extract_kana = 1;
|
|
1498 shrinkargs (argv + i, 1, argc - i);
|
|
1499 argc -= 1;
|
|
1500 break;
|
|
1501
|
|
1502 case 'k':
|
|
1503 {
|
|
1504 Wchar buf[READBUFSIZE];
|
|
1505
|
|
1506 (void) Mbstowcs (buf, argv[i + 1], READBUFSIZE);
|
|
1507 specific_kind |= internkind (buf);
|
|
1508 }
|
|
1509 shrinkargs (argv + i, 2, argc - i);
|
|
1510 argc -= 2;
|
|
1511 break;
|
|
1512
|
|
1513 case 'l':
|
|
1514 list_kinds = 1;
|
|
1515 shrinkargs (argv + i, 1, argc - i);
|
|
1516 argc -= 1;
|
|
1517 break;
|
|
1518
|
|
1519 case 'm':
|
|
1520 merge_kind = 1;
|
|
1521 shrinkargs (argv + i, 1, argc - 1);
|
|
1522 argc -= 1;
|
|
1523 break;
|
|
1524
|
|
1525 case 'n':
|
|
1526 new_out = argv[i + 1];
|
|
1527 shrinkargs (argv + i, 2, argc - i);
|
|
1528 argc -= 2;
|
|
1529 break;
|
|
1530
|
|
1531 case 'o':
|
|
1532 old_out = argv[i + 1];
|
|
1533 shrinkargs (argv + i, 2, argc - i);
|
|
1534 argc -= 2;
|
|
1535 break;
|
|
1536
|
|
1537 case 'p':
|
|
1538 sort_by_frequency = 1;
|
|
1539 shrinkargs (argv + i, 1, argc - i);
|
|
1540 argc -= 1;
|
|
1541 break;
|
|
1542
|
|
1543 case 'r':
|
|
1544 hinshi_table = argv[i + 1];
|
|
1545 shrinkargs (argv + i, 2, argc - i);
|
|
1546 argc -= 2;
|
|
1547 hinshi_direction = REVERSE;
|
|
1548 break;
|
|
1549
|
|
1550 case 's':
|
|
1551 hinshi_table = argv[i + 1];
|
|
1552 shrinkargs (argv + i, 2, argc - i);
|
|
1553 argc -= 2;
|
|
1554 break;
|
|
1555
|
|
1556 case 'v':
|
|
1557 sj3_type_output = 1;
|
|
1558 wnn_type_output = 1; /* Wnn 形式と似ているので立てる */
|
|
1559 shrinkargs (argv + i, 1, argc - i);
|
|
1560 argc -= 1;
|
|
1561 break;
|
|
1562
|
|
1563 case 'w':
|
|
1564 canna_type_output = 0;
|
|
1565 sj3_type_output = 0;
|
|
1566 wnn_type_output = 1;
|
|
1567 shrinkargs (argv + i, 1, argc - i);
|
|
1568 argc -= 1;
|
|
1569 break;
|
|
1570
|
|
1571 case 'x':
|
|
1572 merge_sj3 = 1;
|
|
1573 ignore_hinshi_to_compare = 1;
|
|
1574 shrinkargs (argv + i, 1, argc - i);
|
|
1575 argc -= 1;
|
|
1576 break;
|
|
1577
|
|
1578 default:
|
|
1579 i++;
|
|
1580 break;
|
|
1581 }
|
|
1582 }
|
|
1583 else
|
|
1584 {
|
|
1585 i++;
|
|
1586 }
|
|
1587 }
|
|
1588
|
|
1589 if (argc < 2)
|
|
1590 {
|
|
1591 (void) fprintf (stderr, "Usage: %s dic1 [dic2] [-c filecommon] ...\n", program);
|
|
1592 exit (1);
|
|
1593 }
|
|
1594
|
|
1595 if (argv[1][0] != '-' || argv[1][1])
|
|
1596 {
|
|
1597 in1 = fopen (argv[1], "r");
|
|
1598 if (!in1)
|
|
1599 {
|
|
1600 (void) fprintf (stderr, "%s: can not open file \"%s\".\n", program, argv[1]);
|
|
1601 exit (1);
|
|
1602 }
|
|
1603 }
|
|
1604 if (argc == 3)
|
|
1605 {
|
|
1606 if (argv[2][0] != '-' || argv[2][1])
|
|
1607 {
|
|
1608 in2 = fopen (argv[2], "r");
|
|
1609 if (!in2)
|
|
1610 {
|
|
1611 (void) fprintf (stderr, "%s: can not open file \"%s\".\n", program, argv[2]);
|
|
1612 exit (1);
|
|
1613 }
|
|
1614 }
|
|
1615 }
|
|
1616 else
|
|
1617 {
|
|
1618 in2 = (FILE *) 0;
|
|
1619 }
|
|
1620 if (description_table)
|
|
1621 {
|
|
1622 store_description ();
|
|
1623 }
|
|
1624 }
|
|
1625
|
|
1626 static Wchar kihonh[] = {
|
|
1627 (Wchar) 'k', (Wchar) 'i', (Wchar) 'h', (Wchar) 'o', (Wchar) 'n', (Wchar) 0,
|
|
1628 };
|
|
1629
|
|
1630 int
|
|
1631 main (argc, argv)
|
|
1632 int argc;
|
|
1633 char *argv[];
|
|
1634 {
|
|
1635 #ifndef POD_WCHAR
|
|
1636 setlocale (LC_ALL, "");
|
|
1637 #endif
|
|
1638
|
|
1639 in1 = in2 = stdin;
|
|
1640 (void) internkind (kihonh); /* 基本辞書用。1L として登録 */
|
|
1641 parseargs (argc, argv);
|
|
1642 storepd (in1);
|
|
1643 (void) fclose (in1);
|
|
1644
|
|
1645 if (in2)
|
|
1646 {
|
|
1647 compare = 1;
|
|
1648 comparepd (in2);
|
|
1649 (void) fclose (in2);
|
|
1650 }
|
|
1651
|
|
1652 if (list_kinds)
|
|
1653 {
|
|
1654 listkinds ();
|
|
1655 exit (0);
|
|
1656 }
|
|
1657
|
|
1658 if (selhinshi)
|
|
1659 {
|
|
1660 select_hinshi (selhinshi);
|
|
1661 }
|
|
1662 else if (hinshi_table)
|
|
1663 {
|
|
1664 replace_hinshi ();
|
|
1665 }
|
|
1666
|
|
1667 pdic = (struct dicpack **) malloc (ndicentries * sizeof (struct dicpack *));
|
|
1668 if (pdic)
|
|
1669 {
|
|
1670 int i, j;
|
|
1671 struct dicpack *p;
|
|
1672
|
|
1673 for (i = 0, j = 0; i < DICBUFSIZE; i++)
|
|
1674 {
|
|
1675 for (p = dic[i]; p; p = p->next)
|
|
1676 {
|
|
1677 pdic[j++] = p;
|
|
1678 }
|
|
1679 }
|
|
1680 if (sort_by_frequency)
|
|
1681 {
|
|
1682 qsort (pdic, ndicentries, sizeof (struct dicpack *), dichindocompar);
|
|
1683 }
|
|
1684 else
|
|
1685 {
|
|
1686 qsort (pdic, ndicentries, sizeof (struct dicpack *), diccompar);
|
|
1687 }
|
|
1688 sortkind ();
|
|
1689 showentry (pdic, ndicentries);
|
|
1690 }
|
|
1691 else
|
|
1692 {
|
|
1693 malloc_failed ();
|
|
1694 }
|
|
1695 exit (0);
|
|
1696 }
|