Mercurial > freewnn
view PubdicPlus/pod.c @ 7:6ab41ec6f895
fix dtoa crash when it encounters malformed entry.
author | Yoshiki Yazawa <yaz@cc.rim.or.jp> |
---|---|
date | Tue, 18 Dec 2007 23:25:17 +0900 |
parents | bbc77ca4def5 |
children | 466fe6732d8d |
line wrap: on
line source
/* Copyright 1994 Pubdic Project. * * Permission to use, copy, modify, distribute and sell this software * and its documentation for any purpose is hereby granted without * fee, provided that the above copyright notice appear in all copies * and that both that copyright notice and this permission notice * appear in supporting documentation, and that the name of Pubdic * Project not be used in advertising or publicity pertaining to * distribution of the software without specific, written prior * permission. Pubdic Project makes no representations about the * suitability of this software for any purpose. It is provided "as * is" without express or implied warranty. * * PUBDIC PROJECT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN * NO EVENT SHALL PUBDIC PROJECT BE LIABLE FOR ANY SPECIAL, INDIRECT OR * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR * OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR * PERFORMANCE OF THIS SOFTWARE. */ #ifndef lint static char rcsid[] = "$Id: pod.c,v 1.7 2005/12/10 18:50:43 aonoto Exp $"; #endif #ifdef HAVE_CONFIG_H # include <config.h> #endif #include <stdio.h> #if STDC_HEADERS # include <stdlib.h> # include <stddef.h> # include <string.h> #else # if HAVE_MALLOC_H # include <malloc.h> # endif # if HAVE_STRINGS_H # include <strings.h> # endif #endif /* STDC_HEADERS */ #define POD_WCHAR #ifdef POD_WCHAR typedef unsigned short Wchar; #else #include <locale.h> #include <widec.h> #define Wchar wchar_t #endif #if !(HAVE_BZERO) && (HAVE_MEMSET) # define bzero(a, c) memset(a, 0, c) #endif static char *program; static int compare, ignore_hinshi_to_compare, sort_by_frequency, merge_sj3; static int merge_kind, wnn_type_output, canna_type_output, sj3_type_output; static int list_kinds; static int copy_frequency, extract_kana = 0; static long specific_kind; static FILE *in1, *in2; static char *common_out, *old_out, *new_out, *hinshi_table, *bunrui; static char *description_table; static int selhinshi = 0; /* hinshi_direction */ #define INORDER 0 #define REVERSE 1 static int hinshi_direction = INORDER; /* see above */ #define READBUFSIZE 128 #define DICBUFSIZE (2 << 13) #define DICBUFINDEXMASK (DICBUFSIZE - 1) #define HINSHIBUFSIZE (2 << 13) #define HINSHIBUFINDEXMASK (HINSHIBUFSIZE - 1) /* status of intern() */ #define FOUND 0 #define CREATE 1 /* 品詞を表す構造体 */ struct hinshipack { int nhinshis; Wchar *hinshi; unsigned flags; /* see below */ struct hinshipack *next; }; /* values of (struct hinshipack.)flags */ #define REPLACED 1 /* 終止形を追加するためのルールファイルの内部表現(だと思う) */ struct descpack { Wchar *hinshi, *tandesc, *yomdesc; struct descpack *next; }; /* エントリの種別を表す構造体その他 */ struct kindpack { Wchar *kind; long kindbit; }; /* 辞書を表す構造体 */ struct dicpack { Wchar *yomi, *tango; struct hinshipack *hinshi; int hindo; long kind; Wchar *extdata; unsigned flags; /* SEE BELOW */ struct dicpack *next; }; /* values of (struct dicpack.)flags */ #define COMMON 001 #define NEW 002 #if defined (__STDC__) || defined (_AIX) || (defined (__mips) && defined (_SYSTYPE_SVR4)) || defined(_WIN32) /* Prototype for C89 (or later) */ #ifdef POD_WCHAR size_t Mbstowcs (Wchar *d, char *ss, int n); size_t Wcstombs (char *d, Wchar *s, int n); int Wscmp (register Wchar *s1, register Wchar *s2); Wchar *Wscpy (Wchar *d, register Wchar *s); int Wslen (Wchar *s); int Watoi (Wchar *s); static void Fputws (Wchar *s, FILE *f); Wchar *Fgetws (Wchar *buf, int siz, FILE *f); #endif /* POD_WCHAR */ static int all_kana (Wchar *s); static Wchar *findslash (Wchar *s); static Wchar *extstr (Wchar *p, Wchar **pp, int *key_return); static void malloc_failed (void); static struct hinshipack *internhinshi (Wchar *str, int flag); static void replace_hinshi (void); static void select_hinshi (int n); static void freedesc (struct descpack *p); static struct descpack *interndesc (Wchar *hin, Wchar *tan, Wchar *yom); static struct descpack *searchdesc (Wchar *hin); static void store_description (void); static long internkind (Wchar *s); static void listkinds (void); static int kindcompar (struct kindpack *k1, struct kindpack *k2); static void sortkind (void); static struct dicpack *intern (int key, Wchar *yomi, Wchar *kouho, Wchar *hinshi, int hindo, long kind, int *stat, long flags); static void storepd (FILE *file); static void comparepd (FILE *file); static void canna_output (FILE *cf, struct dicpack *p, Wchar *h, int n); static void entry_out (FILE *cf, struct dicpack *p, Wchar *h, int n, Wchar *ex); static void printentry (FILE *cf, struct dicpack *p); static void showentry (struct dicpack **pd, int n); static int diccompar (struct dicpack **p1, struct dicpack **p2); static int dichindocompar (struct dicpack **p1, struct dicpack **p2); void shrinkargs (char **argv, int n, int count); static void parseargs (int argc, char *argv[]); #endif #ifndef POD_WCHAR # define Mbstowcs mbstowcs # define Wcstombs wcstombs # define Wscmp wscmp # define Wscpy wscpy # define Wslen wslen # define Fgetws fgetws # define Fputws fputws #else # define SS2 0x8e # define SS3 0x8f # define MSB 0x80 # define MSK 0x7f # define WCG0 0x0000 # define WCG1 0x8080 # define WCG2 0x0080 # define WCG3 0x8000 # define WCMSK 0x8080 size_t Mbstowcs (d, ss, n) Wchar *d; char *ss; int n; { register Wchar *p = d; register int ch; register unsigned char *s = (unsigned char *) ss; while ((ch = *s++) && (p - d < n)) { if (ch & MSB) { if (ch == SS2) { /* kana */ *p++ = (Wchar) * s++; } else if (ch == SS3) { *p++ = (Wchar) ((*s << 8) | (*(s + 1) & MSK)); s += 2; } else { *p++ = (Wchar) ((ch << 8) | (*s++ & 0xff)); } } else { *p++ = (Wchar) ch; } } *p = (Wchar) 0; return p - d; } size_t Wcstombs (d, s, n) char *d; Wchar *s; int n; { register char *p = d; register Wchar ch; while ((ch = *s++) && (p - d + 2 < n)) { switch (ch & WCMSK) { case WCG0: *p++ = (char) ch; break; case WCG1: *p++ = (char) ((ch >> 8) & 0xff); *p++ = (char) (ch & 0xff); break; case WCG2: *p++ = SS2; *p++ = (char) ch; break; case WCG3: *p++ = SS3; *p++ = (char) ((ch >> 8) & 0xff); *p++ = (char) ((ch & 0xff) | MSB); break; } } *p = '\0'; return p - d; } int Wscmp (s1, s2) register Wchar *s1, *s2; { register int res; /* 以下のコードはいささかトリッキーなので、説明を加えておこう。 以下ではこのコメント内にあるようなことをしたいわけである。 while (*s1 && *s2 && && *s1 == *s2) { s1++; s2++; } return *s1 - *s2; すなわち、s1 も s2 も EOS ('\0') を指していなくて、しかも値が 異なる間はそれぞれのポインタを進める。いずれかが EOS になるか、 値が違ってきた場合には、*s1 - *s2 を返す。 */ while (!(res = *s1 - *s2++) && *s1++) ; return res; } Wchar * Wscpy (d, s) Wchar *d; register Wchar *s; { register Wchar *p = d, ch; while (ch = *s++) { *p++ = ch; } *p = (Wchar) 0; return d; } int Wslen (s) Wchar *s; { register Wchar *p = s; while (*p) p++; return p - s; } int Watoi (s) Wchar *s; { register int res = 0; register Wchar ch; while ((ch = *s++) && ((Wchar) '0' <= ch) && (ch <= (Wchar) '9')) { res *= 10; res += ch - (Wchar) '0'; } return res; } static void Fputws (s, f) Wchar *s; FILE *f; { char buf[READBUFSIZE]; if (Wcstombs (buf, s, READBUFSIZE)) { (void) fputs (buf, f); } } Wchar * Fgetws (buf, siz, f) Wchar *buf; int siz; FILE *f; { char mbuf[READBUFSIZE], *p; p = fgets (mbuf, READBUFSIZE, f); if (p) { if (Mbstowcs (buf, mbuf, siz)) { return buf; } } return (Wchar *) 0; } #endif /* s が全てカタカナから構成されているかどうかを返す関数 */ static int all_kana (s) Wchar *s; { static Wchar xa = 0, xke, aa, *p; if (!xa) { Mbstowcs (&xa, "\045\041", 1); Mbstowcs (&xke, "\045\166", 1); Mbstowcs (&aa, "\041\074", 1); } for (p = s; *p; p++) { if (!(*p == aa || (xa <= *p && *p <= xke))) { return 0; } } return 1; } /* スラッシュを探す */ static Wchar * findslash (s) Wchar *s; { while (*s) { if (*s == (Wchar) '/') { return s; } s++; } return (Wchar *) 0; } /* トークンを一個取り出す */ static Wchar * extstr (p, pp, key_return) Wchar *p, **pp; int *key_return; { Wchar *res; int key = 0; while (*p == (Wchar) ' ' || *p == (Wchar) '\t') p++; res = p; while (*p && *p != (Wchar) ' ' && *p != (Wchar) '\t' && *p != (Wchar) '\n') { key += (int) *p++; } *p++ = (Wchar) '\0'; if (pp) *pp = p; if (key_return) *key_return = key; return res; } static struct hinshipack *partsofspeech[HINSHIBUFSIZE]; static void malloc_failed () { (void) fprintf (stderr, "%s: malloc failed.\n", program); } /* 品詞名を品詞名テーブルに登録する */ static struct hinshipack * internhinshi (str, flag) Wchar *str; int flag; { struct hinshipack *p, **pp; Wchar *s; int key = 0; for (s = str; *s; s++) key += (int) *s; key = ((unsigned) key & HINSHIBUFINDEXMASK); for (pp = partsofspeech + key; p = *pp; pp = &(p->next)) { if (!Wscmp (p->hinshi, str)) { return p; } } if (flag) { p = (struct hinshipack *) malloc (sizeof (struct hinshipack)); if (p) { *pp = p; (void) bzero (p, sizeof (struct hinshipack)); p->hinshi = (Wchar *) malloc ((Wslen (str) + 1) * sizeof (Wchar)); if (p->hinshi) { (void) Wscpy (p->hinshi, str); p->nhinshis = 1; return p; } free (p); } malloc_failed (); } return (struct hinshipack *) 0; } /* 品詞名を置き換える */ static void replace_hinshi () { FILE *f; Wchar readbuf[READBUFSIZE], *to, *from, *s; struct hinshipack *hinshientry, *p; int i, err = 0; f = fopen (hinshi_table, "r"); if (!f) { (void) fprintf (stderr, "%s: can not open the table file of parts of speech \"%s\".\n", program, hinshi_table); exit (1); } while (s = Fgetws (readbuf, READBUFSIZE, f)) { from = extstr (s, &s, 0); to = extstr (s, &s, 0); if (hinshi_direction == REVERSE) { Wchar *xx = from; from = to; to = xx; } hinshientry = internhinshi (from, 0); if (hinshientry) { Wchar *xx; xx = (Wchar *) malloc ((Wslen (to) + 1) * sizeof (Wchar)); if (xx) { Wchar *cp; int n = 1; (void) Wscpy (xx, to); free (hinshientry->hinshi); hinshientry->hinshi = xx; for (cp = xx; *cp; cp++) { if (*cp == (Wchar) '/') { *cp = (Wchar) 0; n++; } } hinshientry->nhinshis = n; hinshientry->flags |= REPLACED; } else { malloc_failed (); } } } (void) fclose (f); for (i = 0; i < HINSHIBUFSIZE; i++) { for (p = partsofspeech[i]; p; p = p->next) { if (!(p->flags & REPLACED)) { (void) fprintf (stderr, "%s: The replacement for \"", program); Fputws (p->hinshi, stderr); (void) fprintf (stderr, "\" is not mentioned in the table.\n"); err = 1; } } } if (err) { exit (1); } } static void select_hinshi (n) int n; { Wchar *s, *t, *xx; struct hinshipack *p; int i; if (!n) return; for (i = 0; i < HINSHIBUFSIZE; i++) { for (p = partsofspeech[i]; p; p = p->next) { switch (n) { case 1: s = findslash (p->hinshi); if (s) { *s = (Wchar) 0; } break; case 2: s = findslash (p->hinshi); if (s) { s++; t = findslash (s); if (t) { xx = (Wchar *) malloc ((t - s + 1) * sizeof (Wchar)); if (xx) { *t = (Wchar) 0; Wscpy (xx, s); t = p->hinshi; p->hinshi = xx; (void) free ((char *) t); } } } break; case 3: s = findslash (p->hinshi); if (s) { t = findslash (s + 1); if (t) { t++; xx = (Wchar *) malloc ((Wslen (t) + 1) * sizeof (Wchar)); if (xx) { Wscpy (xx, t); t = p->hinshi; p->hinshi = xx; (void) free ((char *) t); } } } break; default: break; } } } } static void freedesc (p) struct descpack *p; { free (p->hinshi); free (p->tandesc); free (p->yomdesc); free (p); } static struct descpack *description[HINSHIBUFSIZE]; /* ルールの登録 */ static struct descpack * interndesc (hin, tan, yom) Wchar *hin, *tan, *yom; { struct descpack *p, **pp, *next = (struct descpack *) 0; Wchar *s; int key = 0; for (s = hin; *s; s++) key += (int) *s; key = ((unsigned) key & HINSHIBUFINDEXMASK); for (pp = description + key; p = *pp; pp = &(p->next)) { if (!Wscmp (p->hinshi, hin)) { if (!Wscmp (p->tandesc, tan) && !Wscmp (p->yomdesc, yom)) { return p; } else { *pp = next = p->next; freedesc (p); break; } } } p = (struct descpack *) malloc (sizeof (struct descpack)); if (p) { *pp = p; (void) bzero (p, sizeof (struct descpack)); p->next = next; p->hinshi = (Wchar *) malloc ((Wslen (hin) + 1) * sizeof (Wchar)); if (p->hinshi) { (void) Wscpy (p->hinshi, hin); p->tandesc = (Wchar *) malloc ((Wslen (tan) + 1) * sizeof (Wchar)); if (p->tandesc) { (void) Wscpy (p->tandesc, tan); p->yomdesc = (Wchar *) malloc ((Wslen (yom) + 1) * sizeof (Wchar)); if (p->yomdesc) { (void) Wscpy (p->yomdesc, yom); return p; } free (p->tandesc); } free (p->hinshi); } free (p); } malloc_failed (); return (struct descpack *) 0; } /* ルールの探索 */ static struct descpack * searchdesc (hin) Wchar *hin; { struct descpack *p, **pp; Wchar *s; int key = 0; for (s = hin; *s; s++) key += (int) *s; key = ((unsigned) key & HINSHIBUFINDEXMASK); for (pp = description + key; p = *pp; pp = &(p->next)) { if (!Wscmp (p->hinshi, hin)) { return p; } } return (struct descpack *) 0; } static void store_description () { FILE *f; Wchar readbuf[READBUFSIZE], *hin, *tan, *yom, *s; if (!description_table) { return; } f = fopen (description_table, "r"); if (!f) { (void) fprintf (stderr, "%s: can not open the table file of parts of speech \"%s\".\n", program, description_table); exit (1); } while (s = Fgetws (readbuf, READBUFSIZE, f)) { Wchar nl[1]; nl[0] = (Wchar) 0; hin = tan = yom = nl; hin = extstr (s, &s, 0); if (*hin) { tan = extstr (s, &s, 0); if (*tan) { yom = extstr (s, &s, 0); } } interndesc (hin, tan, yom); } (void) fclose (f); } struct kindpack kinds[sizeof (long) * 8]; static int nkinds; #define KIHONBIT 1L /* 種別の登録 */ static long internkind (s) Wchar *s; { int i; Wchar *p; p = findslash (s); if (p) { long res; *p = (Wchar) '\0'; res = internkind (s); res |= internkind (p + 1); return res; } else { for (i = 0; i < nkinds; i++) { if (!Wscmp (s, kinds[i].kind)) { return kinds[i].kindbit; } } if (nkinds < (sizeof (long) * 8) && (kinds[nkinds].kind = (Wchar *) malloc ((Wslen (s) + 1) * sizeof (Wchar)))) { (void) Wscpy (kinds[nkinds].kind, s); kinds[nkinds].kindbit = 1 << nkinds; return kinds[nkinds++].kindbit; } return 0; } } /* 種別の一覧の出力 */ static void listkinds () { int i; for (i = 0; i < nkinds; i++) { Fputws (kinds[i].kind, stdout); putchar ('\n'); } } static int kindcompar (k1, k2) struct kindpack *k1, *k2; { return Wscmp (k1->kind, k2->kind); } static void sortkind () { qsort (kinds, nkinds, sizeof (struct kindpack), kindcompar); } static struct dicpack *dic[DICBUFSIZE], **pdic; static int ndicentries = 0; /* intern -- 辞書エントリの検索/登録 第6引数の stat としてヌルでないアドレスが指定された場合には、同じエントリ が登録されていない場合には登録を行う。アドレスがヌルの場合には登録しない。 flags によっていろいろと指定をする。(以下を見てね)。 hinshi に 0 を渡してはいけない。kind は 0 を渡しても可だが、-m の時じゃない マッチはしないので注意。 */ /* flags */ #define IGNORE_HINSHI 1L #define IGNORE_KIND 2L static struct dicpack * intern (key, yomi, kouho, hinshi, hindo, kind, stat, flags) int key, hindo, *stat; Wchar *yomi, *kouho, *hinshi; long kind, flags; { struct dicpack *p, **pp; struct descpack *dp; Wchar nl[1], *yomdesc = nl, *tandesc = nl; Wchar *yom = (Wchar *) 0, *tan = (Wchar *) 0, *dhinshi, *dh; nl[0] = (Wchar) '\0'; if (description_table) { dhinshi = dh = hinshi; /* かんなの品詞を探す */ while (*dh) { if (*dh++ == (Wchar) '/') { dhinshi = dh; } } dp = searchdesc (dhinshi); if (dp) { yomdesc = dp->yomdesc; tandesc = dp->tandesc; if (Wslen (yomdesc)) { Wchar *t; t = (Wchar *) malloc ((Wslen (yomi) + Wslen (yomdesc) + 1) * sizeof (Wchar)); if (t) { Wscpy (t, yomi); yom = yomi = t; Wscpy (yomi + Wslen (yomi), yomdesc); } } if (Wslen (tandesc)) { Wchar *t; t = (Wchar *) malloc ((Wslen (kouho) + Wslen (tandesc) + 1) * sizeof (Wchar)); if (t) { Wscpy (t, kouho); tan = kouho = t; Wscpy (kouho + Wslen (kouho), tandesc); } } } else { char foo[64]; fprintf (stderr, "no description rule for "); Wcstombs (foo, dhinshi, 64); fprintf (stderr, "%s.\n", foo); } } key = ((unsigned) key & DICBUFINDEXMASK); for (pp = dic + key; p = *pp; pp = &(p->next)) { if (!Wscmp (p->yomi, yomi) && !Wscmp (p->tango, kouho) && ((flags & IGNORE_HINSHI) || !Wscmp (p->hinshi->hinshi, hinshi)) && ((flags & IGNORE_KIND) || ((p->kind & kind) == kind))) { /* match */ if (stat) *stat = FOUND; if (yom) free (yom); if (tan) free (tan); return p; } } if (stat) { p = (struct dicpack *) malloc (sizeof (struct dicpack)); if (p) { *pp = p; (void) bzero (p, sizeof (struct dicpack)); p->yomi = (Wchar *) malloc ((Wslen (yomi) + 1) * sizeof (Wchar)); if (p->yomi) { (void) Wscpy (p->yomi, yomi); p->tango = (Wchar *) malloc ((Wslen (kouho) + 1) * sizeof (Wchar)); if (p->tango) { (void) Wscpy (p->tango, kouho); p->hinshi = internhinshi (hinshi, 1); if (p->hinshi) { p->hindo = hindo; *stat = CREATE; ndicentries++; p->kind = kind; p->extdata = (Wchar *) 0; if (yom) free (yom); if (tan) free (tan); return p; } free (p->tango); } free (p->yomi); } free (p); } malloc_failed (); } if (yom) free (yom); if (tan) free (tan); return (struct dicpack *) 0; } /* 登録されているエントリに対して fn を実行する */ static void for_all_interned (fn) void (*fn) (); { int i; struct dicpack *p; for (i = 0; i < DICBUFSIZE; i++) { for (p = dic[i]; p; p = p->next) { (*fn) (p); } } } static void storepd (file) FILE *file; { Wchar readbuf[READBUFSIZE], *p, *yomi, *hinshi, *kouho, *hindo, *kind; int nhindo, key, tkey, stat; long kindbit; struct dicpack *dicentry; while (p = Fgetws (readbuf, READBUFSIZE, file)) { key = 0; yomi = extstr (p, &p, &tkey); key += tkey; kouho = extstr (p, &p, &tkey); key += tkey; hinshi = extstr (p, &p, 0); hindo = extstr (p, &p, 0); nhindo = Watoi (hindo); kind = extstr (p, 0, 0); if (*kind) { kindbit = internkind (kind); } else { kindbit = KIHONBIT; } dicentry = intern (key, yomi, kouho, hinshi, nhindo, kindbit, &stat, IGNORE_KIND); if (dicentry) { dicentry->kind |= kindbit; } } } static void comparepd (file) FILE *file; { Wchar readbuf[READBUFSIZE], *p, *yomi, *hinshi, *kouho, *hindo, *kind; int nhindo, key, tkey, stat, *statp = &stat; struct dicpack *dicentry; long kindbit, flags = 0L; while (p = Fgetws (readbuf, READBUFSIZE, file)) { key = 0; yomi = extstr (p, &p, &tkey); key += tkey; kouho = extstr (p, &p, &tkey); key += tkey; hinshi = extstr (p, &p, 0); if (ignore_hinshi_to_compare) { flags |= IGNORE_HINSHI; } hindo = extstr (p, &p, 0); nhindo = Watoi (hindo); kind = extstr (p, 0, 0); if (*kind) { kindbit = internkind (kind); } else { kindbit = KIHONBIT; } if (merge_kind || merge_sj3) { flags |= IGNORE_KIND; } if (copy_frequency) { statp = (int *) 0; } dicentry = intern (key, yomi, kouho, hinshi, nhindo, kindbit, statp, flags); if (dicentry) { if (copy_frequency) { dicentry->hindo = nhindo; dicentry->flags &= ~COMMON; } else if (ignore_hinshi_to_compare && stat == FOUND) { /* この場合、同じキーのチェーンが返る */ struct dicpack *pd; for (pd = dicentry; pd; pd = pd->next) { if (!Wscmp (pd->yomi, yomi) && !Wscmp (pd->tango, kouho)) { pd->flags |= COMMON; if (!merge_sj3) { pd->kind |= kindbit; } if (merge_sj3) { int len = 0; Wchar *dat; if (pd->extdata) { len = Wslen (pd->extdata); } dat = (Wchar *) malloc ((Wslen (hinshi) + 1 + len) * sizeof (Wchar)); if (dat) { if (len) { (void) Wscpy (dat, pd->extdata); (void) free ((char *) pd->extdata); } (void) Wscpy (dat + len, hinshi); pd->extdata = dat; } } } } } else { dicentry->kind |= kindbit; if (stat == FOUND) { dicentry->flags |= COMMON; } else { /* CREATE */ dicentry->flags |= NEW; } } } } } static void canna_output (cf, p, h, n) FILE *cf; struct dicpack *p; Wchar *h; int n; { for (; n-- > 0; h += Wslen (h) + 1) { Fputws (p->yomi, cf); (void) putc (' ', cf); Fputws (h, cf); if (p->hindo) { (void) fprintf (cf, "*%d", p->hindo); } (void) putc (' ', cf); Fputws (p->tango, cf); (void) putc ('\n', cf); } } static void entry_out (cf, p, h, n, ex) FILE *cf; struct dicpack *p; Wchar *h; int n; Wchar *ex; { int i, f = 1; long b; for (; n-- > 0; h += Wslen (h) + 1) { Fputws (p->yomi, cf); (void) putc (' ', cf); Fputws (p->tango, cf); (void) putc (' ', cf); if (merge_sj3 && ex) { Fputws (ex, cf); (void) putc ('/', cf); } Fputws (h, cf); if (!sj3_type_output) { (void) fprintf (cf, " %d", p->hindo); } if (!wnn_type_output) { if (bunrui) { (void) printf (" %s", bunrui); } else { if (specific_kind) { b = (specific_kind & p->kind); } else { b = p->kind; } if (b != KIHONBIT) { /* 基本だけだったら何も書かない */ for (i = 0; i < nkinds; i++) { if (b & kinds[i].kindbit) { if (f) { (void) putc (' ', cf); f = 0; } else { (void) putc ('/', cf); } Fputws (kinds[i].kind, cf); } } } } } (void) putc ('\n', cf); } } /* p で表されるエントリをファイル cf に出力する */ static void printentry (cf, p) FILE *cf; struct dicpack *p; { if (specific_kind && !(p->kind & specific_kind)) { return; } if (extract_kana && !all_kana (p->tango)) { return; } if (selhinshi && !p->hinshi->hinshi[0]) { return; } if (canna_type_output) { canna_output (cf, p, p->hinshi->hinshi, p->hinshi->nhinshis); } else { entry_out (cf, p, p->hinshi->hinshi, p->hinshi->nhinshis, p->extdata); } } static void showdeleted (p) struct dicpack *p; { if (!(p->flags & COMMON)) { (void) printf ("- "); printentry (stdout, p); } } static void showentry (pd, n) struct dicpack **pd; int n; { FILE *cf = (FILE *) 0, *of = (FILE *) 0, *nf = (FILE *) 0; struct dicpack *p; int i; if (common_out) { if (common_out[0] != '-' || common_out[1]) { cf = fopen (common_out, "w"); if (!cf) { (void) fprintf (stderr, "%s: can not open file \"%s\".\n", program, common_out); exit (1); } } else { cf = stdout; } } if (old_out) { if (old_out[0] != '-' || old_out[1]) { of = fopen (old_out, "w"); if (!of) { (void) fprintf (stderr, "%s: can not open file \"%s\".\n", program, old_out); exit (1); } } else { of = stdout; } } if (new_out) { if (new_out[0] != '-' || new_out[1]) { nf = fopen (new_out, "w"); if (!nf) { (void) fprintf (stderr, "%s: can not open file \"%s\".\n", program, new_out); exit (1); } } else { nf = stdout; } } for (i = 0; i < n; i++) { p = pd[i]; if (compare) { if (p->flags & COMMON) { if (cf) { printentry (cf, p); } } else if (p->flags & NEW) { if (nf) { printentry (nf, p); } } else { if (of) { printentry (of, p); } } } else { /* just print the normalized dictionary */ printentry (stdout, p); } } } static int diccompar (p1, p2) struct dicpack **p1, **p2; { int n; if (n = Wscmp ((*p1)->yomi, (*p2)->yomi)) { return n; } else if (n = Wscmp ((*p1)->tango, (*p2)->tango)) { return n; } else if (n = Wscmp ((*p1)->hinshi->hinshi, (*p2)->hinshi->hinshi)) { return n; } else { /* impossible */ return 0; } } static int dichindocompar (p1, p2) struct dicpack **p1, **p2; { int n; if (n = Wscmp ((*p1)->yomi, (*p2)->yomi)) { return n; } else if (n = ((*p2)->hindo - (*p1)->hindo)) { return n; } else if (n = Wscmp ((*p1)->tango, (*p2)->tango)) { return n; } else if (n = Wscmp ((*p1)->hinshi->hinshi, (*p2)->hinshi->hinshi)) { return n; } else { /* impossible */ return 0; } } void shrinkargs (argv, n, count) char **argv; int n, count; { int i; for (i = 0; i + n < count; i++) { argv[i] = argv[i + n]; } } static void parseargs (argc, argv) int argc; char *argv[]; { int i; for (program = argv[0] + strlen (argv[0]); argv[0] < program; program--) { if (program[0] == '/') { program++; break; } } for (i = 1; i < argc;) { if (argv[i][0] == '-' && argv[i][2] == '\0') { switch (argv[i][1]) { case '1': case '2': case '3': selhinshi = argv[i][1] - '0'; shrinkargs (argv + i, 1, argc - i); argc -= 1; break; case 'b': bunrui = argv[i + 1]; shrinkargs (argv + i, 2, argc - i); argc -= 2; break; case 'c': common_out = argv[i + 1]; shrinkargs (argv + i, 2, argc - i); argc -= 2; break; case 'd': description_table = argv[i + 1]; shrinkargs (argv + i, 2, argc - i); argc -= 2; break; case 'f': copy_frequency = 1; shrinkargs (argv + i, 1, argc - i); argc -= 1; break; case 'h': ignore_hinshi_to_compare = 1; shrinkargs (argv + i, 1, argc - i); argc -= 1; break; case 'i': canna_type_output = 1; wnn_type_output = 0; shrinkargs (argv + i, 1, argc - i); argc -= 1; break; case 'j': extract_kana = 1; shrinkargs (argv + i, 1, argc - i); argc -= 1; break; case 'k': { Wchar buf[READBUFSIZE]; (void) Mbstowcs (buf, argv[i + 1], READBUFSIZE); specific_kind |= internkind (buf); } shrinkargs (argv + i, 2, argc - i); argc -= 2; break; case 'l': list_kinds = 1; shrinkargs (argv + i, 1, argc - i); argc -= 1; break; case 'm': merge_kind = 1; shrinkargs (argv + i, 1, argc - 1); argc -= 1; break; case 'n': new_out = argv[i + 1]; shrinkargs (argv + i, 2, argc - i); argc -= 2; break; case 'o': old_out = argv[i + 1]; shrinkargs (argv + i, 2, argc - i); argc -= 2; break; case 'p': sort_by_frequency = 1; shrinkargs (argv + i, 1, argc - i); argc -= 1; break; case 'r': hinshi_table = argv[i + 1]; shrinkargs (argv + i, 2, argc - i); argc -= 2; hinshi_direction = REVERSE; break; case 's': hinshi_table = argv[i + 1]; shrinkargs (argv + i, 2, argc - i); argc -= 2; break; case 'v': sj3_type_output = 1; wnn_type_output = 1; /* Wnn 形式と似ているので立てる */ shrinkargs (argv + i, 1, argc - i); argc -= 1; break; case 'w': canna_type_output = 0; sj3_type_output = 0; wnn_type_output = 1; shrinkargs (argv + i, 1, argc - i); argc -= 1; break; case 'x': merge_sj3 = 1; ignore_hinshi_to_compare = 1; shrinkargs (argv + i, 1, argc - i); argc -= 1; break; default: i++; break; } } else { i++; } } if (argc < 2) { (void) fprintf (stderr, "Usage: %s dic1 [dic2] [-c filecommon] ...\n", program); exit (1); } if (argv[1][0] != '-' || argv[1][1]) { in1 = fopen (argv[1], "r"); if (!in1) { (void) fprintf (stderr, "%s: can not open file \"%s\".\n", program, argv[1]); exit (1); } } if (argc == 3) { if (argv[2][0] != '-' || argv[2][1]) { in2 = fopen (argv[2], "r"); if (!in2) { (void) fprintf (stderr, "%s: can not open file \"%s\".\n", program, argv[2]); exit (1); } } } else { in2 = (FILE *) 0; } if (description_table) { store_description (); } } static Wchar kihonh[] = { (Wchar) 'k', (Wchar) 'i', (Wchar) 'h', (Wchar) 'o', (Wchar) 'n', (Wchar) 0, }; int main (argc, argv) int argc; char *argv[]; { #ifndef POD_WCHAR setlocale (LC_ALL, ""); #endif in1 = in2 = stdin; (void) internkind (kihonh); /* 基本辞書用。1L として登録 */ parseargs (argc, argv); storepd (in1); (void) fclose (in1); if (in2) { compare = 1; comparepd (in2); (void) fclose (in2); } if (list_kinds) { listkinds (); exit (0); } if (selhinshi) { select_hinshi (selhinshi); } else if (hinshi_table) { replace_hinshi (); } pdic = (struct dicpack **) malloc (ndicentries * sizeof (struct dicpack *)); if (pdic) { int i, j; struct dicpack *p; for (i = 0, j = 0; i < DICBUFSIZE; i++) { for (p = dic[i]; p; p = p->next) { pdic[j++] = p; } } if (sort_by_frequency) { qsort (pdic, ndicentries, sizeof (struct dicpack *), dichindocompar); } else { qsort (pdic, ndicentries, sizeof (struct dicpack *), diccompar); } sortkind (); showentry (pdic, ndicentries); } else { malloc_failed (); } exit (0); }