diff PubdicPlus/pod.c @ 0:bbc77ca4def5

initial import
author Yoshiki Yazawa <yaz@cc.rim.or.jp>
date Thu, 13 Dec 2007 04:30:14 +0900
parents
children 466fe6732d8d
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/PubdicPlus/pod.c	Thu Dec 13 04:30:14 2007 +0900
@@ -0,0 +1,1696 @@
+/* Copyright 1994 Pubdic Project.
+ *
+ * Permission to use, copy, modify, distribute and sell this software
+ * and its documentation for any purpose is hereby granted without
+ * fee, provided that the above copyright notice appear in all copies
+ * and that both that copyright notice and this permission notice
+ * appear in supporting documentation, and that the name of Pubdic
+ * Project not be used in advertising or publicity pertaining to
+ * distribution of the software without specific, written prior
+ * permission.  Pubdic Project makes no representations about the
+ * suitability of this software for any purpose.  It is provided "as
+ * is" without express or implied warranty.
+ *
+ * PUBDIC PROJECT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN 
+ * NO EVENT SHALL PUBDIC PROJECT BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF 
+ * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR 
+ * OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 
+ * PERFORMANCE OF THIS SOFTWARE. 
+ */
+
+#ifndef lint
+static char rcsid[] = "$Id: pod.c,v 1.7 2005/12/10 18:50:43 aonoto Exp $";
+#endif
+
+#ifdef HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
+#include <stdio.h>
+#if STDC_HEADERS
+#  include <stdlib.h>
+#  include <stddef.h>
+#  include <string.h>
+#else
+#  if HAVE_MALLOC_H
+#    include <malloc.h>
+#  endif
+#  if HAVE_STRINGS_H
+#    include <strings.h>
+#  endif
+#endif /* STDC_HEADERS */
+
+#define POD_WCHAR
+#ifdef POD_WCHAR
+typedef unsigned short Wchar;
+#else
+#include <locale.h>
+#include <widec.h>
+#define Wchar wchar_t
+#endif
+
+#if !(HAVE_BZERO) && (HAVE_MEMSET)
+#  define bzero(a, c) memset(a, 0, c)
+#endif
+
+static char *program;
+static int compare, ignore_hinshi_to_compare, sort_by_frequency, merge_sj3;
+static int merge_kind, wnn_type_output, canna_type_output, sj3_type_output;
+static int list_kinds;
+static int copy_frequency, extract_kana = 0;
+static long specific_kind;
+static FILE *in1, *in2;
+static char *common_out, *old_out, *new_out, *hinshi_table, *bunrui;
+static char *description_table;
+static int selhinshi = 0;
+
+/* hinshi_direction */
+#define INORDER 0
+#define REVERSE 1
+
+static int hinshi_direction = INORDER;  /* see above */
+
+#define READBUFSIZE 128
+#define DICBUFSIZE (2 << 13)
+#define DICBUFINDEXMASK (DICBUFSIZE - 1)
+#define HINSHIBUFSIZE (2 << 13)
+#define HINSHIBUFINDEXMASK (HINSHIBUFSIZE - 1)
+
+/* status of intern() */
+#define FOUND 0
+#define CREATE 1
+
+/* 品詞を表す構造体 */
+
+struct hinshipack
+{
+  int nhinshis;
+  Wchar *hinshi;
+  unsigned flags;               /* see below */
+  struct hinshipack *next;
+};
+
+/* values of (struct hinshipack.)flags */
+#define REPLACED 1
+
+/* 終止形を追加するためのルールファイルの内部表現(だと思う) */
+
+struct descpack
+{
+  Wchar *hinshi, *tandesc, *yomdesc;
+  struct descpack *next;
+};
+
+/* エントリの種別を表す構造体その他 */
+
+struct kindpack
+{
+  Wchar *kind;
+  long kindbit;
+};
+
+/* 辞書を表す構造体 */
+
+struct dicpack
+{
+  Wchar *yomi, *tango;
+  struct hinshipack *hinshi;
+  int hindo;
+  long kind;
+  Wchar *extdata;
+  unsigned flags;               /* SEE BELOW */
+  struct dicpack *next;
+};
+
+/* values of (struct dicpack.)flags */
+#define COMMON 001
+#define NEW    002
+
+#if defined (__STDC__) || defined (_AIX) || (defined (__mips) && defined (_SYSTYPE_SVR4)) || defined(_WIN32)
+/* Prototype for C89 (or later) */
+#ifdef POD_WCHAR
+size_t Mbstowcs (Wchar *d, char *ss, int n);
+size_t Wcstombs (char *d, Wchar *s, int n);
+int    Wscmp (register Wchar *s1, register Wchar *s2);
+Wchar  *Wscpy (Wchar *d, register Wchar *s);
+int    Wslen (Wchar *s);
+int    Watoi (Wchar *s);
+static void Fputws (Wchar *s, FILE *f);
+Wchar  *Fgetws (Wchar *buf, int siz, FILE *f);
+#endif /* POD_WCHAR */
+
+static int all_kana (Wchar *s);
+static Wchar *findslash (Wchar *s);
+static Wchar *extstr (Wchar *p, Wchar **pp, int *key_return);
+static void malloc_failed (void);
+static struct hinshipack *internhinshi (Wchar *str, int flag);
+static void replace_hinshi (void);
+static void select_hinshi (int n);
+static void freedesc (struct descpack *p);
+static struct descpack *interndesc (Wchar *hin, Wchar *tan, Wchar *yom);
+static struct descpack *searchdesc (Wchar *hin);
+static void store_description (void);
+static long internkind (Wchar *s);
+static void listkinds (void);
+static int kindcompar (struct kindpack *k1, struct kindpack *k2);
+static void sortkind (void);
+static struct dicpack *intern (int key, Wchar *yomi, Wchar *kouho, Wchar *hinshi, int hindo, long kind, int *stat, long flags);
+static void storepd (FILE *file);
+static void comparepd (FILE *file);
+static void canna_output (FILE *cf, struct dicpack *p, Wchar *h, int n);
+static void entry_out (FILE *cf, struct dicpack *p, Wchar *h, int n, Wchar *ex);
+static void printentry (FILE *cf, struct dicpack *p);
+static void showentry (struct dicpack **pd, int n);
+static int diccompar (struct dicpack **p1, struct dicpack **p2);
+static int dichindocompar (struct dicpack **p1, struct dicpack **p2);
+void shrinkargs (char **argv, int n, int count);
+static void parseargs (int argc, char *argv[]);
+#endif
+
+#ifndef POD_WCHAR
+# define Mbstowcs mbstowcs
+# define Wcstombs wcstombs
+# define Wscmp wscmp
+# define Wscpy wscpy
+# define Wslen wslen
+# define Fgetws fgetws
+# define Fputws fputws
+#else
+# define SS2 0x8e
+# define SS3 0x8f
+# define MSB 0x80
+# define MSK 0x7f
+
+# define WCG0 0x0000
+# define WCG1 0x8080
+# define WCG2 0x0080
+# define WCG3 0x8000
+# define WCMSK 0x8080
+
+size_t
+Mbstowcs (d, ss, n)
+     Wchar *d;
+     char *ss;
+     int n;
+{
+  register Wchar *p = d;
+  register int ch;
+  register unsigned char *s = (unsigned char *) ss;
+
+  while ((ch = *s++) && (p - d < n))
+    {
+      if (ch & MSB)
+        {
+          if (ch == SS2)
+            {                   /* kana */
+              *p++ = (Wchar) * s++;
+            }
+          else if (ch == SS3)
+            {
+              *p++ = (Wchar) ((*s << 8) | (*(s + 1) & MSK));
+              s += 2;
+            }
+          else
+            {
+              *p++ = (Wchar) ((ch << 8) | (*s++ & 0xff));
+            }
+        }
+      else
+        {
+          *p++ = (Wchar) ch;
+        }
+    }
+  *p = (Wchar) 0;
+  return p - d;
+}
+
+size_t
+Wcstombs (d, s, n)
+     char *d;
+     Wchar *s;
+     int n;
+{
+  register char *p = d;
+  register Wchar ch;
+
+  while ((ch = *s++) && (p - d + 2 < n))
+    {
+      switch (ch & WCMSK)
+        {
+        case WCG0:
+          *p++ = (char) ch;
+          break;
+
+        case WCG1:
+          *p++ = (char) ((ch >> 8) & 0xff);
+          *p++ = (char) (ch & 0xff);
+          break;
+
+        case WCG2:
+          *p++ = SS2;
+          *p++ = (char) ch;
+          break;
+
+        case WCG3:
+          *p++ = SS3;
+          *p++ = (char) ((ch >> 8) & 0xff);
+          *p++ = (char) ((ch & 0xff) | MSB);
+          break;
+        }
+    }
+  *p = '\0';
+  return p - d;
+}
+
+int
+Wscmp (s1, s2)
+     register Wchar *s1, *s2;
+{
+  register int res;
+
+  /* 以下のコードはいささかトリッキーなので、説明を加えておこう。
+     以下ではこのコメント内にあるようなことをしたいわけである。
+
+     while (*s1 && *s2 && && *s1 == *s2) {
+     s1++; s2++;
+     }
+     return *s1 - *s2;
+
+     すなわち、s1 も s2 も EOS ('\0') を指していなくて、しかも値が
+     異なる間はそれぞれのポインタを進める。いずれかが EOS になるか、
+     値が違ってきた場合には、*s1 - *s2 を返す。
+   */
+
+  while (!(res = *s1 - *s2++) && *s1++)
+    ;
+  return res;
+}
+
+Wchar *
+Wscpy (d, s)
+     Wchar *d;
+     register Wchar *s;
+{
+  register Wchar *p = d, ch;
+
+  while (ch = *s++)
+    {
+      *p++ = ch;
+    }
+  *p = (Wchar) 0;
+  return d;
+}
+
+int
+Wslen (s)
+     Wchar *s;
+{
+  register Wchar *p = s;
+
+  while (*p)
+    p++;
+  return p - s;
+}
+
+int
+Watoi (s)
+     Wchar *s;
+{
+  register int res = 0;
+  register Wchar ch;
+
+  while ((ch = *s++) && ((Wchar) '0' <= ch) && (ch <= (Wchar) '9'))
+    {
+      res *= 10;
+      res += ch - (Wchar) '0';
+    }
+  return res;
+}
+
+static void
+Fputws (s, f)
+     Wchar *s;
+     FILE *f;
+{
+  char buf[READBUFSIZE];
+
+  if (Wcstombs (buf, s, READBUFSIZE))
+    {
+      (void) fputs (buf, f);
+    }
+}
+
+Wchar *
+Fgetws (buf, siz, f)
+     Wchar *buf;
+     int siz;
+     FILE *f;
+{
+  char mbuf[READBUFSIZE], *p;
+
+  p = fgets (mbuf, READBUFSIZE, f);
+  if (p)
+    {
+      if (Mbstowcs (buf, mbuf, siz))
+        {
+          return buf;
+        }
+    }
+  return (Wchar *) 0;
+}
+#endif
+
+/* s が全てカタカナから構成されているかどうかを返す関数 */
+
+static int
+all_kana (s)
+     Wchar *s;
+{
+  static Wchar xa = 0, xke, aa, *p;
+
+  if (!xa)
+    {
+      Mbstowcs (&xa, "\045\041", 1);
+      Mbstowcs (&xke, "\045\166", 1);
+      Mbstowcs (&aa, "\041\074", 1);
+    }
+
+  for (p = s; *p; p++)
+    {
+      if (!(*p == aa || (xa <= *p && *p <= xke)))
+        {
+          return 0;
+        }
+    }
+
+  return 1;
+}
+
+/* スラッシュを探す */
+
+static Wchar *
+findslash (s)
+     Wchar *s;
+{
+  while (*s)
+    {
+      if (*s == (Wchar) '/')
+        {
+          return s;
+        }
+      s++;
+    }
+  return (Wchar *) 0;
+}
+
+/* トークンを一個取り出す */
+
+static Wchar *
+extstr (p, pp, key_return)
+     Wchar *p, **pp;
+     int *key_return;
+{
+  Wchar *res;
+  int key = 0;
+
+  while (*p == (Wchar) ' ' || *p == (Wchar) '\t')
+    p++;
+  res = p;
+  while (*p && *p != (Wchar) ' ' && *p != (Wchar) '\t' && *p != (Wchar) '\n')
+    {
+      key += (int) *p++;
+    }
+  *p++ = (Wchar) '\0';
+  if (pp)
+    *pp = p;
+  if (key_return)
+    *key_return = key;
+  return res;
+}
+
+static struct hinshipack *partsofspeech[HINSHIBUFSIZE];
+
+static void
+malloc_failed ()
+{
+  (void) fprintf (stderr, "%s: malloc failed.\n", program);
+}
+
+/* 品詞名を品詞名テーブルに登録する */
+
+static struct hinshipack *
+internhinshi (str, flag)
+     Wchar *str;
+     int flag;
+{
+  struct hinshipack *p, **pp;
+  Wchar *s;
+  int key = 0;
+
+  for (s = str; *s; s++)
+    key += (int) *s;
+  key = ((unsigned) key & HINSHIBUFINDEXMASK);
+  for (pp = partsofspeech + key; p = *pp; pp = &(p->next))
+    {
+      if (!Wscmp (p->hinshi, str))
+        {
+          return p;
+        }
+    }
+  if (flag)
+    {
+      p = (struct hinshipack *) malloc (sizeof (struct hinshipack));
+      if (p)
+        {
+          *pp = p;
+          (void) bzero (p, sizeof (struct hinshipack));
+          p->hinshi = (Wchar *) malloc ((Wslen (str) + 1) * sizeof (Wchar));
+          if (p->hinshi)
+            {
+              (void) Wscpy (p->hinshi, str);
+              p->nhinshis = 1;
+              return p;
+            }
+          free (p);
+        }
+      malloc_failed ();
+    }
+  return (struct hinshipack *) 0;
+}
+
+/* 品詞名を置き換える */
+
+static void
+replace_hinshi ()
+{
+  FILE *f;
+  Wchar readbuf[READBUFSIZE], *to, *from, *s;
+  struct hinshipack *hinshientry, *p;
+  int i, err = 0;
+
+  f = fopen (hinshi_table, "r");
+  if (!f)
+    {
+      (void) fprintf (stderr, "%s: can not open the table file of parts of speech \"%s\".\n", program, hinshi_table);
+      exit (1);
+    }
+  while (s = Fgetws (readbuf, READBUFSIZE, f))
+    {
+      from = extstr (s, &s, 0);
+      to = extstr (s, &s, 0);
+      if (hinshi_direction == REVERSE)
+        {
+          Wchar *xx = from;
+          from = to;
+          to = xx;
+        }
+
+      hinshientry = internhinshi (from, 0);
+      if (hinshientry)
+        {
+          Wchar *xx;
+
+          xx = (Wchar *) malloc ((Wslen (to) + 1) * sizeof (Wchar));
+          if (xx)
+            {
+              Wchar *cp;
+              int n = 1;
+
+              (void) Wscpy (xx, to);
+              free (hinshientry->hinshi);
+              hinshientry->hinshi = xx;
+              for (cp = xx; *cp; cp++)
+                {
+                  if (*cp == (Wchar) '/')
+                    {
+                      *cp = (Wchar) 0;
+                      n++;
+                    }
+                }
+              hinshientry->nhinshis = n;
+              hinshientry->flags |= REPLACED;
+            }
+          else
+            {
+              malloc_failed ();
+            }
+        }
+    }
+  (void) fclose (f);
+
+  for (i = 0; i < HINSHIBUFSIZE; i++)
+    {
+      for (p = partsofspeech[i]; p; p = p->next)
+        {
+          if (!(p->flags & REPLACED))
+            {
+              (void) fprintf (stderr, "%s: The replacement for \"", program);
+              Fputws (p->hinshi, stderr);
+              (void) fprintf (stderr, "\" is not mentioned in the table.\n");
+              err = 1;
+            }
+        }
+    }
+  if (err)
+    {
+      exit (1);
+    }
+}
+
+static void
+select_hinshi (n)
+     int n;
+{
+  Wchar *s, *t, *xx;
+  struct hinshipack *p;
+  int i;
+
+  if (!n)
+    return;
+
+  for (i = 0; i < HINSHIBUFSIZE; i++)
+    {
+      for (p = partsofspeech[i]; p; p = p->next)
+        {
+          switch (n)
+            {
+            case 1:
+              s = findslash (p->hinshi);
+              if (s)
+                {
+                  *s = (Wchar) 0;
+                }
+              break;
+
+            case 2:
+              s = findslash (p->hinshi);
+              if (s)
+                {
+                  s++;
+                  t = findslash (s);
+                  if (t)
+                    {
+                      xx = (Wchar *) malloc ((t - s + 1) * sizeof (Wchar));
+                      if (xx)
+                        {
+                          *t = (Wchar) 0;
+                          Wscpy (xx, s);
+                          t = p->hinshi;
+                          p->hinshi = xx;
+                          (void) free ((char *) t);
+                        }
+                    }
+                }
+              break;
+
+            case 3:
+              s = findslash (p->hinshi);
+              if (s)
+                {
+                  t = findslash (s + 1);
+                  if (t)
+                    {
+                      t++;
+                      xx = (Wchar *) malloc ((Wslen (t) + 1) * sizeof (Wchar));
+                      if (xx)
+                        {
+                          Wscpy (xx, t);
+                          t = p->hinshi;
+                          p->hinshi = xx;
+                          (void) free ((char *) t);
+                        }
+                    }
+                }
+              break;
+
+            default:
+              break;
+            }
+        }
+    }
+}
+
+static void
+freedesc (p)
+     struct descpack *p;
+{
+  free (p->hinshi);
+  free (p->tandesc);
+  free (p->yomdesc);
+  free (p);
+}
+
+static struct descpack *description[HINSHIBUFSIZE];
+
+/* ルールの登録 */
+
+static struct descpack *
+interndesc (hin, tan, yom)
+     Wchar *hin, *tan, *yom;
+{
+  struct descpack *p, **pp, *next = (struct descpack *) 0;
+  Wchar *s;
+  int key = 0;
+
+  for (s = hin; *s; s++)
+    key += (int) *s;
+  key = ((unsigned) key & HINSHIBUFINDEXMASK);
+  for (pp = description + key; p = *pp; pp = &(p->next))
+    {
+      if (!Wscmp (p->hinshi, hin))
+        {
+          if (!Wscmp (p->tandesc, tan) && !Wscmp (p->yomdesc, yom))
+            {
+              return p;
+            }
+          else
+            {
+              *pp = next = p->next;
+              freedesc (p);
+              break;
+            }
+        }
+    }
+  p = (struct descpack *) malloc (sizeof (struct descpack));
+  if (p)
+    {
+      *pp = p;
+      (void) bzero (p, sizeof (struct descpack));
+      p->next = next;
+      p->hinshi = (Wchar *) malloc ((Wslen (hin) + 1) * sizeof (Wchar));
+      if (p->hinshi)
+        {
+          (void) Wscpy (p->hinshi, hin);
+          p->tandesc = (Wchar *) malloc ((Wslen (tan) + 1) * sizeof (Wchar));
+          if (p->tandesc)
+            {
+              (void) Wscpy (p->tandesc, tan);
+              p->yomdesc = (Wchar *) malloc ((Wslen (yom) + 1) * sizeof (Wchar));
+              if (p->yomdesc)
+                {
+                  (void) Wscpy (p->yomdesc, yom);
+                  return p;
+                }
+              free (p->tandesc);
+            }
+          free (p->hinshi);
+        }
+      free (p);
+    }
+  malloc_failed ();
+  return (struct descpack *) 0;
+}
+
+/* ルールの探索 */
+
+static struct descpack *
+searchdesc (hin)
+     Wchar *hin;
+{
+  struct descpack *p, **pp;
+  Wchar *s;
+  int key = 0;
+
+  for (s = hin; *s; s++)
+    key += (int) *s;
+  key = ((unsigned) key & HINSHIBUFINDEXMASK);
+  for (pp = description + key; p = *pp; pp = &(p->next))
+    {
+      if (!Wscmp (p->hinshi, hin))
+        {
+          return p;
+        }
+    }
+  return (struct descpack *) 0;
+}
+
+static void
+store_description ()
+{
+  FILE *f;
+  Wchar readbuf[READBUFSIZE], *hin, *tan, *yom, *s;
+
+  if (!description_table)
+    {
+      return;
+    }
+
+  f = fopen (description_table, "r");
+  if (!f)
+    {
+      (void) fprintf (stderr, "%s: can not open the table file of parts of speech \"%s\".\n", program, description_table);
+      exit (1);
+    }
+  while (s = Fgetws (readbuf, READBUFSIZE, f))
+    {
+      Wchar nl[1];
+
+      nl[0] = (Wchar) 0;
+      hin = tan = yom = nl;
+      hin = extstr (s, &s, 0);
+      if (*hin)
+        {
+          tan = extstr (s, &s, 0);
+          if (*tan)
+            {
+              yom = extstr (s, &s, 0);
+            }
+        }
+
+      interndesc (hin, tan, yom);
+    }
+  (void) fclose (f);
+}
+
+struct kindpack kinds[sizeof (long) * 8];
+static int nkinds;
+
+#define KIHONBIT 1L
+
+/* 種別の登録 */
+
+static long
+internkind (s)
+     Wchar *s;
+{
+  int i;
+  Wchar *p;
+
+  p = findslash (s);
+  if (p)
+    {
+      long res;
+
+      *p = (Wchar) '\0';
+      res = internkind (s);
+      res |= internkind (p + 1);
+      return res;
+    }
+  else
+    {
+      for (i = 0; i < nkinds; i++)
+        {
+          if (!Wscmp (s, kinds[i].kind))
+            {
+              return kinds[i].kindbit;
+            }
+        }
+      if (nkinds < (sizeof (long) * 8) && (kinds[nkinds].kind = (Wchar *) malloc ((Wslen (s) + 1) * sizeof (Wchar))))
+        {
+          (void) Wscpy (kinds[nkinds].kind, s);
+          kinds[nkinds].kindbit = 1 << nkinds;
+          return kinds[nkinds++].kindbit;
+        }
+      return 0;
+    }
+}
+
+/* 種別の一覧の出力 */
+
+static void
+listkinds ()
+{
+  int i;
+
+  for (i = 0; i < nkinds; i++)
+    {
+      Fputws (kinds[i].kind, stdout);
+      putchar ('\n');
+    }
+}
+
+static int
+kindcompar (k1, k2)
+     struct kindpack *k1, *k2;
+{
+  return Wscmp (k1->kind, k2->kind);
+}
+
+static void
+sortkind ()
+{
+  qsort (kinds, nkinds, sizeof (struct kindpack), kindcompar);
+}
+
+static struct dicpack *dic[DICBUFSIZE], **pdic;
+static int ndicentries = 0;
+
+/*
+
+ intern -- 辞書エントリの検索/登録
+
+ 第6引数の stat としてヌルでないアドレスが指定された場合には、同じエントリ
+ が登録されていない場合には登録を行う。アドレスがヌルの場合には登録しない。
+
+ flags によっていろいろと指定をする。(以下を見てね)。
+
+ hinshi に 0 を渡してはいけない。kind は 0 を渡しても可だが、-m の時じゃない
+ マッチはしないので注意。
+
+ */
+
+/* flags */
+#define IGNORE_HINSHI 1L
+#define IGNORE_KIND   2L
+
+static struct dicpack *
+intern (key, yomi, kouho, hinshi, hindo, kind, stat, flags)
+     int key, hindo, *stat;
+     Wchar *yomi, *kouho, *hinshi;
+     long kind, flags;
+{
+  struct dicpack *p, **pp;
+  struct descpack *dp;
+  Wchar nl[1], *yomdesc = nl, *tandesc = nl;
+  Wchar *yom = (Wchar *) 0, *tan = (Wchar *) 0, *dhinshi, *dh;
+
+  nl[0] = (Wchar) '\0';
+
+  if (description_table)
+    {
+      dhinshi = dh = hinshi;    /* かんなの品詞を探す */
+      while (*dh)
+        {
+          if (*dh++ == (Wchar) '/')
+            {
+              dhinshi = dh;
+            }
+        }
+      dp = searchdesc (dhinshi);
+      if (dp)
+        {
+          yomdesc = dp->yomdesc;
+          tandesc = dp->tandesc;
+          if (Wslen (yomdesc))
+            {
+              Wchar *t;
+              t = (Wchar *) malloc ((Wslen (yomi) + Wslen (yomdesc) + 1) * sizeof (Wchar));
+              if (t)
+                {
+                  Wscpy (t, yomi);
+                  yom = yomi = t;
+                  Wscpy (yomi + Wslen (yomi), yomdesc);
+                }
+            }
+          if (Wslen (tandesc))
+            {
+              Wchar *t;
+              t = (Wchar *) malloc ((Wslen (kouho) + Wslen (tandesc) + 1) * sizeof (Wchar));
+              if (t)
+                {
+                  Wscpy (t, kouho);
+                  tan = kouho = t;
+                  Wscpy (kouho + Wslen (kouho), tandesc);
+                }
+            }
+        }
+      else
+        {
+          char foo[64];
+
+          fprintf (stderr, "no description rule for ");
+          Wcstombs (foo, dhinshi, 64);
+          fprintf (stderr, "%s.\n", foo);
+        }
+    }
+
+  key = ((unsigned) key & DICBUFINDEXMASK);
+  for (pp = dic + key; p = *pp; pp = &(p->next))
+    {
+      if (!Wscmp (p->yomi, yomi) && !Wscmp (p->tango, kouho) && ((flags & IGNORE_HINSHI) || !Wscmp (p->hinshi->hinshi, hinshi)) && ((flags & IGNORE_KIND) || ((p->kind & kind) == kind)))
+        {
+          /* match */
+          if (stat)
+            *stat = FOUND;
+          if (yom)
+            free (yom);
+          if (tan)
+            free (tan);
+          return p;
+        }
+    }
+  if (stat)
+    {
+      p = (struct dicpack *) malloc (sizeof (struct dicpack));
+      if (p)
+        {
+          *pp = p;
+          (void) bzero (p, sizeof (struct dicpack));
+          p->yomi = (Wchar *) malloc ((Wslen (yomi) + 1) * sizeof (Wchar));
+          if (p->yomi)
+            {
+              (void) Wscpy (p->yomi, yomi);
+              p->tango = (Wchar *) malloc ((Wslen (kouho) + 1) * sizeof (Wchar));
+              if (p->tango)
+                {
+                  (void) Wscpy (p->tango, kouho);
+                  p->hinshi = internhinshi (hinshi, 1);
+                  if (p->hinshi)
+                    {
+                      p->hindo = hindo;
+                      *stat = CREATE;
+                      ndicentries++;
+                      p->kind = kind;
+                      p->extdata = (Wchar *) 0;
+                      if (yom)
+                        free (yom);
+                      if (tan)
+                        free (tan);
+                      return p;
+                    }
+                  free (p->tango);
+                }
+              free (p->yomi);
+            }
+          free (p);
+        }
+      malloc_failed ();
+    }
+  if (yom)
+    free (yom);
+  if (tan)
+    free (tan);
+  return (struct dicpack *) 0;
+}
+
+/* 登録されているエントリに対して fn を実行する */
+
+static void
+for_all_interned (fn)
+     void (*fn) ();
+{
+  int i;
+  struct dicpack *p;
+
+  for (i = 0; i < DICBUFSIZE; i++)
+    {
+      for (p = dic[i]; p; p = p->next)
+        {
+          (*fn) (p);
+        }
+    }
+}
+
+static void
+storepd (file)
+     FILE *file;
+{
+  Wchar readbuf[READBUFSIZE], *p, *yomi, *hinshi, *kouho, *hindo, *kind;
+  int nhindo, key, tkey, stat;
+  long kindbit;
+  struct dicpack *dicentry;
+
+  while (p = Fgetws (readbuf, READBUFSIZE, file))
+    {
+      key = 0;
+      yomi = extstr (p, &p, &tkey);
+      key += tkey;
+      kouho = extstr (p, &p, &tkey);
+      key += tkey;
+      hinshi = extstr (p, &p, 0);
+      hindo = extstr (p, &p, 0);
+      nhindo = Watoi (hindo);
+
+      kind = extstr (p, 0, 0);
+      if (*kind)
+        {
+          kindbit = internkind (kind);
+        }
+      else
+        {
+          kindbit = KIHONBIT;
+        }
+
+      dicentry = intern (key, yomi, kouho, hinshi, nhindo, kindbit, &stat, IGNORE_KIND);
+      if (dicentry)
+        {
+          dicentry->kind |= kindbit;
+        }
+    }
+}
+
+static void
+comparepd (file)
+     FILE *file;
+{
+  Wchar readbuf[READBUFSIZE], *p, *yomi, *hinshi, *kouho, *hindo, *kind;
+  int nhindo, key, tkey, stat, *statp = &stat;
+  struct dicpack *dicentry;
+  long kindbit, flags = 0L;
+
+  while (p = Fgetws (readbuf, READBUFSIZE, file))
+    {
+      key = 0;
+      yomi = extstr (p, &p, &tkey);
+      key += tkey;
+      kouho = extstr (p, &p, &tkey);
+      key += tkey;
+      hinshi = extstr (p, &p, 0);
+      if (ignore_hinshi_to_compare)
+        {
+          flags |= IGNORE_HINSHI;
+        }
+      hindo = extstr (p, &p, 0);
+      nhindo = Watoi (hindo);
+
+      kind = extstr (p, 0, 0);
+      if (*kind)
+        {
+          kindbit = internkind (kind);
+        }
+      else
+        {
+          kindbit = KIHONBIT;
+        }
+      if (merge_kind || merge_sj3)
+        {
+          flags |= IGNORE_KIND;
+        }
+      if (copy_frequency)
+        {
+          statp = (int *) 0;
+        }
+
+      dicentry = intern (key, yomi, kouho, hinshi, nhindo, kindbit, statp, flags);
+
+      if (dicentry)
+        {
+          if (copy_frequency)
+            {
+              dicentry->hindo = nhindo;
+              dicentry->flags &= ~COMMON;
+            }
+          else if (ignore_hinshi_to_compare && stat == FOUND)
+            {
+              /* この場合、同じキーのチェーンが返る */
+              struct dicpack *pd;
+
+              for (pd = dicentry; pd; pd = pd->next)
+                {
+                  if (!Wscmp (pd->yomi, yomi) && !Wscmp (pd->tango, kouho))
+                    {
+                      pd->flags |= COMMON;
+                      if (!merge_sj3)
+                        {
+                          pd->kind |= kindbit;
+                        }
+
+                      if (merge_sj3)
+                        {
+                          int len = 0;
+                          Wchar *dat;
+
+                          if (pd->extdata)
+                            {
+                              len = Wslen (pd->extdata);
+                            }
+                          dat = (Wchar *) malloc ((Wslen (hinshi) + 1 + len) * sizeof (Wchar));
+                          if (dat)
+                            {
+                              if (len)
+                                {
+                                  (void) Wscpy (dat, pd->extdata);
+                                  (void) free ((char *) pd->extdata);
+                                }
+                              (void) Wscpy (dat + len, hinshi);
+                              pd->extdata = dat;
+                            }
+                        }
+                    }
+                }
+            }
+          else
+            {
+              dicentry->kind |= kindbit;
+              if (stat == FOUND)
+                {
+                  dicentry->flags |= COMMON;
+                }
+              else
+                {               /* CREATE */
+                  dicentry->flags |= NEW;
+                }
+            }
+        }
+    }
+}
+
+static void
+canna_output (cf, p, h, n)
+     FILE *cf;
+     struct dicpack *p;
+     Wchar *h;
+     int n;
+{
+  for (; n-- > 0; h += Wslen (h) + 1)
+    {
+      Fputws (p->yomi, cf);
+      (void) putc (' ', cf);
+      Fputws (h, cf);
+      if (p->hindo)
+        {
+          (void) fprintf (cf, "*%d", p->hindo);
+        }
+      (void) putc (' ', cf);
+      Fputws (p->tango, cf);
+      (void) putc ('\n', cf);
+    }
+}
+
+static void
+entry_out (cf, p, h, n, ex)
+     FILE *cf;
+     struct dicpack *p;
+     Wchar *h;
+     int n;
+     Wchar *ex;
+{
+  int i, f = 1;
+  long b;
+
+  for (; n-- > 0; h += Wslen (h) + 1)
+    {
+      Fputws (p->yomi, cf);
+      (void) putc (' ', cf);
+      Fputws (p->tango, cf);
+      (void) putc (' ', cf);
+      if (merge_sj3 && ex)
+        {
+          Fputws (ex, cf);
+          (void) putc ('/', cf);
+        }
+      Fputws (h, cf);
+      if (!sj3_type_output)
+        {
+          (void) fprintf (cf, " %d", p->hindo);
+        }
+
+      if (!wnn_type_output)
+        {
+          if (bunrui)
+            {
+              (void) printf (" %s", bunrui);
+            }
+          else
+            {
+              if (specific_kind)
+                {
+                  b = (specific_kind & p->kind);
+                }
+              else
+                {
+                  b = p->kind;
+                }
+              if (b != KIHONBIT)
+                {               /* 基本だけだったら何も書かない */
+                  for (i = 0; i < nkinds; i++)
+                    {
+                      if (b & kinds[i].kindbit)
+                        {
+                          if (f)
+                            {
+                              (void) putc (' ', cf);
+                              f = 0;
+                            }
+                          else
+                            {
+                              (void) putc ('/', cf);
+                            }
+                          Fputws (kinds[i].kind, cf);
+                        }
+                    }
+                }
+            }
+        }
+      (void) putc ('\n', cf);
+    }
+}
+
+/* p で表されるエントリをファイル cf に出力する */
+
+static void
+printentry (cf, p)
+     FILE *cf;
+     struct dicpack *p;
+{
+  if (specific_kind && !(p->kind & specific_kind))
+    {
+      return;
+    }
+
+  if (extract_kana && !all_kana (p->tango))
+    {
+      return;
+    }
+
+  if (selhinshi && !p->hinshi->hinshi[0])
+    {
+      return;
+    }
+
+  if (canna_type_output)
+    {
+      canna_output (cf, p, p->hinshi->hinshi, p->hinshi->nhinshis);
+    }
+  else
+    {
+      entry_out (cf, p, p->hinshi->hinshi, p->hinshi->nhinshis, p->extdata);
+    }
+}
+
+static void
+showdeleted (p)
+     struct dicpack *p;
+{
+  if (!(p->flags & COMMON))
+    {
+      (void) printf ("- ");
+      printentry (stdout, p);
+    }
+}
+
+static void
+showentry (pd, n)
+     struct dicpack **pd;
+     int n;
+{
+  FILE *cf = (FILE *) 0, *of = (FILE *) 0, *nf = (FILE *) 0;
+  struct dicpack *p;
+  int i;
+
+  if (common_out)
+    {
+      if (common_out[0] != '-' || common_out[1])
+        {
+          cf = fopen (common_out, "w");
+          if (!cf)
+            {
+              (void) fprintf (stderr, "%s: can not open file \"%s\".\n", program, common_out);
+              exit (1);
+            }
+        }
+      else
+        {
+          cf = stdout;
+        }
+    }
+  if (old_out)
+    {
+      if (old_out[0] != '-' || old_out[1])
+        {
+          of = fopen (old_out, "w");
+          if (!of)
+            {
+              (void) fprintf (stderr, "%s: can not open file \"%s\".\n", program, old_out);
+              exit (1);
+            }
+        }
+      else
+        {
+          of = stdout;
+        }
+    }
+  if (new_out)
+    {
+      if (new_out[0] != '-' || new_out[1])
+        {
+          nf = fopen (new_out, "w");
+          if (!nf)
+            {
+              (void) fprintf (stderr, "%s: can not open file \"%s\".\n", program, new_out);
+              exit (1);
+            }
+        }
+      else
+        {
+          nf = stdout;
+        }
+    }
+
+  for (i = 0; i < n; i++)
+    {
+      p = pd[i];
+      if (compare)
+        {
+          if (p->flags & COMMON)
+            {
+              if (cf)
+                {
+                  printentry (cf, p);
+                }
+            }
+          else if (p->flags & NEW)
+            {
+              if (nf)
+                {
+                  printentry (nf, p);
+                }
+            }
+          else
+            {
+              if (of)
+                {
+                  printentry (of, p);
+                }
+            }
+        }
+      else
+        {                       /* just print the normalized dictionary */
+          printentry (stdout, p);
+        }
+    }
+}
+
+static int
+diccompar (p1, p2)
+     struct dicpack **p1, **p2;
+{
+  int n;
+  if (n = Wscmp ((*p1)->yomi, (*p2)->yomi))
+    {
+      return n;
+    }
+  else if (n = Wscmp ((*p1)->tango, (*p2)->tango))
+    {
+      return n;
+    }
+  else if (n = Wscmp ((*p1)->hinshi->hinshi, (*p2)->hinshi->hinshi))
+    {
+      return n;
+    }
+  else
+    {                           /* impossible */
+      return 0;
+    }
+}
+
+static int
+dichindocompar (p1, p2)
+     struct dicpack **p1, **p2;
+{
+  int n;
+  if (n = Wscmp ((*p1)->yomi, (*p2)->yomi))
+    {
+      return n;
+    }
+  else if (n = ((*p2)->hindo - (*p1)->hindo))
+    {
+      return n;
+    }
+  else if (n = Wscmp ((*p1)->tango, (*p2)->tango))
+    {
+      return n;
+    }
+  else if (n = Wscmp ((*p1)->hinshi->hinshi, (*p2)->hinshi->hinshi))
+    {
+      return n;
+    }
+  else
+    {                           /* impossible */
+      return 0;
+    }
+}
+
+void
+shrinkargs (argv, n, count)
+     char **argv;
+     int n, count;
+{
+  int i;
+
+  for (i = 0; i + n < count; i++)
+    {
+      argv[i] = argv[i + n];
+    }
+}
+
+static void
+parseargs (argc, argv)
+     int argc;
+     char *argv[];
+{
+  int i;
+
+  for (program = argv[0] + strlen (argv[0]); argv[0] < program; program--)
+    {
+      if (program[0] == '/')
+        {
+          program++;
+          break;
+        }
+    }
+
+  for (i = 1; i < argc;)
+    {
+      if (argv[i][0] == '-' && argv[i][2] == '\0')
+        {
+          switch (argv[i][1])
+            {
+            case '1':
+            case '2':
+            case '3':
+              selhinshi = argv[i][1] - '0';
+              shrinkargs (argv + i, 1, argc - i);
+              argc -= 1;
+              break;
+
+            case 'b':
+              bunrui = argv[i + 1];
+              shrinkargs (argv + i, 2, argc - i);
+              argc -= 2;
+              break;
+
+            case 'c':
+              common_out = argv[i + 1];
+              shrinkargs (argv + i, 2, argc - i);
+              argc -= 2;
+              break;
+
+            case 'd':
+              description_table = argv[i + 1];
+              shrinkargs (argv + i, 2, argc - i);
+              argc -= 2;
+              break;
+
+            case 'f':
+              copy_frequency = 1;
+              shrinkargs (argv + i, 1, argc - i);
+              argc -= 1;
+              break;
+
+            case 'h':
+              ignore_hinshi_to_compare = 1;
+              shrinkargs (argv + i, 1, argc - i);
+              argc -= 1;
+              break;
+
+            case 'i':
+              canna_type_output = 1;
+              wnn_type_output = 0;
+              shrinkargs (argv + i, 1, argc - i);
+              argc -= 1;
+              break;
+
+            case 'j':
+              extract_kana = 1;
+              shrinkargs (argv + i, 1, argc - i);
+              argc -= 1;
+              break;
+
+            case 'k':
+              {
+                Wchar buf[READBUFSIZE];
+
+                (void) Mbstowcs (buf, argv[i + 1], READBUFSIZE);
+                specific_kind |= internkind (buf);
+              }
+              shrinkargs (argv + i, 2, argc - i);
+              argc -= 2;
+              break;
+
+            case 'l':
+              list_kinds = 1;
+              shrinkargs (argv + i, 1, argc - i);
+              argc -= 1;
+              break;
+
+            case 'm':
+              merge_kind = 1;
+              shrinkargs (argv + i, 1, argc - 1);
+              argc -= 1;
+              break;
+
+            case 'n':
+              new_out = argv[i + 1];
+              shrinkargs (argv + i, 2, argc - i);
+              argc -= 2;
+              break;
+
+            case 'o':
+              old_out = argv[i + 1];
+              shrinkargs (argv + i, 2, argc - i);
+              argc -= 2;
+              break;
+
+            case 'p':
+              sort_by_frequency = 1;
+              shrinkargs (argv + i, 1, argc - i);
+              argc -= 1;
+              break;
+
+            case 'r':
+              hinshi_table = argv[i + 1];
+              shrinkargs (argv + i, 2, argc - i);
+              argc -= 2;
+              hinshi_direction = REVERSE;
+              break;
+
+            case 's':
+              hinshi_table = argv[i + 1];
+              shrinkargs (argv + i, 2, argc - i);
+              argc -= 2;
+              break;
+
+            case 'v':
+              sj3_type_output = 1;
+              wnn_type_output = 1;      /* Wnn 形式と似ているので立てる */
+              shrinkargs (argv + i, 1, argc - i);
+              argc -= 1;
+              break;
+
+            case 'w':
+              canna_type_output = 0;
+              sj3_type_output = 0;
+              wnn_type_output = 1;
+              shrinkargs (argv + i, 1, argc - i);
+              argc -= 1;
+              break;
+
+            case 'x':
+              merge_sj3 = 1;
+              ignore_hinshi_to_compare = 1;
+              shrinkargs (argv + i, 1, argc - i);
+              argc -= 1;
+              break;
+
+            default:
+              i++;
+              break;
+            }
+        }
+      else
+        {
+          i++;
+        }
+    }
+
+  if (argc < 2)
+    {
+      (void) fprintf (stderr, "Usage: %s dic1 [dic2] [-c filecommon] ...\n", program);
+      exit (1);
+    }
+
+  if (argv[1][0] != '-' || argv[1][1])
+    {
+      in1 = fopen (argv[1], "r");
+      if (!in1)
+        {
+          (void) fprintf (stderr, "%s: can not open file \"%s\".\n", program, argv[1]);
+          exit (1);
+        }
+    }
+  if (argc == 3)
+    {
+      if (argv[2][0] != '-' || argv[2][1])
+        {
+          in2 = fopen (argv[2], "r");
+          if (!in2)
+            {
+              (void) fprintf (stderr, "%s: can not open file \"%s\".\n", program, argv[2]);
+              exit (1);
+            }
+        }
+    }
+  else
+    {
+      in2 = (FILE *) 0;
+    }
+  if (description_table)
+    {
+      store_description ();
+    }
+}
+
+static Wchar kihonh[] = {
+  (Wchar) 'k', (Wchar) 'i', (Wchar) 'h', (Wchar) 'o', (Wchar) 'n', (Wchar) 0,
+};
+
+int
+main (argc, argv)
+     int argc;
+     char *argv[];
+{
+#ifndef POD_WCHAR
+  setlocale (LC_ALL, "");
+#endif
+
+  in1 = in2 = stdin;
+  (void) internkind (kihonh);   /* 基本辞書用。1L として登録 */
+  parseargs (argc, argv);
+  storepd (in1);
+  (void) fclose (in1);
+
+  if (in2)
+    {
+      compare = 1;
+      comparepd (in2);
+      (void) fclose (in2);
+    }
+
+  if (list_kinds)
+    {
+      listkinds ();
+      exit (0);
+    }
+
+  if (selhinshi)
+    {
+      select_hinshi (selhinshi);
+    }
+  else if (hinshi_table)
+    {
+      replace_hinshi ();
+    }
+
+  pdic = (struct dicpack **) malloc (ndicentries * sizeof (struct dicpack *));
+  if (pdic)
+    {
+      int i, j;
+      struct dicpack *p;
+
+      for (i = 0, j = 0; i < DICBUFSIZE; i++)
+        {
+          for (p = dic[i]; p; p = p->next)
+            {
+              pdic[j++] = p;
+            }
+        }
+      if (sort_by_frequency)
+        {
+          qsort (pdic, ndicentries, sizeof (struct dicpack *), dichindocompar);
+        }
+      else
+        {
+          qsort (pdic, ndicentries, sizeof (struct dicpack *), diccompar);
+        }
+      sortkind ();
+      showentry (pdic, ndicentries);
+    }
+  else
+    {
+      malloc_failed ();
+    }
+  exit (0);
+}