view PubdicPlus/pod.c @ 25:466fe6732d8d

- fixed more NULL pointer related errata - suppress warnings
author Yoshiki Yazawa <yaz@honeyplanet.jp>
date Sat, 06 Mar 2010 04:37:31 +0900
parents bbc77ca4def5
children a7ccf412ba02
line wrap: on
line source

/* Copyright 1994 Pubdic Project.
 *
 * Permission to use, copy, modify, distribute and sell this software
 * and its documentation for any purpose is hereby granted without
 * fee, provided that the above copyright notice appear in all copies
 * and that both that copyright notice and this permission notice
 * appear in supporting documentation, and that the name of Pubdic
 * Project not be used in advertising or publicity pertaining to
 * distribution of the software without specific, written prior
 * permission.  Pubdic Project makes no representations about the
 * suitability of this software for any purpose.  It is provided "as
 * is" without express or implied warranty.
 *
 * PUBDIC PROJECT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN 
 * NO EVENT SHALL PUBDIC PROJECT BE LIABLE FOR ANY SPECIAL, INDIRECT OR
 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF 
 * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR 
 * OTHER TORTUOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 
 * PERFORMANCE OF THIS SOFTWARE. 
 */

#ifndef lint
static char rcsid[] = "$Id: pod.c,v 1.7 2005/12/10 18:50:43 aonoto Exp $";
#endif

#ifdef HAVE_CONFIG_H
#  include <config.h>
#endif

#include <stdio.h>
#if STDC_HEADERS
#  include <stdlib.h>
#  include <stddef.h>
#  include <string.h>
#else
#  if HAVE_MALLOC_H
#    include <malloc.h>
#  endif
#  if HAVE_STRINGS_H
#    include <strings.h>
#  endif
#endif /* STDC_HEADERS */

#define POD_WCHAR
#ifdef POD_WCHAR
typedef unsigned short Wchar;
#else
#include <locale.h>
#include <widec.h>
#define Wchar wchar_t
#endif

#if !(HAVE_BZERO) && (HAVE_MEMSET)
#  define bzero(a, c) memset(a, 0, c)
#endif

static char *program;
static int compare, ignore_hinshi_to_compare, sort_by_frequency, merge_sj3;
static int merge_kind, wnn_type_output, canna_type_output, sj3_type_output;
static int list_kinds;
static int copy_frequency, extract_kana = 0;
static long specific_kind;
static FILE *in1, *in2;
static char *common_out, *old_out, *new_out, *hinshi_table, *bunrui;
static char *description_table;
static int selhinshi = 0;

/* hinshi_direction */
#define INORDER 0
#define REVERSE 1

static int hinshi_direction = INORDER;  /* see above */

#define READBUFSIZE 128
#define DICBUFSIZE (2 << 13)
#define DICBUFINDEXMASK (DICBUFSIZE - 1)
#define HINSHIBUFSIZE (2 << 13)
#define HINSHIBUFINDEXMASK (HINSHIBUFSIZE - 1)

/* status of intern() */
#define FOUND 0
#define CREATE 1

/* 品詞を表す構造体 */

struct hinshipack
{
  int nhinshis;
  Wchar *hinshi;
  unsigned flags;               /* see below */
  struct hinshipack *next;
};

/* values of (struct hinshipack.)flags */
#define REPLACED 1

/* 終止形を追加するためのルールファイルの内部表現(だと思う) */

struct descpack
{
  Wchar *hinshi, *tandesc, *yomdesc;
  struct descpack *next;
};

/* エントリの種別を表す構造体その他 */

struct kindpack
{
  Wchar *kind;
  long kindbit;
};

/* 辞書を表す構造体 */

struct dicpack
{
  Wchar *yomi, *tango;
  struct hinshipack *hinshi;
  int hindo;
  long kind;
  Wchar *extdata;
  unsigned flags;               /* SEE BELOW */
  struct dicpack *next;
};

/* values of (struct dicpack.)flags */
#define COMMON 001
#define NEW    002

#if defined (__STDC__) || defined (_AIX) || (defined (__mips) && defined (_SYSTYPE_SVR4)) || defined(_WIN32)
/* Prototype for C89 (or later) */
#ifdef POD_WCHAR
size_t Mbstowcs (Wchar *d, char *ss, int n);
size_t Wcstombs (char *d, Wchar *s, int n);
int    Wscmp (register Wchar *s1, register Wchar *s2);
Wchar  *Wscpy (Wchar *d, register Wchar *s);
int    Wslen (Wchar *s);
int    Watoi (Wchar *s);
static void Fputws (Wchar *s, FILE *f);
Wchar  *Fgetws (Wchar *buf, int siz, FILE *f);
#endif /* POD_WCHAR */

static int all_kana (Wchar *s);
static Wchar *findslash (Wchar *s);
static Wchar *extstr (Wchar *p, Wchar **pp, int *key_return);
static void malloc_failed (void);
static struct hinshipack *internhinshi (Wchar *str, int flag);
static void replace_hinshi (void);
static void select_hinshi (int n);
static void freedesc (struct descpack *p);
static struct descpack *interndesc (Wchar *hin, Wchar *tan, Wchar *yom);
static struct descpack *searchdesc (Wchar *hin);
static void store_description (void);
static long internkind (Wchar *s);
static void listkinds (void);
static int kindcompar (const void *p1, const void *p2);
static void sortkind (void);
static struct dicpack *intern (int key, Wchar *yomi, Wchar *kouho, Wchar *hinshi, int hindo, long kind, int *stat, long flags);
static void storepd (FILE *file);
static void comparepd (FILE *file);
static void canna_output (FILE *cf, struct dicpack *p, Wchar *h, int n);
static void entry_out (FILE *cf, struct dicpack *p, Wchar *h, int n, Wchar *ex);
static void printentry (FILE *cf, struct dicpack *p);
static void showentry (struct dicpack **pd, int n);
static int diccompar (const void *pp1, const void *pp2);
static int dichindocompar (const void *pp1, const void *pp2);
void shrinkargs (char **argv, int n, int count);
static void parseargs (int argc, char *argv[]);
#endif

#ifndef POD_WCHAR
# define Mbstowcs mbstowcs
# define Wcstombs wcstombs
# define Wscmp wscmp
# define Wscpy wscpy
# define Wslen wslen
# define Fgetws fgetws
# define Fputws fputws
#else
# define SS2 0x8e
# define SS3 0x8f
# define MSB 0x80
# define MSK 0x7f

# define WCG0 0x0000
# define WCG1 0x8080
# define WCG2 0x0080
# define WCG3 0x8000
# define WCMSK 0x8080

size_t
Mbstowcs (d, ss, n)
     Wchar *d;
     char *ss;
     int n;
{
  register Wchar *p = d;
  register int ch;
  register unsigned char *s = (unsigned char *) ss;

  while ((ch = *s++) && (p - d < n))
    {
      if (ch & MSB)
        {
          if (ch == SS2)
            {                   /* kana */
              *p++ = (Wchar) * s++;
            }
          else if (ch == SS3)
            {
              *p++ = (Wchar) ((*s << 8) | (*(s + 1) & MSK));
              s += 2;
            }
          else
            {
              *p++ = (Wchar) ((ch << 8) | (*s++ & 0xff));
            }
        }
      else
        {
          *p++ = (Wchar) ch;
        }
    }
  *p = (Wchar) 0;
  return p - d;
}

size_t
Wcstombs (d, s, n)
     char *d;
     Wchar *s;
     int n;
{
  register char *p = d;
  register Wchar ch;

  while ((ch = *s++) && (p - d + 2 < n))
    {
      switch (ch & WCMSK)
        {
        case WCG0:
          *p++ = (char) ch;
          break;

        case WCG1:
          *p++ = (char) ((ch >> 8) & 0xff);
          *p++ = (char) (ch & 0xff);
          break;

        case WCG2:
          *p++ = SS2;
          *p++ = (char) ch;
          break;

        case WCG3:
          *p++ = SS3;
          *p++ = (char) ((ch >> 8) & 0xff);
          *p++ = (char) ((ch & 0xff) | MSB);
          break;
        }
    }
  *p = '\0';
  return p - d;
}

int
Wscmp (s1, s2)
     register Wchar *s1, *s2;
{
  register int res;

  /* 以下のコードはいささかトリッキーなので、説明を加えておこう。
     以下ではこのコメント内にあるようなことをしたいわけである。

     while (*s1 && *s2 && && *s1 == *s2) {
     s1++; s2++;
     }
     return *s1 - *s2;

     すなわち、s1 も s2 も EOS ('\0') を指していなくて、しかも値が
     異なる間はそれぞれのポインタを進める。いずれかが EOS になるか、
     値が違ってきた場合には、*s1 - *s2 を返す。
   */

  while (!(res = *s1 - *s2++) && *s1++)
    ;
  return res;
}

Wchar *
Wscpy (d, s)
     Wchar *d;
     register Wchar *s;
{
  register Wchar *p = d, ch;

  while (ch = *s++)
    {
      *p++ = ch;
    }
  *p = (Wchar) 0;
  return d;
}

int
Wslen (s)
     Wchar *s;
{
  register Wchar *p = s;

  while (*p)
    p++;
  return p - s;
}

int
Watoi (s)
     Wchar *s;
{
  register int res = 0;
  register Wchar ch;

  while ((ch = *s++) && ((Wchar) '0' <= ch) && (ch <= (Wchar) '9'))
    {
      res *= 10;
      res += ch - (Wchar) '0';
    }
  return res;
}

static void
Fputws (s, f)
     Wchar *s;
     FILE *f;
{
  char buf[READBUFSIZE];

  if (Wcstombs (buf, s, READBUFSIZE))
    {
      (void) fputs (buf, f);
    }
}

Wchar *
Fgetws (buf, siz, f)
     Wchar *buf;
     int siz;
     FILE *f;
{
  char mbuf[READBUFSIZE], *p;

  p = fgets (mbuf, READBUFSIZE, f);
  if (p)
    {
      if (Mbstowcs (buf, mbuf, siz))
        {
          return buf;
        }
    }
  return (Wchar *) 0;
}
#endif

/* s が全てカタカナから構成されているかどうかを返す関数 */

static int
all_kana (s)
     Wchar *s;
{
  static Wchar xa = 0, xke, aa, *p;

  if (!xa)
    {
      Mbstowcs (&xa, "\045\041", 1);
      Mbstowcs (&xke, "\045\166", 1);
      Mbstowcs (&aa, "\041\074", 1);
    }

  for (p = s; *p; p++)
    {
      if (!(*p == aa || (xa <= *p && *p <= xke)))
        {
          return 0;
        }
    }

  return 1;
}

/* スラッシュを探す */

static Wchar *
findslash (s)
     Wchar *s;
{
  while (*s)
    {
      if (*s == (Wchar) '/')
        {
          return s;
        }
      s++;
    }
  return (Wchar *) 0;
}

/* トークンを一個取り出す */

static Wchar *
extstr (p, pp, key_return)
     Wchar *p, **pp;
     int *key_return;
{
  Wchar *res;
  int key = 0;

  while (*p == (Wchar) ' ' || *p == (Wchar) '\t')
    p++;
  res = p;
  while (*p && *p != (Wchar) ' ' && *p != (Wchar) '\t' && *p != (Wchar) '\n')
    {
      key += (int) *p++;
    }
  *p++ = (Wchar) '\0';
  if (pp)
    *pp = p;
  if (key_return)
    *key_return = key;
  return res;
}

static struct hinshipack *partsofspeech[HINSHIBUFSIZE];

static void
malloc_failed ()
{
  (void) fprintf (stderr, "%s: malloc failed.\n", program);
}

/* 品詞名を品詞名テーブルに登録する */

static struct hinshipack *
internhinshi (str, flag)
     Wchar *str;
     int flag;
{
  struct hinshipack *p, **pp;
  Wchar *s;
  int key = 0;

  for (s = str; *s; s++)
    key += (int) *s;
  key = ((unsigned) key & HINSHIBUFINDEXMASK);
  for (pp = partsofspeech + key; p = *pp; pp = &(p->next))
    {
      if (!Wscmp (p->hinshi, str))
        {
          return p;
        }
    }
  if (flag)
    {
      p = (struct hinshipack *) malloc (sizeof (struct hinshipack));
      if (p)
        {
          *pp = p;
          (void) bzero (p, sizeof (struct hinshipack));
          p->hinshi = (Wchar *) malloc ((Wslen (str) + 1) * sizeof (Wchar));
          if (p->hinshi)
            {
              (void) Wscpy (p->hinshi, str);
              p->nhinshis = 1;
              return p;
            }
          free (p);
        }
      malloc_failed ();
    }
  return (struct hinshipack *) 0;
}

/* 品詞名を置き換える */

static void
replace_hinshi ()
{
  FILE *f;
  Wchar readbuf[READBUFSIZE], *to, *from, *s;
  struct hinshipack *hinshientry, *p;
  int i, err = 0;

  f = fopen (hinshi_table, "r");
  if (!f)
    {
      (void) fprintf (stderr, "%s: can not open the table file of parts of speech \"%s\".\n", program, hinshi_table);
      exit (1);
    }
  while (s = Fgetws (readbuf, READBUFSIZE, f))
    {
      from = extstr (s, &s, 0);
      to = extstr (s, &s, 0);
      if (hinshi_direction == REVERSE)
        {
          Wchar *xx = from;
          from = to;
          to = xx;
        }

      hinshientry = internhinshi (from, 0);
      if (hinshientry)
        {
          Wchar *xx;

          xx = (Wchar *) malloc ((Wslen (to) + 1) * sizeof (Wchar));
          if (xx)
            {
              Wchar *cp;
              int n = 1;

              (void) Wscpy (xx, to);
              free (hinshientry->hinshi);
              hinshientry->hinshi = xx;
              for (cp = xx; *cp; cp++)
                {
                  if (*cp == (Wchar) '/')
                    {
                      *cp = (Wchar) 0;
                      n++;
                    }
                }
              hinshientry->nhinshis = n;
              hinshientry->flags |= REPLACED;
            }
          else
            {
              malloc_failed ();
            }
        }
    }
  (void) fclose (f);

  for (i = 0; i < HINSHIBUFSIZE; i++)
    {
      for (p = partsofspeech[i]; p; p = p->next)
        {
          if (!(p->flags & REPLACED))
            {
              (void) fprintf (stderr, "%s: The replacement for \"", program);
              Fputws (p->hinshi, stderr);
              (void) fprintf (stderr, "\" is not mentioned in the table.\n");
              err = 1;
            }
        }
    }
  if (err)
    {
      exit (1);
    }
}

static void
select_hinshi (n)
     int n;
{
  Wchar *s, *t, *xx;
  struct hinshipack *p;
  int i;

  if (!n)
    return;

  for (i = 0; i < HINSHIBUFSIZE; i++)
    {
      for (p = partsofspeech[i]; p; p = p->next)
        {
          switch (n)
            {
            case 1:
              s = findslash (p->hinshi);
              if (s)
                {
                  *s = (Wchar) 0;
                }
              break;

            case 2:
              s = findslash (p->hinshi);
              if (s)
                {
                  s++;
                  t = findslash (s);
                  if (t)
                    {
                      xx = (Wchar *) malloc ((t - s + 1) * sizeof (Wchar));
                      if (xx)
                        {
                          *t = (Wchar) 0;
                          Wscpy (xx, s);
                          t = p->hinshi;
                          p->hinshi = xx;
                          (void) free ((char *) t);
                        }
                    }
                }
              break;

            case 3:
              s = findslash (p->hinshi);
              if (s)
                {
                  t = findslash (s + 1);
                  if (t)
                    {
                      t++;
                      xx = (Wchar *) malloc ((Wslen (t) + 1) * sizeof (Wchar));
                      if (xx)
                        {
                          Wscpy (xx, t);
                          t = p->hinshi;
                          p->hinshi = xx;
                          (void) free ((char *) t);
                        }
                    }
                }
              break;

            default:
              break;
            }
        }
    }
}

static void
freedesc (p)
     struct descpack *p;
{
  free (p->hinshi);
  free (p->tandesc);
  free (p->yomdesc);
  free (p);
}

static struct descpack *description[HINSHIBUFSIZE];

/* ルールの登録 */

static struct descpack *
interndesc (hin, tan, yom)
     Wchar *hin, *tan, *yom;
{
  struct descpack *p, **pp, *next = (struct descpack *) 0;
  Wchar *s;
  int key = 0;

  for (s = hin; *s; s++)
    key += (int) *s;
  key = ((unsigned) key & HINSHIBUFINDEXMASK);
  for (pp = description + key; p = *pp; pp = &(p->next))
    {
      if (!Wscmp (p->hinshi, hin))
        {
          if (!Wscmp (p->tandesc, tan) && !Wscmp (p->yomdesc, yom))
            {
              return p;
            }
          else
            {
              *pp = next = p->next;
              freedesc (p);
              break;
            }
        }
    }
  p = (struct descpack *) malloc (sizeof (struct descpack));
  if (p)
    {
      *pp = p;
      (void) bzero (p, sizeof (struct descpack));
      p->next = next;
      p->hinshi = (Wchar *) malloc ((Wslen (hin) + 1) * sizeof (Wchar));
      if (p->hinshi)
        {
          (void) Wscpy (p->hinshi, hin);
          p->tandesc = (Wchar *) malloc ((Wslen (tan) + 1) * sizeof (Wchar));
          if (p->tandesc)
            {
              (void) Wscpy (p->tandesc, tan);
              p->yomdesc = (Wchar *) malloc ((Wslen (yom) + 1) * sizeof (Wchar));
              if (p->yomdesc)
                {
                  (void) Wscpy (p->yomdesc, yom);
                  return p;
                }
              free (p->tandesc);
            }
          free (p->hinshi);
        }
      free (p);
    }
  malloc_failed ();
  return (struct descpack *) 0;
}

/* ルールの探索 */

static struct descpack *
searchdesc (hin)
     Wchar *hin;
{
  struct descpack *p, **pp;
  Wchar *s;
  int key = 0;

  for (s = hin; *s; s++)
    key += (int) *s;
  key = ((unsigned) key & HINSHIBUFINDEXMASK);
  for (pp = description + key; p = *pp; pp = &(p->next))
    {
      if (!Wscmp (p->hinshi, hin))
        {
          return p;
        }
    }
  return (struct descpack *) 0;
}

static void
store_description ()
{
  FILE *f;
  Wchar readbuf[READBUFSIZE], *hin, *tan, *yom, *s;

  if (!description_table)
    {
      return;
    }

  f = fopen (description_table, "r");
  if (!f)
    {
      (void) fprintf (stderr, "%s: can not open the table file of parts of speech \"%s\".\n", program, description_table);
      exit (1);
    }
  while (s = Fgetws (readbuf, READBUFSIZE, f))
    {
      Wchar nl[1];

      nl[0] = (Wchar) 0;
      hin = tan = yom = nl;
      hin = extstr (s, &s, 0);
      if (*hin)
        {
          tan = extstr (s, &s, 0);
          if (*tan)
            {
              yom = extstr (s, &s, 0);
            }
        }

      interndesc (hin, tan, yom);
    }
  (void) fclose (f);
}

struct kindpack kinds[sizeof (long) * 8];
static int nkinds;

#define KIHONBIT 1L

/* 種別の登録 */

static long
internkind (s)
     Wchar *s;
{
  int i;
  Wchar *p;

  p = findslash (s);
  if (p)
    {
      long res;

      *p = (Wchar) '\0';
      res = internkind (s);
      res |= internkind (p + 1);
      return res;
    }
  else
    {
      for (i = 0; i < nkinds; i++)
        {
          if (!Wscmp (s, kinds[i].kind))
            {
              return kinds[i].kindbit;
            }
        }
      if (nkinds < (sizeof (long) * 8) && (kinds[nkinds].kind = (Wchar *) malloc ((Wslen (s) + 1) * sizeof (Wchar))))
        {
          (void) Wscpy (kinds[nkinds].kind, s);
          kinds[nkinds].kindbit = 1 << nkinds;
          return kinds[nkinds++].kindbit;
        }
      return 0;
    }
}

/* 種別の一覧の出力 */

static void
listkinds ()
{
  int i;

  for (i = 0; i < nkinds; i++)
    {
      Fputws (kinds[i].kind, stdout);
      putchar ('\n');
    }
}

static int
kindcompar (const void *p1, const void *p2)
{
  struct kindpack *k1, *k2;
  k1 = (struct kindpack *)p1;
  k2 = (struct kindpack *)p2;

  return Wscmp (k1->kind, k2->kind);
}

static void
sortkind ()
{
  qsort (kinds, nkinds, sizeof (struct kindpack), kindcompar);
}

static struct dicpack *dic[DICBUFSIZE], **pdic;
static int ndicentries = 0;

/*

 intern -- 辞書エントリの検索/登録

 第6引数の stat としてヌルでないアドレスが指定された場合には、同じエントリ
 が登録されていない場合には登録を行う。アドレスがヌルの場合には登録しない。

 flags によっていろいろと指定をする。(以下を見てね)。

 hinshi に 0 を渡してはいけない。kind は 0 を渡しても可だが、-m の時じゃない
 マッチはしないので注意。

 */

/* flags */
#define IGNORE_HINSHI 1L
#define IGNORE_KIND   2L

static struct dicpack *
intern (key, yomi, kouho, hinshi, hindo, kind, stat, flags)
     int key, hindo, *stat;
     Wchar *yomi, *kouho, *hinshi;
     long kind, flags;
{
  struct dicpack *p, **pp;
  struct descpack *dp;
  Wchar nl[1], *yomdesc = nl, *tandesc = nl;
  Wchar *yom = (Wchar *) 0, *tan = (Wchar *) 0, *dhinshi, *dh;

  nl[0] = (Wchar) '\0';

  if (description_table)
    {
      dhinshi = dh = hinshi;    /* かんなの品詞を探す */
      while (*dh)
        {
          if (*dh++ == (Wchar) '/')
            {
              dhinshi = dh;
            }
        }
      dp = searchdesc (dhinshi);
      if (dp)
        {
          yomdesc = dp->yomdesc;
          tandesc = dp->tandesc;
          if (Wslen (yomdesc))
            {
              Wchar *t;
              t = (Wchar *) malloc ((Wslen (yomi) + Wslen (yomdesc) + 1) * sizeof (Wchar));
              if (t)
                {
                  Wscpy (t, yomi);
                  yom = yomi = t;
                  Wscpy (yomi + Wslen (yomi), yomdesc);
                }
            }
          if (Wslen (tandesc))
            {
              Wchar *t;
              t = (Wchar *) malloc ((Wslen (kouho) + Wslen (tandesc) + 1) * sizeof (Wchar));
              if (t)
                {
                  Wscpy (t, kouho);
                  tan = kouho = t;
                  Wscpy (kouho + Wslen (kouho), tandesc);
                }
            }
        }
      else
        {
          char foo[64];

          fprintf (stderr, "no description rule for ");
          Wcstombs (foo, dhinshi, 64);
          fprintf (stderr, "%s.\n", foo);
        }
    }

  key = ((unsigned) key & DICBUFINDEXMASK);
  for (pp = dic + key; p = *pp; pp = &(p->next))
    {
      if (!Wscmp (p->yomi, yomi) && !Wscmp (p->tango, kouho) && ((flags & IGNORE_HINSHI) || !Wscmp (p->hinshi->hinshi, hinshi)) && ((flags & IGNORE_KIND) || ((p->kind & kind) == kind)))
        {
          /* match */
          if (stat)
            *stat = FOUND;
          if (yom)
            free (yom);
          if (tan)
            free (tan);
          return p;
        }
    }
  if (stat)
    {
      p = (struct dicpack *) malloc (sizeof (struct dicpack));
      if (p)
        {
          *pp = p;
          (void) bzero (p, sizeof (struct dicpack));
          p->yomi = (Wchar *) malloc ((Wslen (yomi) + 1) * sizeof (Wchar));
          if (p->yomi)
            {
              (void) Wscpy (p->yomi, yomi);
              p->tango = (Wchar *) malloc ((Wslen (kouho) + 1) * sizeof (Wchar));
              if (p->tango)
                {
                  (void) Wscpy (p->tango, kouho);
                  p->hinshi = internhinshi (hinshi, 1);
                  if (p->hinshi)
                    {
                      p->hindo = hindo;
                      *stat = CREATE;
                      ndicentries++;
                      p->kind = kind;
                      p->extdata = (Wchar *) 0;
                      if (yom)
                        free (yom);
                      if (tan)
                        free (tan);
                      return p;
                    }
                  free (p->tango);
                }
              free (p->yomi);
            }
          free (p);
        }
      malloc_failed ();
    }
  if (yom)
    free (yom);
  if (tan)
    free (tan);
  return (struct dicpack *) 0;
}

/* 登録されているエントリに対して fn を実行する */

static void
for_all_interned (fn)
     void (*fn) ();
{
  int i;
  struct dicpack *p;

  for (i = 0; i < DICBUFSIZE; i++)
    {
      for (p = dic[i]; p; p = p->next)
        {
          (*fn) (p);
        }
    }
}

static void
storepd (file)
     FILE *file;
{
  Wchar readbuf[READBUFSIZE], *p, *yomi, *hinshi, *kouho, *hindo, *kind;
  int nhindo, key, tkey, stat;
  long kindbit;
  struct dicpack *dicentry;

  while (p = Fgetws (readbuf, READBUFSIZE, file))
    {
      key = 0;
      yomi = extstr (p, &p, &tkey);
      key += tkey;
      kouho = extstr (p, &p, &tkey);
      key += tkey;
      hinshi = extstr (p, &p, 0);
      hindo = extstr (p, &p, 0);
      nhindo = Watoi (hindo);

      kind = extstr (p, 0, 0);
      if (*kind)
        {
          kindbit = internkind (kind);
        }
      else
        {
          kindbit = KIHONBIT;
        }

      dicentry = intern (key, yomi, kouho, hinshi, nhindo, kindbit, &stat, IGNORE_KIND);
      if (dicentry)
        {
          dicentry->kind |= kindbit;
        }
    }
}

static void
comparepd (file)
     FILE *file;
{
  Wchar readbuf[READBUFSIZE], *p, *yomi, *hinshi, *kouho, *hindo, *kind;
  int nhindo, key, tkey, stat, *statp = &stat;
  struct dicpack *dicentry;
  long kindbit, flags = 0L;

  while (p = Fgetws (readbuf, READBUFSIZE, file))
    {
      key = 0;
      yomi = extstr (p, &p, &tkey);
      key += tkey;
      kouho = extstr (p, &p, &tkey);
      key += tkey;
      hinshi = extstr (p, &p, 0);
      if (ignore_hinshi_to_compare)
        {
          flags |= IGNORE_HINSHI;
        }
      hindo = extstr (p, &p, 0);
      nhindo = Watoi (hindo);

      kind = extstr (p, 0, 0);
      if (*kind)
        {
          kindbit = internkind (kind);
        }
      else
        {
          kindbit = KIHONBIT;
        }
      if (merge_kind || merge_sj3)
        {
          flags |= IGNORE_KIND;
        }
      if (copy_frequency)
        {
          statp = (int *) 0;
        }

      dicentry = intern (key, yomi, kouho, hinshi, nhindo, kindbit, statp, flags);

      if (dicentry)
        {
          if (copy_frequency)
            {
              dicentry->hindo = nhindo;
              dicentry->flags &= ~COMMON;
            }
          else if (ignore_hinshi_to_compare && stat == FOUND)
            {
              /* この場合、同じキーのチェーンが返る */
              struct dicpack *pd;

              for (pd = dicentry; pd; pd = pd->next)
                {
                  if (!Wscmp (pd->yomi, yomi) && !Wscmp (pd->tango, kouho))
                    {
                      pd->flags |= COMMON;
                      if (!merge_sj3)
                        {
                          pd->kind |= kindbit;
                        }

                      if (merge_sj3)
                        {
                          int len = 0;
                          Wchar *dat;

                          if (pd->extdata)
                            {
                              len = Wslen (pd->extdata);
                            }
                          dat = (Wchar *) malloc ((Wslen (hinshi) + 1 + len) * sizeof (Wchar));
                          if (dat)
                            {
                              if (len)
                                {
                                  (void) Wscpy (dat, pd->extdata);
                                  (void) free ((char *) pd->extdata);
                                }
                              (void) Wscpy (dat + len, hinshi);
                              pd->extdata = dat;
                            }
                        }
                    }
                }
            }
          else
            {
              dicentry->kind |= kindbit;
              if (stat == FOUND)
                {
                  dicentry->flags |= COMMON;
                }
              else
                {               /* CREATE */
                  dicentry->flags |= NEW;
                }
            }
        }
    }
}

static void
canna_output (cf, p, h, n)
     FILE *cf;
     struct dicpack *p;
     Wchar *h;
     int n;
{
  for (; n-- > 0; h += Wslen (h) + 1)
    {
      Fputws (p->yomi, cf);
      (void) putc (' ', cf);
      Fputws (h, cf);
      if (p->hindo)
        {
          (void) fprintf (cf, "*%d", p->hindo);
        }
      (void) putc (' ', cf);
      Fputws (p->tango, cf);
      (void) putc ('\n', cf);
    }
}

static void
entry_out (cf, p, h, n, ex)
     FILE *cf;
     struct dicpack *p;
     Wchar *h;
     int n;
     Wchar *ex;
{
  int i, f = 1;
  long b;

  for (; n-- > 0; h += Wslen (h) + 1)
    {
      Fputws (p->yomi, cf);
      (void) putc (' ', cf);
      Fputws (p->tango, cf);
      (void) putc (' ', cf);
      if (merge_sj3 && ex)
        {
          Fputws (ex, cf);
          (void) putc ('/', cf);
        }
      Fputws (h, cf);
      if (!sj3_type_output)
        {
          (void) fprintf (cf, " %d", p->hindo);
        }

      if (!wnn_type_output)
        {
          if (bunrui)
            {
              (void) printf (" %s", bunrui);
            }
          else
            {
              if (specific_kind)
                {
                  b = (specific_kind & p->kind);
                }
              else
                {
                  b = p->kind;
                }
              if (b != KIHONBIT)
                {               /* 基本だけだったら何も書かない */
                  for (i = 0; i < nkinds; i++)
                    {
                      if (b & kinds[i].kindbit)
                        {
                          if (f)
                            {
                              (void) putc (' ', cf);
                              f = 0;
                            }
                          else
                            {
                              (void) putc ('/', cf);
                            }
                          Fputws (kinds[i].kind, cf);
                        }
                    }
                }
            }
        }
      (void) putc ('\n', cf);
    }
}

/* p で表されるエントリをファイル cf に出力する */

static void
printentry (cf, p)
     FILE *cf;
     struct dicpack *p;
{
  if (specific_kind && !(p->kind & specific_kind))
    {
      return;
    }

  if (extract_kana && !all_kana (p->tango))
    {
      return;
    }

  if (selhinshi && !p->hinshi->hinshi[0])
    {
      return;
    }

  if (canna_type_output)
    {
      canna_output (cf, p, p->hinshi->hinshi, p->hinshi->nhinshis);
    }
  else
    {
      entry_out (cf, p, p->hinshi->hinshi, p->hinshi->nhinshis, p->extdata);
    }
}

static void
showdeleted (p)
     struct dicpack *p;
{
  if (!(p->flags & COMMON))
    {
      (void) printf ("- ");
      printentry (stdout, p);
    }
}

static void
showentry (pd, n)
     struct dicpack **pd;
     int n;
{
  FILE *cf = (FILE *) 0, *of = (FILE *) 0, *nf = (FILE *) 0;
  struct dicpack *p;
  int i;

  if (common_out)
    {
      if (common_out[0] != '-' || common_out[1])
        {
          cf = fopen (common_out, "w");
          if (!cf)
            {
              (void) fprintf (stderr, "%s: can not open file \"%s\".\n", program, common_out);
              exit (1);
            }
        }
      else
        {
          cf = stdout;
        }
    }
  if (old_out)
    {
      if (old_out[0] != '-' || old_out[1])
        {
          of = fopen (old_out, "w");
          if (!of)
            {
              (void) fprintf (stderr, "%s: can not open file \"%s\".\n", program, old_out);
              exit (1);
            }
        }
      else
        {
          of = stdout;
        }
    }
  if (new_out)
    {
      if (new_out[0] != '-' || new_out[1])
        {
          nf = fopen (new_out, "w");
          if (!nf)
            {
              (void) fprintf (stderr, "%s: can not open file \"%s\".\n", program, new_out);
              exit (1);
            }
        }
      else
        {
          nf = stdout;
        }
    }

  for (i = 0; i < n; i++)
    {
      p = pd[i];
      if (compare)
        {
          if (p->flags & COMMON)
            {
              if (cf)
                {
                  printentry (cf, p);
                }
            }
          else if (p->flags & NEW)
            {
              if (nf)
                {
                  printentry (nf, p);
                }
            }
          else
            {
              if (of)
                {
                  printentry (of, p);
                }
            }
        }
      else
        {                       /* just print the normalized dictionary */
          printentry (stdout, p);
        }
    }
}

static int
diccompar (const void *pp1, const void *pp2)
{
  struct dicpack **p1, **p2;
  p1 = (struct dicpack **)pp1;
  p2 = (struct dicpack **)pp2;

  int n;
  if (n = Wscmp ((*p1)->yomi, (*p2)->yomi))
    {
      return n;
    }
  else if (n = Wscmp ((*p1)->tango, (*p2)->tango))
    {
      return n;
    }
  else if (n = Wscmp ((*p1)->hinshi->hinshi, (*p2)->hinshi->hinshi))
    {
      return n;
    }
  else
    {                           /* impossible */
      return 0;
    }
}

static int
dichindocompar (const void *pp1, const void *pp2)
{
  struct dicpack **p1, **p2;
  p1 = (struct dicpack **)pp1;
  p2 = (struct dicpack **)pp2;

  int n;
  if (n = Wscmp ((*p1)->yomi, (*p2)->yomi))
    {
      return n;
    }
  else if (n = ((*p2)->hindo - (*p1)->hindo))
    {
      return n;
    }
  else if (n = Wscmp ((*p1)->tango, (*p2)->tango))
    {
      return n;
    }
  else if (n = Wscmp ((*p1)->hinshi->hinshi, (*p2)->hinshi->hinshi))
    {
      return n;
    }
  else
    {                           /* impossible */
      return 0;
    }
}

void
shrinkargs (argv, n, count)
     char **argv;
     int n, count;
{
  int i;

  for (i = 0; i + n < count; i++)
    {
      argv[i] = argv[i + n];
    }
}

static void
parseargs (argc, argv)
     int argc;
     char *argv[];
{
  int i;

  for (program = argv[0] + strlen (argv[0]); argv[0] < program; program--)
    {
      if (program[0] == '/')
        {
          program++;
          break;
        }
    }

  for (i = 1; i < argc;)
    {
      if (argv[i][0] == '-' && argv[i][2] == '\0')
        {
          switch (argv[i][1])
            {
            case '1':
            case '2':
            case '3':
              selhinshi = argv[i][1] - '0';
              shrinkargs (argv + i, 1, argc - i);
              argc -= 1;
              break;

            case 'b':
              bunrui = argv[i + 1];
              shrinkargs (argv + i, 2, argc - i);
              argc -= 2;
              break;

            case 'c':
              common_out = argv[i + 1];
              shrinkargs (argv + i, 2, argc - i);
              argc -= 2;
              break;

            case 'd':
              description_table = argv[i + 1];
              shrinkargs (argv + i, 2, argc - i);
              argc -= 2;
              break;

            case 'f':
              copy_frequency = 1;
              shrinkargs (argv + i, 1, argc - i);
              argc -= 1;
              break;

            case 'h':
              ignore_hinshi_to_compare = 1;
              shrinkargs (argv + i, 1, argc - i);
              argc -= 1;
              break;

            case 'i':
              canna_type_output = 1;
              wnn_type_output = 0;
              shrinkargs (argv + i, 1, argc - i);
              argc -= 1;
              break;

            case 'j':
              extract_kana = 1;
              shrinkargs (argv + i, 1, argc - i);
              argc -= 1;
              break;

            case 'k':
              {
                Wchar buf[READBUFSIZE];

                (void) Mbstowcs (buf, argv[i + 1], READBUFSIZE);
                specific_kind |= internkind (buf);
              }
              shrinkargs (argv + i, 2, argc - i);
              argc -= 2;
              break;

            case 'l':
              list_kinds = 1;
              shrinkargs (argv + i, 1, argc - i);
              argc -= 1;
              break;

            case 'm':
              merge_kind = 1;
              shrinkargs (argv + i, 1, argc - 1);
              argc -= 1;
              break;

            case 'n':
              new_out = argv[i + 1];
              shrinkargs (argv + i, 2, argc - i);
              argc -= 2;
              break;

            case 'o':
              old_out = argv[i + 1];
              shrinkargs (argv + i, 2, argc - i);
              argc -= 2;
              break;

            case 'p':
              sort_by_frequency = 1;
              shrinkargs (argv + i, 1, argc - i);
              argc -= 1;
              break;

            case 'r':
              hinshi_table = argv[i + 1];
              shrinkargs (argv + i, 2, argc - i);
              argc -= 2;
              hinshi_direction = REVERSE;
              break;

            case 's':
              hinshi_table = argv[i + 1];
              shrinkargs (argv + i, 2, argc - i);
              argc -= 2;
              break;

            case 'v':
              sj3_type_output = 1;
              wnn_type_output = 1;      /* Wnn 形式と似ているので立てる */
              shrinkargs (argv + i, 1, argc - i);
              argc -= 1;
              break;

            case 'w':
              canna_type_output = 0;
              sj3_type_output = 0;
              wnn_type_output = 1;
              shrinkargs (argv + i, 1, argc - i);
              argc -= 1;
              break;

            case 'x':
              merge_sj3 = 1;
              ignore_hinshi_to_compare = 1;
              shrinkargs (argv + i, 1, argc - i);
              argc -= 1;
              break;

            default:
              i++;
              break;
            }
        }
      else
        {
          i++;
        }
    }

  if (argc < 2)
    {
      (void) fprintf (stderr, "Usage: %s dic1 [dic2] [-c filecommon] ...\n", program);
      exit (1);
    }

  if (argv[1][0] != '-' || argv[1][1])
    {
      in1 = fopen (argv[1], "r");
      if (!in1)
        {
          (void) fprintf (stderr, "%s: can not open file \"%s\".\n", program, argv[1]);
          exit (1);
        }
    }
  if (argc == 3)
    {
      if (argv[2][0] != '-' || argv[2][1])
        {
          in2 = fopen (argv[2], "r");
          if (!in2)
            {
              (void) fprintf (stderr, "%s: can not open file \"%s\".\n", program, argv[2]);
              exit (1);
            }
        }
    }
  else
    {
      in2 = (FILE *) 0;
    }
  if (description_table)
    {
      store_description ();
    }
}

static Wchar kihonh[] = {
  (Wchar) 'k', (Wchar) 'i', (Wchar) 'h', (Wchar) 'o', (Wchar) 'n', (Wchar) 0,
};

int
main (argc, argv)
     int argc;
     char *argv[];
{
#ifndef POD_WCHAR
  setlocale (LC_ALL, "");
#endif

  in1 = in2 = stdin;
  (void) internkind (kihonh);   /* 基本辞書用。1L として登録 */
  parseargs (argc, argv);
  storepd (in1);
  (void) fclose (in1);

  if (in2)
    {
      compare = 1;
      comparepd (in2);
      (void) fclose (in2);
    }

  if (list_kinds)
    {
      listkinds ();
      exit (0);
    }

  if (selhinshi)
    {
      select_hinshi (selhinshi);
    }
  else if (hinshi_table)
    {
      replace_hinshi ();
    }

  pdic = (struct dicpack **) malloc (ndicentries * sizeof (struct dicpack *));
  if (pdic)
    {
      int i, j;
      struct dicpack *p;

      for (i = 0, j = 0; i < DICBUFSIZE; i++)
        {
          for (p = dic[i]; p; p = p->next)
            {
              pdic[j++] = p;
            }
        }
      if (sort_by_frequency)
        {
          qsort (pdic, ndicentries, sizeof (struct dicpack *), dichindocompar);
        }
      else
        {
          qsort (pdic, ndicentries, sizeof (struct dicpack *), diccompar);
        }
      sortkind ();
      showentry (pdic, ndicentries);
    }
  else
    {
      malloc_failed ();
    }
  exit (0);
}