view Wnn/jutil/atof.c @ 2:b605a0e60f5b

- reverted jdata.h - fixed the bug which occurred on changing length of bunsetsu.
author Yoshiki Yazawa <yaz@cc.rim.or.jp>
date Thu, 13 Dec 2007 17:42:01 +0900
parents bbc77ca4def5
children a7ccf412ba02
line wrap: on
line source

/*
 *  $Id: atof.c,v 1.7 2002/07/14 04:26:57 hiroo Exp $
 */

/*
 * FreeWnn is a network-extensible Kana-to-Kanji conversion system.
 * This file is part of FreeWnn.
 * 
 * Copyright Kyoto University Research Institute for Mathematical Sciences
 *                 1987, 1988, 1989, 1990, 1991, 1992
 * Copyright OMRON Corporation. 1987, 1988, 1989, 1990, 1991, 1992, 1999
 * Copyright ASTEC, Inc. 1987, 1988, 1989, 1990, 1991, 1992
 * Copyright FreeWnn Project 1999, 2000, 2002
 *
 * Maintainer:  FreeWnn Project   <freewnn@tomo.gr.jp>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

/*
  Fuzokugo file convert command.
  Standard input UJIS format.
  Standard output jserver format.
  */

#ifndef lint
static char *rcs_id = "$Id: atof.c,v 1.7 2002/07/14 04:26:57 hiroo Exp $";
#endif /* lint */

/*
  UJIS FORMAT

\attribute
{属性名}
{属性名}
..........
..........
\jiritugo-attr
{自立語品詞の属性定義}
{自立語品詞の属性定義}
..........
..........
\fuzokugo-id
{品詞の定義}
{品詞の定義}
..........
..........
\jiritugo
{自立語の品詞名}/{接続ベクトル}
{自立語の品詞名}/{接続ベクトル}
..........
..........
\fuzokugo
{同じ品詞の付属語の定義}
{同じ品詞の付属語の定義}
..........
..........
\syuutenv
\svkanren/{接続ベクトル}
\svkantan/{接続ベクトル}
\svbunsetsu/{接続ベクトル}
\bye


品詞の定義 :==
{品詞名} | 
{品詞名}@{属性名},{属性名},.....

自立語品詞の属性定義 :==
{自立語品詞名} | 
{自立語品詞名}@{属性名},{属性名},.....

注意)「自立語品詞の名前」は、品詞ファイルの中で定義されたものでないと
いけません。それに対し、「属性名」「品詞の名前」は、自分で勝手に名前を
定義してかまいません。


同じ品詞の付属語の定義 :==
\{付属語の品詞名}
{付属語の読み}/{コメント}/{接続ベクトル}
{付属語の読み}/{コメント}/{接続ベクトル}
..........
..........


接続ベクトル :==
{品詞名}:{品詞名}:{品詞名}:......

品詞名 :==
{品詞ファイルで定義された幹語品詞} |
{品詞の定義で定義された品詞名} |
@{品詞名}.{その品詞に属する付属語の読み} |
@{属性の定義で定義された属性} | 
@{属性の定義で定義された属性}&{属性の定義で定義された属性}&... |
@svkanren. | @svkantan. | @svbunsetsu.

*/

#ifdef HAVE_CONFIG_H
#  include <config.h>
#endif

#include <stdio.h>
#if STDC_HEADERS
#  include <stdlib.h>
#  include <string.h>
#else
#  if HAVE_STRINGS_H
#    include <strings.h>
#  endif
#endif /* STDC_HEADERS */
#if HAVE_UNISTD_H
#  include <unistd.h>
#endif

#include "commonhd.h"
#include "hinsi.h"
#include "jslib.h"
#include "wnn_os.h"
#include "wnn_string.h"

#define FUZOKUGO_LEN 8
#define OUT_INT_KOSUU 20
#ifndef NO_FZK
#define FUZOKUGO_KOSUU 600      /* fuzokugo kosuu */
#endif /* NO_FZK */
#define ATTR_KOSUU 20
#define ID_HEIKIN_LEN 15
#define ID_MAX_LEN 128
#define SYUUTANV_KOSUU 3
#define F_COMMENT_LEN 80
#define ID_KOSUU (OUT_INT_KOSUU * sizeof(int) * 8)
/* fuzokugo hinsi kosuu */
#define HEAP_LEN (ID_KOSUU * ID_HEIKIN_LEN)

#define ATTR "\\attribute"
#ifndef NO_FZK
#define FUZOKUGO_ID "\\fuzokugo-id"
#endif /* NO_FZK */
#define JIRITUGO_ATTR "\\jiritugo-attr"
#define JIRITUGO "\\jiritugo"
#ifndef NO_FZK
#define FUZOKUGO  "\\fuzokugo"
#endif /* NO_FZK */
#define END_ID "\\bye"
#define SYUUTANV "\\syuutanv"
#define SV0 "\\svkanren"
#define SV1 "\\svkantan"
#define SV2 "\\svbunsetsu"

char *SV[] = { "svkanren", "svkantan", "svbunsetsu" };

int wnnerror;

#define TCSEP '.'
#define ATTR_CHAR '@'
#define ATTR_START '@'
#define ATTR_NEXT ','
#define ATTR_AND '&'

#define ESCAPE_CHAR '\\'
#define COMMENT_CHAR ';'
#define DEVIDE_CHAR '/'
#define IGNORE_CHAR1 ' '
#define IGNORE_CHAR2 '\t'

#define UJIS_TEN 0xa1a2
#define UJIS_MARU 0xa1a3

#define LINE_SIZE 1024

extern int mhinsi;

int line_no = 0;

char outfile[LINE_SIZE];
char *com_name;

char *hinsi_file_name = NULL;

struct attr_struct
{
  int id_set[OUT_INT_KOSUU];
  char *name;
}
attr[ATTR_KOSUU];
int attr_num = 0;


struct id_struct
{
  int bit;
  char *str;
}
id[ID_KOSUU];
int id_num = 0;
int idn = 0;                    /* 品詞の数 */

char heap[HEAP_LEN];
char *hp = heap;

struct vector
{
  int sv[OUT_INT_KOSUU];
  struct vector *tc;            /* for including other fuzokugo entries */
  char *tc_name;
};

#ifndef NO_FZK
struct fuzokugo
{
  w_char y[FUZOKUGO_LEN + 1];
  char cy[FUZOKUGO_LEN * 2 + 1];
  int hinsi;
  char comment[F_COMMENT_LEN];
  struct vector vect;
}
fz[FUZOKUGO_KOSUU];
int fz_num;
#endif /* NO_FZK */

struct jiritugov
{
  struct vector v;
  int vnum;
}
jiritugov[MAXHINSI];

int actjv[MAXHINSI];

int mactjv;

int out_int_kosuu = 0;

struct vector syuutanv[SYUUTANV_KOSUU];

#define SIZE 1024
char buf[SIZE];

static int find_id (), bsch (), tcp (),
#ifndef NO_FZK
  count_yomi (), count_fz_num (), count_same_yomi (),
#endif
  find_id_name (), search_attr (), is_same_jiritugov ();
static void init (), read_attr (), read_id (), read_jiritugov (),
#ifndef NO_FZK
  make_fuzokugo_table (), check_fuzokugo (), sort (),
#endif
  read_syuutanv (), set_tc_from_name (), manipulate_tc (),
print_out (), set_heap (), get_id_part (), check_attrs (), read_a_vector (), set_attribute (), bit_or (), set_id (), pre_clear_jiritugo_v (), usage (), sort_id ();
extern int wnn_find_hinsi_by_name (), wnn_loadhinsi (), create_file_header ();

static void
error_format (s, d1, d2, d3, d4, d5)
     char *s;
     int d1, d2, d3, d4, d5;
{
  fprintf (stderr, "Bad format near line %d \"%s\".\n", line_no, buf);
  fprintf (stderr, s, d1, d2, d3, d4, d5);
  exit (1);
}

static void
error_exit_long ()
{
  error_format ("Too Long Line.\n");
  exit (1);
}

static void
error_eof ()
{
  fprintf (stderr, "Error. File must end with %s\n", END_ID);
  exit (1);
}

static void
error_no_heap ()
{
  fprintf (stderr, "Error. Heap area is exhausted\n");
  fprintf (stderr, "Please compile it again.");
  exit (1);
}

static void
error_tc (c)
     char *c;
{
  fprintf (stderr, "No fuzokugo entry %s.\n", c);
  exit (1);
}

int
main (argc, argv)
     int argc;
     char **argv;
{
  com_name = argv[0];
  init (argc, argv);
  read_attr ();
  read_id ();
  sort_id ();
  out_int_kosuu = (idn - 1) / 32 + 1;
  read_jiritugov ();
#ifndef NO_FZK
  make_fuzokugo_table ();
#endif /* NO_FZK */
  read_syuutanv ();
  set_tc_from_name ();
  manipulate_tc ();
#ifndef NO_FZK
  sort ();
  check_fuzokugo ();
#endif /* NO_FZK */
  print_out ();
  exit (0);
}


static int
get_char0 ()
{
  int c;

  for (; (c = getchar ()) == COMMENT_CHAR || c == IGNORE_CHAR1 || c == IGNORE_CHAR2;)
    {
      if (c == COMMENT_CHAR)
        {
          for (;;)
            {
              if ((c = getchar ()) == EOF)
                {
                  return (EOF);
                }
              if (c == '\n')
                {
                  ungetc (c, stdin);
                  break;
                }
            }
        }
    }
  if (c == '\n')
    line_no += 1;
  return (c);
}

static int
get_char1 ()                    /* remove null lines "\\n" */
{
  static int c = -1;
  int d;
  static int fufufu = 0;


  if (c != -1)
    {
      d = c;
      c = -1;
      return (d);
    }
  else
    {
      if (fufufu == 0)
        {                       /* remove all new lines in the head of the file */
          for (; (d = get_char0 ()) == '\n';);
          fufufu = 1;
        }
      else
        {
          d = get_char0 ();
        }
      if (d == '\n')
        {
          while ((c = get_char0 ()) == '\n');
        }
      return (d);
    }
}

static int
get_char ()
{

  static int hold = -1;
  int c, d;

  if (hold != -1)
    {
      c = hold;
      hold = -1;
      return (c);
    }
  else
    {
    start:
      c = get_char1 ();
      if (c == '\\')
        {
          if ((d = get_char1 ()) == EOF)
            {
              return (EOF);
            }
          if (d == '\n')
            {
              goto start;
            }
          else
            {
              hold = d;
              return (c);
            }
        }
      return (c);
    }
}


/* get one phrase and return the separater */
static int
get_phrase (s0, size)
     char *s0;
     int size;
{
  char *s = s0;
  int c;
  static int eof = 0;

  if (eof)
    error_eof ();

  while ((c = get_char ()) != '\n' && c != '/' && c != ':' && c != EOF)
    {
      if (s - s0 >= size)
        {
          error_exit_long ();
        }
      *s++ = c;
    }
  if (c == EOF)
    eof = 1;
  if (s - s0 >= size - 1)
    {
      error_exit_long ();
    }
  *s++ = '\0';
  return (c);
}

static void
read_attr ()
{
  int k, j;

  if (get_phrase (buf, SIZE) != '\n' || strcmp (ATTR, buf) != 0)
    {
      error_format ("File must start with %s\n", ATTR);
    }

  for (attr_num = 0;;)
    {
      if (get_phrase (buf, SIZE) != '\n')
        {
          error_format ("Newline is expected\n");
        }
      if (strcmp (JIRITUGO_ATTR, buf) == 0)
        break;
      set_heap (&attr[attr_num].name, buf);
      if ((++attr_num) >= ATTR_KOSUU)
        {
          error_format ("Too many ATTR's\n");
        }
    }

  for (k = 0; k < attr_num; k++)
    {
      for (j = 0; j < OUT_INT_KOSUU; j++)
        {
          attr[k].id_set[j] = 0;
        }
    }
}

extern char *wnn_get_hinsi_name ();

static void
read_id ()
{
  char *d;
  char buf1[SIZE];
  int n;

/*
 *  jiritugo
 */
  if (wnn_loadhinsi (hinsi_file_name) < 0)
    error_format ("Cannot open hinsi.data\n");
  for (idn = 0; idn < mhinsi; idn++)
    {
      if (d = wnn_get_hinsi_name (idn))
        {
          set_heap (&id[id_num].str, d);
          id[id_num++].bit = idn;
        }
      if (id_num >= ID_KOSUU)
        {
          error_format ("Too many ID's\n");
        }
    }
  idn = (idn + 31) & ~31;
/*
 *  set attribute to jiritugo
 */

  for (;;)
    {
      get_phrase (buf, SIZE);
#ifdef  NO_FZK
      if (strcmp (JIRITUGO, buf) == 0)
        break;
#else
      if (strcmp (FUZOKUGO_ID, buf) == 0)
        break;
#endif /* NO_FZK */
      get_id_part (buf1, buf);
      n = wnn_find_hinsi_by_name (buf1);
      if (n == -1)
        error_format ("Hinsi which is not defined in Hinsifile.\n");
      strcpy (buf1, buf);       /* in order to use it in check_attrs */
      check_attrs (n, buf1);
    }

#ifndef NO_FZK
/*
 *  fuzokugo
 */
  for (;; idn++)
    {
      get_phrase (buf, SIZE);
      if (strcmp (JIRITUGO, buf) == 0)
        break;
      get_id_part (buf1, buf);
      set_heap (&(id[id_num].str), buf1);
      strcpy (buf1, buf);       /* in order to use it in check_attrs */
      id[id_num].bit = idn;
      check_attrs (idn, buf1);
      if ((++id_num) >= ID_KOSUU)
        {
          error_format ("Too many ID's\n");
        }
    }
#endif /* NO_FZK */
}


static int
sort_func_id (a, b)
     char *a, *b;
{
  return (strcmp (((struct id_struct *) a)->str, ((struct id_struct *) b)->str));
}

static void
sort_id ()
{
  qsort ((char *) &id[0], id_num, sizeof (struct id_struct), sort_func_id);
}

#ifdef nodef
/* s is subseq of t */
static int
subseq (s, t)
     char *s, *t;
{
  while (*s)
    {
      if (*s++ != *t++)
        {
          return (-1);          /* false */
        }
    }
  return (0);                   /* true */
}
#endif

#ifndef NO_FZK
static void
make_fuzokugo_table ()
{
  int id_num;
  char d;
  int i;
  w_char tmp[FUZOKUGO_LEN + 1];


  fz_num = 0;
  d = get_phrase (buf, SIZE);
  if (strcmp (SYUUTANV, buf) == 0)
    return;
  if (*buf != '\\')
    {
      error_format ("Fuzokugo_id is expected\n");
    }
  if ((id_num = find_id (buf + 1)) < 0)
    {
      error_format ("Unknown identifier %s\n", buf + 1);
    }
  for (; fz_num < FUZOKUGO_KOSUU;)
    {
      d = get_phrase (buf, SIZE);
      if (strcmp (SYUUTANV, buf) == 0)
        return;
      if (*buf == '\\')
        {
          if ((id_num = find_id (buf + 1)) < 0)
            {
              error_format ("Unknown identifier %s\n", buf + 1);
            }
          continue;
        }
      if (d != '/')
        {
          error_format ("'/' and definition of fuzokugo is expected.\n");
        }

      if (strlen (buf) >= FUZOKUGO_LEN * 2)
        {
          error_format ("Too long fuzokugo \"%s\"\n", buf);
        }

      strcpy (fz[fz_num].cy, buf);
      wnn_Sstrcpy (tmp, buf);
      wnn_Sreverse (fz[fz_num].y, tmp);
      fz[fz_num].hinsi = id_num;

      d = get_phrase (buf, SIZE);
      if (strlen (buf) >= F_COMMENT_LEN)
        {
          error_format ("Too long comment\n");
        }
      strcpy (fz[fz_num].comment, buf);

      if (d != '/')
        {
          error_format ("'/' is expected.\n");
        }

      for (i = 0; i < OUT_INT_KOSUU; i++)
        {
          fz[fz_num].vect.sv[i] = 0;
        }
      fz[fz_num].vect.tc = NULL;
      fz[fz_num].vect.tc_name = NULL;
      read_a_vector (&fz[fz_num].vect);
      fz_num += 1;
    }
}
#endif /* NO_FZK */

static int
find_id (c)
     char *c;
{
  int k;
  if ((k = bsch (c, 0, id_num - 1)) < 0)
    {
      return (-1);
    }
  return (id[k].bit);
}

static int
bsch (c, st, end)
     char *c;
     int st;
     int end;
{
  int tmp;
  int m = (st + end) / 2;
  if (st >= end)
    {
      if (strcmp (c, id[m].str) == 0)
        {
          return (st);
        }
      else
        {
          return (-1);
        }
    }

  tmp = strcmp (c, id[m].str);
  if (tmp == 0)
    return (m);
  if (tmp < 0)
    return (bsch (c, st, m - 1));
  return (bsch (c, m + 1, end));
}

#ifndef NO_FZK
static int
sort_func_fz (a, b)
     char *a, *b;
{
  int c;

  if ((c = wnn_Strcmp (((struct fuzokugo *) a)->y, ((struct fuzokugo *) b)->y)) == 0)
    {
      if (((struct fuzokugo *) a)->hinsi > ((struct fuzokugo *) b)->hinsi)
        {
          return (1);
        }
      else if (((struct fuzokugo *) a)->hinsi < ((struct fuzokugo *) b)->hinsi)
        {
          return (-1);
        }
      return (0);
    }
  return (c);
}

static void
sort ()
{
  qsort ((char *) &fz[0], fz_num, sizeof (struct fuzokugo), sort_func_fz);
}
#endif /* NO_FZK */

static void
read_a_vector (vect)
     struct vector *vect;
{
  int *v = vect->sv;
  int d;
  int id_num;
  int finish = 0;

  for (; finish == 0 && ((d = get_phrase (buf, SIZE)) == ':' || d == '\n');)
    {
      if (d == '\n')
        {
          finish = 1;
        }
      if (*buf == '\0')
        continue;
      if (*buf == ATTR_CHAR)
        {
          if (tcp (buf + 1))
            {
              set_heap (&vect->tc_name, buf + 1);
            }
          else
            {
              set_attribute (buf + 1, vect);
            }
        }
      else
        {
          if ((id_num = find_id (buf)) < 0)
            {
              error_format ("Unknown identifier in vector %s\n", buf);
            }
          v[id_num / (sizeof (int) * 8)] |= 1 << id_num % (sizeof (int) * 8);
/*      1 << (((sizeof(int) * 8 - 1) - id_num % (sizeof(int) * 8))); */
        }
    }
  if (finish == 0)
    {
      error_format ("%c is not permitted here.\n", d);
    }
}

static void
read_syuutanv ()
{
  int d;
/*
  int k,j;
  for(k = 0 ; k < SYUUTANV_KOSUU;k++){
    for(j = 0 ; j < OUT_INT_KOSUU ;j++){
      syuutanv[k].sv[j] = 0;
    }
    syuutanv[k].tc = NULL;
  }
*/
  for (;;)
    {
      d = get_phrase (buf, SIZE);
      if (strcmp (buf, END_ID) == 0)
        {
          break;
        }
      if (d != '/')
        {
          error_format ("");
        }
      if (strcmp (buf, SV0) == 0)
        {
          read_a_vector (&syuutanv[0]);
        }
      if (strcmp (buf, SV1) == 0)
        {
          read_a_vector (&syuutanv[1]);
        }
      if (strcmp (buf, SV2) == 0)
        {
          read_a_vector (&syuutanv[2]);
        }
    }
}

static void
print_out ()
{
  int k, j, i;
  int count = 0;
  char *c;
  FILE *ofpter;

  if ((ofpter = fopen (outfile, "w")) == NULL)
    {
      fprintf (stderr, "Can't open the output file %s.\n", outfile);
      perror ("");
      exit (1);
    }

  create_file_header (ofpter, WNN_FT_FUZOKUGO_FILE, NULL);

  for (k = 0; k < idn; k++)
    {
      if ((k % 32) == 0)
        {
          fprintf (ofpter, "\n");
        }
      for (j = 0; j < id_num; j++)
        {
          if (id[j].bit == k)
            {
              fprintf (ofpter, ";%d\t%s\n", id[j].bit, id[j].str);
              break;
            }
        }
    }

#ifndef NO_FZK
  fprintf (ofpter, "\n%d ;付属語の個数\n", count_yomi ());
#endif /* NO_FZK */
  fprintf (ofpter, "%d ;接続ベクタの長さ\n", out_int_kosuu);
#ifndef NO_FZK
  fprintf (ofpter, "%d ;付属語ベクタの個数\n", count_fz_num ());
#endif /* NO_FZK */
  fprintf (ofpter, "%d ;幹語品詞のワード数\n", ((mhinsi + 31) >> 5));
  fprintf (ofpter, "%d ;幹語ベクトルの数\n", mactjv);
  fprintf (ofpter, "%d ;幹語の品詞数\n", mhinsi);
#ifndef NO_FZK
  for (k = 0; (count = count_same_yomi (k)) > 0;)
    {
      char tmp[FUZOKUGO_LEN * 2 + 1];
      if (fz[k].y[0] == 0)
        {
          fprintf (ofpter, ";%s\n", fz[k].comment);
          k++;
          continue;
        }
      wnn_sStrcpy (tmp, fz[k].y);
      fprintf (ofpter, "%s %d\n", tmp, count);
      for (; count > 0; k++)
        {
          if (fz[k].y[0])
            {
              fprintf (ofpter, "%4d ", fz[k].hinsi);
              for (i = 0; i < out_int_kosuu; i++)
                {
                  fprintf (ofpter, "%8x ", fz[k].vect.sv[i]);
                }
              fprintf (ofpter, ";%s\n", fz[k].comment);
              count--;
            }
          else
            {
              fprintf (ofpter, ";%s\n", fz[k].comment);
            }
        }
    }
#endif /* NO_FZK */
  fprintf (ofpter, "\n\n");
  fprintf (ofpter, ";\n");
  fprintf (ofpter, ";syuutan vectors\n");
  fprintf (ofpter, ";\n");
  for (k = 0; k < SYUUTANV_KOSUU; k++)
    {
      for (i = 0; i < out_int_kosuu; i++)
        {
          fprintf (ofpter, "%8x ", syuutanv[k].sv[i]);
        }
      fprintf (ofpter, "\n");
    }
  fprintf (ofpter, "; 幹語接続ベクタNo. 幹語接続ベクタ\n");
  for (k = 0; k < mactjv; k++)
    {
      fprintf (ofpter, "\n%4d ", k);
      for (i = 0; i < out_int_kosuu; i++)
        {
          fprintf (ofpter, "%8x ", jiritugov[k].v.sv[i]);
        }
    }
  fprintf (ofpter, "\n\n; 幹語接続ベクタ\n");
  for (k = 0; k < mhinsi; k++)
    {
      c = wnn_get_hinsi_name (k);
      if (c)
        {
          fprintf (ofpter, "\t%d\t%d\t;%s\n", k, actjv[k], c);
        }
      else
        {
#ifdef CHINESE
          fprintf (ofpter, "\t%d\t-1\t;隆協吶\n", k);
#else
# ifdef KOREAN
          fprintf (ofpter, "\t%d\t-1\t;擶鑷譎\n", k);
# else
          fprintf (ofpter, "\t%d\t-1\t;未定義\n", k);
# endif
#endif
        }
    }
}

#ifndef NO_FZK
static int
count_same_yomi (start)
     int start;
{
  int count = 1;
  w_char yomi[FUZOKUGO_LEN + 1];
  int i;

  if (start >= fz_num)
    return (0);
  wnn_Strcpy (yomi, fz[start].y);
  for (i = start + 1; i < fz_num; i++)
    {
      if (fz[i].y[0])
        {
          if (wnn_Strcmp (yomi, fz[i].y) == 0)
            {
              count += 1;
            }
          else
            {
              return (count);
            }
        }
    }
  return (count);
}

static int
count_yomi ()
{
  int k;
  w_char yomi[FUZOKUGO_LEN + 1];
  int count = 0;

  yomi[0] = 0;
  for (k = 0; k < fz_num; k++)
    {
      if (fz[k].y[0])
        {
          if (wnn_Strcmp (yomi, fz[k].y) != 0)
            {
              count += 1;
              wnn_Strcpy (yomi, fz[k].y);
            }
        }
    }
  return (count);
}
#endif /* NO_FZK */

/* reverse for char as w_char */
#ifdef nodef
static void
reverse (d, s)
     char *d, *s;
{
  int k;
  int len = strlen (s);
  if (len % 2)
    {
      error_format ("length of yomi string is odd\n");
    }
  len /= 2;
  for (k = 0; k < len; k++)
    {
      d[k * 2] = s[(len - 1 - k) * 2];
      d[k * 2 + 1] = s[(len - 1 - k) * 2 + 1];
    }
  d[len * 2] = 0;
  d[len * 2 + 1] = 0;
}


static int
w_str_cmp (a, b)
     w_char *a, *b;
{
  w_char wa, wb;

  for (; *a != 0 && *b != 0;)
    {
      wa = *a;
      wb = *b;
      if (wa != wb)
        {
          return (w_char_cmp (wa, wb));
        }
    }
  if (*a == 0 && *b == 0)
    return (0);
  if (*a == 0)
    return (-1);
  return (1);

}

static int
w_char_cmp (wa, wb)
     unsigned short wa, wb;
{
  if (wa == wb)
    return (0);
  if (wa == UJIS_TEN)
    {
      if (wb == UJIS_MARU)
        return (-1);
      return (1);
    }
  if (wa == UJIS_MARU)
    return (1);
  if (wb == UJIS_MARU || wb == UJIS_TEN)
    return (-1);

  if (wb > wa)
    return (-1);
  return (1);
}
#endif

#ifndef NO_FZK
static void
check_fuzokugo ()
{
  int k;
  int point = 0;

  for (k = 1; k < fz_num; k++)
    {
      if (wnn_Strcmp (fz[point].y, fz[k].y) == 0 && fz[point].hinsi == fz[k].hinsi)
        {
          fprintf (stderr, "Entries with same yomi \"%s\"and same hinsi \"%s\" are merged.\n", fz[k].cy, id[find_id_name (fz[k].hinsi)].str);
          bit_or (fz[point].vect.sv, fz[k].vect.sv);
          fz[k].y[0] = 0;
        }
      else
        {
          point = k;
        }
    }
}
#endif /* NO_FZK */

static void
bit_or (bit1, bit2)
     int bit1[];
     int bit2[];
{
  int i;
  for (i = 0; i < out_int_kosuu; i++)
    {
      bit1[i] |= bit2[i];
    }
}

static int
find_id_name (i)
     int i;
{
  int k;
  for (k = 0; k < id_num; k++)
    {
      if (id[k].bit == i)
        {
          return (k);
        }
    }
  return -1;
}

#ifndef NO_FZK
static int
count_fz_num ()
{
  int k;
  int count = 0;
  for (k = 0; k < fz_num; k++)
    {
      if (fz[k].y[0])
        {
          count += 1;
        }
    }
  return (count);
}
#endif /* NO_FZK */

static void
get_id_part (buf1, buf)
     char *buf1;
     char *buf;
{
  char *c = buf1;

  strcpy (buf1, buf);
  for (; *c; c++)
    {
      if (*c == ATTR_START)
        {
          *c = '\0';
          return;
        }
    }
  return;
}

static char *
attr_start (c)
     char *c;
{
  for (; *c; c++)
    {
      if (*c == ATTR_START)
        {
          return (c + 1);
        }
    }
  return (NULL);
}

static char *
get_attr_part (c, intp)
     char *c;
     int *intp;
{
  char tmp[ID_MAX_LEN];
  char *c1 = tmp;

  strcpy (tmp, c);
  for (; *c1; c1++)
    {
      if (*c1 == ATTR_NEXT)
        {
          *c1 = '\0';
          *intp = search_attr (tmp);
          return (c + 1 + (c1 - tmp));
        }
    }
  *intp = search_attr (tmp);
  return (NULL);
}

static int
search_attr (c)
     char *c;
{
  int k;

  for (k = 0; k < attr_num; k++)
    {
      if (strcmp (attr[k].name, c) == 0)
        {
          return (k);
        }
    }
  error_format ("Not defined attribute %s.\n", c);
  return -1;
}

static void
check_attrs (id_n, c)
     int id_n;
     char *c;
{
  int attr_n;

  for (c = attr_start (c); c;)
    {
      c = get_attr_part (c, &attr_n);
      set_id (attr_n, id_n);
    }
}

static void
set_id (attr_n, id_n)
     int attr_n, id_n;
{
  attr[attr_n].id_set[id_n / (sizeof (int) * 8)] |= (1 << id_n % (sizeof (int) * 8));
}

static void
vector_or (sv, attrv)
     int sv[];
     int attrv[];
{
  int k;

  for (k = 0; k < OUT_INT_KOSUU; k++)
    {
      sv[k] |= attrv[k];
    }
}

static void
vector_and (sv, attrv)
     int sv[];
     int attrv[];
{
  int k;

  for (k = 0; k < OUT_INT_KOSUU; k++)
    {
      sv[k] &= attrv[k];
    }
}

static void
manipulate_tc_vector (vectp)
     struct vector *vectp;
{
  if (vectp->tc)
    {
      manipulate_tc_vector (vectp->tc);
      vector_or (vectp->sv, (vectp->tc)->sv);
      vectp->tc = NULL;
    }
}


static void
manipulate_tc ()
{
  int k;

#ifndef NO_FZK
  for (k = 0; k < fz_num; k++)
    {
      manipulate_tc_vector (&fz[k].vect);
    }
#endif /* NO_FZK */
  for (k = 0; k < SYUUTANV_KOSUU; k++)
    {
      manipulate_tc_vector (&syuutanv[k]);
    }
}

static void
set_heap (cp, str)
     char **cp;
     char *str;
{
  int len = strlen (str);

  if (hp + len + 1 >= heap + HEAP_LEN)
    {
      error_no_heap ();
    }
  *cp = hp;
  strcpy (hp, str);
  hp += len + 1;
}

char svstr[] = "sv*";

static struct vector *
find_tc_vect (c)
     char *c;
{
  int k;
  char tmp[ID_MAX_LEN + FUZOKUGO_LEN + 1];
#ifndef NO_FZK
  w_char tmp1[FUZOKUGO_LEN + 1];
  w_char tmp2[FUZOKUGO_LEN + 1];
  int id_num;
#endif
  char *cc;

  strcpy (tmp, c);
  for (cc = tmp; *cc; cc++)
    {
      if (*cc == TCSEP)
        {
          *cc = 0;
          cc += 1;
          break;
        }
    }


  if (strlen (tmp) == strlen (c))
    {
      error_tc (c);
    }

  for (k = 0; k < SYUUTANV_KOSUU; k++)
    {
      if (strcmp (tmp, SV[k]) == 0)
        {
          return (&syuutanv[k]);
        }
    }

#ifndef NO_FZK
  if ((id_num = find_id (tmp)) < 0)
    {
      error_tc (c);
    }

  wnn_Sstrcpy (tmp1, cc);
  wnn_Sreverse (tmp2, tmp1);
  for (k = 0; k < fz_num; k++)
    {
      if ((wnn_Strcmp (fz[k].y, tmp2) == 0) && (fz[k].hinsi == id_num))
        {
          return (&fz[k].vect);
        }
    }
#endif /* NO_FZK */
  error_tc (c);
  return (NULL);
}

static int
tcp (c)
     char *c;
{
  for (; *c; c++)
    {
      if (*c == TCSEP)
        return (1);
    }
  return (0);
}

static void
set_tc_from_name ()
{
  int k;
#ifndef NO_FZK
  for (k = 0; k < fz_num; k++)
    {
      if (fz[k].vect.tc_name)
        {
          fz[k].vect.tc = find_tc_vect (fz[k].vect.tc_name);
        }
    }
#endif /* NO_FZK */
  for (k = 0; k < SYUUTANV_KOSUU; k++)
    {
      if (syuutanv[k].tc_name)
        {
          syuutanv[k].tc = find_tc_vect (syuutanv[k].tc_name);
        }
    }
}


static void
set_attribute (c, vect)
     char *c;
     struct vector *vect;
{

  int tmp_v[OUT_INT_KOSUU];
  char tmp[ID_MAX_LEN];
  char *d = tmp;
  int k;

  for (k = 0; k < OUT_INT_KOSUU; k++)
    {
      tmp_v[k] = 0xffffffff;
    }

  for (; *c; c++)
    {
      if (*c == ATTR_AND)
        {
          *d = 0;
          vector_and (tmp_v, attr[search_attr (tmp)].id_set);
          d = tmp;
        }
      else
        {
          *d++ = *c;
        }
    }
  *d++ = 0;
  vector_and (tmp_v, attr[search_attr (tmp)].id_set);

  vector_or (vect->sv, tmp_v);
}

/* 自立語ベクタの読み込み */
static void
read_jiritugov ()
{
  char d;
  register int i, n;
  int vno;

  for (mactjv = 0, i = 0; i < MAXHINSI; i++)
    {
      d = get_phrase (buf, SIZE);
#ifdef  NO_FZK
      if (strcmp (SYUUTANV, buf) == 0)
        return;
#else
      if (strcmp (FUZOKUGO, buf) == 0)
        return;
#endif /* NO_FZK */
      n = wnn_find_hinsi_by_name (buf);
      if (n == -1)
        error_format ("Hinsi which is not defined in Hinsifile.\n");
      if (d != '/')
        {
          error_format ("'/' and setuzoku vector is expected.\n");
        }
      pre_clear_jiritugo_v (mactjv);
      read_a_vector (&jiritugov[mactjv].v);
      if ((vno = is_same_jiritugov (mactjv)) == -1)
        {
          vno = mactjv;
          mactjv++;
        }
      actjv[n] = vno;
    }
}

/* 自立語ベクタ領域をクリアーする */
static void
pre_clear_jiritugo_v (n)
     int n;
{
  int i;

  for (i = 0; i < OUT_INT_KOSUU; i++)
    jiritugov[n].v.sv[i] = 0;
}

/* 同じ自立語ベクタあったか */
static int
is_same_jiritugov (n)
     int n;
{
  register int i, j;

  for (i = 0; i < n; i++)
    {
      for (j = 0; j < OUT_INT_KOSUU; j++)
        {
          if (jiritugov[i].v.sv[j] != jiritugov[n].v.sv[j])
            break;
        }
      if (j == OUT_INT_KOSUU)
        return (i);             /* 同じベクタ */
    }
  return (-1);                  /* 同じものがなかった */
}

/*
static void
classify_jiritugov()
{
  int k,j;
  int mac;
  for(k = 0 ; k < mhinsi; k++){
    for(j = 0 ; j < mactjv; j++){
      if(eqv(jiritugov[k].v.sv,actjv[j].sv)){
        jiritugov[k].vnum = j;
        break;
      }
    }
    if(j == mactjv){
      vcp(actjv[mactjv++].sv, jiritugov[k].v.sv);
    }
  }
}

static void
eqv(a,b)
int a[],b[];
{
  int k;
  for(k = 0 ; k < OUT_INT_KOSUU; k++){
    if(!(a[k] == b[k]))return(0);
  }
  return(1);
}

static void
vcp(a,b)
     int a[],b[];
{
  int k;
  for(k = 0 ; k < OUT_INT_KOSUU; k++){
    a[k] = b[k];
  }
}
*/

static void
init (argc, argv)
     int argc;
     char **argv;
{

  int c;
  extern int optind;
  extern char *optarg;

  while ((c = getopt (argc, argv, "h:")) != EOF)
    {
      switch (c)
        {
        case 'h':
          hinsi_file_name = optarg;
          break;
        }
    }
  if (optind)
    {
      optind--;
      argc -= optind;
      argv += optind;
    }

  if (argc != 2)
    {
      usage ();
      exit (1);
    }

  strcpy (outfile, argv[1]);

}

static void
usage ()
{
  fprintf (stderr, "Usage : %s [-h <hinsi filename>] <fzk.data filename>\n", com_name);
}