view Wnn/jserver/jbiki.c @ 1:790205f476c0

applied wnn_dec12.diff
author Yoshiki Yazawa <yaz@cc.rim.or.jp>
date Thu, 13 Dec 2007 04:47:24 +0900
parents bbc77ca4def5
children ed4bb01eb317
line wrap: on
line source

/*
 *  $Id: jbiki.c,v 1.4 2003/06/07 02:22:23 hiroo Exp $
 */

/*
 * FreeWnn is a network-extensible Kana-to-Kanji conversion system.
 * This file is part of FreeWnn.
 * 
 * Copyright Kyoto University Research Institute for Mathematical Sciences
 *                 1987, 1988, 1989, 1990, 1991, 1992
 * Copyright OMRON Corporation. 1987, 1988, 1989, 1990, 1991, 1992, 1999
 * Copyright ASTEC, Inc. 1987, 1988, 1989, 1990, 1991, 1992
 * Copyright FreeWnn Project 1999, 2000, 2003
 *
 * Maintainer:  FreeWnn Project   <freewnn@tomo.gr.jp>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#ifdef HAVE_CONFIG_H
#  include <config.h>
#endif

#include <stdio.h>

#include "commonhd.h"
#include "de_header.h"
#include "jdata.h"
#include "kaiseki.h"


#ifndef min
#define min(a, b) ((a > b)? b:a)
#define max(a, b) ((a < b)? b:a)
#endif

static int maxlevel;/** 読みの何文字目までマッチするエントリーがあったか */
static struct jdata **jmt;/** 候補を返す場所へのポインタ */
static int number;/** クライアントの辞書番号 */
static unsigned short *hinsistart;
static UCHAR *hindostart;
static UCHAR *hindo2start;

#ifdef  CONVERT_with_SiSheng
static unsigned short *sishengstart;
static unsigned int sisheng_int;
static w_char pan_yomi[256];
static char pan_tmp[256];
#endif /* CONVERT_with_SiSheng */

static int ud_biki (struct JT *, w_char *);
static int sd_biki ();
static int sd_biki_one ();
static int sdbinary ();

int
jishobiki (w_char *yomi,	/* 読みの先頭へのポインタ */
	   struct jdata **jmtx)	/* 結果を返す領域の先頭 */
{
  struct JT *jtp;
  int k;
#ifdef  CONVERT_with_SiSheng    /* TMP */
  char sisheng_si[LENGTHBUNSETSU];
  w_char yomi_tmp[LENGTHBUNSETSU];
  get_sisheng (yomi, sisheng_si, yomi_tmp);
  sisheng_int = 0;
  sscanf (sisheng_si, "%d", &sisheng_int);
#endif /* CONVERT_with_SiSheng */

  maxlevel = 0;
  jmt = jmtx;

  for (k = 0; k < c_env->jishomax; k++)
    {
      number = c_env->jisho[k];
      if (dic_table[c_env->jisho[k]].enablef)
        {
          jtp = (struct JT *) (files[dic_table[c_env->jisho[k]].body].area);
          hindo2start = (dic_table[c_env->jisho[k]].hindo != -1) ? jtp->hindo : NULL;
          hindostart = (dic_table[c_env->jisho[k]].hindo != -1) ? ((struct HJT *) (files[dic_table[c_env->jisho[k]].hindo].area))->hindo : jtp->hindo;
#ifdef  CONVERT_with_SiSheng
          if (jtp->syurui == CWNN_REV_DICT)
            sishengstart = jtp->sisheng;
          else
            sishengstart = 0;
#endif /* CONVERT_with_SiSheng */
          hinsistart = jtp->hinsi;
          if (jtp->syurui == WNN_UD_DICT)
            {
#ifdef  CONVERT_with_SiSheng
              if (ud_biki (jtp, yomi_tmp) == -1)
#else
              if (ud_biki (jtp, yomi) == -1)
#endif /* CONVERT_with_SiSheng */
                {
                  goto err;
                }
            }
#ifdef  CONVERT_with_SiSheng
          else if ((jtp->syurui & 0x00ff) == WNN_REV_DICT)
            {
              if (rd_biki (jtp, yomi_tmp, dic_table[c_env->jisho[k]].rev) == -1)
#else
          else if (jtp->syurui == WNN_REV_DICT)
            {
              if (rd_biki (jtp, yomi, dic_table[c_env->jisho[k]].rev) == -1)
#endif /* CONVERT_with_SiSheng */
                {
                  goto err;
                }
            }
          else
            {
#ifdef  CONVERT_with_SiSheng
              if (sd_biki (jtp, yomi_tmp) == -1)
#else
              if (sd_biki (jtp, yomi) == -1)
#endif /* CONVERT_with_SiSheng */
                {
                  goto err;
                }
            }
        }
    }
  return (maxlevel);
err:
  {
    error1 ("No More Jishobiki Area\n");
    for (k = 0; k < maxlevel; k++)
      {
        jmt[k] = NULL;
      }
    return (-1);
  }
}



static int
ud_biki (struct JT *jtl, w_char *yomi)
{
  struct jdata *jep;
  register struct uind2 *p;
  register int ind1;
  register int len;

  struct uind1 *tary;

  tary = jtl->table;

  for (ind1 = binary (tary, yomi, jtl->maxtable, jtl); ind1 >= 0; ind1 = tary[ind1].pter1)
    {

      for (p = ((struct uind2 *) ((tary[ind1].pter) + jtl->hontai));
	  ;
	  p = ((struct uind2 *) ((p->next) + jtl->hontai)))
        {
          len = p->yomi[0];
          if (jmt + len > jmt_end)
            return (-1);

          /* set jmt */
          if (*(jmt + len - 1))
            {
              for (jep = *(jmt + len - 1); jep->jptr != NULL; jep = jep->jptr);
              jep->jptr = j_e_p;
            }
          else
            {
              *(jmt + len - 1) = j_e_p;
            }

          maxlevel = max (maxlevel, len);
          if (j_e_p >= jmtw_end)
            return (-1);
          j_e_p->kanji1 = (UCHAR *)p->kanjipter;
          j_e_p->kanji2 = 0;
          j_e_p->serial = p->serial;
          j_e_p->kosuu = p->kosuu;
          j_e_p->jishono = number;
          j_e_p->jptr = NULL;
          j_e_p->hinsi = hinsistart + p->serial;
          j_e_p->hindo = hindostart + p->serial;
          if (hindo2start)
            j_e_p->hindo_in = hindo2start + p->serial;
          else
            j_e_p->hindo_in = NULL;
          j_e_p->which = D_YOMI;
          j_e_p += 1;
          if (p->next == ENDPTR)
            break;
        }
    }
  return (maxlevel);
}

/*
 * Find the element of user dict which matches longest with the yomi.
 */

int
binary1 (tary, yomi, end, jtl)
     register struct uind1 *tary;
     register w_char *yomi;
     register int end;
     struct JT *jtl;
{
  register int start = -1;
  register int key;

  for (; start + 1 < end;)
    {
      key = (start + end) / 2;
      if (Strcmpud (&tary[key], yomi, jtl) > 0)
        {
          end = key;
        }
      else
        {
          start = key;
        }
    }
  return (start);
}

int
binary (tary, yomi, end, jtl)
     register struct uind1 *tary;
     register w_char *yomi;
     int end;
     struct JT *jtl;
{
  register int start;
  register struct uind2 *hop;
  register int len;

  start = binary1 (tary, yomi, end, jtl);
  for (; start >= 0; start = tary[start].pter1)
    {
      hop = ((struct uind2 *) ((tary[start].pter) + jtl->hontai));
      len = hop->yomi[0];
      if (Substrud (&tary[start], yomi, len, jtl))
        {
          return (start);
        }
    }
  return (-1);
}

int
Strcmpud (ui1p, yomi, jtl)
     struct uind1 *ui1p;
     w_char *yomi;
     struct JT *jtl;
{
  unsigned int y1, y2;
  int tmp;
  int len;
  struct uind2 *hop;

  if (yomi[0] == 0)
    return (ui1p->yomi1 > 0);
  y1 = yomi[0] << 16 | yomi[1];
  if (tmp = (ui1p->yomi1 - y1))
    {
      return ((ui1p->yomi1 > y1) ? 1 : -1);
    }
  if (yomi[1] == 0)
    return (0);
  if (yomi[2] == 0)
    return (ui1p->yomi2 > 0);
  y2 = yomi[2] << 16 | yomi[3];
  if (tmp = (ui1p->yomi2 - y2))
    {
      return ((ui1p->yomi2 > y2) ? 1 : -1);
    }
  hop = ((struct uind2 *) ((ui1p->pter) + jtl->hontai));
  if ((len = hop->yomi[0]) < 4)
    return (0);
  if ((tmp = Strncmp (hop->yomi + 1, yomi + 4, len - 4)) != 0)
    {
      return (tmp);
    }
  return (0 - yomi[len]);
}

int
Substrud (ui1p, yomi, len, jtl)
     struct uind1 *ui1p;
     w_char *yomi;
     int len;
     struct JT *jtl;
{
  switch (len)
    {
    case 0:
      return (1);
    case 1:
      return ((ui1p->yomi1 >> 16 == yomi[0]));
    case 2:
      return (ui1p->yomi1 == (yomi[0] << 16 | yomi[1]));
    case 3:
      return ((ui1p->yomi1 == (yomi[0] << 16 | yomi[1])) && ui1p->yomi2 >> 16 == yomi[2]);
    case 4:
      return ((ui1p->yomi1 == (yomi[0] << 16 | yomi[1])) && (ui1p->yomi2 == (yomi[2] << 16 | yomi[3])));
    default:
      return ((ui1p->yomi1 == (yomi[0] << 16 | yomi[1])) && (ui1p->yomi2 == (yomi[2] << 16 | yomi[3])) && !(Strncmp (((struct uind2 *) (ui1p->pter + jtl->hontai))->yomi + 1, yomi + 4, len - 4)));
    }
}

int
Substrstud (yomi, ui1p, jtl)
     w_char *yomi;
     struct uind1 *ui1p;
     struct JT *jtl;
{
  int len = Strlen (yomi);

  switch (len)
    {
    case 0:
      return (1);
    case 1:
      return ((ui1p->yomi1 >> 16 == yomi[0]));
    case 2:
      return (ui1p->yomi1 == (yomi[0] << 16 | yomi[1]));
    case 3:
      return ((ui1p->yomi1 == (yomi[0] << 16 | yomi[1])) && ui1p->yomi2 >> 16 == yomi[2]);
    case 4:
      return ((ui1p->yomi1 == (yomi[0] << 16 | yomi[1])) && (ui1p->yomi2 == (yomi[2] << 16 | yomi[3])));
    default:
      if (len > (int) ((struct uind2 *) ((ui1p->pter) + jtl->hontai))->yomi[0])
        return (0);
      return ((ui1p->yomi1 == (yomi[0] << 16 | yomi[1])) && (ui1p->yomi2 == (yomi[2] << 16 | yomi[3])) && !(Strncmp (((struct uind2 *) ((ui1p->pter) + jtl->hontai))->yomi + 1, yomi + 4, len - 4)));
    }
}

int
Substrudud (ui1d, ui1s, jtl)
     register struct uind1 *ui1d, *ui1s;
     register struct JT *jtl;
{
  if (ui1d->yomi1 != ui1s->yomi1)
    {
      if ((ui1d->yomi1 == 0) || (((ui1d->yomi1 & 0xffff0000) == (ui1s->yomi1 & 0xffff0000)) && ((ui1d->yomi1 & 0x0000ffff) == 0)))
        {
          return (1);
        }
      else
        return (0);
    }
  else if (ui1d->yomi2 != ui1d->yomi2)
    {
      if ((ui1d->yomi2 == 0) || (((ui1d->yomi2 & 0xffff0000) == (ui1s->yomi2 & 0xffff0000)) && ((ui1d->yomi2 & 0x0000ffff) == 0)))
        {
          return (1);
        }
      else
        return (0);
    }
  else
    {
      return (!(Strncmp (
                         ((struct uind2 *) ((ui1d->pter) + jtl->hontai))->yomi + 1,
                         ((struct uind2 *) ((ui1s->pter) + jtl->hontai))->yomi + 1, ((struct uind2 *) ((ui1d->pter) + jtl->hontai))->yomi[0])));
    }
}

/* 
 *
 *
 *sd_biki
 *
 *
 */

UCHAR *hontaistart;

static int
sd_biki (jtl, yomi)
     struct JT *jtl;
     w_char *yomi;
{

  hontaistart = jtl->hontai;
  return (sd_biki_one (jtl->hontai, 0, yomi));
}

static int
sd_biki_one (hopter, level, yomi)
     char *hopter;
     w_char *yomi;
     int level;
{

  int tsize;
  w_char *charst;
  w_char *sumst;
  int *ptrst;
  int state;
  struct jdata *jep;
  int index;
  int cnt;
  int serial;

  switch ((state = *(w_char *) hopter))
    {
    case ST_NORMAL:
    case ST_NOPTER:
      tsize = *(w_char *) (hopter + 2);
      charst = (w_char *) (hopter + 12);
      sumst = ((w_char *) charst + tsize + 2);  /* + 2 keeps two zero words */
      ptrst = (int *) ((w_char *) sumst + tsize);
      if ((index = sdbinary (*yomi, charst, tsize)) < 0)
        return (maxlevel);

      if ((cnt = sumst[index] - sumst[index - 1]) > 0)
        {
          if (jmt + level >= jmt_end)
            return (-1);

          /* set jmt */
          if (*(jmt + level))
            {
              for (jep = *(jmt + level); jep->jptr != NULL; jep = jep->jptr);
              jep->jptr = j_e_p;
            }
          else
            {
              *(jmt + level) = j_e_p;
            }

          maxlevel = max (maxlevel, level + 1);
          if (j_e_p >= jmtw_end)
            return (-1);
          j_e_p->kanji1 = (UCHAR *) (hopter + 8);
          j_e_p->kanji2 = sumst[index - 1];
          j_e_p->serial = serial = *(int *) (hopter + 4) + sumst[index - 1];
          j_e_p->kosuu = cnt;
          j_e_p->jishono = number;
          j_e_p->jptr = NULL;
          j_e_p->hinsi = hinsistart + serial;
          j_e_p->hindo = hindostart + serial;
          if (hindo2start)
            j_e_p->hindo_in = hindo2start + serial;
          else
            j_e_p->hindo_in = NULL;
          j_e_p->which = D_YOMI;
          j_e_p += 1;
        }
      if (state == ST_NORMAL)
        {
          if (ptrst[index] != ENDPTR)
            {
              sd_biki_one (hontaistart + ptrst[index], level + 1, yomi + 1);
            }
        }
      break;
    case ST_NOENT:
      tsize = *(w_char *) (hopter + 2);
      charst = (w_char *) (hopter + 4);
      ptrst = (int *) AL_INT ((w_char *) charst + tsize);
      if ((index = sdbinary (*yomi, charst, tsize)) < 0)
        return (maxlevel);
      sd_biki_one (hontaistart + ptrst[index], level + 1, yomi + 1);
      break;
    case ST_SMALL:
      if (*yomi != *(w_char *) (hopter + 2))
        return (maxlevel);
      sd_biki_one (hopter + 4, level + 1, yomi + 1);
      break;

    }
  return (maxlevel);
}


static int
sdbinary (ch, chst, tsize)
     register w_char ch;
     register w_char *chst;
     int tsize;
{
  register w_char *chst1 = chst;
  register w_char *chend = chst + tsize;
  register w_char *key;

  for (; chst < chend;)
    {
      key = chst + ((chend - chst) >> 1);
      if (*key == ch)
        return (key - chst1);
      if (*key < ch)
        chst = key + 1;
      else
        chend = key - 1;
    }
  if (*chst == ch)
    return (chst - chst1);
  return (-1);
}

int
word_search (dic_no, yomi, jmtx)
     int dic_no;
     w_char *yomi;
             /** 読みの先頭へのポインタ */
     struct jdata **jmtx;
                    /** 結果を返す領域の先頭 */
{

  register struct JT *jtp;

  maxlevel = 0;
  jmt = jmtx;

  if (dic_no >= MAX_DIC || dic_no < 0 || dic_table[dic_no].body == -1)
    return (-1);
  number = dic_no;
  jtp = (struct JT *) (files[dic_table[dic_no].body].area);
  hindo2start = (dic_table[dic_no].hindo != -1) ? jtp->hindo : NULL;
  hindostart = (dic_table[dic_no].hindo != -1) ? ((struct HJT *) (files[dic_table[dic_no].hindo].area))->hindo : jtp->hindo;

  hinsistart = jtp->hinsi;
  if (jtp->syurui == WNN_UD_DICT)
    {
      if (ud_biki (jtp, yomi) == -1)
        return (-1);
#if     defined(CONVERT_by_STROKE) || defined(CONVERT_with_SiSheng)
    }
  else if ((jtp->syurui & 0x00ff) == WNN_REV_DICT)
    {
#else
    }
  else if (jtp->syurui == WNN_REV_DICT)
    {
#endif /* CONVERT_by_STROKE || CONVERT_with_SiSheng */
      if (rd_biki (jtp, yomi, dic_table[dic_no].rev) == -1)
        return (-1);
    }
  else
    {
      if (sd_biki (jtp, yomi) == -1)
        return (-1);
    }
  return (maxlevel);
}



/**************************************
 ******        Rev_Dic        *********
 **************************************/

int
rd_biki (jtl, yomi, which)
     struct JT *jtl;
     w_char *yomi;
     register int which;
{
  register struct rind2 *p;
  register int ind, serial;
  register int len;
  register int prev;
  register struct jdata **jepp;

  struct rind1 *tary;
  struct rind2 *ri2;
  UCHAR *kanji;

  tary = jtl->ri1[which];
  ri2 = jtl->ri2;
  kanji = jtl->kanji;

  for (ind = rd_binary (tary, yomi, jtl->maxri1[which], ri2, kanji, which); ind >= 0; ind = tary[ind].pter1)
    {
      p = tary[ind].pter + ri2;
      len = Get_kanji_len (p->kanjipter + kanji, which);
      if (jmt + len > jmt_end)
        return (-1);
      /* set jmt */
      jepp = &(jmt[len - 1]);
      for (; *jepp; jepp = &((*jepp)->jptr));
      maxlevel = max (maxlevel, len);
      prev = -2;                /* prev + 1 is not a serial number */
      for (;; p = p->next[which] + ri2)
        {
          serial = p - ri2;
          if (serial == prev + 1)
            {
              (j_e_p - 1)->kosuu++;
            }
          else
            {
              *jepp = j_e_p;
              if (j_e_p >= jmtw_end)
                return (-1);
              j_e_p->kanji1 = (UCHAR *)p->kanjipter; //xxx this may result in failure --yaz
              j_e_p->kanji2 = 0;
              j_e_p->serial = serial;
              j_e_p->kosuu = 1;
              j_e_p->jishono = number;
              j_e_p->jptr = NULL;
              j_e_p->hinsi = hinsistart + serial;
              j_e_p->hindo = hindostart + serial;
#ifdef  CONVERT_with_SiSheng
              if (sishengstart)
                {
                  j_e_p->sisheng = sishengstart + serial;
                  j_e_p->sisheng_int = sisheng_int;
                }
#endif /* CONVERT_with_SiSheng */
              if (hindo2start)
                j_e_p->hindo_in = hindo2start + serial;
              else
                j_e_p->hindo_in = NULL;
              j_e_p->which = which;
              j_e_p += 1;
              jepp = &((*jepp)->jptr);
            }
          prev = serial;
          if (p->next[which] == RD_ENDPTR)
            break;
        }
    }
  return (maxlevel);
}

/*
 * Find the element of user dict which matches longest with the yomi.
 */

int
rd_binary1 (tary, yomi, end, ri2, kanji, which)
     register struct rind1 *tary;
     register w_char *yomi;
     register int end;
     struct rind2 *ri2;
     UCHAR *kanji;
     int which;
{
  register w_char *s1, *s2;
  register int start = -1;
  register int key;

  for (; start + 1 < end;)
    {
      key = (start + end) / 2;
      s1 = KANJI_str (ri2[tary[key].pter].kanjipter + kanji, which);
#ifdef  CONVERT_with_SiSheng    /*      reform_dic      */
      s1 = biki_sisheng (s1, pan_tmp, pan_yomi);
#endif /* CONVERT_with_SiSheng */
      s2 = yomi;
      /* This is same as Strcmp(s1, s2) */
      for (; *s1 != 0 && *s1 == *s2; s1++, s2++);
      if (*s1 > *s2)
        {
          end = key;
        }
      else
        {
          start = key;
        }
    }
  return (start);
}

/* returns the entry which matches longest with yomi.*/
int
rd_binary (tary, yomi, end, ri2, kanji, which)
     register struct rind1 *tary;
     register w_char *yomi;
     register int end;
     struct rind2 *ri2;
     UCHAR *kanji;
     int which;
{
  register w_char *s1, *s2;
  register int start;
  /*
     register int len;
   */

  start = rd_binary1 (tary, yomi, end, ri2, kanji, which);
  for (; start != RD_ENDPTR; start = tary[start].pter1)
    {
      s1 = KANJI_str (ri2[tary[start].pter].kanjipter + kanji, which);
#ifdef  CONVERT_with_SiSheng    /*      reform_dic      */
      s1 = biki_sisheng (s1, pan_tmp, pan_yomi);
#endif /* CONVERT_with_SiSheng */
      s2 = yomi;
      /* This is same as Substr(s1, s2) */
      for (; *s1 != 0 && *s1 == *s2; s1++, s2++);
      if (!*s1)
        {
          return (start);
        }
    }
  return (-1);
}

/* returns the entry which matches accurately with yomi.*/

int
rd_binary_same (tary, yomi, end, ri2, kanji, which)
     register struct rind1 *tary;
     register w_char *yomi;
     register int end;
     struct rind2 *ri2;
     UCHAR *kanji;
     int which;
{
  register int start;
  /*
     register int len;
   */
  w_char *tmp;

  start = rd_binary1 (tary, yomi, end, ri2, kanji, which);
  for (; start >= 0; start = tary[start].pter1)
    {
      tmp = KANJI_str (ri2[tary[start].pter].kanjipter + kanji, which);
#ifdef  CONVERT_with_SiSheng    /*      reform_dic      */
      tmp = biki_sisheng (tmp, pan_tmp, pan_yomi);
#endif /* CONVERT_with_SiSheng */
      if (!Strcmp (tmp, yomi))
        {
          return (start);
        }
    }
  return (-1);
}


/*
 *   Gd_biki( Giji (Regular Dict) biki routine.
 *
 *
 *  GD Format
 *  [ - ]  moji no syuugou.
 *   p*   p no 0 kai ijou no kurikaesi(p = syuugou)
 *  []+  p no 1 kai ijou no kurikaesi(p = syuugou)
 *
 *
 *  ri2 is only used to point kanji
 */
#define GD_SET_START  '['
#define GD_SET_END    ']'
#define GD_KURIKAESI0 '*'
#define GD_KURIKAESI1 '+'
#define GD_ESCAPE    '\\'
#define GD_FROMTO     '-'