Mercurial > freewnn
diff Wnn/jserver/renbn_kai.c @ 0:bbc77ca4def5
initial import
author | Yoshiki Yazawa <yaz@cc.rim.or.jp> |
---|---|
date | Thu, 13 Dec 2007 04:30:14 +0900 |
parents | |
children | ed4bb01eb317 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Wnn/jserver/renbn_kai.c Thu Dec 13 04:30:14 2007 +0900 @@ -0,0 +1,602 @@ +/* + * FreeWnn is a network-extensible Kana-to-Kanji conversion system. + * This file is part of FreeWnn. + * + * Copyright Kyoto University Research Institute for Mathematical Sciences + * 1987, 1988, 1989, 1990, 1991, 1992 + * Copyright OMRON Corporation. 1987, 1988, 1989, 1990, 1991, 1992, 1999 + * Copyright ASTEC, Inc. 1987, 1988, 1989, 1990, 1991, 1992 + * Copyright FreeWnn Project 1999, 2000, 2002 + * + * Maintainer: FreeWnn Project <freewnn@tomo.gr.jp> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +static char rcs_id[] = "$Id: renbn_kai.c,v 1.4 2002/09/01 17:13:11 hiroo Exp $"; + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +/* #include <stdio.h> */ /* for debug only */ +#include "commonhd.h" +#include "de_header.h" +#include "fzk.h" +#include "kaiseki.h" + +static struct DSD_DBN *dcdbn_set (struct DSD_DBN *, struct DSD_SBN **, struct BZD *); +static struct DSD_SBN *dcdsbn_set (struct DSD_SBN *, struct SYO_BNSETSU *); +static int cnt_syo (struct SYO_BNSETSU *); +static void cnt_bzd (struct BZD *, int *, int *); +static int chk_yomi_endvect (int, int, int, int); +static int set_kata_giji_sbn (int, int, int, int, struct SYO_BNSETSU **); +static int set_kata_giji_bzd (int, int, int, int, struct BZD **, int); + +int +renbn_kai (int yomi_sno, /* 解析文字列 start index */ + int yomi_eno, /* 解析文字列 end index (end char の次) */ + int beginvect, /* 前端ベクタ(-1:文節先頭、-2:なんでも)品詞No. */ +#ifndef NO_FZK + w_char *fzkchar, +#endif /* NO_FZK */ + int endvect, /* 終端ベクタ */ + int endvect1, /* 終端ベクタ (予備) */ + int endvect2, /* bunsetsu 終端ベクタ */ + int kaidbno, /* 解析大文節数 */ + int kaisbno, /* 解析小文節数 */ + struct DSD_DBN **dsd_dbn) /* 決定大文節情報エリア pointer */ +{ + int dbn_cnt; + int sbn_cnt; + struct DSD_SBN *dsd_sbn; /* 決定大文節情報エリア pointer */ + + UINT dicidyno, /* 決定した大文節の index */ + buncnt = 0; /* 決定した大文節数 */ + struct BZD *rbzdptr; /* 決定の対象となるノード top pointer */ + struct BZD *brbzdptr, /* work pointer */ + *dicide_bp = 0, /* 決定した大文節の top pointer */ + *dicide_np, /* 決定した大文節への pointer */ + *wkbzdptr, /* work pointer */ + *maxbzd; /* 決定の対象となっている大文節の */ + /* 最大の評価値を持つノードへの pointer */ + int rtmknode; + extern int _status; + + dicidyno = yomi_sno - 1; + rbzdptr = 0; + rtmknode = 1; + + _status = 0; + + if (c_env->fzk_fid == -1) + { + wnn_errorno = WNN_FZK_FILE_NO_LOAD; + return (-1); + } + ft = (struct FT *) files[c_env->fzk_fid].area; + if (chk_yomi_endvect (yomi_sno, yomi_eno, endvect, endvect1) < 0) + return (-1); + while (dicidyno + 1 < yomi_eno) + { + if (rbzdptr == 0) + { + if (dbn_kai (dicidyno + 1, yomi_eno, beginvect, +#ifndef NO_FZK + fzkchar, +#endif /* NO_FZK */ + endvect, endvect1, kaisbno, &rbzdptr) < 0) + { + log_err ("CONVERSION ERROR."); + init_work_areas (); + return (-1); /* ERROR */ + } + if (rbzdptr == 0) + { + if (set_kata_giji_bzd (dicidyno + 1, yomi_eno, endvect, endvect1, &rbzdptr, buncnt) < 0) + return (-1); + buncnt++; + rtmknode = 0; + if (dicide_bp == 0) + dicide_bp = rbzdptr; + else + dicide_np->lnk_son = rbzdptr; + break; + } + } + /* 1 文節決定可能になるまでノードを作る */ + if (kaidbno == 1) + { + /* 各文節の評価値の足し算(son_v)は、mknode でやるんだけど kaidbno が 1 + つまり 1文節解析の場合は、そこを通らないからここでやろう */ + for (brbzdptr = rbzdptr->lnk_br; brbzdptr != 0; brbzdptr = brbzdptr->lnk_br) + brbzdptr->son_v = brbzdptr->v_jc; + } + else + { + while ((int) (rbzdptr->kbcnt) < kaidbno && rtmknode > 0) + { + for (brbzdptr = rbzdptr; brbzdptr != 0; brbzdptr = brbzdptr->lnk_br) + { + rtmknode = mknode (brbzdptr, yomi_eno, beginvect, +#ifndef NO_FZK + fzkchar, +#endif /* NO_FZK */ + endvect2, kaisbno); + brbzdptr->son_v /= brbzdptr->kbcnt; + if (rtmknode == 0) + { + brbzdptr->son_v += brbzdptr->v_jc; + } + else if (rtmknode == -1) + { + log_err ("CONVERSION ERROR."); + init_work_areas (); /* エラーでいいのかな */ + return (-1); /* ERROR */ + } + } + } + } + + /* 最大の評価を持つノードを見つける */ + for (brbzdptr = rbzdptr->lnk_br, maxbzd = rbzdptr; brbzdptr != 0; brbzdptr = wkbzdptr) + { + wkbzdptr = brbzdptr->lnk_br; + if (maxbzd->son_v <= brbzdptr->son_v) + { + clr_node (maxbzd); /* garbage collection */ + maxbzd = brbzdptr; + } + else + clr_node (brbzdptr); + } + + /* 1 文節決定 */ + buncnt++; /* 決定文節 count up */ + dicidyno = maxbzd->j_c; /* 決定した文節の top */ + /* kettei sita node no link */ + if (maxbzd->bend_m == yomi_sno) + { + dicide_bp = dicide_np = maxbzd; + } + else + { + dicide_np->lnk_son = maxbzd; + dicide_np = maxbzd; + } + rbzdptr = maxbzd->lnk_son; + } + + cnt_bzd (dicide_bp, &dbn_cnt, &sbn_cnt); + if ((*dsd_dbn = get_dsd_dbn (dbn_cnt)) <= (struct DSD_DBN *) 0) + { + init_work_areas (); + return (-1); + } + if ((dsd_sbn = get_dsd_sbn (sbn_cnt)) <= (struct DSD_SBN *) 0) + { + init_work_areas (); + return (-1); + } + + dcdbn_set (*dsd_dbn, &dsd_sbn, dicide_bp); + return (buncnt); +} + +/************************************************************/ +/* 単文節解析(大文節) routine */ +/************************************************************/ +int +tan_dai (int yomi_sno, /* 解析文字列 start index */ + int yomi_eno, /* 解析文字列 end index (end char の次) */ + int beginvect, /* 前端ベクタ(-1:文節先頭、-2:なんでも)品詞No. */ +#ifndef NO_FZK + w_char *fzkchar, +#endif /* NO_FZK */ + int endvect, /* 終端ベクタ */ + int endvect1, /* 終端ベクタ */ + int kaisbno, /* 解析小文節数 */ + struct DSD_DBN **dsd_dbn) /* 決定大文節情報エリア pointer */ +{ + int dbn_cnt; + int sbn_cnt; + struct DSD_SBN *dsd_sbn; /* 決定大文節情報エリア pointer */ + struct BZD *rbzdptr; /* 決定の対象となるノードの + トップポインタ */ + struct BZD *brbzdptr, *wkbzdptr; + struct BZD *maxbzd; /* 決定の対象となっている文節の + 最大の評価値を持つノードへのポインタ */ + extern int _status; + + if (c_env->fzk_fid == -1) + { + wnn_errorno = WNN_FZK_FILE_NO_LOAD; + return (-1); + } + ft = (struct FT *) files[c_env->fzk_fid].area; + + rbzdptr = 0; + _status = 0; + if (chk_yomi_endvect (yomi_sno, yomi_eno, endvect, endvect1) < 0) + return (-1); + if (dbn_kai (yomi_sno, yomi_eno, beginvect, +#ifndef NO_FZK + fzkchar, +#endif /* NO_FZK */ + endvect, endvect1, kaisbno, &rbzdptr) < 0) + { + init_work_areas (); + return (-1); /* ERROR */ + } + + /* 最大の評価値を持つノードを見つける */ + for (brbzdptr = rbzdptr, maxbzd = 0; brbzdptr != 0; brbzdptr = wkbzdptr) + { + wkbzdptr = brbzdptr->lnk_br; + if (brbzdptr->j_c == yomi_eno - 1) + { /* 文節長さ */ + if (maxbzd == 0) + { + maxbzd = brbzdptr; + } + else if (maxbzd->v_jc < brbzdptr->v_jc) + { + freebzd (maxbzd); + maxbzd = brbzdptr; + } + else + { + freebzd (brbzdptr); + } + } + else + { + clr_node (brbzdptr); + } + } + + if (maxbzd == 0) + { + if (set_kata_giji_bzd (yomi_sno, yomi_eno, endvect, endvect1, &maxbzd, 0) < 0) + return (-1); + } + cnt_bzd (maxbzd, &dbn_cnt, &sbn_cnt); + if ((*dsd_dbn = get_dsd_dbn (dbn_cnt)) <= (struct DSD_DBN *) 0) + { + init_work_areas (); + return (-1); + } + if ((dsd_sbn = get_dsd_sbn (sbn_cnt)) <= (struct DSD_SBN *) 0) + { + init_work_areas (); + return (-1); + } + + dcdbn_set (*dsd_dbn, &dsd_sbn, maxbzd); + return (1); +} + +/************************************************************/ +/* 単文節解析(小文節) routine */ +/************************************************************/ +int +tan_syo (int yomi_sno, /* 解析文字列 start index */ + int yomi_eno, /* 解析文字列 end index (end char の次) */ + int beginvect, /* 前端ベクタ(-1:文節先頭、-2:なんでも)品詞No. */ +#ifndef NO_FZK + w_char *fzkchar, +#endif /* NO_FZK */ + int endvect, /* 終端ベクタ */ + int endvect1, /* 終端ベクタ */ + struct DSD_SBN **dsd_sbn) /* 決定小文節情報エリア pointer */ +{ + int sbn_cnt; + struct SYO_BNSETSU *rsbnptr; /* 決定の対象となるノードのトップポインタ */ + struct SYO_BNSETSU *brsbnptr, *wksbnptr; + struct SYO_BNSETSU *maxsbn; /* 決定の対象となっている文節の + 最大の評価値を持つノードへのポインタ */ + int divid; + extern int _status; + + if (c_env->fzk_fid == -1) + { + wnn_errorno = WNN_FZK_FILE_NO_LOAD; + return (-1); + } + ft = (struct FT *) files[c_env->fzk_fid].area; + + rsbnptr = 0; + _status = 0; + if (chk_yomi_endvect (yomi_sno, yomi_eno, endvect, endvect1) < 0) + return (-1); + if (sbn_kai (yomi_sno, yomi_eno, endvect, endvect1, &rsbnptr, 1, 0) < 0) + { + init_work_areas (); + return (-1); /* ERROR */ + } + + /* 最大の評価値を持つノードを見つける */ + for (brsbnptr = rsbnptr, maxsbn = 0; brsbnptr != 0; brsbnptr = wksbnptr) + { + wksbnptr = brsbnptr->lnk_br; + if (brsbnptr->j_c == yomi_eno - 1) + { /* 文節長さ */ + if (maxsbn == 0) + { + maxsbn = brsbnptr; + divid = get_status (brsbnptr->kangovect, beginvect, +#ifndef NO_FZK + fzkchar, +#endif /* NO_FZK */ + &(brsbnptr->status)); + brsbnptr->v_jc = DIVID_HYOUKA (brsbnptr->v_jc, divid); + } + else + { + divid = get_status (brsbnptr->kangovect, beginvect, +#ifndef NO_FZK + fzkchar, +#endif /* NO_FZK */ + &(brsbnptr->status)); + brsbnptr->v_jc = DIVID_HYOUKA (brsbnptr->v_jc, divid); + if (beginvect != WNN_ALL_HINSI && ((brsbnptr->status == WNN_SENTOU && maxsbn->status != WNN_SENTOU) || (brsbnptr->status == WNN_CONNECT && maxsbn->status != WNN_CONNECT))) + { + freesbn (maxsbn); + maxsbn = brsbnptr; + } + else if (maxsbn->v_jc < brsbnptr->v_jc) + { + freesbn (maxsbn); + maxsbn = brsbnptr; + } + else + { + freesbn (brsbnptr); + } + } + } + else + { + freesbn (brsbnptr); + } + } + + if (maxsbn == 0) + { + if (set_kata_giji_sbn (yomi_sno, yomi_eno, endvect, endvect1, &maxsbn) < 0) + return (-1); + } + if (maxsbn->jentptr == 0 && maxsbn->status == WNN_NOT_CONNECT) + maxsbn->status = WNN_GIJI; + sbn_cnt = cnt_syo (maxsbn); + if ((*dsd_sbn = get_dsd_sbn (sbn_cnt)) <= (struct DSD_SBN *) 0) + { + init_work_areas (); + return (-1); + } + dcdsbn_set (*dsd_sbn, maxsbn); + freesbn (maxsbn); + return (1); +} + +/**********************************************/ +/* 決定した文節の情報をセットする */ +/**********************************************/ +static struct DSD_DBN * +dcdbn_set (struct DSD_DBN *dsd_dbn, + struct DSD_SBN **dsd_sbn, + struct BZD *bzd) +{ + struct DSD_DBN *nextp; +#ifdef CONVERT_from_TOP + struct DSD_DBN *dsd_dbn_head; + struct BZD *s_bzd; /* Buffer son's bzd */ + dsd_dbn_head = dsd_dbn; + while (bzd != 0) + { + nextp = dsd_dbn++; + nextp->bun_m = bzd->bend_m; + nextp->bun_jc = bzd->j_c; + nextp->sbncnt = bzd->sbn_cnt; + nextp->v_jc = bzd->v_jc; + + if (bzd->sbn->status == 0) + bzd->sbn->status = WNN_SENTOU; + + nextp->sbn = *dsd_sbn; + *dsd_sbn = dcdsbn_set (*dsd_sbn, bzd->sbn); + + s_bzd = bzd->lnk_son; + freebzd (bzd); + bzd = s_bzd; + } + return (dsd_dbn_head); +#else /* CONVERT_from_TOP */ + if (bzd == 0) + return (dsd_dbn); + nextp = dcdbn_set (dsd_dbn, dsd_sbn, bzd->lnk_son); + nextp->bun_m = bzd->bend_m; + nextp->bun_jc = bzd->j_c; + nextp->sbncnt = bzd->sbn_cnt; + nextp->v_jc = bzd->v_jc; + + if (bzd->sbn->status == 0) + bzd->sbn->status = WNN_SENTOU; + + nextp->sbn = *dsd_sbn; + *dsd_sbn = dcdsbn_set (*dsd_sbn, bzd->sbn); + freebzd (bzd); + return (++nextp); +#endif /* CONVERT_from_TOP */ +} + +static struct DSD_SBN * +dcdsbn_set (struct DSD_SBN *dsd_sbn, + struct SYO_BNSETSU *sbn) +{ + if (sbn == 0) + return (dsd_sbn); +#ifdef CONVERT_from_TOP + dsd_sbn = dcdsbn_set (dsd_sbn, sbn->parent); +#endif /* CONVERT_from_TOP */ + dsd_sbn->bun_m = sbn->bend_m; + dsd_sbn->bun_jc = sbn->j_c; + dsd_sbn->i_jc = sbn->i_jc; + dsd_sbn->jentptr = sbn->jentptr; + dsd_sbn->t_jc = sbn->t_jc; + dsd_sbn->hinsi = sbn->hinsi_fk; + dsd_sbn->kangovect = sbn->kangovect; + dsd_sbn->v_jc = sbn->v_jc; + dsd_sbn->status = sbn->status; + dsd_sbn->status_bkwd = sbn->status_bkwd; + dsd_sbn++; +#ifndef CONVERT_from_TOP + dsd_sbn = dcdsbn_set (dsd_sbn, sbn->parent); +#endif /* CONVERT_from_TOP */ + return (dsd_sbn); +} + +/* 1 大文節中の小文節の数 */ +static int +cnt_syo (struct SYO_BNSETSU *sbn) +{ + int cnt; + cnt = 0; + while (sbn) + { + cnt++; + sbn = sbn->parent; + } + return (cnt); +} + +/* 1 大文節の数 */ +static void +cnt_bzd (struct BZD *bzd, + int *dbn_cnt, + int *sbn_cnt) +{ + *dbn_cnt = 0; + *sbn_cnt = 0; + while (bzd) + { + *sbn_cnt += cnt_syo (bzd->sbn); + (*dbn_cnt)++; + bzd = bzd->lnk_son; + } +} + +static int +chk_yomi_endvect ( + int yomi_sno, /* 解析文字列 start index */ + int yomi_eno, /* 解析文字列 end index (end char の次) */ + int endvect, /* 終端ベクタ */ + int endvect1) /* 終端ベクタ */ +{ + if (yomi_sno == yomi_eno || (fzk_ckvt (endvect) == NO && fzk_ckvt (endvect1) == NO)) + { + wnn_errorno = WNN_NO_KOUHO; + log_err ("chk_yomi_endvect: cannot make tan-bunsetu kouho."); + return (-1); + } + return (0); +} + +static int +set_kata_giji_sbn ( + int yomi_sno, /* 解析文字列 start index */ + int yomi_eno, /* 解析文字列 end index (end char の次) */ + int endvect, /* 終端ベクタ */ + int endvect1, /* 終端ベクタ */ + struct SYO_BNSETSU **sbn) +{ + struct ICHBNP *ichbnpbp; /* ICHBNP のセーブ */ + int fzkcnt; + int tempi; + int connect_flg = NO; + + if (chk_yomi_endvect (yomi_sno, yomi_eno, endvect, endvect1) < 0) + return (-1); + + fzkcnt = fzk_kai (&bun[yomi_sno], &bun[yomi_eno], endvect, endvect1, &ichbnpbp); + if (fzkcnt <= 0) + { + log_err ("tan_syo(): cannot make tan-bunsetu kouho."); + init_work_areas (); + return (-1); /* ERROR */ + } + for (tempi = fzkcnt - 1; (int) tempi >= (int) 0; tempi--) + { + if (kan_ck_vector (giji_no, getfzkoh (ichbnpbp, tempi)->vector) == WNN_CONNECT_BK) + { + connect_flg = YES; + break; + } + } + if (tempi < 0) + { + for (tempi = fzkcnt - 1; (int) tempi >= (int) 0; tempi--) + { + if (kan_ck_vector (giji_no, getfzkoh1 (ichbnpbp, tempi)->vector) == WNN_CONNECT_BK) + { + break; + } + } + if (tempi < 0) + { + freeibsp (ichbnpbp); + wnn_errorno = WNN_NO_KOUHO; + log_err ("tan_syo(): cannot make tanbunsetu kouho."); + return (-1); + } + } + + if ((*sbn = getsbnsp ()) == 0) + return (-1); + + (*sbn)->j_c = yomi_eno - 1; + (*sbn)->i_jc = getfzkoh (ichbnpbp, tempi)->offset + yomi_sno; + (*sbn)->bend_m = yomi_sno; + (*sbn)->v_jc = 0; + (*sbn)->jentptr = 0; + (*sbn)->t_jc = WNN_KATAKANA; + (*sbn)->kangovect = ft->kango_hinsi_area[giji_no]; + (*sbn)->hinsi_fk = giji_no; + (*sbn)->status = WNN_GIJI; + (*sbn)->status_bkwd = connect_flg; + freeibsp (ichbnpbp); + return (1); +} + +static int +set_kata_giji_bzd ( + int yomi_sno, /* 解析文字列 start index */ + int yomi_eno, /* 解析文字列 end index (end char の次) */ + int endvect, /* 終端ベクタ */ + int endvect1, /* 終端ベクタ */ + struct BZD **bzd, + int buncnt) +{ + if ((*bzd = getbzdsp ()) == 0) + return (-1); + if (set_kata_giji_sbn (yomi_sno, yomi_eno, endvect, endvect1, &(*bzd)->sbn) < 0) + return (-1); + (*bzd)->j_c = yomi_eno - 1; + (*bzd)->bend_m = yomi_sno; + (*bzd)->v_jc = 0; + (*bzd)->sbn_cnt = 1; + (*bzd)->kbcnt = buncnt + 1; + return (1); +}