Mercurial > freewnn
diff Wnn/jserver/daibn_kai.c @ 0:bbc77ca4def5
initial import
author | Yoshiki Yazawa <yaz@cc.rim.or.jp> |
---|---|
date | Thu, 13 Dec 2007 04:30:14 +0900 |
parents | |
children | 790205f476c0 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Wnn/jserver/daibn_kai.c Thu Dec 13 04:30:14 2007 +0900 @@ -0,0 +1,622 @@ +/* + * $Id: daibn_kai.c,v 1.4 2002/05/12 22:51:16 hiroo Exp $ + */ + +/* + * FreeWnn is a network-extensible Kana-to-Kanji conversion system. + * This file is part of FreeWnn. + * + * Copyright Kyoto University Research Institute for Mathematical Sciences + * 1987, 1988, 1989, 1990, 1991, 1992 + * Copyright OMRON Corporation. 1987, 1988, 1989, 1990, 1991, 1992, 1999 + * Copyright ASTEC, Inc. 1987, 1988, 1989, 1990, 1991, 1992 + * Copyright FreeWnn Project 1999, 2000, 2002 + * + * Maintainer: FreeWnn Project <freewnn@tomo.gr.jp> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/************************************************ + * 大文節解析 * + ************************************************/ +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#if STDC_HEADERS +# include <stdlib.h> +#elif HAVE_MALLOC_H +# include <malloc.h> +#endif /* STDC_HEADERS */ +#include "commonhd.h" +#include "de_header.h" +#include "kaiseki.h" +#include "fzk.h" + +static int yomi_sno_tmp; +int _status = 0; +static int edagari_hyouka; + +int +#ifdef NO_FZK +dbn_kai (yomi_sno, yomi_eno, beginvect, endvect, endvect1, nmax, rbzd) +#else +dbn_kai (yomi_sno, yomi_eno, beginvect, fzkchar, endvect, endvect1, nmax, rbzd) +#endif /* NO_FZK */ + int yomi_sno; /* 被解析文字開始位置 */ + register int yomi_eno; /* 被解析文字終了位置(の次) */ + int beginvect; /* 前端ベクタ(-1:文節先頭、-2:なんでも)品詞No. */ +#ifndef NO_FZK + w_char *fzkchar; /* 前端の付属語文字列 */ +#endif /* NO_FZK */ + int endvect; /* 終端ベクトル */ + int endvect1; /* 終端ベクトル */ + register int nmax; + struct BZD **rbzd; + /* struct BZD *p; p = NULL; として &p を使う。ここに大文節候補の + リストが入る */ +{ + /* 大文節候補リストのカレントポインタ */ + int tmp; + struct SYO_BNSETSU *db_set; + register struct SYO_BNSETSU **sb_que_head; + struct BZD *bzd = 0; + int dbncnt = 0; + int setflg; + register struct SYO_BNSETSU *sb_one; + struct SYO_BNSETSU *sb_set; + register struct SYO_BNSETSU **sb_que_newcomer; + int divid; + int edagari_hyouka_sbn; + + edagari_hyouka = _MININT; + edagari_hyouka_sbn = _MININT; + yomi_sno_tmp = yomi_sno; + + sb_que_head = &db_set; + *sb_que_head = NULL; + if ((tmp = sbn_kai (yomi_sno, yomi_eno, endvect, endvect1, sb_que_head, 1, (struct SYO_BNSETSU *) 0)) <= 0) + return (tmp); + for (sb_one = *sb_que_head; sb_one != 0; sb_one = sb_one->lnk_br) + sb_one->son_v = sum_hyouka (sb_one); + + while (*sb_que_head != NULL) + { + sb_one = *sb_que_head; + *sb_que_head = sb_one->lnk_br; + sb_one->son_v = sum_hyouka (sb_one); + setflg = 0; + if (yomi_eno <= sb_one->j_c + 1) + { +#ifdef NO_FZK + divid = get_status (sb_one->kangovect, beginvect, &sb_one->status); +#else + divid = get_status (sb_one->kangovect, beginvect, fzkchar, &sb_one->status); +#endif /* NO_FZK */ + if (beginvect != WNN_ALL_HINSI && bzd != 0 && + bzd->j_c == sb_one->j_c && ((sb_one->status == WNN_SENTOU && bzd->sbn->status != WNN_SENTOU) || (sb_one->status == WNN_CONNECT && bzd->sbn->status != WNN_CONNECT))) + bzd->v_jc = 0; + if ((tmp = set_daibnsetu (rbzd, &bzd, sb_one, &setflg, divid)) < 0) + return (tmp); + dbncnt += tmp; + if (setflg == 0) + clr_sbn_node (sb_one); + } + else + { + if (kan_ckvt (sentou_no, sb_one->kangovect) == WNN_CONNECT_BK) + { + divid = 0; + sb_one->status = WNN_SENTOU; + } + else + { +/* divid = 2; */ + divid = -1; + sb_one->status = WNN_GIJI; + } + if ((tmp = set_daibnsetu (rbzd, &bzd, sb_one, &setflg, divid)) < 0) + return (tmp); + dbncnt += tmp; + + if (nmax > sb_one->kbcnt) + { +/****************************/ + if (_DIVID (edagari_hyouka_sbn, 3) < sb_one->son_v) + { + if (edagari_hyouka_sbn < sb_one->son_v) + { + edagari_hyouka_sbn = sb_one->son_v; + } +/****************************/ + sb_que_newcomer = &sb_set; + *sb_que_newcomer = NULL; + if ((tmp = sbn_kai (sb_one->j_c + 1, yomi_eno, sb_one->kangovect, WNN_VECT_NO, sb_que_newcomer, sb_one->kbcnt + 1, sb_one)) < 0) + return (tmp); /* ERROR */ +/****************************/ + } + else + { + tmp = 0; + } +/****************************/ + if (tmp > 0) + { + sb_one->reference += tmp; + for (sb_one = *sb_que_newcomer; sb_one != 0; sb_one = sb_one->lnk_br) + sb_one->son_v = sum_hyouka (sb_one); + if (*sb_que_head != 0) + *sb_que_head = que_reorder (*sb_que_head, *sb_que_newcomer); + else + *sb_que_head = *sb_que_newcomer; + } + else + { + if (setflg == 0) + clr_sbn_node (sb_one); + } + } + else + { + if (setflg == 0) + clr_sbn_node (sb_one); + } + } + } +/**********/ + { + struct BZD *bzd_sv; + bzd = *rbzd; + while (dbncnt > 0 && bzd->v_jc < _DIVID (edagari_hyouka, 2)) + { + *rbzd = bzd->lnk_br; + bzd->lnk_br = 0; + clr_node (bzd); + bzd = *rbzd; + dbncnt--; + } + for (; bzd != 0 && bzd->lnk_br != 0; bzd = bzd->lnk_br) + { + if (bzd->lnk_br->v_jc < _DIVID (edagari_hyouka, 2)) + { + bzd_sv = bzd->lnk_br->lnk_br; + bzd->lnk_br->lnk_br = 0; + clr_node (bzd->lnk_br); + bzd->lnk_br = bzd_sv; + dbncnt--; + } + } + } +/**********/ + return (dbncnt); +} + +/* 前(beginvect,fzkchar)と接続できるか調べ、 + 接続できるとき 1 + 大文節の先頭の時 0 + 接続できないとき -1 + を返す + get_jkt_status 参照 +*/ +int +#ifdef NO_FZK +get_status (kangovect, beginvect, status) +#else +get_status (kangovect, beginvect, fzkchar, status) +#endif /* NO_FZK */ + register int kangovect; + int beginvect; +#ifndef NO_FZK + w_char *fzkchar; +#endif /* NO_FZK */ + register short *status; +{ +#ifdef NO_FZK + if (zentan_able (kangovect, beginvect) == YES) + { +#else + if (zentan_able (kangovect, beginvect, fzkchar) == YES) + { +#endif /* NO_FZK */ + _status = 2; + if (beginvect == WNN_ALL_HINSI) + { + if (kan_ckvt (sentou_no, kangovect) == WNN_CONNECT_BK) + { + *status = WNN_SENTOU; +/* + } else if (jentptr == 0) { + *status = WNN_GIJI; + return (-1); +*/ + } + else + { + *status = WNN_NOT_CONNECT; + return (-1); + } + } + else if (beginvect == WNN_BUN_SENTOU) + { + *status = WNN_SENTOU; + } + else + { + if ( +#ifndef NO_FZK + (fzkchar == NULL || *fzkchar == NULL) && +#endif /* NO_FZK */ + beginvect == sentou_no) + { + *status = WNN_SENTOU; + } + else + { + *status = WNN_CONNECT; + return (1); + } + } + } + else if (_status < 2 && kan_ckvt (sentou_no, kangovect) == WNN_CONNECT_BK) + { + _status = 1; + *status = WNN_SENTOU; + } + else + { +/* + if (jentptr == 0) + *status = WNN_GIJI; + else +*/ + *status = WNN_NOT_CONNECT; + return (-1); + } + return (0); +} + +/* 文節先頭になれるか */ +/* 前端ベクタのチェック */ +int +#ifdef NO_FZK +zentan_able (v, hinsi) +#else +zentan_able (v, hinsi, fzkchar) +#endif /* NO_FZK */ + int v; + register int hinsi; /* 前端ベクタ(-1:文節先頭、-2:なんでも)品詞No. */ +#ifndef NO_FZK + w_char *fzkchar; +#endif /* NO_FZK */ +{ +#ifndef NO_FZK + register int ll; + unsigned short *buf; + struct ICHBNP *ichbnpbp; + w_char *fzk_buf; + int fzkcnt; + int fzklen; + w_char *rev_fzk (); +#endif /* NO_FZK */ + + if (hinsi == WNN_ALL_HINSI) + { + return (YES); + } + else if (hinsi == WNN_BUN_SENTOU) + { + return (kan_ckvt (sentou_no, v)); + } +#ifndef NO_FZK + else + { + if (fzkchar == NULL || *fzkchar == NULL) + { + if ((ll = wnn_get_fukugou_component_body (hinsi, &buf)) == 0) + { + error1 ("wnn_get_fukugou_component:error in zentan_able."); + return (-1); + } + for (; ll > 0; ll--) + { + if (kan_ckvt (*buf, v) == WNN_CONNECT_BK) + return (YES); + buf++; + } + } + else + { + fzklen = Strlen (fzkchar); + fzk_buf = rev_fzk (fzkchar, fzklen); + if (fzk_buf == 0) + return (NO); /* ええかげん */ + + fzkcnt = fzk_kai (fzk_buf, fzk_buf + fzklen, v, WNN_VECT_NO, &ichbnpbp); + if ((fzkcnt <= 0) || (getfzkoh (ichbnpbp, fzkcnt - 1)->offset != fzklen)) + { + freeibsp (ichbnpbp); + return (NO); + } + for (ll = wnn_get_fukugou_component_body (hinsi, &buf); ll > 0; ll--) + { + if (kan_ck_vector (*buf, getfzkoh (ichbnpbp, fzkcnt - 1)->vector) == WNN_CONNECT_BK) + { + freeibsp (ichbnpbp); + return (YES); + } + buf++; + } + freeibsp (ichbnpbp); + } + } +#endif /* NO_FZK */ + return (NO); +} + +#ifndef NO_FZK +w_char * +rev_fzk (fzkchar, len) + register w_char *fzkchar; + int len; +{ + static w_char *fzk = 0; + static int fzk_len = 0; + + if (fzk_len < len) + { + if (fzk != 0) + free (fzk); + if ((fzk = (w_char *) malloc ((len + 1) * sizeof (w_char))) == 0) + { + wnn_errorno = WNN_MALLOC_ERR; + fzk_len = 0; + return (fzk); + } + fzk_len = len; + } + + (void) Sreverse (fzk, fzkchar); + return (fzk); +} +#endif /* NO_FZK */ + +/* 小文節の並び替え 長さとベクタでソートする */ +/* que の先頭を返す */ +/* 長さもベクタも同じなら評価値の高い方だけにする */ +struct SYO_BNSETSU * +que_reorder (que, new) + register struct SYO_BNSETSU *que, *new; +{ + struct SYO_BNSETSU *que_sv; + register struct SYO_BNSETSU *q; + register struct SYO_BNSETSU *tmp; + register struct SYO_BNSETSU *next; + int flg; + + if (new == 0) + return (que); + if ((flg = sbjunjo (que, new)) < 0) + { + if (flg == -2) + { + if (cmp_hyouka (new, que) > 0) + { + tmp = que->lnk_br; + next = new->lnk_br; + clr_sbn_node (que); + que = new; + que->lnk_br = tmp; + } + else + { + next = new->lnk_br; + clr_sbn_node (new); + } + que_sv = que; + new = next; + } + else + que_sv = new; + } + else + que_sv = que; + + while (new != NULL) + { + next = new->lnk_br; + if ((flg = sbjunjo (que, new)) < 0) + { + if (flg == -2) + { + if (cmp_hyouka (new, que) > 0) + { + for (q = que_sv; q->lnk_br != que; q = q->lnk_br); + tmp = que->lnk_br; + clr_sbn_node (que); + que = q->lnk_br = new; + que->lnk_br = tmp; + } + else + { + clr_sbn_node (new); + } + } + else + { + tmp = que; + que = new; + que->lnk_br = tmp; + } + new = next; + continue; + } + while (((flg = sbjunjo (que, new)) > 0) && (que)->lnk_br != NULL) + que = (que->lnk_br); + tmp = que->lnk_br; + if (flg == -2) + { + if (cmp_hyouka (new, que) > 0) + { + for (q = que_sv; q->lnk_br != que; q = q->lnk_br); + clr_sbn_node (que); + que = q->lnk_br = new; + que->lnk_br = tmp; + } + else + { + clr_sbn_node (new); + } + } + else + { + que->lnk_br = new; + new->lnk_br = tmp; + } + new = next; + } + return (que_sv); +} + +/* que と new の順序 + 1: que が前 + 0: que の後に new + -1:new が前 + -2:同一順位 */ +int +sbjunjo (que, new) + register struct SYO_BNSETSU *que, *new; +{ + if (new == 0) + return (1); + if (que->j_c > new->j_c) + return (-1); + if (que->j_c < new->j_c) + { + if (que->lnk_br == 0) + return (0); + if (que->lnk_br->j_c > new->j_c) + return (0); + if (que->lnk_br->j_c < new->j_c) + return (1); + if (que->lnk_br->kangovect > new->kangovect) + return (0); + return (1); + } + if (que->kangovect == new->kangovect) + return (-2); + if (que->kangovect > new->kangovect) + return (-1); + if (que->lnk_br == 0) + return (0); + if (que->lnk_br->j_c > new->j_c) + return (0); + if (que->lnk_br->kangovect > new->kangovect) + return (0); + return (1); +} + + +/* 文節の先頭になれれば、大文節の候補をセットする + bzd に小文節を追加したなら 1 追加しなければ 0 を返す。 */ +int +set_daibnsetu (rbzd, bzd, sbn, setflg, divid) + struct BZD **rbzd; + register struct BZD **bzd; + register struct SYO_BNSETSU *sbn; + int *setflg; + int divid; +{ + register int ret; + int hyouka; + + hyouka = DIVID_HYOUKA (ave_hyouka (sbn), divid); + if (hyouka > edagari_hyouka) + edagari_hyouka = hyouka; + else if (hyouka < _DIVID (edagari_hyouka, 2)) + return (0); + + if (*bzd != 0) + { + if ((*bzd)->j_c == sbn->j_c) + { + /* 同じ長さ */ + if ((*bzd)->v_jc >= hyouka) + return (0); + clr_sbn_node ((*bzd)->sbn); + ret = 0; + } + else + { + if (((*bzd)->lnk_br = getbzdsp ()) == 0) + return (-1); + (*bzd) = (*bzd)->lnk_br; + ret = 1; + } + } + else + { + if ((*rbzd = *bzd = getbzdsp ()) == 0) + return (-1); + ret = 1; + } + (*bzd)->v_jc = hyouka; + (*bzd)->j_c = sbn->j_c; + (*bzd)->sbn_cnt = sbn->kbcnt; + (*bzd)->lnk_br = 0; + (*bzd)->lnk_son = 0; + (*bzd)->son_v = 0; + (*bzd)->sbn = sbn; + (*bzd)->kbcnt = 1; + sbn->reference++; + (*bzd)->bend_m = yomi_sno_tmp; + *setflg = 1; + return (ret); +} + +/* 大文節の評価関数 とりあえず */ + +/* sbn に含まれる小文節の評価値の合計 */ +int +sum_hyouka (sbn) + register struct SYO_BNSETSU *sbn; +{ + return (sbn->v_jc + (sbn->parent ? sbn->parent->son_v : 0)); +} + +int +ave_hyouka (sbn) + register struct SYO_BNSETSU *sbn; +{ + register int len; /* 大文節長 */ + + len = sbn->j_c - yomi_sno_tmp + 1; + return (hyoka_dbn (sbn->son_v, sbn->kbcnt, len)); +} + +/* 大文節の評価値 + 長さが同じで、前端ベクタも同じ2つの大文節のどちらを + 選ぶか決めるために使う + 小文節のリスト sbn1 の方が高いと正の値を返す + */ +int +cmp_hyouka (sbn1, sbn2) + register struct SYO_BNSETSU *sbn1; + register struct SYO_BNSETSU *sbn2; +{ + /* 取りあえず大文節の評価値は、小文節の評価値の平均と考えている */ + return (ave_hyouka (sbn1) - ave_hyouka (sbn2)); +}