Mercurial > freewnn
view Wnn/jserver/daibn_kai.c @ 18:e7e2aba67cb3
disabled build for cWnn and kWnn by default
author | Yoshiki Yazawa <yaz@cc.rim.or.jp> |
---|---|
date | Mon, 14 Apr 2008 17:33:53 +0900 |
parents | 338478efe78d |
children | 466fe6732d8d |
line wrap: on
line source
/* * $Id: daibn_kai.c,v 1.4 2002/05/12 22:51:16 hiroo Exp $ */ /* * FreeWnn is a network-extensible Kana-to-Kanji conversion system. * This file is part of FreeWnn. * * Copyright Kyoto University Research Institute for Mathematical Sciences * 1987, 1988, 1989, 1990, 1991, 1992 * Copyright OMRON Corporation. 1987, 1988, 1989, 1990, 1991, 1992, 1999 * Copyright ASTEC, Inc. 1987, 1988, 1989, 1990, 1991, 1992 * Copyright FreeWnn Project 1999, 2000, 2002 * * Maintainer: FreeWnn Project <freewnn@tomo.gr.jp> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /************************************************ * 大文節解析 * ************************************************/ #ifdef HAVE_CONFIG_H # include <config.h> #endif #include <stdio.h> #if STDC_HEADERS # include <stdlib.h> #elif HAVE_MALLOC_H # include <malloc.h> #endif /* STDC_HEADERS */ #include "commonhd.h" #include "de_header.h" #include "kaiseki.h" #include "fzk.h" static int yomi_sno_tmp; int _status = 0; static int edagari_hyouka; int #ifdef NO_FZK dbn_kai (yomi_sno, yomi_eno, beginvect, endvect, endvect1, nmax, rbzd) #else dbn_kai (yomi_sno, yomi_eno, beginvect, fzkchar, endvect, endvect1, nmax, rbzd) #endif /* NO_FZK */ int yomi_sno; /* 被解析文字開始位置 */ register int yomi_eno; /* 被解析文字終了位置(の次) */ int beginvect; /* 前端ベクタ(-1:文節先頭、-2:なんでも)品詞No. */ #ifndef NO_FZK w_char *fzkchar; /* 前端の付属語文字列 */ #endif /* NO_FZK */ int endvect; /* 終端ベクトル */ int endvect1; /* 終端ベクトル */ register int nmax; struct BZD **rbzd; /* struct BZD *p; p = NULL; として &p を使う。ここに大文節候補の リストが入る */ { /* 大文節候補リストのカレントポインタ */ int tmp; struct SYO_BNSETSU *db_set; register struct SYO_BNSETSU **sb_que_head; struct BZD *bzd = 0; int dbncnt = 0; int setflg; register struct SYO_BNSETSU *sb_one; struct SYO_BNSETSU *sb_set; register struct SYO_BNSETSU **sb_que_newcomer; int divid; int edagari_hyouka_sbn; edagari_hyouka = _MININT; edagari_hyouka_sbn = _MININT; yomi_sno_tmp = yomi_sno; sb_que_head = &db_set; *sb_que_head = NULL; if ((tmp = sbn_kai (yomi_sno, yomi_eno, endvect, endvect1, sb_que_head, 1, (struct SYO_BNSETSU *) 0)) <= 0) return (tmp); for (sb_one = *sb_que_head; sb_one != 0; sb_one = sb_one->lnk_br) sb_one->son_v = sum_hyouka (sb_one); while (*sb_que_head != NULL) { sb_one = *sb_que_head; *sb_que_head = sb_one->lnk_br; sb_one->son_v = sum_hyouka (sb_one); setflg = 0; if (yomi_eno <= sb_one->j_c + 1) { #ifdef NO_FZK divid = get_status (sb_one->kangovect, beginvect, &sb_one->status); #else divid = get_status (sb_one->kangovect, beginvect, fzkchar, &sb_one->status); #endif /* NO_FZK */ if (beginvect != WNN_ALL_HINSI && bzd != 0 && bzd->j_c == sb_one->j_c && ((sb_one->status == WNN_SENTOU && bzd->sbn->status != WNN_SENTOU) || (sb_one->status == WNN_CONNECT && bzd->sbn->status != WNN_CONNECT))) bzd->v_jc = 0; if ((tmp = set_daibnsetu (rbzd, &bzd, sb_one, &setflg, divid)) < 0) return (tmp); dbncnt += tmp; if (setflg == 0) clr_sbn_node (sb_one); } else { if (kan_ckvt (sentou_no, sb_one->kangovect) == WNN_CONNECT_BK) { divid = 0; sb_one->status = WNN_SENTOU; } else { /* divid = 2; */ divid = -1; sb_one->status = WNN_GIJI; } if ((tmp = set_daibnsetu (rbzd, &bzd, sb_one, &setflg, divid)) < 0) return (tmp); dbncnt += tmp; if (nmax > sb_one->kbcnt) { /****************************/ if (_DIVID (edagari_hyouka_sbn, 3) < sb_one->son_v) { if (edagari_hyouka_sbn < sb_one->son_v) { edagari_hyouka_sbn = sb_one->son_v; } /****************************/ sb_que_newcomer = &sb_set; *sb_que_newcomer = NULL; if ((tmp = sbn_kai (sb_one->j_c + 1, yomi_eno, sb_one->kangovect, WNN_VECT_NO, sb_que_newcomer, sb_one->kbcnt + 1, sb_one)) < 0) return (tmp); /* ERROR */ /****************************/ } else { tmp = 0; } /****************************/ if (tmp > 0) { sb_one->reference += tmp; for (sb_one = *sb_que_newcomer; sb_one != 0; sb_one = sb_one->lnk_br) sb_one->son_v = sum_hyouka (sb_one); if (*sb_que_head != 0) *sb_que_head = que_reorder (*sb_que_head, *sb_que_newcomer); else *sb_que_head = *sb_que_newcomer; } else { if (setflg == 0) clr_sbn_node (sb_one); } } else { if (setflg == 0) clr_sbn_node (sb_one); } } } /**********/ { struct BZD *bzd_sv; bzd = *rbzd; while (dbncnt > 0 && bzd->v_jc < _DIVID (edagari_hyouka, 2)) { *rbzd = bzd->lnk_br; bzd->lnk_br = 0; clr_node (bzd); bzd = *rbzd; dbncnt--; } for (; bzd != 0 && bzd->lnk_br != 0; bzd = bzd->lnk_br) { if (bzd->lnk_br->v_jc < _DIVID (edagari_hyouka, 2)) { bzd_sv = bzd->lnk_br->lnk_br; bzd->lnk_br->lnk_br = 0; clr_node (bzd->lnk_br); bzd->lnk_br = bzd_sv; dbncnt--; } } } /**********/ return (dbncnt); } /* 前(beginvect,fzkchar)と接続できるか調べ、 接続できるとき 1 大文節の先頭の時 0 接続できないとき -1 を返す get_jkt_status 参照 */ int #ifdef NO_FZK get_status (kangovect, beginvect, status) #else get_status (kangovect, beginvect, fzkchar, status) #endif /* NO_FZK */ register int kangovect; int beginvect; #ifndef NO_FZK w_char *fzkchar; #endif /* NO_FZK */ register short *status; { #ifdef NO_FZK if (zentan_able (kangovect, beginvect) == YES) { #else if (zentan_able (kangovect, beginvect, fzkchar) == YES) { #endif /* NO_FZK */ _status = 2; if (beginvect == WNN_ALL_HINSI) { if (kan_ckvt (sentou_no, kangovect) == WNN_CONNECT_BK) { *status = WNN_SENTOU; /* } else if (jentptr == 0) { *status = WNN_GIJI; return (-1); */ } else { *status = WNN_NOT_CONNECT; return (-1); } } else if (beginvect == WNN_BUN_SENTOU) { *status = WNN_SENTOU; } else { if ( #ifndef NO_FZK (fzkchar == NULL || *fzkchar == 0) && #endif /* NO_FZK */ beginvect == sentou_no) { *status = WNN_SENTOU; } else { *status = WNN_CONNECT; return (1); } } } else if (_status < 2 && kan_ckvt (sentou_no, kangovect) == WNN_CONNECT_BK) { _status = 1; *status = WNN_SENTOU; } else { /* if (jentptr == 0) *status = WNN_GIJI; else */ *status = WNN_NOT_CONNECT; return (-1); } return (0); } /* 文節先頭になれるか */ /* 前端ベクタのチェック */ int #ifdef NO_FZK zentan_able (v, hinsi) #else zentan_able (v, hinsi, fzkchar) #endif /* NO_FZK */ int v; register int hinsi; /* 前端ベクタ(-1:文節先頭、-2:なんでも)品詞No. */ #ifndef NO_FZK w_char *fzkchar; #endif /* NO_FZK */ { #ifndef NO_FZK register int ll; unsigned short *buf; struct ICHBNP *ichbnpbp; w_char *fzk_buf; int fzkcnt; int fzklen; w_char *rev_fzk (); #endif /* NO_FZK */ if (hinsi == WNN_ALL_HINSI) { return (YES); } else if (hinsi == WNN_BUN_SENTOU) { return (kan_ckvt (sentou_no, v)); } #ifndef NO_FZK else { if (fzkchar == NULL || *fzkchar == 0) { if ((ll = wnn_get_fukugou_component_body (hinsi, &buf)) == 0) { error1 ("wnn_get_fukugou_component:error in zentan_able."); return (-1); } for (; ll > 0; ll--) { if (kan_ckvt (*buf, v) == WNN_CONNECT_BK) return (YES); buf++; } } else { fzklen = Strlen (fzkchar); fzk_buf = rev_fzk (fzkchar, fzklen); if (fzk_buf == 0) return (NO); /* ええかげん */ fzkcnt = fzk_kai (fzk_buf, fzk_buf + fzklen, v, WNN_VECT_NO, &ichbnpbp); if ((fzkcnt <= 0) || (getfzkoh (ichbnpbp, fzkcnt - 1)->offset != fzklen)) { freeibsp (ichbnpbp); return (NO); } for (ll = wnn_get_fukugou_component_body (hinsi, &buf); ll > 0; ll--) { if (kan_ck_vector (*buf, getfzkoh (ichbnpbp, fzkcnt - 1)->vector) == WNN_CONNECT_BK) { freeibsp (ichbnpbp); return (YES); } buf++; } freeibsp (ichbnpbp); } } #endif /* NO_FZK */ return (NO); } #ifndef NO_FZK w_char * rev_fzk (fzkchar, len) register w_char *fzkchar; int len; { static w_char *fzk = 0; static int fzk_len = 0; if (fzk_len < len) { if (fzk != 0) free (fzk); if ((fzk = (w_char *) malloc ((len + 1) * sizeof (w_char))) == 0) { wnn_errorno = WNN_MALLOC_ERR; fzk_len = 0; return (fzk); } fzk_len = len; } (void) Sreverse (fzk, fzkchar); return (fzk); } #endif /* NO_FZK */ /* 小文節の並び替え 長さとベクタでソートする */ /* que の先頭を返す */ /* 長さもベクタも同じなら評価値の高い方だけにする */ struct SYO_BNSETSU * que_reorder (que, new) register struct SYO_BNSETSU *que, *new; { struct SYO_BNSETSU *que_sv; register struct SYO_BNSETSU *q; register struct SYO_BNSETSU *tmp; register struct SYO_BNSETSU *next; int flg; if (new == 0) return (que); if ((flg = sbjunjo (que, new)) < 0) { if (flg == -2) { if (cmp_hyouka (new, que) > 0) { tmp = que->lnk_br; next = new->lnk_br; clr_sbn_node (que); que = new; que->lnk_br = tmp; } else { next = new->lnk_br; clr_sbn_node (new); } que_sv = que; new = next; } else que_sv = new; } else que_sv = que; while (new != NULL) { next = new->lnk_br; if ((flg = sbjunjo (que, new)) < 0) { if (flg == -2) { if (cmp_hyouka (new, que) > 0) { for (q = que_sv; q->lnk_br != que; q = q->lnk_br); tmp = que->lnk_br; clr_sbn_node (que); que = q->lnk_br = new; que->lnk_br = tmp; } else { clr_sbn_node (new); } } else { tmp = que; que = new; que->lnk_br = tmp; } new = next; continue; } while (((flg = sbjunjo (que, new)) > 0) && (que)->lnk_br != NULL) que = (que->lnk_br); tmp = que->lnk_br; if (flg == -2) { if (cmp_hyouka (new, que) > 0) { for (q = que_sv; q->lnk_br != que; q = q->lnk_br); clr_sbn_node (que); que = q->lnk_br = new; que->lnk_br = tmp; } else { clr_sbn_node (new); } } else { que->lnk_br = new; new->lnk_br = tmp; } new = next; } return (que_sv); } /* que と new の順序 1: que が前 0: que の後に new -1:new が前 -2:同一順位 */ int sbjunjo (que, new) register struct SYO_BNSETSU *que, *new; { if (new == 0) return (1); if (que->j_c > new->j_c) return (-1); if (que->j_c < new->j_c) { if (que->lnk_br == 0) return (0); if (que->lnk_br->j_c > new->j_c) return (0); if (que->lnk_br->j_c < new->j_c) return (1); if (que->lnk_br->kangovect > new->kangovect) return (0); return (1); } if (que->kangovect == new->kangovect) return (-2); if (que->kangovect > new->kangovect) return (-1); if (que->lnk_br == 0) return (0); if (que->lnk_br->j_c > new->j_c) return (0); if (que->lnk_br->kangovect > new->kangovect) return (0); return (1); } /* 文節の先頭になれれば、大文節の候補をセットする bzd に小文節を追加したなら 1 追加しなければ 0 を返す。 */ int set_daibnsetu (rbzd, bzd, sbn, setflg, divid) struct BZD **rbzd; register struct BZD **bzd; register struct SYO_BNSETSU *sbn; int *setflg; int divid; { register int ret; int hyouka; hyouka = DIVID_HYOUKA (ave_hyouka (sbn), divid); if (hyouka > edagari_hyouka) edagari_hyouka = hyouka; else if (hyouka < _DIVID (edagari_hyouka, 2)) return (0); if (*bzd != 0) { if ((*bzd)->j_c == sbn->j_c) { /* 同じ長さ */ if ((*bzd)->v_jc >= hyouka) return (0); clr_sbn_node ((*bzd)->sbn); ret = 0; } else { if (((*bzd)->lnk_br = getbzdsp ()) == 0) return (-1); (*bzd) = (*bzd)->lnk_br; ret = 1; } } else { if ((*rbzd = *bzd = getbzdsp ()) == 0) return (-1); ret = 1; } (*bzd)->v_jc = hyouka; (*bzd)->j_c = sbn->j_c; (*bzd)->sbn_cnt = sbn->kbcnt; (*bzd)->lnk_br = 0; (*bzd)->lnk_son = 0; (*bzd)->son_v = 0; (*bzd)->sbn = sbn; (*bzd)->kbcnt = 1; sbn->reference++; (*bzd)->bend_m = yomi_sno_tmp; *setflg = 1; return (ret); } /* 大文節の評価関数 とりあえず */ /* sbn に含まれる小文節の評価値の合計 */ int sum_hyouka (sbn) register struct SYO_BNSETSU *sbn; { return (sbn->v_jc + (sbn->parent ? sbn->parent->son_v : 0)); } int ave_hyouka (sbn) register struct SYO_BNSETSU *sbn; { register int len; /* 大文節長 */ len = sbn->j_c - yomi_sno_tmp + 1; return (hyoka_dbn (sbn->son_v, sbn->kbcnt, len)); } /* 大文節の評価値 長さが同じで、前端ベクタも同じ2つの大文節のどちらを 選ぶか決めるために使う 小文節のリスト sbn1 の方が高いと正の値を返す */ int cmp_hyouka (sbn1, sbn2) register struct SYO_BNSETSU *sbn1; register struct SYO_BNSETSU *sbn2; { /* 取りあえず大文節の評価値は、小文節の評価値の平均と考えている */ return (ave_hyouka (sbn1) - ave_hyouka (sbn2)); }