comparison Wnn/jserver/daibn_kai.c @ 0:bbc77ca4def5

initial import
author Yoshiki Yazawa <yaz@cc.rim.or.jp>
date Thu, 13 Dec 2007 04:30:14 +0900
parents
children 790205f476c0
comparison
equal deleted inserted replaced
-1:000000000000 0:bbc77ca4def5
1 /*
2 * $Id: daibn_kai.c,v 1.4 2002/05/12 22:51:16 hiroo Exp $
3 */
4
5 /*
6 * FreeWnn is a network-extensible Kana-to-Kanji conversion system.
7 * This file is part of FreeWnn.
8 *
9 * Copyright Kyoto University Research Institute for Mathematical Sciences
10 * 1987, 1988, 1989, 1990, 1991, 1992
11 * Copyright OMRON Corporation. 1987, 1988, 1989, 1990, 1991, 1992, 1999
12 * Copyright ASTEC, Inc. 1987, 1988, 1989, 1990, 1991, 1992
13 * Copyright FreeWnn Project 1999, 2000, 2002
14 *
15 * Maintainer: FreeWnn Project <freewnn@tomo.gr.jp>
16 *
17 * This program is free software; you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License as published by
19 * the Free Software Foundation; either version 2 of the License, or
20 * (at your option) any later version.
21 *
22 * This program is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License
28 * along with this program; if not, write to the Free Software
29 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30 */
31
32 /************************************************
33 * 大文節解析 *
34 ************************************************/
35 #ifdef HAVE_CONFIG_H
36 # include <config.h>
37 #endif
38
39 #include <stdio.h>
40 #if STDC_HEADERS
41 # include <stdlib.h>
42 #elif HAVE_MALLOC_H
43 # include <malloc.h>
44 #endif /* STDC_HEADERS */
45 #include "commonhd.h"
46 #include "de_header.h"
47 #include "kaiseki.h"
48 #include "fzk.h"
49
50 static int yomi_sno_tmp;
51 int _status = 0;
52 static int edagari_hyouka;
53
54 int
55 #ifdef NO_FZK
56 dbn_kai (yomi_sno, yomi_eno, beginvect, endvect, endvect1, nmax, rbzd)
57 #else
58 dbn_kai (yomi_sno, yomi_eno, beginvect, fzkchar, endvect, endvect1, nmax, rbzd)
59 #endif /* NO_FZK */
60 int yomi_sno; /* 被解析文字開始位置 */
61 register int yomi_eno; /* 被解析文字終了位置(の次) */
62 int beginvect; /* 前端ベクタ(-1:文節先頭、-2:なんでも)品詞No. */
63 #ifndef NO_FZK
64 w_char *fzkchar; /* 前端の付属語文字列 */
65 #endif /* NO_FZK */
66 int endvect; /* 終端ベクトル */
67 int endvect1; /* 終端ベクトル */
68 register int nmax;
69 struct BZD **rbzd;
70 /* struct BZD *p; p = NULL; として &p を使う。ここに大文節候補の
71 リストが入る */
72 {
73 /* 大文節候補リストのカレントポインタ */
74 int tmp;
75 struct SYO_BNSETSU *db_set;
76 register struct SYO_BNSETSU **sb_que_head;
77 struct BZD *bzd = 0;
78 int dbncnt = 0;
79 int setflg;
80 register struct SYO_BNSETSU *sb_one;
81 struct SYO_BNSETSU *sb_set;
82 register struct SYO_BNSETSU **sb_que_newcomer;
83 int divid;
84 int edagari_hyouka_sbn;
85
86 edagari_hyouka = _MININT;
87 edagari_hyouka_sbn = _MININT;
88 yomi_sno_tmp = yomi_sno;
89
90 sb_que_head = &db_set;
91 *sb_que_head = NULL;
92 if ((tmp = sbn_kai (yomi_sno, yomi_eno, endvect, endvect1, sb_que_head, 1, (struct SYO_BNSETSU *) 0)) <= 0)
93 return (tmp);
94 for (sb_one = *sb_que_head; sb_one != 0; sb_one = sb_one->lnk_br)
95 sb_one->son_v = sum_hyouka (sb_one);
96
97 while (*sb_que_head != NULL)
98 {
99 sb_one = *sb_que_head;
100 *sb_que_head = sb_one->lnk_br;
101 sb_one->son_v = sum_hyouka (sb_one);
102 setflg = 0;
103 if (yomi_eno <= sb_one->j_c + 1)
104 {
105 #ifdef NO_FZK
106 divid = get_status (sb_one->kangovect, beginvect, &sb_one->status);
107 #else
108 divid = get_status (sb_one->kangovect, beginvect, fzkchar, &sb_one->status);
109 #endif /* NO_FZK */
110 if (beginvect != WNN_ALL_HINSI && bzd != 0 &&
111 bzd->j_c == sb_one->j_c && ((sb_one->status == WNN_SENTOU && bzd->sbn->status != WNN_SENTOU) || (sb_one->status == WNN_CONNECT && bzd->sbn->status != WNN_CONNECT)))
112 bzd->v_jc = 0;
113 if ((tmp = set_daibnsetu (rbzd, &bzd, sb_one, &setflg, divid)) < 0)
114 return (tmp);
115 dbncnt += tmp;
116 if (setflg == 0)
117 clr_sbn_node (sb_one);
118 }
119 else
120 {
121 if (kan_ckvt (sentou_no, sb_one->kangovect) == WNN_CONNECT_BK)
122 {
123 divid = 0;
124 sb_one->status = WNN_SENTOU;
125 }
126 else
127 {
128 /* divid = 2; */
129 divid = -1;
130 sb_one->status = WNN_GIJI;
131 }
132 if ((tmp = set_daibnsetu (rbzd, &bzd, sb_one, &setflg, divid)) < 0)
133 return (tmp);
134 dbncnt += tmp;
135
136 if (nmax > sb_one->kbcnt)
137 {
138 /****************************/
139 if (_DIVID (edagari_hyouka_sbn, 3) < sb_one->son_v)
140 {
141 if (edagari_hyouka_sbn < sb_one->son_v)
142 {
143 edagari_hyouka_sbn = sb_one->son_v;
144 }
145 /****************************/
146 sb_que_newcomer = &sb_set;
147 *sb_que_newcomer = NULL;
148 if ((tmp = sbn_kai (sb_one->j_c + 1, yomi_eno, sb_one->kangovect, WNN_VECT_NO, sb_que_newcomer, sb_one->kbcnt + 1, sb_one)) < 0)
149 return (tmp); /* ERROR */
150 /****************************/
151 }
152 else
153 {
154 tmp = 0;
155 }
156 /****************************/
157 if (tmp > 0)
158 {
159 sb_one->reference += tmp;
160 for (sb_one = *sb_que_newcomer; sb_one != 0; sb_one = sb_one->lnk_br)
161 sb_one->son_v = sum_hyouka (sb_one);
162 if (*sb_que_head != 0)
163 *sb_que_head = que_reorder (*sb_que_head, *sb_que_newcomer);
164 else
165 *sb_que_head = *sb_que_newcomer;
166 }
167 else
168 {
169 if (setflg == 0)
170 clr_sbn_node (sb_one);
171 }
172 }
173 else
174 {
175 if (setflg == 0)
176 clr_sbn_node (sb_one);
177 }
178 }
179 }
180 /**********/
181 {
182 struct BZD *bzd_sv;
183 bzd = *rbzd;
184 while (dbncnt > 0 && bzd->v_jc < _DIVID (edagari_hyouka, 2))
185 {
186 *rbzd = bzd->lnk_br;
187 bzd->lnk_br = 0;
188 clr_node (bzd);
189 bzd = *rbzd;
190 dbncnt--;
191 }
192 for (; bzd != 0 && bzd->lnk_br != 0; bzd = bzd->lnk_br)
193 {
194 if (bzd->lnk_br->v_jc < _DIVID (edagari_hyouka, 2))
195 {
196 bzd_sv = bzd->lnk_br->lnk_br;
197 bzd->lnk_br->lnk_br = 0;
198 clr_node (bzd->lnk_br);
199 bzd->lnk_br = bzd_sv;
200 dbncnt--;
201 }
202 }
203 }
204 /**********/
205 return (dbncnt);
206 }
207
208 /* 前(beginvect,fzkchar)と接続できるか調べ、
209 接続できるとき 1
210 大文節の先頭の時 0
211 接続できないとき -1
212 を返す
213 get_jkt_status 参照
214 */
215 int
216 #ifdef NO_FZK
217 get_status (kangovect, beginvect, status)
218 #else
219 get_status (kangovect, beginvect, fzkchar, status)
220 #endif /* NO_FZK */
221 register int kangovect;
222 int beginvect;
223 #ifndef NO_FZK
224 w_char *fzkchar;
225 #endif /* NO_FZK */
226 register short *status;
227 {
228 #ifdef NO_FZK
229 if (zentan_able (kangovect, beginvect) == YES)
230 {
231 #else
232 if (zentan_able (kangovect, beginvect, fzkchar) == YES)
233 {
234 #endif /* NO_FZK */
235 _status = 2;
236 if (beginvect == WNN_ALL_HINSI)
237 {
238 if (kan_ckvt (sentou_no, kangovect) == WNN_CONNECT_BK)
239 {
240 *status = WNN_SENTOU;
241 /*
242 } else if (jentptr == 0) {
243 *status = WNN_GIJI;
244 return (-1);
245 */
246 }
247 else
248 {
249 *status = WNN_NOT_CONNECT;
250 return (-1);
251 }
252 }
253 else if (beginvect == WNN_BUN_SENTOU)
254 {
255 *status = WNN_SENTOU;
256 }
257 else
258 {
259 if (
260 #ifndef NO_FZK
261 (fzkchar == NULL || *fzkchar == NULL) &&
262 #endif /* NO_FZK */
263 beginvect == sentou_no)
264 {
265 *status = WNN_SENTOU;
266 }
267 else
268 {
269 *status = WNN_CONNECT;
270 return (1);
271 }
272 }
273 }
274 else if (_status < 2 && kan_ckvt (sentou_no, kangovect) == WNN_CONNECT_BK)
275 {
276 _status = 1;
277 *status = WNN_SENTOU;
278 }
279 else
280 {
281 /*
282 if (jentptr == 0)
283 *status = WNN_GIJI;
284 else
285 */
286 *status = WNN_NOT_CONNECT;
287 return (-1);
288 }
289 return (0);
290 }
291
292 /* 文節先頭になれるか */
293 /* 前端ベクタのチェック */
294 int
295 #ifdef NO_FZK
296 zentan_able (v, hinsi)
297 #else
298 zentan_able (v, hinsi, fzkchar)
299 #endif /* NO_FZK */
300 int v;
301 register int hinsi; /* 前端ベクタ(-1:文節先頭、-2:なんでも)品詞No. */
302 #ifndef NO_FZK
303 w_char *fzkchar;
304 #endif /* NO_FZK */
305 {
306 #ifndef NO_FZK
307 register int ll;
308 unsigned short *buf;
309 struct ICHBNP *ichbnpbp;
310 w_char *fzk_buf;
311 int fzkcnt;
312 int fzklen;
313 w_char *rev_fzk ();
314 #endif /* NO_FZK */
315
316 if (hinsi == WNN_ALL_HINSI)
317 {
318 return (YES);
319 }
320 else if (hinsi == WNN_BUN_SENTOU)
321 {
322 return (kan_ckvt (sentou_no, v));
323 }
324 #ifndef NO_FZK
325 else
326 {
327 if (fzkchar == NULL || *fzkchar == NULL)
328 {
329 if ((ll = wnn_get_fukugou_component_body (hinsi, &buf)) == 0)
330 {
331 error1 ("wnn_get_fukugou_component:error in zentan_able.");
332 return (-1);
333 }
334 for (; ll > 0; ll--)
335 {
336 if (kan_ckvt (*buf, v) == WNN_CONNECT_BK)
337 return (YES);
338 buf++;
339 }
340 }
341 else
342 {
343 fzklen = Strlen (fzkchar);
344 fzk_buf = rev_fzk (fzkchar, fzklen);
345 if (fzk_buf == 0)
346 return (NO); /* ええかげん */
347
348 fzkcnt = fzk_kai (fzk_buf, fzk_buf + fzklen, v, WNN_VECT_NO, &ichbnpbp);
349 if ((fzkcnt <= 0) || (getfzkoh (ichbnpbp, fzkcnt - 1)->offset != fzklen))
350 {
351 freeibsp (ichbnpbp);
352 return (NO);
353 }
354 for (ll = wnn_get_fukugou_component_body (hinsi, &buf); ll > 0; ll--)
355 {
356 if (kan_ck_vector (*buf, getfzkoh (ichbnpbp, fzkcnt - 1)->vector) == WNN_CONNECT_BK)
357 {
358 freeibsp (ichbnpbp);
359 return (YES);
360 }
361 buf++;
362 }
363 freeibsp (ichbnpbp);
364 }
365 }
366 #endif /* NO_FZK */
367 return (NO);
368 }
369
370 #ifndef NO_FZK
371 w_char *
372 rev_fzk (fzkchar, len)
373 register w_char *fzkchar;
374 int len;
375 {
376 static w_char *fzk = 0;
377 static int fzk_len = 0;
378
379 if (fzk_len < len)
380 {
381 if (fzk != 0)
382 free (fzk);
383 if ((fzk = (w_char *) malloc ((len + 1) * sizeof (w_char))) == 0)
384 {
385 wnn_errorno = WNN_MALLOC_ERR;
386 fzk_len = 0;
387 return (fzk);
388 }
389 fzk_len = len;
390 }
391
392 (void) Sreverse (fzk, fzkchar);
393 return (fzk);
394 }
395 #endif /* NO_FZK */
396
397 /* 小文節の並び替え 長さとベクタでソートする */
398 /* que の先頭を返す */
399 /* 長さもベクタも同じなら評価値の高い方だけにする */
400 struct SYO_BNSETSU *
401 que_reorder (que, new)
402 register struct SYO_BNSETSU *que, *new;
403 {
404 struct SYO_BNSETSU *que_sv;
405 register struct SYO_BNSETSU *q;
406 register struct SYO_BNSETSU *tmp;
407 register struct SYO_BNSETSU *next;
408 int flg;
409
410 if (new == 0)
411 return (que);
412 if ((flg = sbjunjo (que, new)) < 0)
413 {
414 if (flg == -2)
415 {
416 if (cmp_hyouka (new, que) > 0)
417 {
418 tmp = que->lnk_br;
419 next = new->lnk_br;
420 clr_sbn_node (que);
421 que = new;
422 que->lnk_br = tmp;
423 }
424 else
425 {
426 next = new->lnk_br;
427 clr_sbn_node (new);
428 }
429 que_sv = que;
430 new = next;
431 }
432 else
433 que_sv = new;
434 }
435 else
436 que_sv = que;
437
438 while (new != NULL)
439 {
440 next = new->lnk_br;
441 if ((flg = sbjunjo (que, new)) < 0)
442 {
443 if (flg == -2)
444 {
445 if (cmp_hyouka (new, que) > 0)
446 {
447 for (q = que_sv; q->lnk_br != que; q = q->lnk_br);
448 tmp = que->lnk_br;
449 clr_sbn_node (que);
450 que = q->lnk_br = new;
451 que->lnk_br = tmp;
452 }
453 else
454 {
455 clr_sbn_node (new);
456 }
457 }
458 else
459 {
460 tmp = que;
461 que = new;
462 que->lnk_br = tmp;
463 }
464 new = next;
465 continue;
466 }
467 while (((flg = sbjunjo (que, new)) > 0) && (que)->lnk_br != NULL)
468 que = (que->lnk_br);
469 tmp = que->lnk_br;
470 if (flg == -2)
471 {
472 if (cmp_hyouka (new, que) > 0)
473 {
474 for (q = que_sv; q->lnk_br != que; q = q->lnk_br);
475 clr_sbn_node (que);
476 que = q->lnk_br = new;
477 que->lnk_br = tmp;
478 }
479 else
480 {
481 clr_sbn_node (new);
482 }
483 }
484 else
485 {
486 que->lnk_br = new;
487 new->lnk_br = tmp;
488 }
489 new = next;
490 }
491 return (que_sv);
492 }
493
494 /* que と new の順序
495 1: que が前
496 0: que の後に new
497 -1:new が前
498 -2:同一順位 */
499 int
500 sbjunjo (que, new)
501 register struct SYO_BNSETSU *que, *new;
502 {
503 if (new == 0)
504 return (1);
505 if (que->j_c > new->j_c)
506 return (-1);
507 if (que->j_c < new->j_c)
508 {
509 if (que->lnk_br == 0)
510 return (0);
511 if (que->lnk_br->j_c > new->j_c)
512 return (0);
513 if (que->lnk_br->j_c < new->j_c)
514 return (1);
515 if (que->lnk_br->kangovect > new->kangovect)
516 return (0);
517 return (1);
518 }
519 if (que->kangovect == new->kangovect)
520 return (-2);
521 if (que->kangovect > new->kangovect)
522 return (-1);
523 if (que->lnk_br == 0)
524 return (0);
525 if (que->lnk_br->j_c > new->j_c)
526 return (0);
527 if (que->lnk_br->kangovect > new->kangovect)
528 return (0);
529 return (1);
530 }
531
532
533 /* 文節の先頭になれれば、大文節の候補をセットする
534 bzd に小文節を追加したなら 1 追加しなければ 0 を返す。 */
535 int
536 set_daibnsetu (rbzd, bzd, sbn, setflg, divid)
537 struct BZD **rbzd;
538 register struct BZD **bzd;
539 register struct SYO_BNSETSU *sbn;
540 int *setflg;
541 int divid;
542 {
543 register int ret;
544 int hyouka;
545
546 hyouka = DIVID_HYOUKA (ave_hyouka (sbn), divid);
547 if (hyouka > edagari_hyouka)
548 edagari_hyouka = hyouka;
549 else if (hyouka < _DIVID (edagari_hyouka, 2))
550 return (0);
551
552 if (*bzd != 0)
553 {
554 if ((*bzd)->j_c == sbn->j_c)
555 {
556 /* 同じ長さ */
557 if ((*bzd)->v_jc >= hyouka)
558 return (0);
559 clr_sbn_node ((*bzd)->sbn);
560 ret = 0;
561 }
562 else
563 {
564 if (((*bzd)->lnk_br = getbzdsp ()) == 0)
565 return (-1);
566 (*bzd) = (*bzd)->lnk_br;
567 ret = 1;
568 }
569 }
570 else
571 {
572 if ((*rbzd = *bzd = getbzdsp ()) == 0)
573 return (-1);
574 ret = 1;
575 }
576 (*bzd)->v_jc = hyouka;
577 (*bzd)->j_c = sbn->j_c;
578 (*bzd)->sbn_cnt = sbn->kbcnt;
579 (*bzd)->lnk_br = 0;
580 (*bzd)->lnk_son = 0;
581 (*bzd)->son_v = 0;
582 (*bzd)->sbn = sbn;
583 (*bzd)->kbcnt = 1;
584 sbn->reference++;
585 (*bzd)->bend_m = yomi_sno_tmp;
586 *setflg = 1;
587 return (ret);
588 }
589
590 /* 大文節の評価関数 とりあえず */
591
592 /* sbn に含まれる小文節の評価値の合計 */
593 int
594 sum_hyouka (sbn)
595 register struct SYO_BNSETSU *sbn;
596 {
597 return (sbn->v_jc + (sbn->parent ? sbn->parent->son_v : 0));
598 }
599
600 int
601 ave_hyouka (sbn)
602 register struct SYO_BNSETSU *sbn;
603 {
604 register int len; /* 大文節長 */
605
606 len = sbn->j_c - yomi_sno_tmp + 1;
607 return (hyoka_dbn (sbn->son_v, sbn->kbcnt, len));
608 }
609
610 /* 大文節の評価値
611 長さが同じで、前端ベクタも同じ2つの大文節のどちらを
612 選ぶか決めるために使う
613 小文節のリスト sbn1 の方が高いと正の値を返す
614 */
615 int
616 cmp_hyouka (sbn1, sbn2)
617 register struct SYO_BNSETSU *sbn1;
618 register struct SYO_BNSETSU *sbn2;
619 {
620 /* 取りあえず大文節の評価値は、小文節の評価値の平均と考えている */
621 return (ave_hyouka (sbn1) - ave_hyouka (sbn2));
622 }