0
|
1 /*
|
|
2 * $Id: bnsetu_kai.c,v 1.5 2003/05/11 18:35:54 hiroo Exp $
|
|
3 */
|
|
4
|
|
5 /*
|
|
6 * FreeWnn is a network-extensible Kana-to-Kanji conversion system.
|
|
7 * This file is part of FreeWnn.
|
|
8 *
|
|
9 * Copyright Kyoto University Research Institute for Mathematical Sciences
|
|
10 * 1987, 1988, 1989, 1990, 1991, 1992
|
|
11 * Copyright OMRON Corporation. 1987, 1988, 1989, 1990, 1991, 1992, 1999
|
|
12 * Copyright ASTEC, Inc. 1987, 1988, 1989, 1990, 1991, 1992
|
|
13 * Copyright FreeWnn Project 1999, 2000, 2003
|
|
14 *
|
|
15 * Maintainer: FreeWnn Project <freewnn@tomo.gr.jp>
|
|
16 *
|
|
17 * This program is free software; you can redistribute it and/or modify
|
|
18 * it under the terms of the GNU General Public License as published by
|
|
19 * the Free Software Foundation; either version 2 of the License, or
|
|
20 * (at your option) any later version.
|
|
21 *
|
|
22 * This program is distributed in the hope that it will be useful,
|
|
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
25 * GNU General Public License for more details.
|
|
26 *
|
|
27 * You should have received a copy of the GNU General Public License
|
|
28 * along with this program; if not, write to the Free Software
|
|
29 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
30 */
|
|
31
|
|
32 /************************************************
|
|
33 * 小文節解析 *
|
|
34 ************************************************/
|
|
35 #include <stdio.h>
|
|
36 #include <ctype.h>
|
|
37 #include "commonhd.h"
|
|
38 #include "de_header.h"
|
|
39 #include "jdata.h"
|
|
40
|
|
41 #include "fzk.h"
|
|
42 #include "kaiseki.h"
|
|
43
|
|
44 int
|
|
45 sbn_kai (yomi_sno, yomi_eno, endvect, endvect1, tsbnptr, bnst_num, parent)
|
|
46 int yomi_sno;
|
|
47 int yomi_eno;
|
|
48 int endvect; /* 終端ベクタ */
|
|
49 int endvect1; /* 終端ベクタ */
|
|
50 struct SYO_BNSETSU **tsbnptr; /* 小文節解析結果 */
|
|
51 int bnst_num; /* これまでに解析した小文節数 */
|
|
52 struct SYO_BNSETSU *parent; /* 親の幹語ノード */
|
|
53 {
|
|
54 int fzkcnt, hyokaval, hyokaval0 = 0, hinsidt, sbncnt;
|
|
55 unsigned short *hinsi_buf;
|
|
56 int hinsidt_fk; /* 複合品詞のセーブ */
|
|
57 register int i, /* 幹語の終わりのインデックス */
|
|
58 j, /* 文節始めのインデックス */
|
|
59 t, ii;
|
|
60 struct jdata *jentptr;
|
|
61 register struct SYO_BNSETSU *sbnptr = NULL;
|
|
62 struct SYO_BNSETSU *sbnptr_top = 0;
|
|
63 struct ICHBNP *ichbnpbp;
|
|
64 int k, ll;
|
|
65 struct SYO_BNSETSU *kanptr;
|
|
66 struct SYO_BNSETSU *kanptr_tmp = NULL;
|
|
67 struct SYO_BNSETSU *kanptr_before;
|
|
68 struct SYO_BNSETSU *giji_sbn;
|
|
69 int get_giji_flg = -1; /* 擬似文節を作れたか */
|
|
70
|
|
71 enum FLG
|
|
72 {
|
|
73 set,
|
|
74 noset
|
|
75 }
|
|
76 setflg;
|
|
77 enum FLG kanflg; /* 同じベクタの品詞があったか否か */
|
|
78 enum GFLG
|
|
79 {
|
|
80 get,
|
|
81 noget
|
|
82 }
|
|
83 getflg;
|
|
84 struct SYO_BNSETSU *getsbnsp ();
|
|
85 int kangovect;
|
|
86 int connect_flg;
|
|
87
|
|
88 if ((yomi_sno == yomi_eno) || (fzk_ckvt (endvect) == NO && fzk_ckvt (endvect1) == NO))
|
|
89 return (NOTHING);
|
|
90
|
|
91 if ((fzkcnt = fzk_kai (&bun[yomi_sno], &bun[yomi_eno], endvect, endvect1, &ichbnpbp)) <= 0)
|
|
92 return (fzkcnt); /* ERROR */
|
|
93 for (ii = 0; ii < fzkcnt; ii++)
|
|
94 getfzkoh (ichbnpbp, ii)->offset += yomi_sno;
|
|
95
|
|
96 for (ii = 0; ii < fzkcnt; ii++)
|
|
97 {
|
|
98 i = getfzkoh (ichbnpbp, ii)->offset;
|
|
99 if (jmtp[i] == (struct jdata **) UN_KNOWN) /* もう引いた? */
|
|
100 {
|
|
101 jmt_set (i); /* 辞書引き */
|
|
102 }
|
|
103 }
|
|
104
|
|
105 j = j_max (ichbnpbp, fzkcnt);
|
|
106 j = (j >= yomi_eno) ? yomi_eno - 1 : j;
|
|
107
|
|
108 setflg = noset;
|
|
109 if ((giji_sbn = getsbnsp ()) == NO)
|
|
110 return (-1); /* ERROR */
|
|
111 giji_sbn->kbcnt = bnst_num;
|
|
112 giji_sbn->bend_m = yomi_sno;
|
|
113 giji_sbn->parent = parent;
|
|
114
|
|
115 /* 疑似文節を先に取り出します(もしあれば) */
|
|
116 if (getgiji (yomi_sno, yomi_eno, giji_sbn, ichbnpbp, fzkcnt, bnst_num) >= 0)
|
|
117 {
|
|
118 sbncnt = 1;
|
|
119 getflg = get;
|
|
120 get_giji_flg = giji_sbn->j_c;
|
|
121 if (giji_sbn->j_c > j)
|
|
122 {
|
|
123 sbnptr_top = giji_sbn;
|
|
124 get_giji_flg = -1;
|
|
125 kanptr_tmp = giji_sbn->lnk_br;
|
|
126 }
|
|
127 }
|
|
128 else
|
|
129 {
|
|
130 sbnptr = giji_sbn;
|
|
131 sbncnt = 0;
|
|
132 getflg = noget;
|
|
133 }
|
|
134
|
|
135 for (; j >= yomi_sno; j--)
|
|
136 { /* 文節の長さ */
|
|
137 if ((setflg == noset) && (get_giji_flg == j))
|
|
138 {
|
|
139 giji_sbn->lnk_br = sbnptr_top;
|
|
140 sbnptr_top = giji_sbn;
|
|
141 setflg = set;
|
|
142 kanptr_tmp = giji_sbn->lnk_br;
|
|
143 }
|
|
144 for (ii = 0; (ii < fzkcnt) && ((i = (getfzkoh (ichbnpbp, ii)->offset)) <= j); ii++)
|
|
145 { /* 付属語 */
|
|
146 jentptr = (struct jdata *) C (i, j);
|
|
147 for (; jentptr; jentptr = jentptr->jptr)
|
|
148 { /* 辞書 */
|
|
149 for (t = 0; t < (jentptr->kosuu); t++)
|
|
150 { /* 幹語 */
|
|
151 if (((jentptr->hindo[t] & 0x7f) == 0x7f) || (jentptr->hindo_in && (jentptr->hindo_in[t] & 0x7f) == 0x7f))
|
|
152 continue; /*コメントアウトされた辞書エントリ */
|
|
153 #ifdef CONVERT_with_SiSheng
|
|
154 hyokaval = hyoka1 (jentptr->hindo[t],
|
|
155 (jentptr->hindo_in == 0 ? 0 : jentptr->hindo_in[t]),
|
|
156 (jentptr->sisheng == 0 ? 0 : diff_sisheng (jentptr->sisheng[t], jentptr->sisheng_int)), j - i / 2 - yomi_sno / 2 + 1, dic_table[jentptr->jishono].nice);
|
|
157 #else
|
|
158 hyokaval = hyoka1 (jentptr->hindo[t], (jentptr->hindo_in == 0 ? 0 : jentptr->hindo_in[t]), j - i + 1, j - yomi_sno + 1, dic_table[jentptr->jishono].nice);
|
|
159 #endif /* CONVERT_with_SiSheng */
|
|
160 if (bnst_num != 1)
|
|
161 {
|
|
162 if (parent != 0 && parent->status_bkwd == NO)
|
|
163 /* 前の文節が endvect に接続できないものは、
|
|
164 評価値を下げる */
|
|
165 hyokaval0 = _DIVID (hyokaval, 2);
|
|
166 else
|
|
167 hyokaval0 = hyokaval;
|
|
168 }
|
|
169 hinsidt_fk = hinsidt = jentptr->hinsi[t];
|
|
170 if ((ll = wnn_get_fukugou_component_body (hinsidt_fk, &hinsi_buf)) == 0)
|
|
171 {
|
|
172 error1 ("wnn_get_fukugou_component:erroe in bunsetukai.");
|
|
173 return (-1);
|
|
174 }
|
|
175 for (; ll > 0; ll--)
|
|
176 {
|
|
177 hinsidt = *hinsi_buf;
|
|
178 hinsi_buf++;
|
|
179 /* ここから後は単純品詞だけ考えればいい */
|
|
180 connect_flg = kan_ck_vector (hinsidt, (getfzkoh (ichbnpbp, ii)->vector));
|
|
181 if ((connect_flg == WNN_CONNECT_BK || (bnst_num == 1 && kan_ck_vector (hinsidt, (getfzkoh1 (ichbnpbp, ii)->vector)) == WNN_CONNECT_BK)))
|
|
182 {
|
|
183 /* 接続できる */
|
|
184 if (bnst_num == 1)
|
|
185 /* endvect に接続できないものは評価値を下げる */
|
|
186 hyokaval0 = (connect_flg != WNN_NOT_CONNECT_BK) ? hyokaval : _DIVID (hyokaval, 2);
|
|
187 kangovect = ft->kango_hinsi_area[hinsidt];
|
|
188 if (getflg == get)
|
|
189 {
|
|
190 getflg = noget;
|
|
191 if (0 == (sbnptr = getsbnsp ()))
|
|
192 return (-1); /* ERROR */
|
|
193 sbnptr->kbcnt = bnst_num;
|
|
194 sbnptr->bend_m = yomi_sno;
|
|
195 sbnptr->parent = parent;
|
|
196 }
|
|
197 sbnptr->j_c = j;
|
|
198
|
|
199 if (setflg == noset)
|
|
200 { /* この長さの文節は初めて */
|
|
201 sbnptr->lnk_br = sbnptr_top;
|
|
202 sbnptr_top = sbnptr;
|
|
203 setflg = set;
|
|
204 sbnptr->v_jc = hyokaval0;
|
|
205 sbnptr->t_jc = t;
|
|
206 sbnptr->jentptr = jentptr;
|
|
207 sbnptr->hinsi_fk = hinsidt_fk;
|
|
208 sbnptr->kangovect = kangovect;
|
|
209 sbnptr->i_jc = i;
|
|
210 /* yoku wakaran 9/8
|
|
211 if (endvect1 == WNN_VECT_NO)
|
|
212 sbnptr->status_bkwd = WNN_NOT_CONNECT_BK;
|
|
213 else
|
|
214 */
|
|
215 sbnptr->status_bkwd = connect_flg;
|
|
216 kanptr_tmp = sbnptr->lnk_br;
|
|
217 getflg = get;
|
|
218 sbncnt++;
|
|
219 }
|
|
220 else
|
|
221 {
|
|
222 kanflg = noset;
|
|
223 for (kanptr_before = kanptr = sbnptr_top; kanptr != kanptr_tmp; kanptr_before = kanptr, kanptr = kanptr->lnk_br)
|
|
224 {
|
|
225 if (kanptr->kangovect == kangovect)
|
|
226 {
|
|
227 /* 同じベクタの品詞があった */
|
|
228 if (hyokaval0 > kanptr->v_jc)
|
|
229 {
|
|
230 /* こっちの方が評価値が高い */
|
|
231 kanptr->v_jc = hyokaval0;
|
|
232 kanptr->t_jc = t;
|
|
233 kanptr->jentptr = jentptr;
|
|
234 kanptr->hinsi_fk = hinsidt_fk;
|
|
235 kanptr->i_jc = i;
|
|
236 /* yoku wakaran 9/8
|
|
237 if (endvect1 == WNN_VECT_NO)
|
|
238 kanptr->status_bkwd = WNN_NOT_CONNECT_BK;
|
|
239 else
|
|
240 */
|
|
241 kanptr->status_bkwd = connect_flg;
|
|
242 }
|
|
243 kanflg = set;
|
|
244 break;
|
|
245 }
|
|
246 else if (kanptr->kangovect > kangovect)
|
|
247 {
|
|
248 /* 同じベクタの品詞がなかった */
|
|
249 break;
|
|
250 }
|
|
251 } /* SORT & INSERT SYO_BN */
|
|
252
|
|
253 if (kanflg == noset)
|
|
254 {
|
|
255 /* 同じベクタの品詞がなかった */
|
|
256 if (kanptr == sbnptr_top)
|
|
257 {
|
|
258 sbnptr->lnk_br = kanptr;
|
|
259 sbnptr_top = sbnptr;
|
|
260 }
|
|
261 else
|
|
262 {
|
|
263 sbnptr->lnk_br = kanptr;
|
|
264 kanptr_before->lnk_br = sbnptr;
|
|
265 }
|
|
266 sbnptr->v_jc = hyokaval0;
|
|
267 sbnptr->t_jc = t;
|
|
268 sbnptr->jentptr = jentptr;
|
|
269 sbnptr->hinsi_fk = hinsidt_fk;
|
|
270 sbnptr->kangovect = kangovect;
|
|
271 sbnptr->i_jc = i;
|
|
272 /* yoku wakaran 9/8
|
|
273 if (endvect1 == WNN_VECT_NO)
|
|
274 sbnptr->status_bkwd = WNN_NOT_CONNECT_BK;
|
|
275 else
|
|
276 */
|
|
277 sbnptr->status_bkwd = connect_flg;
|
|
278 sbncnt++;
|
|
279 getflg = get;
|
|
280 }
|
|
281 }
|
|
282 } /* 接続できる */
|
|
283 } /* 単純品詞 */
|
|
284 } /* 幹語 */
|
|
285 } /* 辞書 */
|
|
286 } /* 付属語 */
|
|
287 setflg = noset; /* 次の長さの漢語はまだsetして無い */
|
|
288 } /* 文節の長さ */
|
|
289 if (sbncnt == NOTHING)
|
|
290 { /* 幹語がなかった */
|
|
291 for (k = fzkcnt - 1; k >= 0; k--)
|
|
292 {
|
|
293 if (kan_ck_vector (katakanago_no, getfzkoh (ichbnpbp, k)->vector) == WNN_CONNECT_BK)
|
|
294 {
|
|
295 k = getgiji (yomi_sno, yomi_eno, sbnptr, ichbnpbp, k + 1, bnst_num);
|
|
296 break;
|
|
297 }
|
|
298 }
|
|
299 if (k < 0)
|
|
300 {
|
|
301 if (bnst_num == 1)
|
|
302 {
|
|
303 /* 何が何でも疑似文節を作るんだい */
|
|
304 getgiji_f (yomi_sno, sbnptr);
|
|
305 sbncnt = 1;
|
|
306 sbnptr_top = sbnptr;
|
|
307 #ifdef nodef
|
|
308 wnn_errorno = WNN_NO_KOUHO;
|
|
309 error1 ("Cannot get bunsetsu kouho in bunsetu-kaiseki.\n");
|
|
310 #endif /* nodef */
|
|
311 }
|
|
312 else
|
|
313 {
|
|
314 freesbn (sbnptr);
|
|
315 sbncnt = 0;
|
|
316 sbnptr_top = 0;
|
|
317 }
|
|
318 }
|
|
319 else
|
|
320 {
|
|
321 sbncnt = 1;
|
|
322 sbnptr_top = sbnptr;
|
|
323 }
|
|
324 }
|
|
325 else if (getflg == noget)
|
|
326 {
|
|
327 freesbn (sbnptr);
|
|
328 }
|
|
329 *tsbnptr = sbnptr_top; /* 短いものから並べる */
|
|
330 freeibsp (ichbnpbp);
|
|
331 return (sbncnt);
|
|
332 }
|
|
333
|
|
334 /*******************************************/
|
|
335 /* max value in maxj[] search routine */
|
|
336 /*******************************************/
|
|
337 int
|
|
338 j_max (fzkbnp, fzkcnt)
|
|
339 register struct ICHBNP *fzkbnp;
|
|
340 int fzkcnt; /* max number of fzkkouho table */
|
|
341 {
|
|
342 register int i, mxj = -1, k, ofst;
|
|
343
|
|
344 for (i = 0; i < fzkcnt; i++)
|
|
345 {
|
|
346 if ((ofst = getfzkoh (fzkbnp, i)->offset) < maxchg)
|
|
347 if ((k = maxj[ofst]) > mxj)
|
|
348 mxj = k;
|
|
349 }
|
|
350 return (mxj);
|
|
351 }
|
|
352
|
|
353 /**********************************************/
|
|
354 /* 疑似幹語の獲得 */
|
|
355 /**********************************************/
|
|
356 #ifdef KOREAN
|
|
357 int
|
|
358 getgiji (yomi_sno, yomi_eno, sbnptr, fzkbnp, fzkcnt, bnst_num)
|
|
359 int yomi_sno;
|
|
360 int yomi_eno;
|
|
361 struct SYO_BNSETSU *sbnptr;
|
|
362 struct ICHBNP *fzkbnp;
|
|
363 int fzkcnt;
|
|
364 int bnst_num;
|
|
365 {
|
|
366 register int j_end;
|
|
367 int j_end_sv;
|
|
368 int *vector;
|
|
369 int *vector1;
|
|
370 int giji_hinsi;
|
|
371 int giji_hindo;
|
|
372 int len = 0;
|
|
373 w_char c = (w_char) 0;
|
|
374
|
|
375 j_end = getfzkoh (fzkbnp, fzkcnt - 1)->offset;
|
|
376 vector = getfzkoh (fzkbnp, fzkcnt - 1)->vector;
|
|
377 vector1 = getfzkoh1 (fzkbnp, fzkcnt - 1)->vector;
|
|
378
|
|
379 /* 分類 */
|
|
380 if ((isascii (c = bun[j_end]) && isdigit (c)) || isjdigit (c))
|
|
381 { /* 数字 */
|
|
382 giji_hinsi = suuji_no;
|
|
383 giji_hindo = c_env->suuji_val;
|
|
384 }
|
|
385 else if ((isascii (c) && isparen_e (c)) || isjparen_e (c))
|
|
386 { /* 閉括弧 */
|
|
387 giji_hinsi = toji_kakko_no;
|
|
388 giji_hindo = c_env->toji_kakko_val;
|
|
389 }
|
|
390 else if (is_g_eisuu (c))
|
|
391 { /* アスキー(アルファベット, 数字) */
|
|
392 giji_hinsi = eisuu_no;
|
|
393 giji_hindo = c_env->eisuu_val;
|
|
394 }
|
|
395 else if ((isascii (c) && isparen_s (c)) || isjparen_s (c))
|
|
396 { /* 開括弧 */
|
|
397 giji_hinsi = kai_kakko_no;
|
|
398 giji_hindo = c_env->kaikakko_val;
|
|
399 }
|
|
400 else if (ishangul (c))
|
|
401 { /* Hangul */
|
|
402 giji_hinsi = katakanago_no;
|
|
403 giji_hindo = 0;
|
|
404 }
|
|
405 else
|
|
406 { /* 記号、漢字 */
|
|
407 giji_hinsi = kigou_no;
|
|
408 giji_hindo = c_env->kigou_val;
|
|
409 }
|
|
410
|
|
411 /* 接続可能か */
|
|
412 if ((sbnptr->status_bkwd = kan_ck_vector (giji_hinsi, vector)) == WNN_CONNECT_BK)
|
|
413 goto _Can_connect;
|
|
414 if (bnst_num != 1)
|
|
415 return (-1);
|
|
416 if (kan_ck_vector (giji_hinsi, vector1) == WNN_NOT_CONNECT_BK)
|
|
417 {
|
|
418 if (giji_hinsi != suuji_no || kan_ck_vector (eisuu_no, vector1) == WNN_NOT_CONNECT_BK)
|
|
419 return (-1);
|
|
420 else
|
|
421 {
|
|
422 giji_hinsi = eisuu_no;
|
|
423 giji_hindo = c_env->eisuu_val;
|
|
424 }
|
|
425 }
|
|
426
|
|
427 _Can_connect:
|
|
428
|
|
429 /* 長さを調べる */
|
|
430 if (giji_hinsi == suuji_no)
|
|
431 {
|
|
432 for (; (j_end + len < yomi_eno) && ((isascii (bun[j_end + len]) && isdigit (bun[j_end + len])) || isjdigit (bun[j_end + len])); len++)
|
|
433 ;
|
|
434 if ((j_end + len < yomi_eno) && is_g_eisuu (bun[j_end + len]))
|
|
435 {
|
|
436 /* 英数 */
|
|
437 for (len++; (j_end + len < yomi_eno) && is_g_eisuu (bun[j_end + len]); len++)
|
|
438 ;
|
|
439 if (kan_ck_vector (eisuu_no, vector) == WNN_NOT_CONNECT_BK && (bnst_num != 1 || kan_ck_vector (eisuu_no, vector1)))
|
|
440 return (-1);
|
|
441 giji_hinsi = eisuu_no;
|
|
442 }
|
|
443 }
|
|
444 else if (giji_hinsi == eisuu_no)
|
|
445 {
|
|
446 for (; ((j_end + len) < yomi_eno) && is_g_eisuu (bun[j_end + len]); len++)
|
|
447 ;
|
|
448 }
|
|
449 else if (giji_hinsi == toji_kakko_no)
|
|
450 { /* 閉括弧 */
|
|
451 len = 1;
|
|
452 }
|
|
453 else if (giji_hinsi == kigou_no)
|
|
454 { /* 記号 */
|
|
455 len = 1;
|
|
456 }
|
|
457 else if (giji_hinsi == kai_kakko_no)
|
|
458 { /* 開括弧 */
|
|
459 len = 1;
|
|
460 }
|
|
461 else if (giji_hinsi == giji_no)
|
|
462 {
|
|
463 len = 1;
|
|
464 }
|
|
465 else if (giji_hinsi == katakanago_no)
|
|
466 { /* Hangul */
|
|
467 for (len = 1; (j_end + len < yomi_eno) && ishangul (bun[j_end + len]); len++)
|
|
468 ;
|
|
469 }
|
|
470 else
|
|
471 {
|
|
472 if (j_end >= yomi_eno)
|
|
473 len = 0;
|
|
474 else
|
|
475 len = 1;
|
|
476 }
|
|
477
|
|
478 /* スペースは、そのあとの疑似文節にくっつける */
|
|
479 if ((giji_hinsi == eisuu_no) || (giji_hinsi == suuji_no) || (giji_hinsi == katakanago_no))
|
|
480 {
|
|
481 for (; ((j_end + len) < yomi_eno) && (isspace (bun[j_end + len]) || (isjspace (c))); len++)
|
|
482 ;
|
|
483 }
|
|
484
|
|
485 _Only_Fuzokugo:
|
|
486 /* 評価値の決定 */
|
|
487 sbnptr->v_jc = HYOKAVAL (giji_hindo, len, len + j_end - yomi_sno);
|
|
488 sbnptr->i_jc = j_end;
|
|
489 if (giji_hinsi == katakanago_no)
|
|
490 {
|
|
491 sbnptr->t_jc = WNN_KATAKANA;
|
|
492 sbnptr->v_jc = 0;
|
|
493 }
|
|
494 else if (giji_hinsi == suuji_no)
|
|
495 {
|
|
496 sbnptr->t_jc = c_env->giji.number;
|
|
497 }
|
|
498 else if (isascii (c))
|
|
499 {
|
|
500 if (giji_hinsi == eisuu_no)
|
|
501 {
|
|
502 sbnptr->t_jc = c_env->giji.eisuu;
|
|
503 }
|
|
504 else if (giji_hinsi == kigou_no || giji_hinsi == toji_kakko_no || giji_hinsi == kai_kakko_no)
|
|
505 {
|
|
506 sbnptr->t_jc = c_env->giji.kigou;
|
|
507 }
|
|
508 else
|
|
509 {
|
|
510 sbnptr->t_jc = -1;
|
|
511 }
|
|
512 sbnptr->v_jc += 2; /* 次候補よりも評価値を上げるため */
|
|
513 }
|
|
514 else
|
|
515 {
|
|
516 sbnptr->t_jc = -1;
|
|
517 }
|
|
518 sbnptr->hinsi_fk = giji_hinsi;
|
|
519 sbnptr->jentptr = 0;
|
|
520 sbnptr->kangovect = ft->kango_hinsi_area[giji_hinsi];
|
|
521 sbnptr->j_c = j_end + len - 1;
|
|
522 return (len);
|
|
523 }
|
|
524 #else /* !KOREAN */
|
|
525 int
|
|
526 getgiji (yomi_sno, yomi_eno, sbnptr, fzkbnp, fzkcnt, bnst_num)
|
|
527 int yomi_sno;
|
|
528 int yomi_eno;
|
|
529 struct SYO_BNSETSU *sbnptr;
|
|
530 struct ICHBNP *fzkbnp;
|
|
531 int fzkcnt;
|
|
532 int bnst_num;
|
|
533 {
|
|
534 register int j_end;
|
|
535 int j_end_sv;
|
|
536 int *vector;
|
|
537 #ifndef CHINESE
|
|
538 int *vector1;
|
|
539 #endif
|
|
540 int giji_hinsi;
|
|
541 int giji_hindo;
|
|
542 int len = 0;
|
|
543 w_char c = (w_char) 0;
|
|
544
|
|
545 j_end = getfzkoh (fzkbnp, fzkcnt - 1)->offset;
|
|
546 vector = getfzkoh (fzkbnp, fzkcnt - 1)->vector;
|
|
547 #ifndef CHINESE
|
|
548 vector1 = getfzkoh1 (fzkbnp, fzkcnt - 1)->vector;
|
|
549 #endif
|
|
550
|
|
551 if (j_end >= yomi_eno)
|
|
552 { /* 付属語だけの場合 */
|
|
553 giji_hinsi = fuzokugo_no;
|
|
554 giji_hindo = c_env->fuzokogo_val;
|
|
555 len = 0;
|
|
556 goto _Only_Fuzokugo;
|
|
557 }
|
|
558 /*#ifdef not_use*/
|
|
559 if (isjhira (bun[j_end])) /* ひらがなならバイバイ */
|
|
560 return (-1);
|
|
561 /*#endif*/
|
|
562 for (j_end_sv = j_end; j_end < yomi_eno; j_end++)
|
|
563 {
|
|
564 /* スペースは、その前の疑似文節にくっつける */
|
|
565 if (!(isspace (c = bun[j_end])) && !(isjspace (c)))
|
|
566 {
|
|
567 if (TOKUSYU (c) || check_bar_katakana (j_end, yomi_eno) || isjhira (c) || isjkanji (c))
|
|
568 { /* 片仮名 ひらがな 漢字 */
|
|
569 j_end = j_end_sv;
|
|
570 }
|
|
571 break;
|
|
572 }
|
|
573 }
|
|
574
|
|
575 if (j_end >= yomi_eno)
|
|
576 { /* スペースだけの場合 */
|
|
577 giji_hinsi = fuzokugo_no;
|
|
578 giji_hindo = c_env->fuzokogo_val;
|
|
579 len = 0;
|
|
580 goto _Only_Fuzokugo;
|
|
581 }
|
|
582
|
|
583 /* 分類 */
|
|
584 if ((isascii (c = bun[j_end]) && isdigit (c)) || isjdigit (c))
|
|
585 { /* 数字 */
|
|
586 giji_hinsi = suuji_no;
|
|
587 giji_hindo = c_env->suuji_val;
|
|
588 }
|
|
589 else if ((isascii (c) && isparen_e (c)) || isjparen_e (c))
|
|
590 { /* 閉括弧 */
|
|
591 giji_hinsi = toji_kakko_no;
|
|
592 giji_hindo = c_env->toji_kakko_val;
|
|
593 }
|
|
594 else if (TOKUSYU (c) || check_bar_katakana (j_end, yomi_eno))
|
|
595 { /* 片仮名 */
|
|
596 giji_hinsi = katakanago_no;
|
|
597 giji_hindo = c_env->kana_val;
|
|
598 }
|
|
599 else if (is_g_eisuu (c))
|
|
600 { /* アスキー(アルファベット, 数字) */
|
|
601 giji_hinsi = eisuu_no;
|
|
602 giji_hindo = c_env->eisuu_val;
|
|
603 }
|
|
604 else if ((isascii (c) && isparen_s (c)) || isjparen_s (c))
|
|
605 { /* 開括弧 */
|
|
606 giji_hinsi = kai_kakko_no;
|
|
607 giji_hindo = c_env->kaikakko_val;
|
|
608 #ifdef nodef
|
|
609 }
|
|
610 else if (isjhira (c))
|
|
611 { /* ひらがな */
|
|
612 giji_hinsi = giji_no;
|
|
613 giji_hindo = 0;
|
|
614 #endif
|
|
615 }
|
|
616 else
|
|
617 { /* 記号、漢字 */
|
|
618 giji_hinsi = kigou_no;
|
|
619 giji_hindo = c_env->kigou_val;
|
|
620 }
|
|
621
|
|
622 /* 接続可能か */
|
3
|
623 if ((sbnptr->status_bkwd = kan_ck_vector (giji_hinsi, vector)) == WNN_CONNECT_BK)
|
0
|
624 goto _Can_connect;
|
|
625 #ifdef nodef
|
|
626 /* 数字に接続できないが,アスキには接続できる */
|
|
627 if (giji_hinsi == suuji_no && kan_ck_vector (eisuu_no, vector) == WNN_CONNECT_BK)
|
|
628 {
|
|
629 giji_hinsi = eisuu_no;
|
|
630 giji_hindo = c_env->eisuu_val;
|
|
631 goto _Can_connect;
|
|
632 }
|
|
633 #endif /* nodef */
|
|
634 if (bnst_num != 1)
|
|
635 return (-1);
|
|
636 #ifndef CHINESE
|
|
637 if (kan_ck_vector (giji_hinsi, vector1) == WNN_NOT_CONNECT_BK)
|
|
638 {
|
|
639 if (giji_hinsi != suuji_no || kan_ck_vector (eisuu_no, vector1) == WNN_NOT_CONNECT_BK)
|
|
640 return (-1);
|
|
641 else
|
|
642 {
|
|
643 giji_hinsi = eisuu_no;
|
|
644 giji_hindo = c_env->eisuu_val;
|
|
645 }
|
|
646 }
|
|
647 #endif /* CHINESE */
|
|
648
|
|
649 _Can_connect:
|
|
650
|
|
651 /* 長さを調べる */
|
|
652 if (giji_hinsi == suuji_no)
|
|
653 {
|
|
654 for (; (j_end + len < yomi_eno) && ((isascii (bun[j_end + len]) && isdigit (bun[j_end + len])) || isjdigit (bun[j_end + len])); len++)
|
|
655 ;
|
|
656 #ifndef CHINESE
|
|
657 if ((j_end + len < yomi_eno) && is_g_eisuu (bun[j_end + len]))
|
|
658 {
|
|
659 /* 英数 */
|
|
660 for (len++; (j_end + len < yomi_eno) && is_g_eisuu (bun[j_end + len]); len++)
|
|
661 ;
|
|
662 if (kan_ck_vector (eisuu_no, vector) == WNN_NOT_CONNECT_BK && (bnst_num != 1 || kan_ck_vector (eisuu_no, vector1)))
|
|
663 return (-1);
|
|
664 giji_hinsi = eisuu_no;
|
|
665 #ifdef nodef
|
|
666 /* 半角数字 */
|
|
667 }
|
|
668 else
|
|
669 {
|
|
670 if (kan_ck_vector (suuji_no, vector) == WNN_NOT_CONNECT_BK)
|
|
671 return (-1);
|
|
672 #endif /* nodef */
|
|
673 }
|
|
674 #endif /* CHINESE */
|
|
675 }
|
|
676 else if (giji_hinsi == eisuu_no)
|
|
677 {
|
|
678 for (; ((j_end + len) < yomi_eno) && is_g_eisuu (bun[j_end + len]); len++)
|
|
679 ;
|
|
680 }
|
|
681 else if (giji_hinsi == toji_kakko_no)
|
|
682 { /* 閉括弧 */
|
|
683 len = 1;
|
|
684 }
|
|
685 else if (giji_hinsi == kigou_no)
|
|
686 { /* 記号 */
|
|
687 len = 1;
|
|
688 }
|
|
689 else if (giji_hinsi == kai_kakko_no)
|
|
690 { /* 開括弧 */
|
|
691 len = 1;
|
|
692 }
|
|
693 else if (giji_hinsi == giji_no)
|
|
694 {
|
|
695 len = 1;
|
|
696 }
|
|
697 else if (giji_hinsi == katakanago_no)
|
|
698 {
|
|
699 /* カタカナ 半角カタカナ 濁点 半濁点 */
|
|
700 for (len = 1; (j_end + len < yomi_eno) && (TOKUSYU (bun[j_end + len]) || check_bar_katakana (j_end + len, yomi_eno)); len++)
|
|
701 ;
|
|
702 }
|
|
703 else
|
|
704 {
|
|
705 if (j_end >= yomi_eno)
|
|
706 len = 0;
|
|
707 else
|
|
708 len = 1;
|
|
709 }
|
|
710
|
|
711 /* スペースは、そのあとの疑似文節にくっつける */
|
|
712 if ((giji_hinsi == eisuu_no) || (giji_hinsi == suuji_no))
|
|
713 {
|
|
714 for (; ((j_end + len) < yomi_eno) && (isspace (bun[j_end + len]) || (isjspace (c))); len++)
|
|
715 ;
|
|
716 }
|
|
717
|
|
718 _Only_Fuzokugo:
|
|
719 /* 評価値の決定 */
|
|
720 sbnptr->v_jc = HYOKAVAL (giji_hindo, len, len + j_end - yomi_sno);
|
|
721 sbnptr->i_jc = j_end;
|
|
722 if (giji_hinsi == katakanago_no)
|
|
723 {
|
|
724 sbnptr->t_jc = WNN_KATAKANA;
|
|
725 }
|
|
726 else if (giji_hinsi == suuji_no)
|
|
727 {
|
|
728 sbnptr->t_jc = c_env->giji.number;
|
|
729 }
|
|
730 else if (isascii (c))
|
|
731 {
|
|
732 if (giji_hinsi == eisuu_no)
|
|
733 {
|
|
734 sbnptr->t_jc = c_env->giji.eisuu;
|
|
735 }
|
|
736 else if (giji_hinsi == kigou_no || giji_hinsi == toji_kakko_no || giji_hinsi == kai_kakko_no)
|
|
737 {
|
|
738 sbnptr->t_jc = c_env->giji.kigou;
|
|
739 }
|
|
740 else
|
|
741 {
|
|
742 sbnptr->t_jc = -1;
|
|
743 }
|
|
744 sbnptr->v_jc += 2; /* 次候補よりも評価値を上げるため */
|
|
745 }
|
|
746 else
|
|
747 {
|
|
748 sbnptr->t_jc = -1;
|
|
749 }
|
|
750 sbnptr->hinsi_fk = giji_hinsi;
|
|
751 sbnptr->jentptr = 0;
|
|
752 sbnptr->kangovect = ft->kango_hinsi_area[giji_hinsi];
|
|
753 sbnptr->j_c = j_end + len - 1;
|
|
754 return (len);
|
|
755 }
|
|
756 #endif /* KOREAN */
|
|
757
|
|
758 int
|
|
759 is_g_eisuu (c)
|
|
760 register w_char c;
|
|
761 {
|
|
762 register int i;
|
|
763
|
|
764 if ((isascii (c) && isalnum (c)) || isjalnum (c))
|
|
765 return (YES);
|
|
766 else
|
|
767 {
|
|
768 for (i = 0; i < 20; i++)
|
|
769 {
|
|
770 if (giji_eisuu[i] == 0xffff)
|
|
771 return (NO);
|
|
772 else if (giji_eisuu[i] == c)
|
|
773 return (YES);
|
|
774 }
|
|
775 }
|
|
776 return (NO);
|
|
777 }
|
|
778
|
|
779
|
|
780 /* 何が何でも疑似文節を作るんだい */
|
|
781 int
|
|
782 getgiji_f (yomi_sno, sbnptr)
|
|
783 register int yomi_sno;
|
|
784 register struct SYO_BNSETSU *sbnptr;
|
|
785 {
|
|
786 sbnptr->v_jc = HYOKAVAL (0, 1, 1);
|
|
787 sbnptr->i_jc = yomi_sno;
|
|
788 sbnptr->t_jc = -1;
|
|
789 sbnptr->hinsi_fk = giji_no;
|
|
790 sbnptr->jentptr = 0;
|
|
791 sbnptr->kangovect = ft->kango_hinsi_area[kigou_no];
|
|
792 sbnptr->j_c = yomi_sno;
|
|
793
|
|
794 return (1);
|
|
795 }
|
|
796
|
|
797 /* カタカナか? 長音記号の前は、カタカナでないとダメ */
|
|
798 int
|
|
799 check_bar_katakana (i, yomi_eno)
|
|
800 register int i;
|
|
801 register int yomi_eno;
|
|
802 {
|
|
803 register int j;
|
|
804 for (j = i; j < yomi_eno; j++)
|
|
805 {
|
|
806 if (isjkata (bun[j]))
|
|
807 return (YES);
|
|
808 if (!(BAR_CODE (bun[j])))
|
|
809 return (NO);
|
|
810 }
|
|
811 return (NO);
|
|
812 }
|
|
813
|
|
814 /*
|
|
815 * kan_ckvt hinsi の品詞がvectorに接続できるか
|
|
816 */
|
|
817
|
|
818 int
|
|
819 kan_ckvt (hinsi, vector)
|
|
820 register unsigned short hinsi; /* 品詞No. */
|
|
821 register int vector; /* 付属語前端ベクタ */
|
|
822 {
|
|
823 register int *v;
|
|
824 v = (int *) ((word_vector *) ft->kango_vect_area + vector);
|
|
825 return (kan_ck_vector (hinsi, v));
|
|
826 /*
|
|
827 return (kan_ck_vector(hinsi,
|
|
828 ((word_vector *) ft->kango_vect_area + vector)));
|
|
829 */
|
|
830 }
|
|
831
|
|
832 #ifdef change_macro
|
|
833 int
|
|
834 kan_ck_vector (hinsi, vector)
|
|
835 unsigned short hinsi; /* 品詞No. */
|
|
836 int vector[]; /* 付属語前端ベクタ */
|
|
837 {
|
|
838 register int wvect;
|
|
839
|
|
840 wvect = vector[hinsi / (sizeof (int) << 3)]; /* << 3 == * 8 */
|
|
841 wvect >>= (hinsi % (sizeof (int) << 3));
|
|
842 if ((wvect & 0x00000001) == 1)
|
|
843 return (WNN_CONNECT_BK);
|
|
844 else
|
|
845 return (WNN_NOT_CONNECT_BK);
|
|
846 }
|
|
847 #endif /* change_macro */
|