Mercurial > freewnn
annotate Wnn/jserver/bnsetu_kai.c @ 29:35bc1f2e3f14 default tip
minor fix
author | Yoshiki Yazawa <yaz@honeyplanet.jp> |
---|---|
date | Sat, 06 Mar 2010 23:55:24 +0900 |
parents | 466fe6732d8d |
children |
rev | line source |
---|---|
0 | 1 /* |
2 * $Id: bnsetu_kai.c,v 1.5 2003/05/11 18:35:54 hiroo Exp $ | |
3 */ | |
4 | |
5 /* | |
6 * FreeWnn is a network-extensible Kana-to-Kanji conversion system. | |
7 * This file is part of FreeWnn. | |
8 * | |
9 * Copyright Kyoto University Research Institute for Mathematical Sciences | |
10 * 1987, 1988, 1989, 1990, 1991, 1992 | |
11 * Copyright OMRON Corporation. 1987, 1988, 1989, 1990, 1991, 1992, 1999 | |
12 * Copyright ASTEC, Inc. 1987, 1988, 1989, 1990, 1991, 1992 | |
13 * Copyright FreeWnn Project 1999, 2000, 2003 | |
14 * | |
15 * Maintainer: FreeWnn Project <freewnn@tomo.gr.jp> | |
16 * | |
17 * This program is free software; you can redistribute it and/or modify | |
18 * it under the terms of the GNU General Public License as published by | |
19 * the Free Software Foundation; either version 2 of the License, or | |
20 * (at your option) any later version. | |
21 * | |
22 * This program is distributed in the hope that it will be useful, | |
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
25 * GNU General Public License for more details. | |
26 * | |
27 * You should have received a copy of the GNU General Public License | |
28 * along with this program; if not, write to the Free Software | |
29 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
30 */ | |
31 | |
32 /************************************************ | |
33 * 小文節解析 * | |
34 ************************************************/ | |
35 #include <stdio.h> | |
36 #include <ctype.h> | |
37 #include "commonhd.h" | |
38 #include "de_header.h" | |
39 #include "jdata.h" | |
40 | |
41 #include "fzk.h" | |
42 #include "kaiseki.h" | |
43 | |
44 int | |
45 sbn_kai (yomi_sno, yomi_eno, endvect, endvect1, tsbnptr, bnst_num, parent) | |
46 int yomi_sno; | |
47 int yomi_eno; | |
48 int endvect; /* 終端ベクタ */ | |
49 int endvect1; /* 終端ベクタ */ | |
50 struct SYO_BNSETSU **tsbnptr; /* 小文節解析結果 */ | |
51 int bnst_num; /* これまでに解析した小文節数 */ | |
52 struct SYO_BNSETSU *parent; /* 親の幹語ノード */ | |
53 { | |
54 int fzkcnt, hyokaval, hyokaval0 = 0, hinsidt, sbncnt; | |
55 unsigned short *hinsi_buf; | |
56 int hinsidt_fk; /* 複合品詞のセーブ */ | |
57 register int i, /* 幹語の終わりのインデックス */ | |
58 j, /* 文節始めのインデックス */ | |
59 t, ii; | |
60 struct jdata *jentptr; | |
61 register struct SYO_BNSETSU *sbnptr = NULL; | |
25
466fe6732d8d
- fixed more NULL pointer related errata
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
3
diff
changeset
|
62 struct SYO_BNSETSU *sbnptr_top = NULL; |
0 | 63 struct ICHBNP *ichbnpbp; |
64 int k, ll; | |
65 struct SYO_BNSETSU *kanptr; | |
66 struct SYO_BNSETSU *kanptr_tmp = NULL; | |
67 struct SYO_BNSETSU *kanptr_before; | |
68 struct SYO_BNSETSU *giji_sbn; | |
69 int get_giji_flg = -1; /* 擬似文節を作れたか */ | |
70 | |
71 enum FLG | |
72 { | |
73 set, | |
74 noset | |
75 } | |
76 setflg; | |
77 enum FLG kanflg; /* 同じベクタの品詞があったか否か */ | |
78 enum GFLG | |
79 { | |
80 get, | |
81 noget | |
82 } | |
83 getflg; | |
84 struct SYO_BNSETSU *getsbnsp (); | |
85 int kangovect; | |
86 int connect_flg; | |
87 | |
88 if ((yomi_sno == yomi_eno) || (fzk_ckvt (endvect) == NO && fzk_ckvt (endvect1) == NO)) | |
89 return (NOTHING); | |
90 | |
91 if ((fzkcnt = fzk_kai (&bun[yomi_sno], &bun[yomi_eno], endvect, endvect1, &ichbnpbp)) <= 0) | |
92 return (fzkcnt); /* ERROR */ | |
93 for (ii = 0; ii < fzkcnt; ii++) | |
94 getfzkoh (ichbnpbp, ii)->offset += yomi_sno; | |
95 | |
96 for (ii = 0; ii < fzkcnt; ii++) | |
97 { | |
98 i = getfzkoh (ichbnpbp, ii)->offset; | |
99 if (jmtp[i] == (struct jdata **) UN_KNOWN) /* もう引いた? */ | |
100 { | |
101 jmt_set (i); /* 辞書引き */ | |
102 } | |
103 } | |
104 | |
105 j = j_max (ichbnpbp, fzkcnt); | |
106 j = (j >= yomi_eno) ? yomi_eno - 1 : j; | |
107 | |
108 setflg = noset; | |
109 if ((giji_sbn = getsbnsp ()) == NO) | |
110 return (-1); /* ERROR */ | |
111 giji_sbn->kbcnt = bnst_num; | |
112 giji_sbn->bend_m = yomi_sno; | |
113 giji_sbn->parent = parent; | |
114 | |
115 /* 疑似文節を先に取り出します(もしあれば) */ | |
116 if (getgiji (yomi_sno, yomi_eno, giji_sbn, ichbnpbp, fzkcnt, bnst_num) >= 0) | |
117 { | |
118 sbncnt = 1; | |
119 getflg = get; | |
120 get_giji_flg = giji_sbn->j_c; | |
121 if (giji_sbn->j_c > j) | |
122 { | |
123 sbnptr_top = giji_sbn; | |
124 get_giji_flg = -1; | |
125 kanptr_tmp = giji_sbn->lnk_br; | |
126 } | |
127 } | |
128 else | |
129 { | |
130 sbnptr = giji_sbn; | |
131 sbncnt = 0; | |
132 getflg = noget; | |
133 } | |
134 | |
135 for (; j >= yomi_sno; j--) | |
136 { /* 文節の長さ */ | |
137 if ((setflg == noset) && (get_giji_flg == j)) | |
138 { | |
139 giji_sbn->lnk_br = sbnptr_top; | |
140 sbnptr_top = giji_sbn; | |
141 setflg = set; | |
142 kanptr_tmp = giji_sbn->lnk_br; | |
143 } | |
144 for (ii = 0; (ii < fzkcnt) && ((i = (getfzkoh (ichbnpbp, ii)->offset)) <= j); ii++) | |
145 { /* 付属語 */ | |
146 jentptr = (struct jdata *) C (i, j); | |
147 for (; jentptr; jentptr = jentptr->jptr) | |
148 { /* 辞書 */ | |
149 for (t = 0; t < (jentptr->kosuu); t++) | |
150 { /* 幹語 */ | |
151 if (((jentptr->hindo[t] & 0x7f) == 0x7f) || (jentptr->hindo_in && (jentptr->hindo_in[t] & 0x7f) == 0x7f)) | |
152 continue; /*コメントアウトされた辞書エントリ */ | |
153 #ifdef CONVERT_with_SiSheng | |
154 hyokaval = hyoka1 (jentptr->hindo[t], | |
155 (jentptr->hindo_in == 0 ? 0 : jentptr->hindo_in[t]), | |
156 (jentptr->sisheng == 0 ? 0 : diff_sisheng (jentptr->sisheng[t], jentptr->sisheng_int)), j - i / 2 - yomi_sno / 2 + 1, dic_table[jentptr->jishono].nice); | |
157 #else | |
158 hyokaval = hyoka1 (jentptr->hindo[t], (jentptr->hindo_in == 0 ? 0 : jentptr->hindo_in[t]), j - i + 1, j - yomi_sno + 1, dic_table[jentptr->jishono].nice); | |
159 #endif /* CONVERT_with_SiSheng */ | |
160 if (bnst_num != 1) | |
161 { | |
25
466fe6732d8d
- fixed more NULL pointer related errata
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
3
diff
changeset
|
162 if (parent != NULL && parent->status_bkwd == NO) |
0 | 163 /* 前の文節が endvect に接続できないものは、 |
164 評価値を下げる */ | |
165 hyokaval0 = _DIVID (hyokaval, 2); | |
166 else | |
167 hyokaval0 = hyokaval; | |
168 } | |
169 hinsidt_fk = hinsidt = jentptr->hinsi[t]; | |
170 if ((ll = wnn_get_fukugou_component_body (hinsidt_fk, &hinsi_buf)) == 0) | |
171 { | |
172 error1 ("wnn_get_fukugou_component:erroe in bunsetukai."); | |
173 return (-1); | |
174 } | |
175 for (; ll > 0; ll--) | |
176 { | |
177 hinsidt = *hinsi_buf; | |
178 hinsi_buf++; | |
179 /* ここから後は単純品詞だけ考えればいい */ | |
180 connect_flg = kan_ck_vector (hinsidt, (getfzkoh (ichbnpbp, ii)->vector)); | |
181 if ((connect_flg == WNN_CONNECT_BK || (bnst_num == 1 && kan_ck_vector (hinsidt, (getfzkoh1 (ichbnpbp, ii)->vector)) == WNN_CONNECT_BK))) | |
182 { | |
183 /* 接続できる */ | |
184 if (bnst_num == 1) | |
185 /* endvect に接続できないものは評価値を下げる */ | |
186 hyokaval0 = (connect_flg != WNN_NOT_CONNECT_BK) ? hyokaval : _DIVID (hyokaval, 2); | |
187 kangovect = ft->kango_hinsi_area[hinsidt]; | |
188 if (getflg == get) | |
189 { | |
190 getflg = noget; | |
191 if (0 == (sbnptr = getsbnsp ())) | |
192 return (-1); /* ERROR */ | |
193 sbnptr->kbcnt = bnst_num; | |
194 sbnptr->bend_m = yomi_sno; | |
195 sbnptr->parent = parent; | |
196 } | |
197 sbnptr->j_c = j; | |
198 | |
199 if (setflg == noset) | |
200 { /* この長さの文節は初めて */ | |
201 sbnptr->lnk_br = sbnptr_top; | |
202 sbnptr_top = sbnptr; | |
203 setflg = set; | |
204 sbnptr->v_jc = hyokaval0; | |
205 sbnptr->t_jc = t; | |
206 sbnptr->jentptr = jentptr; | |
207 sbnptr->hinsi_fk = hinsidt_fk; | |
208 sbnptr->kangovect = kangovect; | |
209 sbnptr->i_jc = i; | |
210 /* yoku wakaran 9/8 | |
211 if (endvect1 == WNN_VECT_NO) | |
212 sbnptr->status_bkwd = WNN_NOT_CONNECT_BK; | |
213 else | |
214 */ | |
215 sbnptr->status_bkwd = connect_flg; | |
216 kanptr_tmp = sbnptr->lnk_br; | |
217 getflg = get; | |
218 sbncnt++; | |
219 } | |
220 else | |
221 { | |
222 kanflg = noset; | |
223 for (kanptr_before = kanptr = sbnptr_top; kanptr != kanptr_tmp; kanptr_before = kanptr, kanptr = kanptr->lnk_br) | |
224 { | |
225 if (kanptr->kangovect == kangovect) | |
226 { | |
227 /* 同じベクタの品詞があった */ | |
228 if (hyokaval0 > kanptr->v_jc) | |
229 { | |
230 /* こっちの方が評価値が高い */ | |
231 kanptr->v_jc = hyokaval0; | |
232 kanptr->t_jc = t; | |
233 kanptr->jentptr = jentptr; | |
234 kanptr->hinsi_fk = hinsidt_fk; | |
235 kanptr->i_jc = i; | |
236 /* yoku wakaran 9/8 | |
237 if (endvect1 == WNN_VECT_NO) | |
238 kanptr->status_bkwd = WNN_NOT_CONNECT_BK; | |
239 else | |
240 */ | |
241 kanptr->status_bkwd = connect_flg; | |
242 } | |
243 kanflg = set; | |
244 break; | |
245 } | |
246 else if (kanptr->kangovect > kangovect) | |
247 { | |
248 /* 同じベクタの品詞がなかった */ | |
249 break; | |
250 } | |
251 } /* SORT & INSERT SYO_BN */ | |
252 | |
253 if (kanflg == noset) | |
254 { | |
255 /* 同じベクタの品詞がなかった */ | |
256 if (kanptr == sbnptr_top) | |
257 { | |
258 sbnptr->lnk_br = kanptr; | |
259 sbnptr_top = sbnptr; | |
260 } | |
261 else | |
262 { | |
263 sbnptr->lnk_br = kanptr; | |
264 kanptr_before->lnk_br = sbnptr; | |
265 } | |
266 sbnptr->v_jc = hyokaval0; | |
267 sbnptr->t_jc = t; | |
268 sbnptr->jentptr = jentptr; | |
269 sbnptr->hinsi_fk = hinsidt_fk; | |
270 sbnptr->kangovect = kangovect; | |
271 sbnptr->i_jc = i; | |
272 /* yoku wakaran 9/8 | |
273 if (endvect1 == WNN_VECT_NO) | |
274 sbnptr->status_bkwd = WNN_NOT_CONNECT_BK; | |
275 else | |
276 */ | |
277 sbnptr->status_bkwd = connect_flg; | |
278 sbncnt++; | |
279 getflg = get; | |
280 } | |
281 } | |
282 } /* 接続できる */ | |
283 } /* 単純品詞 */ | |
284 } /* 幹語 */ | |
285 } /* 辞書 */ | |
286 } /* 付属語 */ | |
287 setflg = noset; /* 次の長さの漢語はまだsetして無い */ | |
288 } /* 文節の長さ */ | |
289 if (sbncnt == NOTHING) | |
290 { /* 幹語がなかった */ | |
291 for (k = fzkcnt - 1; k >= 0; k--) | |
292 { | |
293 if (kan_ck_vector (katakanago_no, getfzkoh (ichbnpbp, k)->vector) == WNN_CONNECT_BK) | |
294 { | |
295 k = getgiji (yomi_sno, yomi_eno, sbnptr, ichbnpbp, k + 1, bnst_num); | |
296 break; | |
297 } | |
298 } | |
299 if (k < 0) | |
300 { | |
301 if (bnst_num == 1) | |
302 { | |
303 /* 何が何でも疑似文節を作るんだい */ | |
304 getgiji_f (yomi_sno, sbnptr); | |
305 sbncnt = 1; | |
306 sbnptr_top = sbnptr; | |
307 #ifdef nodef | |
308 wnn_errorno = WNN_NO_KOUHO; | |
309 error1 ("Cannot get bunsetsu kouho in bunsetu-kaiseki.\n"); | |
310 #endif /* nodef */ | |
311 } | |
312 else | |
313 { | |
314 freesbn (sbnptr); | |
315 sbncnt = 0; | |
25
466fe6732d8d
- fixed more NULL pointer related errata
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
3
diff
changeset
|
316 sbnptr_top = NULL; |
0 | 317 } |
318 } | |
319 else | |
320 { | |
321 sbncnt = 1; | |
322 sbnptr_top = sbnptr; | |
323 } | |
324 } | |
325 else if (getflg == noget) | |
326 { | |
327 freesbn (sbnptr); | |
328 } | |
329 *tsbnptr = sbnptr_top; /* 短いものから並べる */ | |
330 freeibsp (ichbnpbp); | |
331 return (sbncnt); | |
332 } | |
333 | |
334 /*******************************************/ | |
335 /* max value in maxj[] search routine */ | |
336 /*******************************************/ | |
337 int | |
338 j_max (fzkbnp, fzkcnt) | |
339 register struct ICHBNP *fzkbnp; | |
340 int fzkcnt; /* max number of fzkkouho table */ | |
341 { | |
342 register int i, mxj = -1, k, ofst; | |
343 | |
344 for (i = 0; i < fzkcnt; i++) | |
345 { | |
346 if ((ofst = getfzkoh (fzkbnp, i)->offset) < maxchg) | |
347 if ((k = maxj[ofst]) > mxj) | |
348 mxj = k; | |
349 } | |
350 return (mxj); | |
351 } | |
352 | |
353 /**********************************************/ | |
354 /* 疑似幹語の獲得 */ | |
355 /**********************************************/ | |
356 #ifdef KOREAN | |
357 int | |
358 getgiji (yomi_sno, yomi_eno, sbnptr, fzkbnp, fzkcnt, bnst_num) | |
359 int yomi_sno; | |
360 int yomi_eno; | |
361 struct SYO_BNSETSU *sbnptr; | |
362 struct ICHBNP *fzkbnp; | |
363 int fzkcnt; | |
364 int bnst_num; | |
365 { | |
366 register int j_end; | |
367 int j_end_sv; | |
368 int *vector; | |
369 int *vector1; | |
370 int giji_hinsi; | |
371 int giji_hindo; | |
372 int len = 0; | |
373 w_char c = (w_char) 0; | |
374 | |
375 j_end = getfzkoh (fzkbnp, fzkcnt - 1)->offset; | |
376 vector = getfzkoh (fzkbnp, fzkcnt - 1)->vector; | |
377 vector1 = getfzkoh1 (fzkbnp, fzkcnt - 1)->vector; | |
378 | |
379 /* 分類 */ | |
380 if ((isascii (c = bun[j_end]) && isdigit (c)) || isjdigit (c)) | |
381 { /* 数字 */ | |
382 giji_hinsi = suuji_no; | |
383 giji_hindo = c_env->suuji_val; | |
384 } | |
385 else if ((isascii (c) && isparen_e (c)) || isjparen_e (c)) | |
386 { /* 閉括弧 */ | |
387 giji_hinsi = toji_kakko_no; | |
388 giji_hindo = c_env->toji_kakko_val; | |
389 } | |
390 else if (is_g_eisuu (c)) | |
391 { /* アスキー(アルファベット, 数字) */ | |
392 giji_hinsi = eisuu_no; | |
393 giji_hindo = c_env->eisuu_val; | |
394 } | |
395 else if ((isascii (c) && isparen_s (c)) || isjparen_s (c)) | |
396 { /* 開括弧 */ | |
397 giji_hinsi = kai_kakko_no; | |
398 giji_hindo = c_env->kaikakko_val; | |
399 } | |
400 else if (ishangul (c)) | |
401 { /* Hangul */ | |
402 giji_hinsi = katakanago_no; | |
403 giji_hindo = 0; | |
404 } | |
405 else | |
406 { /* 記号、漢字 */ | |
407 giji_hinsi = kigou_no; | |
408 giji_hindo = c_env->kigou_val; | |
409 } | |
410 | |
411 /* 接続可能か */ | |
412 if ((sbnptr->status_bkwd = kan_ck_vector (giji_hinsi, vector)) == WNN_CONNECT_BK) | |
413 goto _Can_connect; | |
414 if (bnst_num != 1) | |
415 return (-1); | |
416 if (kan_ck_vector (giji_hinsi, vector1) == WNN_NOT_CONNECT_BK) | |
417 { | |
418 if (giji_hinsi != suuji_no || kan_ck_vector (eisuu_no, vector1) == WNN_NOT_CONNECT_BK) | |
419 return (-1); | |
420 else | |
421 { | |
422 giji_hinsi = eisuu_no; | |
423 giji_hindo = c_env->eisuu_val; | |
424 } | |
425 } | |
426 | |
427 _Can_connect: | |
428 | |
429 /* 長さを調べる */ | |
430 if (giji_hinsi == suuji_no) | |
431 { | |
432 for (; (j_end + len < yomi_eno) && ((isascii (bun[j_end + len]) && isdigit (bun[j_end + len])) || isjdigit (bun[j_end + len])); len++) | |
433 ; | |
434 if ((j_end + len < yomi_eno) && is_g_eisuu (bun[j_end + len])) | |
435 { | |
436 /* 英数 */ | |
437 for (len++; (j_end + len < yomi_eno) && is_g_eisuu (bun[j_end + len]); len++) | |
438 ; | |
439 if (kan_ck_vector (eisuu_no, vector) == WNN_NOT_CONNECT_BK && (bnst_num != 1 || kan_ck_vector (eisuu_no, vector1))) | |
440 return (-1); | |
441 giji_hinsi = eisuu_no; | |
442 } | |
443 } | |
444 else if (giji_hinsi == eisuu_no) | |
445 { | |
446 for (; ((j_end + len) < yomi_eno) && is_g_eisuu (bun[j_end + len]); len++) | |
447 ; | |
448 } | |
449 else if (giji_hinsi == toji_kakko_no) | |
450 { /* 閉括弧 */ | |
451 len = 1; | |
452 } | |
453 else if (giji_hinsi == kigou_no) | |
454 { /* 記号 */ | |
455 len = 1; | |
456 } | |
457 else if (giji_hinsi == kai_kakko_no) | |
458 { /* 開括弧 */ | |
459 len = 1; | |
460 } | |
461 else if (giji_hinsi == giji_no) | |
462 { | |
463 len = 1; | |
464 } | |
465 else if (giji_hinsi == katakanago_no) | |
466 { /* Hangul */ | |
467 for (len = 1; (j_end + len < yomi_eno) && ishangul (bun[j_end + len]); len++) | |
468 ; | |
469 } | |
470 else | |
471 { | |
472 if (j_end >= yomi_eno) | |
473 len = 0; | |
474 else | |
475 len = 1; | |
476 } | |
477 | |
478 /* スペースは、そのあとの疑似文節にくっつける */ | |
479 if ((giji_hinsi == eisuu_no) || (giji_hinsi == suuji_no) || (giji_hinsi == katakanago_no)) | |
480 { | |
481 for (; ((j_end + len) < yomi_eno) && (isspace (bun[j_end + len]) || (isjspace (c))); len++) | |
482 ; | |
483 } | |
484 | |
485 _Only_Fuzokugo: | |
486 /* 評価値の決定 */ | |
487 sbnptr->v_jc = HYOKAVAL (giji_hindo, len, len + j_end - yomi_sno); | |
488 sbnptr->i_jc = j_end; | |
489 if (giji_hinsi == katakanago_no) | |
490 { | |
491 sbnptr->t_jc = WNN_KATAKANA; | |
492 sbnptr->v_jc = 0; | |
493 } | |
494 else if (giji_hinsi == suuji_no) | |
495 { | |
496 sbnptr->t_jc = c_env->giji.number; | |
497 } | |
498 else if (isascii (c)) | |
499 { | |
500 if (giji_hinsi == eisuu_no) | |
501 { | |
502 sbnptr->t_jc = c_env->giji.eisuu; | |
503 } | |
504 else if (giji_hinsi == kigou_no || giji_hinsi == toji_kakko_no || giji_hinsi == kai_kakko_no) | |
505 { | |
506 sbnptr->t_jc = c_env->giji.kigou; | |
507 } | |
508 else | |
509 { | |
510 sbnptr->t_jc = -1; | |
511 } | |
512 sbnptr->v_jc += 2; /* 次候補よりも評価値を上げるため */ | |
513 } | |
514 else | |
515 { | |
516 sbnptr->t_jc = -1; | |
517 } | |
518 sbnptr->hinsi_fk = giji_hinsi; | |
25
466fe6732d8d
- fixed more NULL pointer related errata
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
3
diff
changeset
|
519 sbnptr->jentptr = NULL; |
0 | 520 sbnptr->kangovect = ft->kango_hinsi_area[giji_hinsi]; |
521 sbnptr->j_c = j_end + len - 1; | |
522 return (len); | |
523 } | |
524 #else /* !KOREAN */ | |
525 int | |
526 getgiji (yomi_sno, yomi_eno, sbnptr, fzkbnp, fzkcnt, bnst_num) | |
527 int yomi_sno; | |
528 int yomi_eno; | |
529 struct SYO_BNSETSU *sbnptr; | |
530 struct ICHBNP *fzkbnp; | |
531 int fzkcnt; | |
532 int bnst_num; | |
533 { | |
534 register int j_end; | |
535 int j_end_sv; | |
536 int *vector; | |
537 #ifndef CHINESE | |
538 int *vector1; | |
539 #endif | |
540 int giji_hinsi; | |
541 int giji_hindo; | |
542 int len = 0; | |
543 w_char c = (w_char) 0; | |
544 | |
545 j_end = getfzkoh (fzkbnp, fzkcnt - 1)->offset; | |
546 vector = getfzkoh (fzkbnp, fzkcnt - 1)->vector; | |
547 #ifndef CHINESE | |
548 vector1 = getfzkoh1 (fzkbnp, fzkcnt - 1)->vector; | |
549 #endif | |
550 | |
551 if (j_end >= yomi_eno) | |
552 { /* 付属語だけの場合 */ | |
553 giji_hinsi = fuzokugo_no; | |
554 giji_hindo = c_env->fuzokogo_val; | |
555 len = 0; | |
556 goto _Only_Fuzokugo; | |
557 } | |
558 /*#ifdef not_use*/ | |
559 if (isjhira (bun[j_end])) /* ひらがなならバイバイ */ | |
560 return (-1); | |
561 /*#endif*/ | |
562 for (j_end_sv = j_end; j_end < yomi_eno; j_end++) | |
563 { | |
564 /* スペースは、その前の疑似文節にくっつける */ | |
565 if (!(isspace (c = bun[j_end])) && !(isjspace (c))) | |
566 { | |
567 if (TOKUSYU (c) || check_bar_katakana (j_end, yomi_eno) || isjhira (c) || isjkanji (c)) | |
568 { /* 片仮名 ひらがな 漢字 */ | |
569 j_end = j_end_sv; | |
570 } | |
571 break; | |
572 } | |
573 } | |
574 | |
575 if (j_end >= yomi_eno) | |
576 { /* スペースだけの場合 */ | |
577 giji_hinsi = fuzokugo_no; | |
578 giji_hindo = c_env->fuzokogo_val; | |
579 len = 0; | |
580 goto _Only_Fuzokugo; | |
581 } | |
582 | |
583 /* 分類 */ | |
584 if ((isascii (c = bun[j_end]) && isdigit (c)) || isjdigit (c)) | |
585 { /* 数字 */ | |
586 giji_hinsi = suuji_no; | |
587 giji_hindo = c_env->suuji_val; | |
588 } | |
589 else if ((isascii (c) && isparen_e (c)) || isjparen_e (c)) | |
590 { /* 閉括弧 */ | |
591 giji_hinsi = toji_kakko_no; | |
592 giji_hindo = c_env->toji_kakko_val; | |
593 } | |
594 else if (TOKUSYU (c) || check_bar_katakana (j_end, yomi_eno)) | |
595 { /* 片仮名 */ | |
596 giji_hinsi = katakanago_no; | |
597 giji_hindo = c_env->kana_val; | |
598 } | |
599 else if (is_g_eisuu (c)) | |
600 { /* アスキー(アルファベット, 数字) */ | |
601 giji_hinsi = eisuu_no; | |
602 giji_hindo = c_env->eisuu_val; | |
603 } | |
604 else if ((isascii (c) && isparen_s (c)) || isjparen_s (c)) | |
605 { /* 開括弧 */ | |
606 giji_hinsi = kai_kakko_no; | |
607 giji_hindo = c_env->kaikakko_val; | |
608 #ifdef nodef | |
609 } | |
610 else if (isjhira (c)) | |
611 { /* ひらがな */ | |
612 giji_hinsi = giji_no; | |
613 giji_hindo = 0; | |
614 #endif | |
615 } | |
616 else | |
617 { /* 記号、漢字 */ | |
618 giji_hinsi = kigou_no; | |
619 giji_hindo = c_env->kigou_val; | |
620 } | |
621 | |
622 /* 接続可能か */ | |
3 | 623 if ((sbnptr->status_bkwd = kan_ck_vector (giji_hinsi, vector)) == WNN_CONNECT_BK) |
0 | 624 goto _Can_connect; |
625 #ifdef nodef | |
626 /* 数字に接続できないが,アスキには接続できる */ | |
627 if (giji_hinsi == suuji_no && kan_ck_vector (eisuu_no, vector) == WNN_CONNECT_BK) | |
628 { | |
629 giji_hinsi = eisuu_no; | |
630 giji_hindo = c_env->eisuu_val; | |
631 goto _Can_connect; | |
632 } | |
633 #endif /* nodef */ | |
634 if (bnst_num != 1) | |
635 return (-1); | |
636 #ifndef CHINESE | |
637 if (kan_ck_vector (giji_hinsi, vector1) == WNN_NOT_CONNECT_BK) | |
638 { | |
639 if (giji_hinsi != suuji_no || kan_ck_vector (eisuu_no, vector1) == WNN_NOT_CONNECT_BK) | |
640 return (-1); | |
641 else | |
642 { | |
643 giji_hinsi = eisuu_no; | |
644 giji_hindo = c_env->eisuu_val; | |
645 } | |
646 } | |
647 #endif /* CHINESE */ | |
648 | |
649 _Can_connect: | |
650 | |
651 /* 長さを調べる */ | |
652 if (giji_hinsi == suuji_no) | |
653 { | |
654 for (; (j_end + len < yomi_eno) && ((isascii (bun[j_end + len]) && isdigit (bun[j_end + len])) || isjdigit (bun[j_end + len])); len++) | |
655 ; | |
656 #ifndef CHINESE | |
657 if ((j_end + len < yomi_eno) && is_g_eisuu (bun[j_end + len])) | |
658 { | |
659 /* 英数 */ | |
660 for (len++; (j_end + len < yomi_eno) && is_g_eisuu (bun[j_end + len]); len++) | |
661 ; | |
662 if (kan_ck_vector (eisuu_no, vector) == WNN_NOT_CONNECT_BK && (bnst_num != 1 || kan_ck_vector (eisuu_no, vector1))) | |
663 return (-1); | |
664 giji_hinsi = eisuu_no; | |
665 #ifdef nodef | |
666 /* 半角数字 */ | |
667 } | |
668 else | |
669 { | |
670 if (kan_ck_vector (suuji_no, vector) == WNN_NOT_CONNECT_BK) | |
671 return (-1); | |
672 #endif /* nodef */ | |
673 } | |
674 #endif /* CHINESE */ | |
675 } | |
676 else if (giji_hinsi == eisuu_no) | |
677 { | |
678 for (; ((j_end + len) < yomi_eno) && is_g_eisuu (bun[j_end + len]); len++) | |
679 ; | |
680 } | |
681 else if (giji_hinsi == toji_kakko_no) | |
682 { /* 閉括弧 */ | |
683 len = 1; | |
684 } | |
685 else if (giji_hinsi == kigou_no) | |
686 { /* 記号 */ | |
687 len = 1; | |
688 } | |
689 else if (giji_hinsi == kai_kakko_no) | |
690 { /* 開括弧 */ | |
691 len = 1; | |
692 } | |
693 else if (giji_hinsi == giji_no) | |
694 { | |
695 len = 1; | |
696 } | |
697 else if (giji_hinsi == katakanago_no) | |
698 { | |
699 /* カタカナ 半角カタカナ 濁点 半濁点 */ | |
700 for (len = 1; (j_end + len < yomi_eno) && (TOKUSYU (bun[j_end + len]) || check_bar_katakana (j_end + len, yomi_eno)); len++) | |
701 ; | |
702 } | |
703 else | |
704 { | |
705 if (j_end >= yomi_eno) | |
706 len = 0; | |
707 else | |
708 len = 1; | |
709 } | |
710 | |
711 /* スペースは、そのあとの疑似文節にくっつける */ | |
712 if ((giji_hinsi == eisuu_no) || (giji_hinsi == suuji_no)) | |
713 { | |
714 for (; ((j_end + len) < yomi_eno) && (isspace (bun[j_end + len]) || (isjspace (c))); len++) | |
715 ; | |
716 } | |
717 | |
718 _Only_Fuzokugo: | |
719 /* 評価値の決定 */ | |
720 sbnptr->v_jc = HYOKAVAL (giji_hindo, len, len + j_end - yomi_sno); | |
721 sbnptr->i_jc = j_end; | |
722 if (giji_hinsi == katakanago_no) | |
723 { | |
724 sbnptr->t_jc = WNN_KATAKANA; | |
725 } | |
726 else if (giji_hinsi == suuji_no) | |
727 { | |
728 sbnptr->t_jc = c_env->giji.number; | |
729 } | |
730 else if (isascii (c)) | |
731 { | |
732 if (giji_hinsi == eisuu_no) | |
733 { | |
734 sbnptr->t_jc = c_env->giji.eisuu; | |
735 } | |
736 else if (giji_hinsi == kigou_no || giji_hinsi == toji_kakko_no || giji_hinsi == kai_kakko_no) | |
737 { | |
738 sbnptr->t_jc = c_env->giji.kigou; | |
739 } | |
740 else | |
741 { | |
742 sbnptr->t_jc = -1; | |
743 } | |
744 sbnptr->v_jc += 2; /* 次候補よりも評価値を上げるため */ | |
745 } | |
746 else | |
747 { | |
748 sbnptr->t_jc = -1; | |
749 } | |
750 sbnptr->hinsi_fk = giji_hinsi; | |
25
466fe6732d8d
- fixed more NULL pointer related errata
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
3
diff
changeset
|
751 sbnptr->jentptr = NULL; |
0 | 752 sbnptr->kangovect = ft->kango_hinsi_area[giji_hinsi]; |
753 sbnptr->j_c = j_end + len - 1; | |
754 return (len); | |
755 } | |
756 #endif /* KOREAN */ | |
757 | |
758 int | |
759 is_g_eisuu (c) | |
760 register w_char c; | |
761 { | |
762 register int i; | |
763 | |
764 if ((isascii (c) && isalnum (c)) || isjalnum (c)) | |
765 return (YES); | |
766 else | |
767 { | |
768 for (i = 0; i < 20; i++) | |
769 { | |
770 if (giji_eisuu[i] == 0xffff) | |
771 return (NO); | |
772 else if (giji_eisuu[i] == c) | |
773 return (YES); | |
774 } | |
775 } | |
776 return (NO); | |
777 } | |
778 | |
779 | |
780 /* 何が何でも疑似文節を作るんだい */ | |
781 int | |
782 getgiji_f (yomi_sno, sbnptr) | |
783 register int yomi_sno; | |
784 register struct SYO_BNSETSU *sbnptr; | |
785 { | |
786 sbnptr->v_jc = HYOKAVAL (0, 1, 1); | |
787 sbnptr->i_jc = yomi_sno; | |
788 sbnptr->t_jc = -1; | |
789 sbnptr->hinsi_fk = giji_no; | |
25
466fe6732d8d
- fixed more NULL pointer related errata
Yoshiki Yazawa <yaz@honeyplanet.jp>
parents:
3
diff
changeset
|
790 sbnptr->jentptr = NULL; |
0 | 791 sbnptr->kangovect = ft->kango_hinsi_area[kigou_no]; |
792 sbnptr->j_c = yomi_sno; | |
793 | |
794 return (1); | |
795 } | |
796 | |
797 /* カタカナか? 長音記号の前は、カタカナでないとダメ */ | |
798 int | |
799 check_bar_katakana (i, yomi_eno) | |
800 register int i; | |
801 register int yomi_eno; | |
802 { | |
803 register int j; | |
804 for (j = i; j < yomi_eno; j++) | |
805 { | |
806 if (isjkata (bun[j])) | |
807 return (YES); | |
808 if (!(BAR_CODE (bun[j]))) | |
809 return (NO); | |
810 } | |
811 return (NO); | |
812 } | |
813 | |
814 /* | |
815 * kan_ckvt hinsi の品詞がvectorに接続できるか | |
816 */ | |
817 | |
818 int | |
819 kan_ckvt (hinsi, vector) | |
820 register unsigned short hinsi; /* 品詞No. */ | |
821 register int vector; /* 付属語前端ベクタ */ | |
822 { | |
823 register int *v; | |
824 v = (int *) ((word_vector *) ft->kango_vect_area + vector); | |
825 return (kan_ck_vector (hinsi, v)); | |
826 /* | |
827 return (kan_ck_vector(hinsi, | |
828 ((word_vector *) ft->kango_vect_area + vector))); | |
829 */ | |
830 } | |
831 | |
832 #ifdef change_macro | |
833 int | |
834 kan_ck_vector (hinsi, vector) | |
835 unsigned short hinsi; /* 品詞No. */ | |
836 int vector[]; /* 付属語前端ベクタ */ | |
837 { | |
838 register int wvect; | |
839 | |
840 wvect = vector[hinsi / (sizeof (int) << 3)]; /* << 3 == * 8 */ | |
841 wvect >>= (hinsi % (sizeof (int) << 3)); | |
842 if ((wvect & 0x00000001) == 1) | |
843 return (WNN_CONNECT_BK); | |
844 else | |
845 return (WNN_NOT_CONNECT_BK); | |
846 } | |
847 #endif /* change_macro */ |