0
|
1 /*
|
|
2 * $Id: fzk.c,v 1.7 2003/05/11 19:01:06 hiroo Exp $
|
|
3 */
|
|
4
|
|
5 /*
|
|
6 * FreeWnn is a network-extensible Kana-to-Kanji conversion system.
|
|
7 * This file is part of FreeWnn.
|
|
8 *
|
|
9 * Copyright Kyoto University Research Institute for Mathematical Sciences
|
|
10 * 1987, 1988, 1989, 1990, 1991, 1992
|
|
11 * Copyright OMRON Corporation. 1987, 1988, 1989, 1990, 1991, 1992, 1999
|
|
12 * Copyright ASTEC, Inc. 1987, 1988, 1989, 1990, 1991, 1992
|
|
13 * Copyright FreeWnn Project 1999, 2000, 2002, 2003
|
|
14 *
|
|
15 * Maintainer: FreeWnn Project <freewnn@tomo.gr.jp>
|
|
16 *
|
|
17 * This program is free software; you can redistribute it and/or modify
|
|
18 * it under the terms of the GNU General Public License as published by
|
|
19 * the Free Software Foundation; either version 2 of the License, or
|
|
20 * (at your option) any later version.
|
|
21 *
|
|
22 * This program is distributed in the hope that it will be useful,
|
|
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
25 * GNU General Public License for more details.
|
|
26 *
|
|
27 * You should have received a copy of the GNU General Public License
|
|
28 * along with this program; if not, write to the Free Software
|
|
29 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
30 */
|
|
31
|
|
32 #ifdef HAVE_CONFIG_H
|
|
33 # include <config.h>
|
|
34 #endif
|
|
35
|
|
36 #include <stdio.h>
|
|
37 #if STDC_HEADERS
|
|
38 # include <stdlib.h>
|
|
39 # include <string.h>
|
|
40 #else
|
|
41 # if HAVE_MALLOC_H
|
|
42 # include <malloc.h>
|
|
43 # endif
|
|
44 #endif /* STDC_HEADERS */
|
|
45
|
|
46 #include "commonhd.h"
|
|
47 #include "de_header.h"
|
|
48 #include "fzk.h"
|
|
49 #include "kaiseki.h"
|
|
50
|
|
51 #define vgetc(pt) ((pt)? getc((pt)):xgetc_cur())
|
|
52 #define vungetc(k, pt) {if(pt) {ungetc((k),(pt));}else{xungetc_cur(k);}}
|
|
53
|
|
54 #ifndef NO_FZK
|
|
55 static void link_job (int, struct FT *);
|
|
56 static int fzk_ken (w_char *, w_char *, word_vector *, struct fzkken *);
|
|
57 static int setfzk (struct fzkentry *, struct fzkken **, word_vector *, int);
|
|
58 static struct fzkentry *bsrch (w_char *);
|
|
59 #endif
|
|
60 static int kng_ckvt (word_vector *, int);
|
|
61 static int fzk_ck_vector (word_vector *);
|
|
62 static void fzk_orvt (int *, int *);
|
|
63 static int bittest (int *, int);
|
|
64 static int error_fzk (void);
|
|
65 static int error_eof (void);
|
|
66 static int get_decimal (FILE *);
|
|
67 static int get_hexsa (FILE *);
|
|
68 static int get_string (FILE *, unsigned char *);
|
|
69 static int check_eof (FILE *);
|
|
70
|
|
71 #ifndef NO_FZK
|
|
72 static int fzkvect_kosuu; /* 付属語ベクタ数 */
|
|
73 #endif /* NO_FZK */
|
|
74 static int kango_vect_kosuu; /* 幹語ベクタの数 */
|
|
75 static int kango_length; /* 幹語数 */
|
|
76
|
|
77 /*
|
|
78 * 付属語ファイルのフォーマット
|
|
79
|
|
80 付属語の個数 (ft->fzklength)
|
|
81 接続ベクタの長さ (fzkvect_l)
|
|
82 付属語ベクタの個数 (fzkvect_kosuu)
|
|
83 幹語ベクタの長さ (kango_vect_l) V4.0
|
|
84 幹語の品詞数 (kango_length) V4.0
|
|
85 幹語ベクタの数 (kango_vect_kosuu) V4.0
|
|
86
|
|
87 付属語読み文字列 付属語ベクタ数
|
|
88 No. 接続ベクタ
|
|
89 … …
|
|
90 付属語読み文字列 付属語ベクタ数
|
|
91 No. 接続ベクタ
|
|
92 … …
|
|
93 … …
|
|
94 …
|
|
95 … …
|
|
96 〜
|
|
97
|
|
98 終端ベクタ
|
|
99 …
|
|
100 …
|
|
101 〜
|
|
102
|
|
103 幹語接続ベクタNo. 幹語接続ベクタ V4.0
|
|
104 幹語接続ベクタ kango_vect_l 付属語接続ベクタ fzk_vect_l
|
|
105 [31,30,...,1][63,62,....32]...
|
|
106 … …
|
|
107 … …
|
|
108 〜
|
|
109
|
|
110 幹語No. 幹語接続ベクタNo. V4.0
|
|
111 … …
|
|
112 … …
|
|
113 〜
|
|
114
|
|
115 疑似品詞No. 幹語接続ベクタNo. 疑似品詞ベクタ (JKTVECT_KOSUU) V4.0
|
|
116 */
|
|
117
|
|
118
|
|
119 struct FT *
|
|
120 fzk_read (FILE *fp)
|
|
121 {
|
|
122 struct FT *fzk_tbl;
|
|
123
|
|
124 fzk_tbl = fzk_ld (fp);
|
|
125 if (fzk_tbl != NULL)
|
|
126 {
|
|
127 #ifndef NO_FZK
|
|
128 link_job (fzk_tbl->fzklength, fzk_tbl);
|
|
129 #endif
|
|
130 }
|
|
131 return (fzk_tbl);
|
|
132 }
|
|
133
|
|
134 struct FT *
|
|
135 fzk_ld (FILE *fp)
|
|
136 {
|
|
137 struct FT *fzk_tbl;
|
|
138 int k, l, m;
|
|
139 int vect_count = 0;
|
|
140 #ifndef NO_FZK
|
|
141 struct fzkentry *ptr;
|
|
142 unsigned char charyomi[(YOMI_L + 1) * 2];
|
|
143 int fzklength;
|
|
144 #endif /* NO_FZK */
|
|
145 int fzkvect_l;
|
|
146 int kango_vect_l;
|
|
147
|
|
148 #ifndef NO_FZK
|
|
149 fzklength = get_decimal (fp); /* 付属語の個数 */
|
|
150 #endif /* NO_FZK */
|
|
151 fzkvect_l = get_decimal (fp); /* 付属語ベクタの長さ */
|
|
152 #ifndef NO_FZK
|
|
153 fzkvect_kosuu = get_decimal (fp); /* 付属語ベクタの個数 */
|
|
154 #endif /* NO_FZK */
|
|
155 kango_vect_l = get_decimal (fp); /* 幹語ベクタの長さ */
|
|
156 kango_vect_kosuu = get_decimal (fp); /* 幹語ベクタの個数 */
|
|
157 kango_length = get_decimal (fp); /* 幹語の品詞数 */
|
|
158
|
|
159 if (fzkvect_l > VECT_L)
|
|
160 {
|
|
161 wnn_errorno = WNN_FZK_TOO_DEF;
|
|
162 log_err ("Sorry, your fuzokugo bit data has too many id.");
|
|
163 log_err ("Please change define VECT_L %d and compile again.", fzkvect_l);
|
|
164 return (NULL);
|
|
165 }
|
|
166
|
|
167 if (kango_vect_l > KANGO_VECT_L)
|
|
168 {
|
|
169 wnn_errorno = WNN_FZK_TOO_DEF;
|
|
170 log_err ("Sorry, your kango bit data has too many id.");
|
|
171 log_err ("Please change define KANGO_VECT_L %d and compile again.", kango_vect_l);
|
|
172 return (NULL);
|
|
173 }
|
|
174
|
|
175 if (kango_length > KANGO_HINSI_MX)
|
|
176 {
|
|
177 wnn_errorno = WNN_FZK_TOO_DEF;
|
|
178 log_err ("Sorry, your kango bit data has too many id.");
|
|
179 log_err ("Please change define KANGO_MX %d and compile again.", kango_length);
|
|
180 return (NULL);
|
|
181 }
|
|
182 if (kango_vect_kosuu > kango_length)
|
|
183 {
|
|
184 wnn_errorno = WNN_FZK_TOO_DEF;
|
|
185 log_err ("KANGO vector kosuu is more than the number of KANGO hinsi.");
|
|
186 return (NULL);
|
|
187 }
|
|
188 if ((fzk_tbl = (struct FT *) malloc (sizeof (struct FT))) == NULL)
|
|
189 {
|
|
190 wnn_errorno = WNN_MALLOC_ERR;
|
|
191 log_err ("fzk_ld: malloc error.");
|
|
192 return (NULL);
|
|
193 }
|
|
194 /* clear struct */
|
|
195 bzero (fzk_tbl, sizeof (struct FT));
|
|
196
|
|
197 fzk_tbl->kango_hinsi_area = NULL;
|
|
198 fzk_tbl->kango_vect_area = NULL;
|
|
199 fzk_tbl->fzkvect_l = fzkvect_l;
|
|
200 fzk_tbl->kango_vect_l = kango_vect_l;
|
|
201 #ifndef NO_FZK
|
|
202 fzk_tbl->vect_area = NULL;
|
|
203 fzk_tbl->tablefuzokugo = NULL;
|
|
204 fzk_tbl->fzklength = fzklength;
|
|
205
|
|
206 if ((fzk_tbl->vect_area = (fzkvect *) calloc (fzkvect_kosuu, sizeof (fzkvect))) == NULL)
|
|
207 {
|
|
208 wnn_errorno = WNN_MALLOC_ERR;
|
|
209 log_err ("fzk_ld: malloc error.");
|
|
210 fzk_discard (fzk_tbl);
|
|
211 return (NULL);
|
|
212 }
|
|
213 #endif /* NO_FZK */
|
|
214
|
|
215 if ((fzk_tbl->kango_hinsi_area = (int *) calloc (kango_length, sizeof (int))) == NULL)
|
|
216 {
|
|
217 wnn_errorno = WNN_MALLOC_ERR;
|
|
218 error1 ("malloc error in fzk\n");
|
|
219 fzk_discard (fzk_tbl);
|
|
220 return (NULL);
|
|
221 }
|
|
222 if ((fzk_tbl->kango_vect_area = (word_vector *) calloc ((kango_vect_kosuu + SV_KOSUU), sizeof (word_vector))) == NULL)
|
|
223 {
|
|
224 wnn_errorno = WNN_MALLOC_ERR;
|
|
225 log_err ("fzk_ld: malloc error.");
|
|
226 fzk_discard (fzk_tbl);
|
|
227 return (NULL);
|
|
228 }
|
|
229
|
|
230 #ifndef NO_FZK
|
|
231 if ((fzk_tbl->tablefuzokugo = (struct fzkentry *) calloc (fzk_tbl->fzklength, sizeof (struct fzkentry))) == NULL)
|
|
232 {
|
|
233 wnn_errorno = WNN_MALLOC_ERR;
|
|
234 log_err ("fzk_ld: malloc error.");
|
|
235 fzk_discard (fzk_tbl);
|
|
236 return (NULL);
|
|
237 }
|
|
238 ptr = fzk_tbl->tablefuzokugo;
|
|
239
|
|
240 for (m = 0; m < fzk_tbl->fzklength; m++, ptr++)
|
|
241 {
|
|
242 get_string (fp, charyomi);
|
|
243 (void) Sstrcpy (ptr->yomi, charyomi);
|
|
244 ptr->yomi_su = Strlen (ptr->yomi);
|
|
245 ptr->kosu = get_decimal (fp);
|
|
246 ptr->pter = &fzk_tbl->vect_area[vect_count];
|
|
247 ptr->link = NULL;
|
|
248 for (l = 0; l < ptr->kosu; l++, vect_count++)
|
|
249 {
|
|
250 fzk_tbl->vect_area[vect_count].no = get_decimal (fp);
|
|
251 for (k = 0; k < fzkvect_l; k++)
|
|
252 {
|
|
253 fzk_tbl->vect_area[vect_count].vector[k] = get_hexsa (fp);
|
|
254 }
|
|
255 }
|
|
256 }
|
|
257 #endif /* !NO_FZK */
|
|
258 /* 終端 vector を SV_KOSUU 個読み取る */
|
|
259
|
|
260 for (l = 0; l < SV_KOSUU; l++)
|
|
261 {
|
|
262 for (k = 0; k < fzkvect_l; k++)
|
|
263 {
|
|
264 fzk_tbl->kango_vect_area[l].vector[k] = get_hexsa (fp);
|
|
265 }
|
|
266 }
|
|
267 /* 幹語接続ベクタを読み取る */
|
|
268 /* 幹語前端接続ベクタNo. - 幹語前端接続ベクタ */
|
|
269 for (m = 0; m < kango_vect_kosuu; m++)
|
|
270 {
|
|
271 vect_count = get_decimal (fp);
|
|
272 for (k = 0; k < fzkvect_l; k++)
|
|
273 {
|
|
274 fzk_tbl->kango_vect_area[SV_KOSUU + vect_count].vector[k] = get_hexsa (fp);
|
|
275 }
|
|
276 }
|
|
277 /* 幹語品詞No. - 幹語前端接続ベクタNo. */
|
|
278 for (m = 0; m < kango_length; m++)
|
|
279 {
|
|
280 vect_count = get_decimal (fp);
|
|
281 if ((k = get_decimal (fp)) != -1)
|
|
282 {
|
|
283 fzk_tbl->kango_hinsi_area[vect_count] = SV_KOSUU + k;
|
|
284 }
|
|
285 else
|
|
286 {
|
|
287 fzk_tbl->kango_hinsi_area[vect_count] = -1;
|
|
288 }
|
|
289 }
|
|
290
|
|
291 if (check_eof (fp) < 0)
|
|
292 {
|
|
293 wnn_errorno = WNN_BAD_FZK_FILE;
|
|
294 return (NULL);
|
|
295 }
|
|
296
|
|
297 /* 疑似品詞番号を調べる */
|
|
298 #ifdef nodef
|
|
299 if ((sentou_no = wnn_find_hinsi_by_name (WNN_SENTOU_MEI)) == -1)
|
|
300 {
|
|
301 giji_hinsi_err (WNN_SENTOU_MEI);
|
|
302 return (NULL);
|
|
303 }
|
|
304 if ((suuji_no = wnn_find_hinsi_by_name (WNN_SUUJI_MEI)) == -1)
|
|
305 {
|
|
306 giji_hinsi_err ("SENTOU");
|
|
307 return (NULL);
|
|
308 }
|
|
309 if ((katakanago_no = wnn_find_hinsi_by_name (WNN_KANA_MEI)) == -1)
|
|
310 {
|
|
311 giji_hinsi_err ("KANA");
|
|
312 return (NULL);
|
|
313 }
|
|
314 if ((eisuu_no = wnn_find_hinsi_by_name (WNN_EISUU_MEI)) == -1)
|
|
315 {
|
|
316 giji_hinsi_err ("EISUU");
|
|
317 return (NULL);
|
|
318 }
|
|
319 if ((kigou_no = wnn_find_hinsi_by_name (WNN_KIGOU_MEI)) == -1)
|
|
320 {
|
|
321 giji_hinsi_err ("KIGOU");
|
|
322 return (NULL);
|
|
323 }
|
|
324 if ((toji_kakko_no = wnn_find_hinsi_by_name (WNN_TOJIKAKKO_MEI)) == -1)
|
|
325 {
|
|
326 giji_hinsi_err ("TOJI_KKAKO");
|
|
327 return (NULL);
|
|
328 }
|
|
329 if ((fuzokugo_no = wnn_find_hinsi_by_name (WNN_FUZOKUGO_MEI)) == -1)
|
|
330 {
|
|
331 giji_hinsi_err ("FUZOKUGO");
|
|
332 return (NULL);
|
|
333 }
|
|
334 if ((kai_kakko_no = wnn_find_hinsi_by_name (WNN_KAIKKAKO_MEI)) == -1)
|
|
335 {
|
|
336 giji_hinsi_err ("KAIKAKKO");
|
|
337 return (NULL);
|
|
338 }
|
|
339 if ((giji_no = wnn_find_hinsi_by_name (WNN_GIJI_MEI)) == -1)
|
|
340 {
|
|
341 giji_hinsi_err ("GIJI");
|
|
342 return (NULL);
|
|
343 }
|
|
344 #endif /* nodef */
|
|
345 return (fzk_tbl);
|
|
346 }
|
|
347
|
|
348 #ifdef nodef
|
|
349 void
|
|
350 giji_hinsi_err (char *str)
|
|
351 {
|
|
352 wnn_errorno = WNN_GIJI_HINSI_ERR;
|
|
353 log_err ("GIJI hinsi (%s) is not defined in hinsi data file.", str);
|
|
354 }
|
|
355 #endif /* nodef */
|
|
356
|
|
357 void
|
|
358 fzk_discard (struct FT *fzk_tbl)
|
|
359 {
|
|
360 if (fzk_tbl->kango_hinsi_area != NULL)
|
|
361 free (fzk_tbl->kango_hinsi_area);
|
|
362 if (fzk_tbl->kango_vect_area != NULL)
|
|
363 free (fzk_tbl->kango_vect_area);
|
|
364 #ifndef NO_FZK
|
|
365 if (fzk_tbl->vect_area != NULL)
|
|
366 free (fzk_tbl->vect_area);
|
|
367 if (fzk_tbl->tablefuzokugo != NULL)
|
|
368 free (fzk_tbl->tablefuzokugo);
|
|
369 #endif
|
|
370 free (fzk_tbl);
|
|
371 }
|
|
372
|
|
373 #ifndef NO_FZK
|
|
374 static void
|
|
375 link_job (int x, struct FT *fzk_tbl)
|
|
376 {
|
|
377 int n;
|
|
378 struct fzkentry *pter_a, *pter_b;
|
|
379
|
|
380 for (pter_a = fzk_tbl->tablefuzokugo, x--, n = 0; n < x; n++, pter_a++)
|
|
381 {
|
|
382 for (pter_b = pter_a + 1; pter_b <= fzk_tbl->tablefuzokugo + x && Strncmp (pter_a->yomi, pter_b->yomi, pter_a->yomi_su) == 0; pter_b++)
|
|
383 pter_b->link = pter_a;
|
|
384 }
|
|
385 }
|
|
386 #endif
|
|
387
|
|
388
|
|
389 /*
|
|
390 * fzk_kai : fuzokugo kaiseki
|
|
391 */
|
|
392
|
|
393 int
|
|
394 fzk_kai (w_char *start, /* string start pointer */
|
|
395 w_char *end, /* string end pointer */
|
|
396 int syuutan_vect, /* 文節終端 vector */
|
|
397 int syuutan_vect1, /* 文節終端 vector 1 */
|
|
398 struct ICHBNP **ichbnp_p) /* ich-bunpou area 付属語候補 set pointer pointer */
|
|
399 {
|
|
400 #ifndef NO_FZK
|
|
401 static word_vector fzkwk[STRK_L + 1]; /* 付属語解析 work area */
|
|
402 static word_vector fzkwk1[STRK_L + 1]; /* 付属語解析 work area */
|
|
403 static int maxpoint = STRK_L;
|
|
404
|
|
405 struct fzkken fzkinf[YOMI_L + 1]; /* 検索 work area */
|
|
406 struct fzkken fzkinf1[YOMI_L + 1]; /* 検索 work area */
|
|
407 struct fzkken *fzkinfp;
|
|
408 struct ICHBNP *ichbnptr = NULL;
|
|
409 struct ICHBNP *wkptr = NULL;
|
|
410
|
|
411 int point; /* index */
|
|
412 int i, j; /* work index */
|
|
413 int cnt, n; /* counter */
|
|
414 word_vector *endvect; /* 文節終端 vector pointer */
|
|
415 word_vector *endvect1; /* 文節終端 vector 1 */
|
|
416
|
|
417 int fzkvect_l = ft->fzkvect_l;
|
|
418 int kango_vect_l = ft->kango_vect_l;
|
|
419
|
|
420 endvect = (word_vector *) ft->kango_vect_area + syuutan_vect;
|
|
421 if (syuutan_vect1 != WNN_VECT_NO)
|
|
422 endvect1 = (word_vector *) ft->kango_vect_area + syuutan_vect1;
|
|
423 else
|
|
424 endvect1 = NULL;
|
|
425
|
|
426 /*
|
|
427 * initialize
|
|
428 */
|
|
429 for (i = maxpoint; i >= 0; i--)
|
|
430 {
|
|
431 for (j = VECT_L - 1; j >= 0; j--)
|
|
432 {
|
|
433 fzkwk[i].vector[j] = 0;
|
|
434 fzkwk1[i].vector[j] = 0;
|
|
435 }
|
|
436 }
|
|
437 maxpoint = 0;
|
|
438 for (j = 0; j < fzkvect_l; j++)
|
|
439 {
|
|
440 fzkwk[0].vector[j] = endvect->vector[j];
|
|
441 if (endvect1 != 0)
|
|
442 fzkwk1[0].vector[j] = endvect1->vector[j];
|
|
443 }
|
|
444 /*
|
|
445 * 付属語解析
|
|
446 */
|
|
447
|
|
448 for (point = 0; point <= maxpoint; point++)
|
|
449 {
|
|
450 if (point > STRK_L)
|
|
451 {
|
|
452 wnn_errorno = WNN_WKAREA_FULL;
|
|
453 log_err ("fzk_kai: fuzokugo-kaiseki area is full.");
|
|
454 return (-1);
|
|
455 }
|
|
456 if (fzk_ck_vector (&fzkwk[point]))
|
|
457 {
|
|
458 /* 付属語検索 vector */
|
|
459 fzk_ken (start + point, end, &fzkwk[point], &fzkinf[0]);
|
|
460 for (fzkinfp = &fzkinf[0]; fzkinfp->ent_ptr; fzkinfp++)
|
|
461 {
|
|
462 n = (fzkinfp->ent_ptr)->yomi_su;
|
|
463 fzk_orvt (fzkwk[point + n].vector, fzkinfp->vector);
|
|
464 maxpoint = (maxpoint < point + n) ? point + n : maxpoint;
|
|
465 }
|
|
466 }
|
|
467 if (fzk_ck_vector (&fzkwk1[point]))
|
|
468 {
|
|
469 /* 付属語検索 vector 1 */
|
|
470 fzk_ken (start + point, end, &fzkwk1[point], &fzkinf1[0]);
|
|
471 for (fzkinfp = &fzkinf1[0]; fzkinfp->ent_ptr; fzkinfp++)
|
|
472 {
|
|
473 n = (fzkinfp->ent_ptr)->yomi_su;
|
|
474 fzk_orvt (fzkwk1[point + n].vector, fzkinfp->vector);
|
|
475 maxpoint = (maxpoint < point + n) ? point + n : maxpoint;
|
|
476 }
|
|
477 }
|
|
478 }
|
|
479
|
|
480
|
|
481 /*
|
|
482 * 付属語候補 set
|
|
483 */
|
|
484 for (point = cnt = 0, *ichbnp_p = NULL; point <= maxpoint; point++)
|
|
485 {
|
|
486 if (kng_ckvt (&fzkwk[point], kango_vect_l) ||
|
|
487 kng_ckvt (&fzkwk1[point], kango_vect_l))
|
|
488 {
|
|
489 if (!(n = cnt % FZKIBNO))
|
|
490 {
|
|
491 if ((wkptr = getibsp ()) == NULL)
|
|
492 {
|
|
493 if (*ichbnp_p != NULL)
|
|
494 (void) freeibsp (*ichbnp_p);
|
|
495 *ichbnp_p = NULL;
|
|
496 log_err ("fzk_kai: error.");
|
|
497 return (-1);
|
|
498 }
|
|
499 else
|
|
500 {
|
|
501 if (*ichbnp_p == NULL)
|
|
502 *ichbnp_p = wkptr;
|
|
503 else
|
|
504 ichbnptr->next_p = wkptr;
|
|
505 ichbnptr = wkptr;
|
|
506 }
|
|
507 }
|
|
508 ichbnptr->fzkib[n].offset = point;
|
|
509 for (i = 0; i < kango_vect_l; i++)
|
|
510 {
|
|
511 ichbnptr->fzkib[n].vector[i] = fzkwk[point].vector[i];
|
|
512 ichbnptr->fzkib1[n].vector[i] = fzkwk1[point].vector[i];
|
|
513 }
|
|
514 cnt++;
|
|
515 }
|
|
516 }
|
|
517 return (cnt);
|
|
518 #else /* NO_FZK */
|
|
519 struct ICHBNP *wkptr;
|
|
520
|
|
521 int i; /* work index */
|
|
522 word_vector *endvect; /* 文節終端 vector pointer */
|
|
523 word_vector *endvect1; /* 文節終端 vector 1 */
|
|
524
|
|
525 int kango_vect_l = ft->kango_vect_l;
|
|
526
|
|
527 endvect = (word_vector *) ft->kango_vect_area + syuutan_vect;
|
|
528 if (syuutan_vect1 != WNN_VECT_NO)
|
|
529 endvect1 = (word_vector *) ft->kango_vect_area + syuutan_vect1;
|
|
530 else
|
|
531 endvect1 = NULL;
|
|
532
|
|
533 *ichbnp_p = NULL;
|
|
534 if (kng_ckvt (endvect, kango_vect_l) || kng_ckvt (endvect1, kango_vect_l))
|
|
535 {
|
|
536 if ((wkptr = getibsp ()) == NULL)
|
|
537 {
|
|
538 log_err ("fzk_kai: error.");
|
|
539 return (-1);
|
|
540 }
|
|
541 else
|
|
542 {
|
|
543 *ichbnp_p = wkptr;
|
|
544 }
|
|
545 wkptr->fzkib[0].offset = 0;
|
|
546 for (i = 0; i < kango_vect_l; i++)
|
|
547 {
|
|
548 wkptr->fzkib[0].vector[i] = endvect->vector[i];
|
|
549 if (endvect1 != NULL)
|
|
550 wkptr->fzkib1[0].vector[i] = endvect1->vector[i];
|
|
551 }
|
|
552 }
|
|
553 return (1);
|
|
554 #endif /* NO_FZK */
|
|
555 }
|
|
556
|
|
557
|
|
558 /*
|
|
559 * kng_ckvt 幹語ベクタのチェック V4.0
|
|
560 */
|
|
561
|
|
562 static int
|
|
563 kng_ckvt (word_vector *wv, int kango_vect_l)
|
|
564 {
|
|
565 int i, rts;
|
|
566 int *v = wv->vector;
|
|
567
|
|
568 for (rts = 0, i = kango_vect_l; i > 0; i--)
|
|
569 rts |= *v++; /* OR cheak */
|
|
570 return (rts);
|
|
571 }
|
|
572
|
|
573
|
|
574 /*
|
|
575 * fzk_ckvt
|
|
576 */
|
|
577 int
|
|
578 fzk_ckvt (int vector)
|
|
579 {
|
|
580 return vector >= 0 &&
|
|
581 fzk_ck_vector (&ft->kango_vect_area[vector]);
|
|
582 }
|
|
583
|
|
584
|
|
585 static int
|
|
586 fzk_ck_vector (word_vector *wv)
|
|
587 {
|
|
588 int i, rts;
|
|
589 int *v = wv->vector;
|
|
590
|
|
591 rts = *v++ & ~0x01; /* 「先頭可」のビットを除く */
|
|
592 for (i = ft->fzkvect_l - 1; i > 0; i--)
|
|
593 rts |= *v++; /* OR cheak */
|
|
594 return (rts);
|
|
595 }
|
|
596
|
|
597
|
|
598 /*
|
|
599 * fzk_orvt
|
|
600 */
|
|
601
|
|
602 static void
|
|
603 fzk_orvt (int *vector1, int *vector2)
|
|
604 {
|
|
605 int i;
|
|
606
|
|
607 for (i = ft->fzkvect_l; i > 0; i--)
|
|
608 *vector1++ |= *vector2++; /* OR set */
|
|
609 }
|
|
610
|
|
611 #ifndef NO_FZK
|
|
612 static int
|
|
613 fzk_ken (w_char *start, w_char *end, word_vector *wv, struct fzkken *fzkptr)
|
|
614 {
|
|
615 int yomicnt;
|
|
616 int setno;
|
|
617 struct fzkken *ansptr;
|
|
618 struct fzkentry *search_ptr;
|
|
619 w_char key[YOMI_L + 1];
|
|
620 int fzkvect_l = ft->fzkvect_l;
|
|
621
|
|
622 ansptr = fzkptr;
|
|
623 for (yomicnt = end - start; yomicnt < YOMI_L; key[yomicnt++] = 0)
|
|
624 ;
|
|
625 for (yomicnt = 0; (yomicnt < YOMI_L) && (start < end); key[yomicnt++] = *start++)
|
|
626 ;
|
|
627 key[yomicnt] = 0;
|
5
|
628 for (; yomicnt > 0; key[--yomicnt] = 0)
|
0
|
629 {
|
|
630 search_ptr = (struct fzkentry *) bsrch (key);
|
|
631 if (search_ptr != NULL)
|
|
632 {
|
|
633 setno = setfzk (search_ptr, &ansptr, wv, fzkvect_l);
|
|
634 ansptr->ent_ptr = NULL;
|
|
635 return (setno);
|
|
636 }
|
|
637 }
|
|
638 fzkptr->ent_ptr = NULL;
|
|
639 return (NO);
|
|
640 }
|
|
641
|
|
642 /******************************************/
|
|
643 static int
|
|
644 setfzk (struct fzkentry *entry_ptr,
|
|
645 struct fzkken **answer_ptr,
|
|
646 word_vector *wv,
|
|
647 int fzkvect_l)
|
|
648 {
|
|
649 int setno;
|
|
650 int vectroop;
|
|
651 int wcnt, setflg;
|
|
652 fzkvect *vect_ptr;
|
|
653
|
|
654 if (entry_ptr != NULL)
|
|
655 {
|
|
656 setno = setfzk (entry_ptr->link, answer_ptr, wv, fzkvect_l);
|
|
657 for (wcnt = 0; wcnt < fzkvect_l; (*answer_ptr)->vector[wcnt++] = 0);
|
|
658 setflg = 0;
|
|
659 for (vectroop = entry_ptr->kosu, vect_ptr = entry_ptr->pter; vectroop > 0; vectroop--, vect_ptr++)
|
|
660 {
|
|
661 if (bittest (wv->vector, vect_ptr->no) > 0)
|
|
662 {
|
|
663 setflg = 1;
|
|
664 for (wcnt = 0; wcnt < fzkvect_l; wcnt++)
|
|
665 (*answer_ptr)->vector[wcnt] |= vect_ptr->vector[wcnt];
|
|
666 }
|
|
667 }
|
|
668 if (setflg != 0)
|
|
669 {
|
|
670 (*answer_ptr)->ent_ptr = entry_ptr;
|
|
671 (*answer_ptr)++;
|
|
672 return (setno + 1);
|
|
673 }
|
|
674 else
|
|
675 return (setno);
|
|
676 }
|
|
677 else
|
|
678 return (0);
|
|
679 }
|
|
680 #endif /* NO_FZK */
|
|
681
|
|
682 /***************************************************/
|
|
683 static int
|
|
684 bittest (int vector[], int no)
|
|
685 {
|
|
686 int wvect;
|
|
687
|
|
688 wvect = vector[no / (sizeof (int) * 8)];
|
|
689 wvect >>= (int) (no % (sizeof (int) * 8));
|
|
690 if ((wvect & 0x00000001) == 1)
|
|
691 return (1);
|
|
692 else
|
|
693 return (-1);
|
|
694 }
|
|
695
|
|
696 /*
|
|
697 int Strncmp(w_char *s1, w_char *s2, int n)
|
|
698 {
|
|
699 if(n == 0)return(0);
|
|
700 for (;n > 0 && *s1++ == *s2++;n--);
|
|
701 return (int)(*--s1 - *--s2);
|
|
702 }
|
|
703 */
|
|
704
|
|
705 #ifndef NO_FZK
|
|
706 static struct fzkentry *
|
|
707 bsrch (w_char *key_yomi)
|
|
708 {
|
|
709 int low, high, j, flg;
|
|
710
|
|
711 for (low = 0, high = ft->fzklength; low < high;)
|
|
712 {
|
|
713 j = (high + low) >> 1;
|
|
714 /*
|
|
715 flg = Strncmp(key_yomi, (ft->tablefuzokugo + j)->yomi, YOMI_L);
|
|
716 */
|
|
717 {
|
|
718 int n;
|
|
719 w_char *s1, *s2;
|
|
720 s1 = key_yomi;
|
|
721 s2 = (ft->tablefuzokugo + j)->yomi;
|
|
722 for (n = YOMI_L; n > 0 && *s1++ == *s2++; n--);
|
|
723 flg = (int) (*--s1 - *--s2);
|
|
724 }
|
|
725 if (flg > 0)
|
|
726 low = j + 1;
|
|
727 else if (flg < 0)
|
|
728 high = j;
|
|
729 else
|
|
730 return (ft->tablefuzokugo + j);
|
|
731 }
|
|
732 return (0);
|
|
733 }
|
|
734 #endif
|
|
735
|
|
736 static int
|
|
737 error_fzk (void)
|
|
738 {
|
|
739 wnn_errorno = WNN_NOT_FZK_FILE;
|
|
740 log_err ("Bad format in fzk_file.");
|
|
741 return (-1);
|
|
742 }
|
|
743
|
|
744 static int
|
|
745 error_eof (void)
|
|
746 {
|
|
747 wnn_errorno = WNN_NOT_FZK_FILE;
|
|
748 log_err ("Unecpected EOF in reading fzk_file.");
|
|
749 return (-1);
|
|
750 }
|
|
751
|
|
752 static int
|
|
753 get_decimal (FILE *fp)
|
|
754 {
|
|
755 unsigned char buf[24];
|
|
756 int k;
|
|
757 if (get_string (fp, buf) == EOF)
|
|
758 {
|
|
759 return (error_eof ());
|
|
760 }
|
|
761 if (sscanf ((char *) buf, "%d", &k) != 1)
|
|
762 {
|
|
763 return (error_fzk ());
|
|
764 }
|
|
765 return (k);
|
|
766 }
|
|
767
|
|
768 static int
|
|
769 get_hexsa (FILE *fp)
|
|
770 {
|
|
771 unsigned char buf[24];
|
|
772 int k;
|
|
773 if (get_string (fp, buf) == EOF)
|
|
774 {
|
|
775 return (error_eof ());
|
|
776 }
|
|
777 if (sscanf ((char *) buf, "%x", &k) != 1)
|
|
778 {
|
|
779 return (error_fzk ());
|
|
780 }
|
|
781 return (k);
|
|
782 }
|
|
783
|
|
784 static int
|
|
785 get_string (FILE *fp, unsigned char *buf)
|
|
786 {
|
|
787 unsigned char *c = buf;
|
|
788 int k;
|
|
789 for (; (k = vgetc (fp)) == ';' || k == '\n' || k == '\t' || k == ' ';)
|
|
790 {
|
|
791 if (k == ';')
|
|
792 {
|
|
793 for (; (k = vgetc (fp)) != '\n';)
|
|
794 {
|
|
795 if (k == EOF)
|
|
796 {
|
|
797 return (EOF);
|
|
798 }
|
|
799 }
|
|
800 }
|
|
801 }
|
|
802 if (k == EOF)
|
|
803 {
|
|
804 return (EOF);
|
|
805 }
|
|
806 vungetc (k, fp);
|
|
807 for (; (k = vgetc (fp)) != ';' && k != '\n' && k != '\t' && k != ' ' && k != EOF;)
|
|
808 {
|
|
809 *c++ = k;
|
|
810 }
|
|
811 *c = '\0';
|
|
812 return (0); /* not EOF */
|
|
813 }
|
|
814
|
|
815 static int
|
|
816 check_eof (FILE *fp)
|
|
817 {
|
|
818 unsigned char buf[24];
|
|
819 if (get_string (fp, buf) != EOF)
|
|
820 {
|
|
821 wnn_errorno = WNN_NOT_FZK_FILE;
|
|
822 log_err ("Not at the end of fzk_file.");
|
|
823 return (-1);
|
|
824 }
|
|
825 return (0);
|
|
826 }
|
|
827
|