0
|
1 /*
|
|
2 * $Id: fzk.c,v 1.7 2003/05/11 19:01:06 hiroo Exp $
|
|
3 */
|
|
4
|
|
5 /*
|
|
6 * FreeWnn is a network-extensible Kana-to-Kanji conversion system.
|
|
7 * This file is part of FreeWnn.
|
|
8 *
|
|
9 * Copyright Kyoto University Research Institute for Mathematical Sciences
|
|
10 * 1987, 1988, 1989, 1990, 1991, 1992
|
|
11 * Copyright OMRON Corporation. 1987, 1988, 1989, 1990, 1991, 1992, 1999
|
|
12 * Copyright ASTEC, Inc. 1987, 1988, 1989, 1990, 1991, 1992
|
|
13 * Copyright FreeWnn Project 1999, 2000, 2002, 2003
|
|
14 *
|
|
15 * Maintainer: FreeWnn Project <freewnn@tomo.gr.jp>
|
|
16 *
|
|
17 * This program is free software; you can redistribute it and/or modify
|
|
18 * it under the terms of the GNU General Public License as published by
|
|
19 * the Free Software Foundation; either version 2 of the License, or
|
|
20 * (at your option) any later version.
|
|
21 *
|
|
22 * This program is distributed in the hope that it will be useful,
|
|
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
25 * GNU General Public License for more details.
|
|
26 *
|
|
27 * You should have received a copy of the GNU General Public License
|
|
28 * along with this program; if not, write to the Free Software
|
|
29 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
30 */
|
|
31
|
|
32 static char rcs_id[] = "$Id: fzk.c,v 1.7 2003/05/11 19:01:06 hiroo Exp $";
|
|
33
|
|
34 #ifdef HAVE_CONFIG_H
|
|
35 # include <config.h>
|
|
36 #endif
|
|
37
|
|
38 #include <stdio.h>
|
|
39 #if STDC_HEADERS
|
|
40 # include <stdlib.h>
|
|
41 # include <string.h>
|
|
42 #else
|
|
43 # if HAVE_MALLOC_H
|
|
44 # include <malloc.h>
|
|
45 # endif
|
|
46 #endif /* STDC_HEADERS */
|
|
47
|
|
48 #include "commonhd.h"
|
|
49 #include "de_header.h"
|
|
50 #include "fzk.h"
|
|
51 #include "kaiseki.h"
|
|
52
|
|
53 #define vgetc(pt) ((pt)? getc((pt)):xgetc_cur())
|
|
54 #define vungetc(k, pt) {if(pt) {ungetc((k),(pt));}else{xungetc_cur(k);}}
|
|
55
|
|
56 #ifndef NO_FZK
|
|
57 static void link_job (int, struct FT *);
|
|
58 static int fzk_ken (w_char *, w_char *, word_vector *, struct fzkken *);
|
|
59 static int setfzk (struct fzkentry *, struct fzkken **, word_vector *, int);
|
|
60 static struct fzkentry *bsrch (w_char *);
|
|
61 #endif
|
|
62 static int kng_ckvt (word_vector *, int);
|
|
63 static int fzk_ck_vector (word_vector *);
|
|
64 static void fzk_orvt (int *, int *);
|
|
65 static int bittest (int *, int);
|
|
66 static int error_fzk (void);
|
|
67 static int error_eof (void);
|
|
68 static int get_decimal (FILE *);
|
|
69 static int get_hexsa (FILE *);
|
|
70 static int get_string (FILE *, unsigned char *);
|
|
71 static int check_eof (FILE *);
|
|
72
|
|
73 #ifndef NO_FZK
|
|
74 static int fzkvect_kosuu; /* 付属語ベクタ数 */
|
|
75 #endif /* NO_FZK */
|
|
76 static int kango_vect_kosuu; /* 幹語ベクタの数 */
|
|
77 static int kango_length; /* 幹語数 */
|
|
78
|
|
79 /*
|
|
80 * 付属語ファイルのフォーマット
|
|
81
|
|
82 付属語の個数 (ft->fzklength)
|
|
83 接続ベクタの長さ (fzkvect_l)
|
|
84 付属語ベクタの個数 (fzkvect_kosuu)
|
|
85 幹語ベクタの長さ (kango_vect_l) V4.0
|
|
86 幹語の品詞数 (kango_length) V4.0
|
|
87 幹語ベクタの数 (kango_vect_kosuu) V4.0
|
|
88
|
|
89 付属語読み文字列 付属語ベクタ数
|
|
90 No. 接続ベクタ
|
|
91 … …
|
|
92 付属語読み文字列 付属語ベクタ数
|
|
93 No. 接続ベクタ
|
|
94 … …
|
|
95 … …
|
|
96 …
|
|
97 … …
|
|
98 〜
|
|
99
|
|
100 終端ベクタ
|
|
101 …
|
|
102 …
|
|
103 〜
|
|
104
|
|
105 幹語接続ベクタNo. 幹語接続ベクタ V4.0
|
|
106 幹語接続ベクタ kango_vect_l 付属語接続ベクタ fzk_vect_l
|
|
107 [31,30,...,1][63,62,....32]...
|
|
108 … …
|
|
109 … …
|
|
110 〜
|
|
111
|
|
112 幹語No. 幹語接続ベクタNo. V4.0
|
|
113 … …
|
|
114 … …
|
|
115 〜
|
|
116
|
|
117 疑似品詞No. 幹語接続ベクタNo. 疑似品詞ベクタ (JKTVECT_KOSUU) V4.0
|
|
118 */
|
|
119
|
|
120
|
|
121 struct FT *
|
|
122 fzk_read (FILE *fp)
|
|
123 {
|
|
124 struct FT *fzk_tbl;
|
|
125
|
|
126 fzk_tbl = fzk_ld (fp);
|
|
127 if (fzk_tbl != NULL)
|
|
128 {
|
|
129 #ifndef NO_FZK
|
|
130 link_job (fzk_tbl->fzklength, fzk_tbl);
|
|
131 #endif
|
|
132 }
|
|
133 return (fzk_tbl);
|
|
134 }
|
|
135
|
|
136 struct FT *
|
|
137 fzk_ld (FILE *fp)
|
|
138 {
|
|
139 struct FT *fzk_tbl;
|
|
140 int k, l, m;
|
|
141 int vect_count = 0;
|
|
142 #ifndef NO_FZK
|
|
143 struct fzkentry *ptr;
|
|
144 unsigned char charyomi[(YOMI_L + 1) * 2];
|
|
145 int fzklength;
|
|
146 #endif /* NO_FZK */
|
|
147 int fzkvect_l;
|
|
148 int kango_vect_l;
|
|
149
|
|
150 #ifndef NO_FZK
|
|
151 fzklength = get_decimal (fp); /* 付属語の個数 */
|
|
152 #endif /* NO_FZK */
|
|
153 fzkvect_l = get_decimal (fp); /* 付属語ベクタの長さ */
|
|
154 #ifndef NO_FZK
|
|
155 fzkvect_kosuu = get_decimal (fp); /* 付属語ベクタの個数 */
|
|
156 #endif /* NO_FZK */
|
|
157 kango_vect_l = get_decimal (fp); /* 幹語ベクタの長さ */
|
|
158 kango_vect_kosuu = get_decimal (fp); /* 幹語ベクタの個数 */
|
|
159 kango_length = get_decimal (fp); /* 幹語の品詞数 */
|
|
160
|
|
161 if (fzkvect_l > VECT_L)
|
|
162 {
|
|
163 wnn_errorno = WNN_FZK_TOO_DEF;
|
|
164 log_err ("Sorry, your fuzokugo bit data has too many id.");
|
|
165 log_err ("Please change define VECT_L %d and compile again.", fzkvect_l);
|
|
166 return (NULL);
|
|
167 }
|
|
168
|
|
169 if (kango_vect_l > KANGO_VECT_L)
|
|
170 {
|
|
171 wnn_errorno = WNN_FZK_TOO_DEF;
|
|
172 log_err ("Sorry, your kango bit data has too many id.");
|
|
173 log_err ("Please change define KANGO_VECT_L %d and compile again.", kango_vect_l);
|
|
174 return (NULL);
|
|
175 }
|
|
176
|
|
177 if (kango_length > KANGO_HINSI_MX)
|
|
178 {
|
|
179 wnn_errorno = WNN_FZK_TOO_DEF;
|
|
180 log_err ("Sorry, your kango bit data has too many id.");
|
|
181 log_err ("Please change define KANGO_MX %d and compile again.", kango_length);
|
|
182 return (NULL);
|
|
183 }
|
|
184 if (kango_vect_kosuu > kango_length)
|
|
185 {
|
|
186 wnn_errorno = WNN_FZK_TOO_DEF;
|
|
187 log_err ("KANGO vector kosuu is more than the number of KANGO hinsi.");
|
|
188 return (NULL);
|
|
189 }
|
|
190 if ((fzk_tbl = (struct FT *) malloc (sizeof (struct FT))) == NULL)
|
|
191 {
|
|
192 wnn_errorno = WNN_MALLOC_ERR;
|
|
193 log_err ("fzk_ld: malloc error.");
|
|
194 return (NULL);
|
|
195 }
|
|
196 /* clear struct */
|
|
197 bzero (fzk_tbl, sizeof (struct FT));
|
|
198
|
|
199 fzk_tbl->kango_hinsi_area = NULL;
|
|
200 fzk_tbl->kango_vect_area = NULL;
|
|
201 fzk_tbl->fzkvect_l = fzkvect_l;
|
|
202 fzk_tbl->kango_vect_l = kango_vect_l;
|
|
203 #ifndef NO_FZK
|
|
204 fzk_tbl->vect_area = NULL;
|
|
205 fzk_tbl->tablefuzokugo = NULL;
|
|
206 fzk_tbl->fzklength = fzklength;
|
|
207
|
|
208 if ((fzk_tbl->vect_area = (fzkvect *) calloc (fzkvect_kosuu, sizeof (fzkvect))) == NULL)
|
|
209 {
|
|
210 wnn_errorno = WNN_MALLOC_ERR;
|
|
211 log_err ("fzk_ld: malloc error.");
|
|
212 fzk_discard (fzk_tbl);
|
|
213 return (NULL);
|
|
214 }
|
|
215 #endif /* NO_FZK */
|
|
216
|
|
217 if ((fzk_tbl->kango_hinsi_area = (int *) calloc (kango_length, sizeof (int))) == NULL)
|
|
218 {
|
|
219 wnn_errorno = WNN_MALLOC_ERR;
|
|
220 error1 ("malloc error in fzk\n");
|
|
221 fzk_discard (fzk_tbl);
|
|
222 return (NULL);
|
|
223 }
|
|
224 if ((fzk_tbl->kango_vect_area = (word_vector *) calloc ((kango_vect_kosuu + SV_KOSUU), sizeof (word_vector))) == NULL)
|
|
225 {
|
|
226 wnn_errorno = WNN_MALLOC_ERR;
|
|
227 log_err ("fzk_ld: malloc error.");
|
|
228 fzk_discard (fzk_tbl);
|
|
229 return (NULL);
|
|
230 }
|
|
231
|
|
232 #ifndef NO_FZK
|
|
233 if ((fzk_tbl->tablefuzokugo = (struct fzkentry *) calloc (fzk_tbl->fzklength, sizeof (struct fzkentry))) == NULL)
|
|
234 {
|
|
235 wnn_errorno = WNN_MALLOC_ERR;
|
|
236 log_err ("fzk_ld: malloc error.");
|
|
237 fzk_discard (fzk_tbl);
|
|
238 return (NULL);
|
|
239 }
|
|
240 ptr = fzk_tbl->tablefuzokugo;
|
|
241
|
|
242 for (m = 0; m < fzk_tbl->fzklength; m++, ptr++)
|
|
243 {
|
|
244 get_string (fp, charyomi);
|
|
245 (void) Sstrcpy (ptr->yomi, charyomi);
|
|
246 ptr->yomi_su = Strlen (ptr->yomi);
|
|
247 ptr->kosu = get_decimal (fp);
|
|
248 ptr->pter = &fzk_tbl->vect_area[vect_count];
|
|
249 ptr->link = NULL;
|
|
250 for (l = 0; l < ptr->kosu; l++, vect_count++)
|
|
251 {
|
|
252 fzk_tbl->vect_area[vect_count].no = get_decimal (fp);
|
|
253 for (k = 0; k < fzkvect_l; k++)
|
|
254 {
|
|
255 fzk_tbl->vect_area[vect_count].vector[k] = get_hexsa (fp);
|
|
256 }
|
|
257 }
|
|
258 }
|
|
259 #endif /* !NO_FZK */
|
|
260 /* 終端 vector を SV_KOSUU 個読み取る */
|
|
261
|
|
262 for (l = 0; l < SV_KOSUU; l++)
|
|
263 {
|
|
264 for (k = 0; k < fzkvect_l; k++)
|
|
265 {
|
|
266 fzk_tbl->kango_vect_area[l].vector[k] = get_hexsa (fp);
|
|
267 }
|
|
268 }
|
|
269 /* 幹語接続ベクタを読み取る */
|
|
270 /* 幹語前端接続ベクタNo. - 幹語前端接続ベクタ */
|
|
271 for (m = 0; m < kango_vect_kosuu; m++)
|
|
272 {
|
|
273 vect_count = get_decimal (fp);
|
|
274 for (k = 0; k < fzkvect_l; k++)
|
|
275 {
|
|
276 fzk_tbl->kango_vect_area[SV_KOSUU + vect_count].vector[k] = get_hexsa (fp);
|
|
277 }
|
|
278 }
|
|
279 /* 幹語品詞No. - 幹語前端接続ベクタNo. */
|
|
280 for (m = 0; m < kango_length; m++)
|
|
281 {
|
|
282 vect_count = get_decimal (fp);
|
|
283 if ((k = get_decimal (fp)) != -1)
|
|
284 {
|
|
285 fzk_tbl->kango_hinsi_area[vect_count] = SV_KOSUU + k;
|
|
286 }
|
|
287 else
|
|
288 {
|
|
289 fzk_tbl->kango_hinsi_area[vect_count] = -1;
|
|
290 }
|
|
291 }
|
|
292
|
|
293 if (check_eof (fp) < 0)
|
|
294 {
|
|
295 wnn_errorno = WNN_BAD_FZK_FILE;
|
|
296 return (NULL);
|
|
297 }
|
|
298
|
|
299 /* 疑似品詞番号を調べる */
|
|
300 #ifdef nodef
|
|
301 if ((sentou_no = wnn_find_hinsi_by_name (WNN_SENTOU_MEI)) == -1)
|
|
302 {
|
|
303 giji_hinsi_err (WNN_SENTOU_MEI);
|
|
304 return (NULL);
|
|
305 }
|
|
306 if ((suuji_no = wnn_find_hinsi_by_name (WNN_SUUJI_MEI)) == -1)
|
|
307 {
|
|
308 giji_hinsi_err ("SENTOU");
|
|
309 return (NULL);
|
|
310 }
|
|
311 if ((katakanago_no = wnn_find_hinsi_by_name (WNN_KANA_MEI)) == -1)
|
|
312 {
|
|
313 giji_hinsi_err ("KANA");
|
|
314 return (NULL);
|
|
315 }
|
|
316 if ((eisuu_no = wnn_find_hinsi_by_name (WNN_EISUU_MEI)) == -1)
|
|
317 {
|
|
318 giji_hinsi_err ("EISUU");
|
|
319 return (NULL);
|
|
320 }
|
|
321 if ((kigou_no = wnn_find_hinsi_by_name (WNN_KIGOU_MEI)) == -1)
|
|
322 {
|
|
323 giji_hinsi_err ("KIGOU");
|
|
324 return (NULL);
|
|
325 }
|
|
326 if ((toji_kakko_no = wnn_find_hinsi_by_name (WNN_TOJIKAKKO_MEI)) == -1)
|
|
327 {
|
|
328 giji_hinsi_err ("TOJI_KKAKO");
|
|
329 return (NULL);
|
|
330 }
|
|
331 if ((fuzokugo_no = wnn_find_hinsi_by_name (WNN_FUZOKUGO_MEI)) == -1)
|
|
332 {
|
|
333 giji_hinsi_err ("FUZOKUGO");
|
|
334 return (NULL);
|
|
335 }
|
|
336 if ((kai_kakko_no = wnn_find_hinsi_by_name (WNN_KAIKKAKO_MEI)) == -1)
|
|
337 {
|
|
338 giji_hinsi_err ("KAIKAKKO");
|
|
339 return (NULL);
|
|
340 }
|
|
341 if ((giji_no = wnn_find_hinsi_by_name (WNN_GIJI_MEI)) == -1)
|
|
342 {
|
|
343 giji_hinsi_err ("GIJI");
|
|
344 return (NULL);
|
|
345 }
|
|
346 #endif /* nodef */
|
|
347 return (fzk_tbl);
|
|
348 }
|
|
349
|
|
350 #ifdef nodef
|
|
351 void
|
|
352 giji_hinsi_err (char *str)
|
|
353 {
|
|
354 wnn_errorno = WNN_GIJI_HINSI_ERR;
|
|
355 log_err ("GIJI hinsi (%s) is not defined in hinsi data file.", str);
|
|
356 }
|
|
357 #endif /* nodef */
|
|
358
|
|
359 void
|
|
360 fzk_discard (struct FT *fzk_tbl)
|
|
361 {
|
|
362 if (fzk_tbl->kango_hinsi_area != NULL)
|
|
363 free (fzk_tbl->kango_hinsi_area);
|
|
364 if (fzk_tbl->kango_vect_area != NULL)
|
|
365 free (fzk_tbl->kango_vect_area);
|
|
366 #ifndef NO_FZK
|
|
367 if (fzk_tbl->vect_area != NULL)
|
|
368 free (fzk_tbl->vect_area);
|
|
369 if (fzk_tbl->tablefuzokugo != NULL)
|
|
370 free (fzk_tbl->tablefuzokugo);
|
|
371 #endif
|
|
372 free (fzk_tbl);
|
|
373 }
|
|
374
|
|
375 #ifndef NO_FZK
|
|
376 static void
|
|
377 link_job (int x, struct FT *fzk_tbl)
|
|
378 {
|
|
379 int n;
|
|
380 struct fzkentry *pter_a, *pter_b;
|
|
381
|
|
382 for (pter_a = fzk_tbl->tablefuzokugo, x--, n = 0; n < x; n++, pter_a++)
|
|
383 {
|
|
384 for (pter_b = pter_a + 1; pter_b <= fzk_tbl->tablefuzokugo + x && Strncmp (pter_a->yomi, pter_b->yomi, pter_a->yomi_su) == 0; pter_b++)
|
|
385 pter_b->link = pter_a;
|
|
386 }
|
|
387 }
|
|
388 #endif
|
|
389
|
|
390
|
|
391 /*
|
|
392 * fzk_kai : fuzokugo kaiseki
|
|
393 */
|
|
394
|
|
395 int
|
|
396 fzk_kai (w_char *start, /* string start pointer */
|
|
397 w_char *end, /* string end pointer */
|
|
398 int syuutan_vect, /* 文節終端 vector */
|
|
399 int syuutan_vect1, /* 文節終端 vector 1 */
|
|
400 struct ICHBNP **ichbnp_p) /* ich-bunpou area 付属語候補 set pointer pointer */
|
|
401 {
|
|
402 #ifndef NO_FZK
|
|
403 static word_vector fzkwk[STRK_L + 1]; /* 付属語解析 work area */
|
|
404 static word_vector fzkwk1[STRK_L + 1]; /* 付属語解析 work area */
|
|
405 static int maxpoint = STRK_L;
|
|
406
|
|
407 struct fzkken fzkinf[YOMI_L + 1]; /* 検索 work area */
|
|
408 struct fzkken fzkinf1[YOMI_L + 1]; /* 検索 work area */
|
|
409 struct fzkken *fzkinfp;
|
|
410 struct ICHBNP *ichbnptr = NULL;
|
|
411 struct ICHBNP *wkptr = NULL;
|
|
412
|
|
413 int point; /* index */
|
|
414 int i, j; /* work index */
|
|
415 int cnt, n; /* counter */
|
|
416 word_vector *endvect; /* 文節終端 vector pointer */
|
|
417 word_vector *endvect1; /* 文節終端 vector 1 */
|
|
418
|
|
419 int fzkvect_l = ft->fzkvect_l;
|
|
420 int kango_vect_l = ft->kango_vect_l;
|
|
421
|
|
422 endvect = (word_vector *) ft->kango_vect_area + syuutan_vect;
|
|
423 if (syuutan_vect1 != WNN_VECT_NO)
|
|
424 endvect1 = (word_vector *) ft->kango_vect_area + syuutan_vect1;
|
|
425 else
|
|
426 endvect1 = NULL;
|
|
427
|
|
428 /*
|
|
429 * initialize
|
|
430 */
|
|
431 for (i = maxpoint; i >= 0; i--)
|
|
432 {
|
|
433 for (j = VECT_L - 1; j >= 0; j--)
|
|
434 {
|
|
435 fzkwk[i].vector[j] = 0;
|
|
436 fzkwk1[i].vector[j] = 0;
|
|
437 }
|
|
438 }
|
|
439 maxpoint = 0;
|
|
440 for (j = 0; j < fzkvect_l; j++)
|
|
441 {
|
|
442 fzkwk[0].vector[j] = endvect->vector[j];
|
|
443 if (endvect1 != 0)
|
|
444 fzkwk1[0].vector[j] = endvect1->vector[j];
|
|
445 }
|
|
446 /*
|
|
447 * 付属語解析
|
|
448 */
|
|
449
|
|
450 for (point = 0; point <= maxpoint; point++)
|
|
451 {
|
|
452 if (point > STRK_L)
|
|
453 {
|
|
454 wnn_errorno = WNN_WKAREA_FULL;
|
|
455 log_err ("fzk_kai: fuzokugo-kaiseki area is full.");
|
|
456 return (-1);
|
|
457 }
|
|
458 if (fzk_ck_vector (&fzkwk[point]))
|
|
459 {
|
|
460 /* 付属語検索 vector */
|
|
461 fzk_ken (start + point, end, &fzkwk[point], &fzkinf[0]);
|
|
462 for (fzkinfp = &fzkinf[0]; fzkinfp->ent_ptr; fzkinfp++)
|
|
463 {
|
|
464 n = (fzkinfp->ent_ptr)->yomi_su;
|
|
465 fzk_orvt (fzkwk[point + n].vector, fzkinfp->vector);
|
|
466 maxpoint = (maxpoint < point + n) ? point + n : maxpoint;
|
|
467 }
|
|
468 }
|
|
469 if (fzk_ck_vector (&fzkwk1[point]))
|
|
470 {
|
|
471 /* 付属語検索 vector 1 */
|
|
472 fzk_ken (start + point, end, &fzkwk1[point], &fzkinf1[0]);
|
|
473 for (fzkinfp = &fzkinf1[0]; fzkinfp->ent_ptr; fzkinfp++)
|
|
474 {
|
|
475 n = (fzkinfp->ent_ptr)->yomi_su;
|
|
476 fzk_orvt (fzkwk1[point + n].vector, fzkinfp->vector);
|
|
477 maxpoint = (maxpoint < point + n) ? point + n : maxpoint;
|
|
478 }
|
|
479 }
|
|
480 }
|
|
481
|
|
482
|
|
483 /*
|
|
484 * 付属語候補 set
|
|
485 */
|
|
486 for (point = cnt = 0, *ichbnp_p = NULL; point <= maxpoint; point++)
|
|
487 {
|
|
488 if (kng_ckvt (&fzkwk[point], kango_vect_l) ||
|
|
489 kng_ckvt (&fzkwk1[point], kango_vect_l))
|
|
490 {
|
|
491 if (!(n = cnt % FZKIBNO))
|
|
492 {
|
|
493 if ((wkptr = getibsp ()) == NULL)
|
|
494 {
|
|
495 if (*ichbnp_p != NULL)
|
|
496 (void) freeibsp (*ichbnp_p);
|
|
497 *ichbnp_p = NULL;
|
|
498 log_err ("fzk_kai: error.");
|
|
499 return (-1);
|
|
500 }
|
|
501 else
|
|
502 {
|
|
503 if (*ichbnp_p == NULL)
|
|
504 *ichbnp_p = wkptr;
|
|
505 else
|
|
506 ichbnptr->next_p = wkptr;
|
|
507 ichbnptr = wkptr;
|
|
508 }
|
|
509 }
|
|
510 ichbnptr->fzkib[n].offset = point;
|
|
511 for (i = 0; i < kango_vect_l; i++)
|
|
512 {
|
|
513 ichbnptr->fzkib[n].vector[i] = fzkwk[point].vector[i];
|
|
514 ichbnptr->fzkib1[n].vector[i] = fzkwk1[point].vector[i];
|
|
515 }
|
|
516 cnt++;
|
|
517 }
|
|
518 }
|
|
519 return (cnt);
|
|
520 #else /* NO_FZK */
|
|
521 struct ICHBNP *wkptr;
|
|
522
|
|
523 int i; /* work index */
|
|
524 word_vector *endvect; /* 文節終端 vector pointer */
|
|
525 word_vector *endvect1; /* 文節終端 vector 1 */
|
|
526
|
|
527 int kango_vect_l = ft->kango_vect_l;
|
|
528
|
|
529 endvect = (word_vector *) ft->kango_vect_area + syuutan_vect;
|
|
530 if (syuutan_vect1 != WNN_VECT_NO)
|
|
531 endvect1 = (word_vector *) ft->kango_vect_area + syuutan_vect1;
|
|
532 else
|
|
533 endvect1 = NULL;
|
|
534
|
|
535 *ichbnp_p = NULL;
|
|
536 if (kng_ckvt (endvect, kango_vect_l) || kng_ckvt (endvect1, kango_vect_l))
|
|
537 {
|
|
538 if ((wkptr = getibsp ()) == NULL)
|
|
539 {
|
|
540 log_err ("fzk_kai: error.");
|
|
541 return (-1);
|
|
542 }
|
|
543 else
|
|
544 {
|
|
545 *ichbnp_p = wkptr;
|
|
546 }
|
|
547 wkptr->fzkib[0].offset = 0;
|
|
548 for (i = 0; i < kango_vect_l; i++)
|
|
549 {
|
|
550 wkptr->fzkib[0].vector[i] = endvect->vector[i];
|
|
551 if (endvect1 != NULL)
|
|
552 wkptr->fzkib1[0].vector[i] = endvect1->vector[i];
|
|
553 }
|
|
554 }
|
|
555 return (1);
|
|
556 #endif /* NO_FZK */
|
|
557 }
|
|
558
|
|
559
|
|
560 /*
|
|
561 * kng_ckvt 幹語ベクタのチェック V4.0
|
|
562 */
|
|
563
|
|
564 static int
|
|
565 kng_ckvt (word_vector *wv, int kango_vect_l)
|
|
566 {
|
|
567 int i, rts;
|
|
568 int *v = wv->vector;
|
|
569
|
|
570 for (rts = 0, i = kango_vect_l; i > 0; i--)
|
|
571 rts |= *v++; /* OR cheak */
|
|
572 return (rts);
|
|
573 }
|
|
574
|
|
575
|
|
576 /*
|
|
577 * fzk_ckvt
|
|
578 */
|
|
579 int
|
|
580 fzk_ckvt (int vector)
|
|
581 {
|
|
582 return vector >= 0 &&
|
|
583 fzk_ck_vector (&ft->kango_vect_area[vector]);
|
|
584 }
|
|
585
|
|
586
|
|
587 static int
|
|
588 fzk_ck_vector (word_vector *wv)
|
|
589 {
|
|
590 int i, rts;
|
|
591 int *v = wv->vector;
|
|
592
|
|
593 rts = *v++ & ~0x01; /* 「先頭可」のビットを除く */
|
|
594 for (i = ft->fzkvect_l - 1; i > 0; i--)
|
|
595 rts |= *v++; /* OR cheak */
|
|
596 return (rts);
|
|
597 }
|
|
598
|
|
599
|
|
600 /*
|
|
601 * fzk_orvt
|
|
602 */
|
|
603
|
|
604 static void
|
|
605 fzk_orvt (int *vector1, int *vector2)
|
|
606 {
|
|
607 int i;
|
|
608
|
|
609 for (i = ft->fzkvect_l; i > 0; i--)
|
|
610 *vector1++ |= *vector2++; /* OR set */
|
|
611 }
|
|
612
|
|
613 #ifndef NO_FZK
|
|
614 static int
|
|
615 fzk_ken (w_char *start, w_char *end, word_vector *wv, struct fzkken *fzkptr)
|
|
616 {
|
|
617 int yomicnt;
|
|
618 int setno;
|
|
619 struct fzkken *ansptr;
|
|
620 struct fzkentry *search_ptr;
|
|
621 w_char key[YOMI_L + 1];
|
|
622 int fzkvect_l = ft->fzkvect_l;
|
|
623
|
|
624 ansptr = fzkptr;
|
|
625 for (yomicnt = end - start; yomicnt < YOMI_L; key[yomicnt++] = 0)
|
|
626 ;
|
|
627 for (yomicnt = 0; (yomicnt < YOMI_L) && (start < end); key[yomicnt++] = *start++)
|
|
628 ;
|
|
629 key[yomicnt] = 0;
|
5
|
630 for (; yomicnt > 0; key[--yomicnt] = 0)
|
0
|
631 {
|
|
632 search_ptr = (struct fzkentry *) bsrch (key);
|
|
633 if (search_ptr != NULL)
|
|
634 {
|
|
635 setno = setfzk (search_ptr, &ansptr, wv, fzkvect_l);
|
|
636 ansptr->ent_ptr = NULL;
|
|
637 return (setno);
|
|
638 }
|
|
639 }
|
|
640 fzkptr->ent_ptr = NULL;
|
|
641 return (NO);
|
|
642 }
|
|
643
|
|
644 /******************************************/
|
|
645 static int
|
|
646 setfzk (struct fzkentry *entry_ptr,
|
|
647 struct fzkken **answer_ptr,
|
|
648 word_vector *wv,
|
|
649 int fzkvect_l)
|
|
650 {
|
|
651 int setno;
|
|
652 int vectroop;
|
|
653 int wcnt, setflg;
|
|
654 fzkvect *vect_ptr;
|
|
655
|
|
656 if (entry_ptr != NULL)
|
|
657 {
|
|
658 setno = setfzk (entry_ptr->link, answer_ptr, wv, fzkvect_l);
|
|
659 for (wcnt = 0; wcnt < fzkvect_l; (*answer_ptr)->vector[wcnt++] = 0);
|
|
660 setflg = 0;
|
|
661 for (vectroop = entry_ptr->kosu, vect_ptr = entry_ptr->pter; vectroop > 0; vectroop--, vect_ptr++)
|
|
662 {
|
|
663 if (bittest (wv->vector, vect_ptr->no) > 0)
|
|
664 {
|
|
665 setflg = 1;
|
|
666 for (wcnt = 0; wcnt < fzkvect_l; wcnt++)
|
|
667 (*answer_ptr)->vector[wcnt] |= vect_ptr->vector[wcnt];
|
|
668 }
|
|
669 }
|
|
670 if (setflg != 0)
|
|
671 {
|
|
672 (*answer_ptr)->ent_ptr = entry_ptr;
|
|
673 (*answer_ptr)++;
|
|
674 return (setno + 1);
|
|
675 }
|
|
676 else
|
|
677 return (setno);
|
|
678 }
|
|
679 else
|
|
680 return (0);
|
|
681 }
|
|
682 #endif /* NO_FZK */
|
|
683
|
|
684 /***************************************************/
|
|
685 static int
|
|
686 bittest (int vector[], int no)
|
|
687 {
|
|
688 int wvect;
|
|
689
|
|
690 wvect = vector[no / (sizeof (int) * 8)];
|
|
691 wvect >>= (int) (no % (sizeof (int) * 8));
|
|
692 if ((wvect & 0x00000001) == 1)
|
|
693 return (1);
|
|
694 else
|
|
695 return (-1);
|
|
696 }
|
|
697
|
|
698 /*
|
|
699 int Strncmp(w_char *s1, w_char *s2, int n)
|
|
700 {
|
|
701 if(n == 0)return(0);
|
|
702 for (;n > 0 && *s1++ == *s2++;n--);
|
|
703 return (int)(*--s1 - *--s2);
|
|
704 }
|
|
705 */
|
|
706
|
|
707 #ifndef NO_FZK
|
|
708 static struct fzkentry *
|
|
709 bsrch (w_char *key_yomi)
|
|
710 {
|
|
711 int low, high, j, flg;
|
|
712
|
|
713 for (low = 0, high = ft->fzklength; low < high;)
|
|
714 {
|
|
715 j = (high + low) >> 1;
|
|
716 /*
|
|
717 flg = Strncmp(key_yomi, (ft->tablefuzokugo + j)->yomi, YOMI_L);
|
|
718 */
|
|
719 {
|
|
720 int n;
|
|
721 w_char *s1, *s2;
|
|
722 s1 = key_yomi;
|
|
723 s2 = (ft->tablefuzokugo + j)->yomi;
|
|
724 for (n = YOMI_L; n > 0 && *s1++ == *s2++; n--);
|
|
725 flg = (int) (*--s1 - *--s2);
|
|
726 }
|
|
727 if (flg > 0)
|
|
728 low = j + 1;
|
|
729 else if (flg < 0)
|
|
730 high = j;
|
|
731 else
|
|
732 return (ft->tablefuzokugo + j);
|
|
733 }
|
|
734 return (0);
|
|
735 }
|
|
736 #endif
|
|
737
|
|
738 static int
|
|
739 error_fzk (void)
|
|
740 {
|
|
741 wnn_errorno = WNN_NOT_FZK_FILE;
|
|
742 log_err ("Bad format in fzk_file.");
|
|
743 return (-1);
|
|
744 }
|
|
745
|
|
746 static int
|
|
747 error_eof (void)
|
|
748 {
|
|
749 wnn_errorno = WNN_NOT_FZK_FILE;
|
|
750 log_err ("Unecpected EOF in reading fzk_file.");
|
|
751 return (-1);
|
|
752 }
|
|
753
|
|
754 static int
|
|
755 get_decimal (FILE *fp)
|
|
756 {
|
|
757 unsigned char buf[24];
|
|
758 int k;
|
|
759 if (get_string (fp, buf) == EOF)
|
|
760 {
|
|
761 return (error_eof ());
|
|
762 }
|
|
763 if (sscanf ((char *) buf, "%d", &k) != 1)
|
|
764 {
|
|
765 return (error_fzk ());
|
|
766 }
|
|
767 return (k);
|
|
768 }
|
|
769
|
|
770 static int
|
|
771 get_hexsa (FILE *fp)
|
|
772 {
|
|
773 unsigned char buf[24];
|
|
774 int k;
|
|
775 if (get_string (fp, buf) == EOF)
|
|
776 {
|
|
777 return (error_eof ());
|
|
778 }
|
|
779 if (sscanf ((char *) buf, "%x", &k) != 1)
|
|
780 {
|
|
781 return (error_fzk ());
|
|
782 }
|
|
783 return (k);
|
|
784 }
|
|
785
|
|
786 static int
|
|
787 get_string (FILE *fp, unsigned char *buf)
|
|
788 {
|
|
789 unsigned char *c = buf;
|
|
790 int k;
|
|
791 for (; (k = vgetc (fp)) == ';' || k == '\n' || k == '\t' || k == ' ';)
|
|
792 {
|
|
793 if (k == ';')
|
|
794 {
|
|
795 for (; (k = vgetc (fp)) != '\n';)
|
|
796 {
|
|
797 if (k == EOF)
|
|
798 {
|
|
799 return (EOF);
|
|
800 }
|
|
801 }
|
|
802 }
|
|
803 }
|
|
804 if (k == EOF)
|
|
805 {
|
|
806 return (EOF);
|
|
807 }
|
|
808 vungetc (k, fp);
|
|
809 for (; (k = vgetc (fp)) != ';' && k != '\n' && k != '\t' && k != ' ' && k != EOF;)
|
|
810 {
|
|
811 *c++ = k;
|
|
812 }
|
|
813 *c = '\0';
|
|
814 return (0); /* not EOF */
|
|
815 }
|
|
816
|
|
817 static int
|
|
818 check_eof (FILE *fp)
|
|
819 {
|
|
820 unsigned char buf[24];
|
|
821 if (get_string (fp, buf) != EOF)
|
|
822 {
|
|
823 wnn_errorno = WNN_NOT_FZK_FILE;
|
|
824 log_err ("Not at the end of fzk_file.");
|
|
825 return (-1);
|
|
826 }
|
|
827 return (0);
|
|
828 }
|
|
829
|