Mercurial > freewnn
annotate Wnn/etc/gethinsi.c @ 23:e1aadb6dcbd4
- fixed the build rule for build_stamp.h. keywords in template for hg tip should surrounded by curly braces instead of hash marks.
- fixed usage of libtool in building jlib.V3. newer libtool does not work without mode specifier.
- replaced some libtool related files. (should remove these?)
author | Yoshiki Yazawa <yaz@honeyplanet.jp> |
---|---|
date | Fri, 23 Jan 2009 16:36:13 +0900 |
parents | c966456648ad |
children | 6bfa7ea3b75b |
rev | line source |
---|---|
0 | 1 /* |
2 * $Id: gethinsi.c,v 1.6 2002/03/24 01:25:13 hiroo Exp $ | |
3 */ | |
4 | |
5 /* | |
6 * FreeWnn is a network-extensible Kana-to-Kanji conversion system. | |
7 * This file is part of FreeWnn. | |
8 * | |
9 * Copyright Kyoto University Research Institute for Mathematical Sciences | |
10 * 1987, 1988, 1989, 1990, 1991, 1992 | |
11 * Copyright OMRON Corporation. 1987, 1988, 1989, 1990, 1991, 1992, 1999 | |
12 * Copyright ASTEC, Inc. 1987, 1988, 1989, 1990, 1991, 1992 | |
13 * Copyright FreeWnn Project 1999, 2000, 2002 | |
14 * | |
15 * Maintainer: FreeWnn Project <freewnn@tomo.gr.jp> | |
16 * | |
17 * This program is free software; you can redistribute it and/or modify | |
18 * it under the terms of the GNU General Public License as published by | |
19 * the Free Software Foundation; either version 2 of the License, or | |
20 * (at your option) any later version. | |
21 * | |
22 * This program is distributed in the hope that it will be useful, | |
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
25 * GNU General Public License for more details. | |
26 * | |
27 * You should have received a copy of the GNU General Public License | |
28 * along with this program; if not, write to the Free Software | |
29 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
30 */ | |
31 | |
32 /* 品詞ファイルの構造に関する定義 */ | |
33 | |
34 #ifdef HAVE_CONFIG_H | |
35 # include <config.h> | |
36 #endif | |
37 | |
38 #include <stdio.h> | |
39 #if STDC_HEADERS | |
40 # include <string.h> | |
41 #elif HAVE_STRINGS_H | |
42 # include <strings.h> | |
43 #endif /* STDC_HEADERS */ | |
44 #include "commonhd.h" | |
45 #include "wnn_config.h" | |
46 #include "wnnerror.h" | |
47 #include "jslib.h" | |
48 #include "hinsi_file.h" | |
49 #include "wnn_os.h" | |
50 #include "wnn_string.h" | |
51 | |
52 /* | |
53 wnn_loadhinsi(NULL) 品詞の情報を読み込む | |
54 | |
55 wnn_find_hinsi_by_name(c) 名前を与えて、品詞番号を取る | |
56 char *c; | |
57 | |
58 char *wnn_get_hinsi_name(k) 品詞番号から名前を取る | |
59 int k; | |
60 | |
61 int | |
62 wnn_get_fukugou_component(k,str, ) 複合品詞に対して 構成要素を求める | |
63 int k; 構成要素の個数が返値として返され、 | |
64 unsigned short **str; 構成要素は str 以降に返される。 | |
65 | |
66 #ifdef nodef | |
67 int wnn_get_hinsi_list(area) 品詞のリストを得る。 | |
68 品詞は、品詞名の配列として管理されている。 | |
69 配列の先頭番地を area に入れ、配列の大きさを返す。 | |
70 w_char ***area; | |
71 | |
72 int wnn_get_fukugou_list(area, start) 複合品詞のリストを得る | |
73 複合品詞は、複合品詞構造体の配列として管理されている。 | |
74 配列の先頭番地を area に入れ、配列の大きさを返す。 | |
75 n 番目の複合品詞の品詞番号は、FUKUGOU_START - n である。 | |
76 FUKUGOU_START の値を start に入れて返す。 | |
77 struct wnn_fukugou **area; | |
78 int *start; | |
79 int wnn_hinsi_node_component(name, area) | |
80 w_char **area; | |
81 w_char *name; | |
82 品詞ノード名から、その子どものノードの名前の列を得る。 | |
83 個数が返る。 | |
84 もし、ノードではなく本当の品詞名(リーフ)なら、0 が返る。 | |
85 ノードでも品詞名でもないとき、-1 が返る。 | |
86 #endif | |
87 */ | |
88 | |
89 extern int wnn_errorno; | |
90 | |
91 #ifdef JSERVER | |
92 /* must be #include "de_header.h" ? */ | |
93 extern void log_debug (); | |
94 #define error1 log_debug | |
95 #endif | |
96 | |
97 /* | |
98 *here start the real program | |
99 */ | |
100 | |
101 | |
102 int wnnerror_hinsi; | |
103 | |
104 static int hinsi_loaded = 0; | |
105 | |
106 static int line_no = 0; | |
107 | |
108 static w_char heap[HEAP_LEN]; | |
109 static w_char *hp = heap; | |
110 | |
111 static unsigned short wheap[WHEAP_LEN]; | |
112 static unsigned short *whp = wheap; | |
113 | |
114 #define SIZE 1024 | |
115 | |
116 static w_char *hinsi[MAXHINSI]; | |
117 static struct wnn_fukugou fukugou[MAXFUKUGOU]; | |
118 static struct wnn_hinsi_node node[MAXNODE]; | |
119 | |
120 int mhinsi = 0; | |
121 int mfukugou = 0; | |
122 int mnode = 0; | |
123 | |
124 static void | |
22
c966456648ad
- fixed argument style in function definition
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
125 error_long (void) |
0 | 126 { |
127 wnnerror_hinsi = WNN_TOO_LONG_HINSI_FILE_LINE; | |
128 } | |
129 | |
130 static void | |
22
c966456648ad
- fixed argument style in function definition
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
131 error_no_heap (void) |
0 | 132 { |
133 wnnerror_hinsi = WNN_TOO_BIG_HINSI_FILE; | |
134 } | |
135 | |
136 static int | |
22
c966456648ad
- fixed argument style in function definition
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
137 get_char0 (FILE *fp) |
0 | 138 { |
139 int c, d; | |
140 | |
141 for (; (c = getc (fp)) == COMMENT_CHAR || c == CONTINUE_CHAR || c == IGNORE_CHAR1 || c == IGNORE_CHAR2;) | |
142 { | |
143 if (c == CONTINUE_CHAR) | |
144 { | |
145 if ((d = getc (fp)) == EOF) | |
146 { | |
147 break; | |
148 } | |
149 if (d == '\n') | |
150 { | |
151 line_no += 1; | |
152 continue; | |
153 } | |
154 else | |
155 { | |
156 ungetc (d, fp); | |
157 break; | |
158 } | |
159 } | |
160 else if (c == COMMENT_CHAR) | |
161 { | |
162 for (;;) | |
163 { | |
164 if ((c = getc (fp)) == EOF) | |
165 { | |
166 return (EOF); | |
167 } | |
168 if (c == '\n') | |
169 { | |
170 ungetc (c, fp); | |
171 line_no += 1; | |
172 break; | |
173 } | |
174 } | |
175 } | |
176 } | |
177 if (c == '\n') | |
178 line_no += 1; | |
179 return (c); | |
180 } | |
181 | |
182 static int | |
22
c966456648ad
- fixed argument style in function definition
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
183 get_char (FILE *fp) /* remove null lines */ |
0 | 184 { |
185 static int c = -1; | |
186 int d; | |
187 static int fufufu = 0; | |
188 | |
189 if (c != -1) | |
190 { | |
191 d = c; | |
192 c = -1; | |
193 return (d); | |
194 } | |
195 else | |
196 { | |
197 if (fufufu == 0) | |
198 { /* remove all new lines in the head of the file */ | |
199 for (; (d = get_char0 (fp)) == '\n';); | |
200 fufufu = 1; | |
201 } | |
202 else | |
203 { | |
204 d = get_char0 (fp); | |
205 } | |
206 if (d == '\n') | |
207 { | |
208 while ((c = get_char0 (fp)) == '\n'); | |
209 } | |
210 return (d); | |
211 } | |
212 } | |
213 | |
214 /* get one phrase and return the separater */ | |
215 static int | |
22
c966456648ad
- fixed argument style in function definition
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
216 get_phrase (UCHAR *s0, int size, FILE *fp) |
0 | 217 { |
218 UCHAR *s = s0; | |
219 int c; | |
220 static int eof = 0; | |
221 | |
222 if (eof) | |
223 { | |
224 *s0 = 0; | |
225 return (EOF); | |
226 } | |
227 while ((c = get_char (fp)) != '\n' && c != DEVIDE_CHAR && c != NODE_CHAR && c != HINSI_SEPARATE_CHAR && c != EOF) | |
228 { | |
229 if (s - s0 >= size) | |
230 { | |
231 error_long (); | |
232 return (HINSI_ERR); | |
233 } | |
234 *s++ = c; | |
235 } | |
236 if (c == EOF) | |
237 eof = 1; | |
238 if (s - s0 >= size - 1) | |
239 { | |
240 error_long (); | |
241 return (HINSI_ERR); | |
242 } | |
243 *s++ = '\0'; | |
244 return (c); | |
245 } | |
246 | |
247 static int | |
22
c966456648ad
- fixed argument style in function definition
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
248 stradd (w_char **cp, char *str) |
0 | 249 { |
250 int len = strlen (str); | |
251 | |
252 if (hp + len + 1 >= heap + HEAP_LEN) | |
253 { | |
254 error_no_heap (); | |
255 return (-1); | |
256 } | |
257 *cp = hp; | |
258 wnn_Sstrcpy (hp, str); | |
259 hp += wnn_Strlen (hp) + 1; | |
260 return (0); | |
261 } | |
262 | |
263 static int | |
22
c966456648ad
- fixed argument style in function definition
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
264 w_stradd (unsigned short **cp, unsigned short *str) |
0 | 265 { |
266 | |
267 *cp = whp; | |
268 for (; *str != TERMINATE; str++, whp++) | |
269 { | |
270 if (whp >= wheap + WHEAP_LEN) | |
271 { | |
272 error_no_heap (); | |
273 return (-1); | |
274 } | |
275 *whp = *str; | |
276 } | |
277 *whp++ = TERMINATE; | |
278 return (0); | |
279 } | |
280 | |
281 int | |
22
c966456648ad
- fixed argument style in function definition
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
282 wnn_loadhinsi (unsigned char *fname) |
0 | 283 { |
284 FILE *fp; | |
285 UCHAR buf[SIZE]; | |
286 unsigned short fukugou_str[MAXHINSI]; | |
287 int sep; | |
288 int h; | |
289 unsigned short *c; | |
290 char tmp[256]; | |
291 extern int wnn_find_hinsi_by_name (); | |
292 | |
293 if (fname == NULL) | |
294 { | |
295 #ifdef JSERVER | |
296 if (hinsi_loaded) | |
297 return (0); | |
298 #endif /* JSERVER */ | |
299 strcpy (tmp, LIBDIR); | |
300 strcat (tmp, HINSIDATA_FILE); | |
301 fname = (unsigned char *) tmp; | |
302 } | |
303 | |
304 #ifdef JSERVER | |
305 error1 ("Read HINSI DATA FILE %s\n", fname); | |
306 #endif /* JSERVER */ | |
307 | |
308 if ((fp = fopen ((char *) fname, "r")) == NULL) | |
309 { | |
310 wnnerror_hinsi = WNN_NO_HINSI_DATA_FILE; | |
311 goto err_1; | |
312 } | |
313 hinsi_loaded = 1; | |
314 | |
315 while ((sep = get_phrase (buf, SIZE, fp)) != EOF) | |
316 { | |
317 if (sep == HINSI_ERR) | |
318 { | |
319 goto err; /* wnnerror_hinsi set in get_phrase */ | |
320 } | |
321 if (buf[0] == YOYAKU_CHAR) | |
322 { /* yoyaku */ | |
323 if (sep != '\n') | |
324 { | |
325 wnnerror_hinsi = WNN_BAD_HINSI_FILE; | |
326 goto err; | |
327 } | |
328 hinsi[mhinsi++] = NULL; | |
329 } | |
330 else if (sep == '\n') | |
331 { /* hinsi */ | |
332 if (stradd (&hinsi[mhinsi++], buf)) | |
333 goto err; | |
334 } | |
335 else if (sep == DEVIDE_CHAR) | |
336 { /* fukugou */ | |
337 if (stradd (&fukugou[mfukugou].name, buf)) | |
338 goto err; | |
339 c = fukugou_str; | |
340 while ((sep = get_phrase (buf, SIZE, fp)) != EOF) | |
341 { | |
342 if (sep == -1) | |
343 { | |
344 goto err; /* wnnerror_hinsi set in get_phrase */ | |
345 } | |
346 if (sep != EOF && sep != HINSI_SEPARATE_CHAR && sep != '\n') | |
347 { | |
348 wnnerror_hinsi = WNN_BAD_HINSI_FILE; | |
349 goto err; | |
350 } | |
351 if ((h = wnn_find_hinsi_by_name (buf)) == -1 || h >= mhinsi) | |
352 { | |
353 wnnerror_hinsi = WNN_BAD_HINSI_FILE; | |
354 goto err; | |
355 } | |
356 *c++ = h; | |
357 if (sep == '\n' || sep == EOF) | |
358 break; | |
359 } | |
360 *c = TERMINATE; | |
361 if (w_stradd (&fukugou[mfukugou++].component, fukugou_str)) | |
362 goto err; | |
363 } | |
364 else if (sep == NODE_CHAR) | |
365 { | |
366 int first = 1; | |
367 w_char *dummy; | |
368 | |
369 node[mnode].kosuu = 0; | |
370 if (stradd (&node[mnode].name, buf)) | |
371 goto err; | |
372 while ((sep = get_phrase (buf, SIZE, fp)) != EOF) | |
373 { | |
374 if (sep == -1) | |
375 { | |
376 goto err; /* wnnerror_hinsi set in get_phrase */ | |
377 } | |
378 if (sep != EOF && sep != HINSI_SEPARATE_CHAR && sep != '\n') | |
379 { | |
380 wnnerror_hinsi = WNN_BAD_HINSI_FILE; | |
381 goto err; | |
382 } | |
383 node[mnode].kosuu++; | |
384 if (first) | |
385 { | |
386 if (stradd (&node[mnode].son, buf)) | |
387 goto err; | |
388 first = 0; | |
389 } | |
390 else | |
391 { | |
392 if (stradd (&dummy, buf)) | |
393 goto err; | |
394 } | |
395 if (sep == '\n' || sep == EOF) | |
396 break; | |
397 } | |
398 mnode++; | |
399 } | |
400 } | |
401 fclose (fp); | |
402 return (0); | |
403 err: | |
404 fclose (fp); | |
405 err_1: | |
406 #ifdef JSERVER | |
407 error1 ("Error reading HINSI DATA FILE %s\n", fname); | |
408 #endif /* JSERVER */ | |
409 return (HINSI_ERR); | |
410 } | |
411 | |
412 static int | |
22
c966456648ad
- fixed argument style in function definition
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
413 find_hinsi_by_name (register w_char *c) |
0 | 414 { |
415 register int k; | |
416 if (!hinsi_loaded) | |
417 { | |
418 if (wnn_loadhinsi (NULL) != 0) | |
419 { | |
420 return (-1); | |
421 } | |
422 } | |
423 for (k = 0; k < mhinsi; k++) | |
424 { | |
425 if (hinsi[k] && wnn_Strcmp (hinsi[k], c) == 0) | |
426 { | |
427 return (k); | |
428 } | |
429 } | |
430 for (k = 0; k < mfukugou; k++) | |
431 { | |
432 if (fukugou[k].name && wnn_Strcmp (fukugou[k].name, c) == 0) | |
433 { | |
434 return (FUKUGOU_START - k); | |
435 } | |
436 } | |
437 return (-1); | |
438 } | |
439 | |
440 | |
441 int | |
22
c966456648ad
- fixed argument style in function definition
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
442 wnn_find_hinsi_by_name (register char *c) |
0 | 443 { |
444 w_char hin[WNN_HINSI_NAME_LEN]; | |
445 | |
446 wnn_Sstrcpy (hin, c); | |
447 return (find_hinsi_by_name (hin)); | |
448 } | |
449 | |
450 | |
451 static w_char * | |
22
c966456648ad
- fixed argument style in function definition
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
452 get_hinsi_name (int k) |
0 | 453 { |
454 if (!hinsi_loaded) | |
455 { | |
456 if (wnn_loadhinsi (NULL) != 0) | |
457 { | |
458 return (NULL); | |
459 } | |
460 } | |
461 if (k < mhinsi && k >= 0) | |
462 { | |
463 return (hinsi[k]); | |
464 } | |
465 else if (k > FUKUGOU_START - mfukugou) | |
466 { | |
467 return (fukugou[FUKUGOU_START - k].name); | |
468 } | |
469 return (NULL); | |
470 } | |
471 | |
472 char * | |
22
c966456648ad
- fixed argument style in function definition
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
473 wnn_get_hinsi_name (int k) |
0 | 474 { |
475 w_char *s; | |
476 static char hin[WNN_HINSI_NAME_LEN * 2]; | |
477 | |
478 if ((s = get_hinsi_name (k)) == NULL) | |
479 return (NULL); | |
480 wnn_sStrcpy (hin, s); | |
481 return (hin); | |
482 } | |
483 | |
484 #ifndef JSERVER | |
485 static | |
486 #endif /* JSERVER */ | |
487 int | |
22
c966456648ad
- fixed argument style in function definition
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
488 wnn_get_fukugou_component_body (register int k, register unsigned short **shp) |
0 | 489 { |
490 static unsigned short tmp; | |
491 register unsigned short *s; | |
492 int index; /* need for NEWS-OS 6.0 */ | |
493 if (k < mhinsi && k >= 0) | |
494 { | |
495 tmp = k; | |
496 *shp = &tmp; | |
497 return (1); | |
498 } | |
499 if (k > FUKUGOU_START - mfukugou && k <= FUKUGOU_START) | |
500 { | |
501 index = FUKUGOU_START - k; | |
502 for (*shp = s = fukugou[index].component; *s != TERMINATE; s++); | |
503 /* | |
504 If next line in NEWS-OS 6.0, jserver down when kanji henkan. | |
505 for(*shp = s = fukugou[FUKUGOU_START - k].component;*s != TERMINATE;s++); | |
506 */ | |
507 return (s - *shp); | |
508 } | |
509 return (-1); | |
510 } | |
511 | |
512 int | |
22
c966456648ad
- fixed argument style in function definition
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
513 wnn_get_fukugou_component (register int k, register unsigned short **shp) |
0 | 514 { |
515 if (!hinsi_loaded) | |
516 { | |
517 if (wnn_loadhinsi (NULL) != 0) | |
518 { | |
519 return (-1); | |
520 } | |
521 } | |
522 return (wnn_get_fukugou_component_body (k, shp)); | |
523 } | |
524 | |
525 | |
526 #ifdef JSERVER | |
527 | |
528 w_char * | |
22
c966456648ad
- fixed argument style in function definition
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
529 wnn_hinsi_name (int no) |
0 | 530 { |
531 w_char *c; | |
532 if ((c = get_hinsi_name (no)) == NULL) | |
533 { | |
534 wnn_errorno = WNN_BAD_HINSI_NO; | |
535 } | |
536 return (c); | |
537 } | |
538 | |
539 int | |
22
c966456648ad
- fixed argument style in function definition
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
540 wnn_hinsi_number (w_char *name) |
0 | 541 { |
542 int n; | |
543 if ((n = find_hinsi_by_name (name)) == -1) | |
544 { | |
545 wnn_errorno = WNN_BAD_HINSI_NAME; | |
546 } | |
547 return (n); | |
548 } | |
549 | |
550 int | |
22
c966456648ad
- fixed argument style in function definition
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
551 wnn_hinsi_list (w_char *name, w_char **c, struct wnn_hinsi_node *mynode, int mmynode) |
0 | 552 { |
553 int k; | |
554 | |
555 if (mynode == NULL) | |
556 { | |
557 mynode = node; | |
558 mmynode = mnode; | |
559 } | |
560 if (!hinsi_loaded) | |
561 wnn_loadhinsi (NULL); | |
562 for (k = 0; k < mmynode; k++) | |
563 { | |
564 if (wnn_Strcmp (name, mynode[k].name) == 0) | |
565 { | |
566 *c = mynode[k].son; | |
567 return (mynode[k].kosuu); | |
568 } | |
569 } | |
570 if (find_hinsi_by_name (name) == -1) | |
571 { | |
572 wnn_errorno = WNN_BAD_HINSI_NAME; | |
573 return (-1); | |
574 } | |
575 return (0); | |
576 } | |
577 | |
578 int | |
22
c966456648ad
- fixed argument style in function definition
Yoshiki Yazawa <yaz@cc.rim.or.jp>
parents:
0
diff
changeset
|
579 wnn_has_hinsi (struct wnn_hinsi_node *mynode, int mmynode, w_char *name) |
0 | 580 { |
581 w_char *c; | |
582 int k, j; | |
583 if (mynode == NULL) | |
584 { | |
585 mynode = node; | |
586 mmynode = mnode; | |
587 } | |
588 for (k = 0; k < mmynode; k++) | |
589 { | |
590 if (wnn_Strcmp (name, mynode[k].name) == 0) | |
591 { | |
592 return (1); | |
593 } | |
594 else | |
595 { | |
596 c = mynode[k].son; | |
597 for (j = 0; j < mynode[k].kosuu; j++) | |
598 { | |
599 if (wnn_Strcmp (name, c) == 0) | |
600 { | |
601 return (1); | |
602 } | |
603 else | |
604 { | |
605 c += wnn_Strlen (c) + 1; | |
606 } | |
607 } | |
608 } | |
609 } | |
610 return (0); | |
611 } | |
612 | |
613 #endif |