Mercurial > freewnn
diff Wnn/etc/gethinsi.c @ 0:bbc77ca4def5
initial import
author | Yoshiki Yazawa <yaz@cc.rim.or.jp> |
---|---|
date | Thu, 13 Dec 2007 04:30:14 +0900 |
parents | |
children | c966456648ad |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Wnn/etc/gethinsi.c Thu Dec 13 04:30:14 2007 +0900 @@ -0,0 +1,640 @@ +/* + * $Id: gethinsi.c,v 1.6 2002/03/24 01:25:13 hiroo Exp $ + */ + +/* + * FreeWnn is a network-extensible Kana-to-Kanji conversion system. + * This file is part of FreeWnn. + * + * Copyright Kyoto University Research Institute for Mathematical Sciences + * 1987, 1988, 1989, 1990, 1991, 1992 + * Copyright OMRON Corporation. 1987, 1988, 1989, 1990, 1991, 1992, 1999 + * Copyright ASTEC, Inc. 1987, 1988, 1989, 1990, 1991, 1992 + * Copyright FreeWnn Project 1999, 2000, 2002 + * + * Maintainer: FreeWnn Project <freewnn@tomo.gr.jp> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* 品詞ファイルの構造に関する定義 */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdio.h> +#if STDC_HEADERS +# include <string.h> +#elif HAVE_STRINGS_H +# include <strings.h> +#endif /* STDC_HEADERS */ +#include "commonhd.h" +#include "wnn_config.h" +#include "wnnerror.h" +#include "jslib.h" +#include "hinsi_file.h" +#include "wnn_os.h" +#include "wnn_string.h" + + /* + wnn_loadhinsi(NULL) 品詞の情報を読み込む + + wnn_find_hinsi_by_name(c) 名前を与えて、品詞番号を取る + char *c; + + char *wnn_get_hinsi_name(k) 品詞番号から名前を取る + int k; + + int + wnn_get_fukugou_component(k,str, ) 複合品詞に対して 構成要素を求める + int k; 構成要素の個数が返値として返され、 + unsigned short **str; 構成要素は str 以降に返される。 + + #ifdef nodef + int wnn_get_hinsi_list(area) 品詞のリストを得る。 + 品詞は、品詞名の配列として管理されている。 + 配列の先頭番地を area に入れ、配列の大きさを返す。 + w_char ***area; + + int wnn_get_fukugou_list(area, start) 複合品詞のリストを得る + 複合品詞は、複合品詞構造体の配列として管理されている。 + 配列の先頭番地を area に入れ、配列の大きさを返す。 + n 番目の複合品詞の品詞番号は、FUKUGOU_START - n である。 + FUKUGOU_START の値を start に入れて返す。 + struct wnn_fukugou **area; + int *start; + int wnn_hinsi_node_component(name, area) + w_char **area; + w_char *name; + 品詞ノード名から、その子どものノードの名前の列を得る。 + 個数が返る。 + もし、ノードではなく本当の品詞名(リーフ)なら、0 が返る。 + ノードでも品詞名でもないとき、-1 が返る。 + #endif + */ + +extern int wnn_errorno; + +#ifdef JSERVER +/* must be #include "de_header.h" ? */ +extern void log_debug (); +#define error1 log_debug +#endif + +/* + *here start the real program + */ + + +int wnnerror_hinsi; + +static int hinsi_loaded = 0; + +static int line_no = 0; + +static w_char heap[HEAP_LEN]; +static w_char *hp = heap; + +static unsigned short wheap[WHEAP_LEN]; +static unsigned short *whp = wheap; + +#define SIZE 1024 + +static w_char *hinsi[MAXHINSI]; +static struct wnn_fukugou fukugou[MAXFUKUGOU]; +static struct wnn_hinsi_node node[MAXNODE]; + +int mhinsi = 0; +int mfukugou = 0; +int mnode = 0; + +static void +error_long () +{ + wnnerror_hinsi = WNN_TOO_LONG_HINSI_FILE_LINE; +} + +static void +error_no_heap () +{ + wnnerror_hinsi = WNN_TOO_BIG_HINSI_FILE; +} + +static int +get_char0 (fp) + FILE *fp; +{ + int c, d; + + for (; (c = getc (fp)) == COMMENT_CHAR || c == CONTINUE_CHAR || c == IGNORE_CHAR1 || c == IGNORE_CHAR2;) + { + if (c == CONTINUE_CHAR) + { + if ((d = getc (fp)) == EOF) + { + break; + } + if (d == '\n') + { + line_no += 1; + continue; + } + else + { + ungetc (d, fp); + break; + } + } + else if (c == COMMENT_CHAR) + { + for (;;) + { + if ((c = getc (fp)) == EOF) + { + return (EOF); + } + if (c == '\n') + { + ungetc (c, fp); + line_no += 1; + break; + } + } + } + } + if (c == '\n') + line_no += 1; + return (c); +} + +static int +get_char (fp) /* remove null lines */ + FILE *fp; +{ + static int c = -1; + int d; + static int fufufu = 0; + + if (c != -1) + { + d = c; + c = -1; + return (d); + } + else + { + if (fufufu == 0) + { /* remove all new lines in the head of the file */ + for (; (d = get_char0 (fp)) == '\n';); + fufufu = 1; + } + else + { + d = get_char0 (fp); + } + if (d == '\n') + { + while ((c = get_char0 (fp)) == '\n'); + } + return (d); + } +} + +/* get one phrase and return the separater */ +static int +get_phrase (s0, size, fp) + UCHAR *s0; + int size; + FILE *fp; +{ + UCHAR *s = s0; + int c; + static int eof = 0; + + if (eof) + { + *s0 = 0; + return (EOF); + } + while ((c = get_char (fp)) != '\n' && c != DEVIDE_CHAR && c != NODE_CHAR && c != HINSI_SEPARATE_CHAR && c != EOF) + { + if (s - s0 >= size) + { + error_long (); + return (HINSI_ERR); + } + *s++ = c; + } + if (c == EOF) + eof = 1; + if (s - s0 >= size - 1) + { + error_long (); + return (HINSI_ERR); + } + *s++ = '\0'; + return (c); +} + +static int +stradd (cp, str) + w_char **cp; + char *str; +{ + int len = strlen (str); + + if (hp + len + 1 >= heap + HEAP_LEN) + { + error_no_heap (); + return (-1); + } + *cp = hp; + wnn_Sstrcpy (hp, str); + hp += wnn_Strlen (hp) + 1; + return (0); +} + +static int +w_stradd (cp, str) + unsigned short **cp; + unsigned short *str; +{ + + *cp = whp; + for (; *str != TERMINATE; str++, whp++) + { + if (whp >= wheap + WHEAP_LEN) + { + error_no_heap (); + return (-1); + } + *whp = *str; + } + *whp++ = TERMINATE; + return (0); +} + +int +wnn_loadhinsi (fname) + unsigned char *fname; +{ + FILE *fp; + UCHAR buf[SIZE]; + unsigned short fukugou_str[MAXHINSI]; + int sep; + int h; + unsigned short *c; + char tmp[256]; + extern int wnn_find_hinsi_by_name (); + + if (fname == NULL) + { +#ifdef JSERVER + if (hinsi_loaded) + return (0); +#endif /* JSERVER */ + strcpy (tmp, LIBDIR); + strcat (tmp, HINSIDATA_FILE); + fname = (unsigned char *) tmp; + } + +#ifdef JSERVER + error1 ("Read HINSI DATA FILE %s\n", fname); +#endif /* JSERVER */ + + if ((fp = fopen ((char *) fname, "r")) == NULL) + { + wnnerror_hinsi = WNN_NO_HINSI_DATA_FILE; + goto err_1; + } + hinsi_loaded = 1; + + while ((sep = get_phrase (buf, SIZE, fp)) != EOF) + { + if (sep == HINSI_ERR) + { + goto err; /* wnnerror_hinsi set in get_phrase */ + } + if (buf[0] == YOYAKU_CHAR) + { /* yoyaku */ + if (sep != '\n') + { + wnnerror_hinsi = WNN_BAD_HINSI_FILE; + goto err; + } + hinsi[mhinsi++] = NULL; + } + else if (sep == '\n') + { /* hinsi */ + if (stradd (&hinsi[mhinsi++], buf)) + goto err; + } + else if (sep == DEVIDE_CHAR) + { /* fukugou */ + if (stradd (&fukugou[mfukugou].name, buf)) + goto err; + c = fukugou_str; + while ((sep = get_phrase (buf, SIZE, fp)) != EOF) + { + if (sep == -1) + { + goto err; /* wnnerror_hinsi set in get_phrase */ + } + if (sep != EOF && sep != HINSI_SEPARATE_CHAR && sep != '\n') + { + wnnerror_hinsi = WNN_BAD_HINSI_FILE; + goto err; + } + if ((h = wnn_find_hinsi_by_name (buf)) == -1 || h >= mhinsi) + { + wnnerror_hinsi = WNN_BAD_HINSI_FILE; + goto err; + } + *c++ = h; + if (sep == '\n' || sep == EOF) + break; + } + *c = TERMINATE; + if (w_stradd (&fukugou[mfukugou++].component, fukugou_str)) + goto err; + } + else if (sep == NODE_CHAR) + { + int first = 1; + w_char *dummy; + + node[mnode].kosuu = 0; + if (stradd (&node[mnode].name, buf)) + goto err; + while ((sep = get_phrase (buf, SIZE, fp)) != EOF) + { + if (sep == -1) + { + goto err; /* wnnerror_hinsi set in get_phrase */ + } + if (sep != EOF && sep != HINSI_SEPARATE_CHAR && sep != '\n') + { + wnnerror_hinsi = WNN_BAD_HINSI_FILE; + goto err; + } + node[mnode].kosuu++; + if (first) + { + if (stradd (&node[mnode].son, buf)) + goto err; + first = 0; + } + else + { + if (stradd (&dummy, buf)) + goto err; + } + if (sep == '\n' || sep == EOF) + break; + } + mnode++; + } + } + fclose (fp); + return (0); +err: + fclose (fp); +err_1: +#ifdef JSERVER + error1 ("Error reading HINSI DATA FILE %s\n", fname); +#endif /* JSERVER */ + return (HINSI_ERR); +} + +static int +find_hinsi_by_name (c) + register w_char *c; +{ + register int k; + if (!hinsi_loaded) + { + if (wnn_loadhinsi (NULL) != 0) + { + return (-1); + } + } + for (k = 0; k < mhinsi; k++) + { + if (hinsi[k] && wnn_Strcmp (hinsi[k], c) == 0) + { + return (k); + } + } + for (k = 0; k < mfukugou; k++) + { + if (fukugou[k].name && wnn_Strcmp (fukugou[k].name, c) == 0) + { + return (FUKUGOU_START - k); + } + } + return (-1); +} + + +int +wnn_find_hinsi_by_name (c) + register char *c; +{ + w_char hin[WNN_HINSI_NAME_LEN]; + + wnn_Sstrcpy (hin, c); + return (find_hinsi_by_name (hin)); +} + + +static w_char * +get_hinsi_name (k) + int k; +{ + if (!hinsi_loaded) + { + if (wnn_loadhinsi (NULL) != 0) + { + return (NULL); + } + } + if (k < mhinsi && k >= 0) + { + return (hinsi[k]); + } + else if (k > FUKUGOU_START - mfukugou) + { + return (fukugou[FUKUGOU_START - k].name); + } + return (NULL); +} + +char * +wnn_get_hinsi_name (k) + int k; +{ + w_char *s; + static char hin[WNN_HINSI_NAME_LEN * 2]; + + if ((s = get_hinsi_name (k)) == NULL) + return (NULL); + wnn_sStrcpy (hin, s); + return (hin); +} + +#ifndef JSERVER +static +#endif /* JSERVER */ + int +wnn_get_fukugou_component_body (k, shp) + register int k; + register unsigned short **shp; +{ + static unsigned short tmp; + register unsigned short *s; + int index; /* need for NEWS-OS 6.0 */ + if (k < mhinsi && k >= 0) + { + tmp = k; + *shp = &tmp; + return (1); + } + if (k > FUKUGOU_START - mfukugou && k <= FUKUGOU_START) + { + index = FUKUGOU_START - k; + for (*shp = s = fukugou[index].component; *s != TERMINATE; s++); +/* + If next line in NEWS-OS 6.0, jserver down when kanji henkan. + for(*shp = s = fukugou[FUKUGOU_START - k].component;*s != TERMINATE;s++); +*/ + return (s - *shp); + } + return (-1); +} + +int +wnn_get_fukugou_component (k, shp) + register int k; + register unsigned short **shp; +{ + if (!hinsi_loaded) + { + if (wnn_loadhinsi (NULL) != 0) + { + return (-1); + } + } + return (wnn_get_fukugou_component_body (k, shp)); +} + + +#ifdef JSERVER + +w_char * +wnn_hinsi_name (no) + int no; +{ + w_char *c; + if ((c = get_hinsi_name (no)) == NULL) + { + wnn_errorno = WNN_BAD_HINSI_NO; + } + return (c); +} + +int +wnn_hinsi_number (name) + w_char *name; +{ + int n; + if ((n = find_hinsi_by_name (name)) == -1) + { + wnn_errorno = WNN_BAD_HINSI_NAME; + } + return (n); +} + +int +wnn_hinsi_list (name, c, mynode, mmynode) + w_char *name; + w_char **c; + struct wnn_hinsi_node *mynode; + int mmynode; +{ + int k; + + if (mynode == NULL) + { + mynode = node; + mmynode = mnode; + } + if (!hinsi_loaded) + wnn_loadhinsi (NULL); + for (k = 0; k < mmynode; k++) + { + if (wnn_Strcmp (name, mynode[k].name) == 0) + { + *c = mynode[k].son; + return (mynode[k].kosuu); + } + } + if (find_hinsi_by_name (name) == -1) + { + wnn_errorno = WNN_BAD_HINSI_NAME; + return (-1); + } + return (0); +} + +int +wnn_has_hinsi (mynode, mmynode, name) + struct wnn_hinsi_node *mynode; + int mmynode; + w_char *name; +{ + w_char *c; + int k, j; + if (mynode == NULL) + { + mynode = node; + mmynode = mnode; + } + for (k = 0; k < mmynode; k++) + { + if (wnn_Strcmp (name, mynode[k].name) == 0) + { + return (1); + } + else + { + c = mynode[k].son; + for (j = 0; j < mynode[k].kosuu; j++) + { + if (wnn_Strcmp (name, c) == 0) + { + return (1); + } + else + { + c += wnn_Strlen (c) + 1; + } + } + } + } + return (0); +} + +#endif