0
|
1 /*
|
|
2 * $Id: atod.c,v 1.13 2004/08/12 09:03:19 aono Exp $
|
|
3 */
|
|
4
|
|
5 /*
|
|
6 * FreeWnn is a network-extensible Kana-to-Kanji conversion system.
|
|
7 * This file is part of FreeWnn.
|
|
8 *
|
|
9 * Copyright Kyoto University Research Institute for Mathematical Sciences
|
|
10 * 1987, 1988, 1989, 1990, 1991, 1992
|
|
11 * Copyright OMRON Corporation. 1987, 1988, 1989, 1990, 1991, 1992, 1999
|
|
12 * Copyright ASTEC, Inc. 1987, 1988, 1989, 1990, 1991, 1992
|
|
13 * Copyright FreeWnn Project 1999, 2000, 2002, 2004
|
|
14 *
|
|
15 * Maintainer: FreeWnn Project <freewnn@tomo.gr.jp>
|
|
16 *
|
|
17 * This program is free software; you can redistribute it and/or modify
|
|
18 * it under the terms of the GNU General Public License as published by
|
|
19 * the Free Software Foundation; either version 2 of the License, or
|
|
20 * (at your option) any later version.
|
|
21 *
|
|
22 * This program is distributed in the hope that it will be useful,
|
|
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
25 * GNU General Public License for more details.
|
|
26 *
|
|
27 * You should have received a copy of the GNU General Public License
|
|
28 * along with this program; if not, write to the Free Software
|
|
29 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
30 */
|
|
31
|
|
32 /*
|
|
33 UJIS 形式を、辞書登録可能形式, 及び固定形式辞書に変換するプログラム。
|
|
34 */
|
|
35
|
|
36 #ifndef lint
|
|
37 static char *rcs_id = "$Id: atod.c,v 1.13 2004/08/12 09:03:19 aono Exp $";
|
|
38 #endif /* lint */
|
|
39
|
|
40 #ifdef HAVE_CONFIG_H
|
|
41 # include <config.h>
|
|
42 #endif
|
|
43
|
|
44 #include <stdio.h>
|
|
45 #include <ctype.h>
|
|
46 #if STDC_HEADERS
|
|
47 # include <stdlib.h>
|
|
48 # include <string.h>
|
|
49 #else
|
|
50 # if HAVE_MALLOC_H
|
|
51 # include <malloc.h>
|
|
52 # endif
|
|
53 # if HAVE_STRINGS_H
|
|
54 # include <strings.h>
|
|
55 # endif
|
|
56 #endif /* STDC_HEADERS */
|
|
57 #if HAVE_UNISTD_H
|
|
58 # include <unistd.h>
|
|
59 #endif
|
|
60
|
|
61 #include "commonhd.h"
|
|
62 #include "wnn_config.h"
|
|
63 #include "jslib.h"
|
|
64 #include "jh.h"
|
|
65 #include "jdata.h"
|
|
66 #ifdef CHINESE
|
|
67 #include "cplib.h"
|
|
68 #endif
|
|
69 #include "getopt.h" /* GNU getopt in the stock */
|
|
70 #include "wnn_string.h"
|
|
71 #include "wnn_os.h"
|
|
72
|
|
73 extern int wnn_loadhinsi (), init_heap (), little_endian (), Sorted (), asshuku (), revdic ();
|
|
74 extern void ujis_header (), read_ujis (), reverse_yomi (), create_rev_dict (),
|
|
75 uniq_je (), sort_if_not_sorted (), output_header (), udytoS (), output_ri (), exit1 (), new_pwd (), rev_short_fun (), rev_w_char ();
|
|
76 extern
|
|
77 #ifdef BDIC_WRITE_CHECK
|
|
78 int
|
|
79 #else
|
|
80 void
|
|
81 #endif
|
|
82 put_n_EU_str ();
|
|
83 static void ujistoud (), output_dic_index (), usage (), sdic_sort (),
|
|
84 ujistosd (), not_enough_area (), get_pwd (), output_hindo (), output_hinsi (), output_kanji (), rev_dic_data (), set_pter1 (), output_comment (), output_hinsi_list ();
|
|
85 void upd_kanjicount (), output_dic_data ();
|
|
86
|
|
87 /* Switcher variable between UD and SD */
|
|
88
|
|
89 int which_dict = WNN_REV_DICT;
|
|
90 #ifdef CHINESE
|
|
91 extern int pzy_flag;
|
|
92 static void output_sisheng ();
|
|
93 #endif
|
|
94
|
|
95 /* Variables both for UD and SD */
|
|
96 int reverse_dict = NORMAL;
|
|
97
|
|
98 char *com_name;
|
|
99 int maxserial = MAX_ENTRIES;
|
|
100 int kanjicount = 0;
|
|
101 char outfile[LINE_SIZE];
|
|
102 struct wnn_file_head file_head;
|
|
103 struct JT jt;
|
|
104 UCHAR *hostart, *hoend; /* index 2 */
|
|
105 int to_esc = 0;
|
|
106
|
|
107 /* For SD */
|
|
108 #define HONTAI_PER_ENTRY 20
|
|
109 int node_count = 0;
|
|
110 UCHAR *hopter;
|
|
111
|
|
112
|
|
113 /* For UD */
|
|
114 struct uind1 *tary; /* index 1 */
|
|
115 int tnum = 0;
|
|
116 struct uind2 *uhopter;
|
|
117
|
|
118 char *hinsi_file_name = NULL;
|
|
119
|
|
120
|
|
121 void
|
|
122 init (int argc, char **argv)
|
|
123 {
|
|
124 int c;
|
|
125
|
|
126 maxserial = MAX_ENTRIES;
|
|
127 while ((c = getopt (argc, argv, "SURrs:P:p:Nneh:")) != EOF)
|
|
128 {
|
|
129 switch (c)
|
|
130 {
|
|
131 case 'S':
|
|
132 which_dict = WNN_STATIC_DICT;
|
|
133 break;
|
|
134 case 'U':
|
|
135 which_dict = WNN_UD_DICT;
|
|
136 break;
|
|
137 case 'R':
|
|
138 which_dict = WNN_REV_DICT;
|
|
139 break;
|
|
140 case 'r':
|
|
141 reverse_dict = REVERSE;
|
|
142 break;
|
|
143 case 'P':
|
|
144 get_pwd (optarg, file_head.file_passwd);
|
|
145 break;
|
|
146 case 'p':
|
|
147 get_pwd (optarg, jt.hpasswd);
|
|
148 break;
|
|
149 case 'N':
|
|
150 strcpy (file_head.file_passwd, "*");
|
|
151 break;
|
|
152 case 'n':
|
|
153 strcpy (jt.hpasswd, "*");
|
|
154 break;
|
|
155 case 'e':
|
|
156 to_esc = 1;
|
|
157 break;
|
|
158 case 'h':
|
|
159 hinsi_file_name = optarg;
|
|
160 break;
|
|
161 case 's':
|
|
162 if (sscanf (optarg, "%d", &maxserial) == 0)
|
|
163 {
|
|
164 usage ();
|
|
165 exit (1);
|
|
166 }
|
|
167 break;
|
|
168 }
|
|
169 }
|
|
170 if (to_esc && which_dict == WNN_REV_DICT)
|
|
171 {
|
|
172 fprintf (stderr, "You can't make the kanji component of reverse dictionary compact.\n");
|
|
173 exit (1);
|
|
174 }
|
|
175 if (optind)
|
|
176 {
|
|
177 optind--;
|
|
178 argc -= optind;
|
|
179 argv += optind;
|
|
180 }
|
|
181 if (argc != 2)
|
|
182 {
|
|
183 usage ();
|
|
184 exit (1);
|
|
185 }
|
|
186 strncpy (outfile, argv[1], LINE_SIZE-1);
|
|
187 outfile[LINE_SIZE-1] = '\0';
|
|
188 if (wnn_loadhinsi (hinsi_file_name) != 0)
|
|
189 {
|
|
190 fprintf (stderr, "Can't Open hinsi_file.\n");
|
|
191 exit (1);
|
|
192 }
|
|
193 if (init_heap (DEF_ENTRIES * HEAP_PER_LINE, DEF_ENTRIES * YOMI_PER_LINE, maxserial, DEF_ENTRIES, stdin) == -1)
|
|
194 exit (1);
|
|
195 }
|
|
196
|
|
197 void
|
|
198 alloc_area (void)
|
|
199 {
|
|
200 if (which_dict == WNN_STATIC_DICT)
|
|
201 {
|
|
202 if ((hostart = (UCHAR *) malloc (maxserial * HONTAI_PER_ENTRY)) == NULL)
|
|
203 {
|
|
204 fprintf (stderr, "Malloc Failed\n");
|
|
205 exit (1);
|
|
206 }
|
|
207 hopter = hostart;
|
|
208 hoend = (UCHAR *) hostart + maxserial * HONTAI_PER_ENTRY;
|
|
209 }
|
|
210 else
|
|
211 {
|
|
212 if ((tary = (struct uind1 *) malloc (jt.maxserial * sizeof (struct uind1))) == NULL ||
|
|
213 /* Too large? */
|
|
214 (hostart = (UCHAR *) malloc (jt.maxserial * (sizeof (struct uind2) + sizeof (w_char) * 2))) == NULL)
|
|
215 {
|
|
216 /* 2 must be enough? */
|
|
217 fprintf (stderr, "Malloc Failed\n");
|
|
218 exit (1);
|
|
219 }
|
|
220 hoend = (UCHAR *) ((char *) hostart + jt.maxserial * (sizeof (struct uind2) + sizeof (w_char) * 2));
|
|
221 }
|
|
222 }
|
|
223
|
|
224 extern int sort_func_je ();
|
1
|
225 int sort_func_sdic (const char* a, const char* b);
|
0
|
226 FILE *ofpter;
|
|
227
|
|
228 int
|
|
229 main (int argc, char** argv)
|
|
230 {
|
|
231 char *cswidth_name;
|
|
232 extern char *get_cswidth_name ();
|
|
233 extern void set_cswidth ();
|
|
234
|
|
235 com_name = argv[0];
|
|
236 init (argc, argv);
|
|
237
|
|
238 if (cswidth_name = get_cswidth_name (WNN_DEFAULT_LANG))
|
|
239 set_cswidth (create_cswidth (cswidth_name));
|
|
240
|
|
241 #ifdef CHINESE
|
|
242 ujis_header (&which_dict); /* read header of UJIS dic */
|
|
243 #else
|
|
244 ujis_header (); /* read header of UJIS dic */
|
|
245 #endif
|
|
246 /* like comment,total,hinsi */
|
|
247 #ifdef CHINESE
|
|
248 read_ujis (reverse_dict, to_esc, which_dict);
|
|
249
|
|
250 if (which_dict != CWNN_REV_DICT && which_dict != BWNN_REV_DICT)
|
|
251 reverse_yomi ();
|
|
252 #else
|
|
253 read_ujis (reverse_dict, to_esc, (which_dict == WNN_REV_DICT) ? 1 : 0);
|
|
254 #ifndef CONVERT_from_TOP
|
|
255 reverse_yomi ();
|
|
256 #endif
|
|
257 #endif
|
|
258
|
|
259 if ((ofpter = fopen (outfile, "w")) == NULL)
|
|
260 {
|
|
261 fprintf (stderr, "Can't open the output file %s.\n", outfile);
|
|
262 perror ("");
|
|
263 exit (1);
|
|
264 }
|
|
265
|
|
266 #ifdef CHINESE
|
|
267 if ((which_dict & 0xff) == WNN_REV_DICT)
|
|
268 {
|
|
269 #else
|
|
270 if (which_dict == WNN_REV_DICT)
|
|
271 {
|
|
272 #endif
|
|
273 create_rev_dict ();
|
|
274 }
|
|
275 else
|
|
276 {
|
|
277 alloc_area ();
|
|
278 if (which_dict == WNN_STATIC_DICT)
|
|
279 {
|
|
280 sdic_sort ();
|
|
281 uniq_je (sort_func_sdic);
|
|
282 output_dic_data ();
|
|
283 ujistosd (0, 0);
|
|
284 }
|
|
285 else
|
|
286 {
|
|
287 sort_if_not_sorted ();
|
|
288 uniq_je (sort_func_je);
|
|
289 output_dic_data ();
|
|
290 ujistoud ();
|
|
291 set_pter1 ();
|
|
292 }
|
|
293 }
|
|
294 output_dic_index ();
|
|
295 rewind (ofpter);
|
|
296 output_header (ofpter, &jt, &file_head);
|
|
297 #ifdef nodef
|
|
298 output_comment (ofpter); /* In order to change the byte order */
|
|
299 output_hinsi_list (ofpter); /* In order to change the byte order */
|
|
300 #endif
|
|
301 exit (0);
|
|
302 }
|
|
303
|
|
304
|
|
305 w_char *
|
|
306 addyomient (int tn, w_char* yomi)
|
|
307 {
|
|
308 int len = wnn_Strlen (yomi);
|
|
309 tary[tn].yomi2 = 0;
|
|
310 tary[tn].yomi1 = yomi[0] << 16;
|
|
311
|
|
312 uhopter->yomi[0] = len;
|
|
313 if (yomi[1])
|
|
314 {
|
|
315 tary[tn].yomi1 |= yomi[1];
|
|
316 if (yomi[2])
|
|
317 {
|
|
318 tary[tn].yomi2 = yomi[2] << 16;
|
|
319 if (yomi[3])
|
|
320 {
|
|
321 tary[tn].yomi2 |= yomi[3];
|
|
322 }
|
|
323 if (len > 4)
|
|
324 {
|
|
325 wnn_Strncpy (uhopter->yomi + 1, yomi + 4, len - 4);
|
|
326 return (uhopter->yomi + 1 + len - 4);
|
|
327 }
|
|
328 }
|
|
329 }
|
|
330 return (uhopter->yomi + 1);
|
|
331 }
|
|
332
|
|
333 static void
|
|
334 ujistoud (void)
|
|
335 {
|
|
336 w_char *yomi;
|
|
337 w_char dummy = 0; /* 2 byte yomi */
|
|
338 w_char *pyomi; /* maeno tangono yomi */
|
|
339 w_char *wcp;
|
|
340 int serial_count;
|
|
341 w_char *kosuup = NULL;
|
|
342
|
|
343 *(int *) hostart = 0;
|
|
344 uhopter = (struct uind2 *) ((int *) hostart + 1);
|
|
345
|
|
346 yomi = &dummy;
|
|
347
|
|
348 for (serial_count = 0; serial_count < jt.maxserial; serial_count++)
|
|
349 {
|
|
350 pyomi = yomi;
|
|
351 yomi = jeary[serial_count]->yomi;
|
|
352
|
|
353 if (wnn_Strcmp (yomi, pyomi))
|
|
354 {
|
|
355 tary[tnum++].pter = (char *) uhopter - (char *) hostart;
|
|
356 uhopter->next = 0;
|
|
357 uhopter->serial = serial_count;
|
|
358 uhopter->kanjipter = kanjicount;
|
|
359 uhopter->kosuu = 1;
|
|
360 kosuup = &uhopter->kosuu;
|
|
361 wcp = addyomient (tnum - 1, yomi);
|
|
362 uhopter = (struct uind2 *) (AL_INT (wcp));
|
|
363 }
|
|
364 else
|
|
365 {
|
|
366 if (kosuup)
|
|
367 *kosuup += 1;
|
|
368 }
|
|
369 upd_kanjicount (serial_count);
|
|
370 }
|
|
371 }
|
|
372
|
|
373 void
|
|
374 upd_kanjicount (int k)
|
|
375 {
|
|
376 kanjicount += *jeary[k]->kanji;
|
|
377 }
|
|
378
|
|
379 static void
|
|
380 set_pter1 (void)
|
|
381 {
|
|
382 int k;
|
|
383 int len;
|
|
384 w_char oyomi[LENGTHYOMI], nyomi[LENGTHYOMI];
|
|
385 /* May be a little slow, but simple! */
|
|
386 int lasts[LENGTHYOMI]; /* pter_to */
|
|
387
|
|
388 for (k = 0; k < LENGTHYOMI; k++)
|
|
389 {
|
|
390 lasts[k] = -1;
|
|
391 }
|
|
392
|
|
393 for (k = 0; k < tnum; k++)
|
|
394 {
|
|
395 len = ((struct uind2 *) ((char *) hostart + tary[k].pter))->yomi[0];
|
|
396 lasts[len] = k;
|
|
397 for (len--; len; len--)
|
|
398 {
|
|
399 if (lasts[len] >= 0)
|
|
400 {
|
|
401 udytoS (oyomi, lasts[len], hostart, tary);
|
|
402 udytoS (nyomi, k, hostart, tary);
|
|
403 if (wnn_Substr (oyomi, nyomi))
|
|
404 {
|
|
405 tary[k].pter1 = lasts[len];
|
|
406 break;
|
|
407 }
|
|
408 }
|
|
409 }
|
|
410 if (len == 0)
|
|
411 tary[k].pter1 = -1;
|
|
412 }
|
|
413 }
|
|
414
|
|
415 void
|
|
416 output_dic_data (void)
|
|
417 {
|
|
418
|
|
419 fprintf (stderr, "%d words are in this dictionary\n", jt.maxserial);
|
|
420
|
|
421 if (little_endian ())
|
|
422 {
|
|
423 rev_dic_data ();
|
|
424 }
|
|
425
|
|
426 jt.syurui = which_dict;
|
|
427 jt.gosuu = jt.maxserial;
|
|
428 output_header (ofpter, &jt, &file_head); /* dummy; Will be rewitten. */
|
|
429 output_comment (ofpter);
|
|
430 output_hinsi_list (ofpter);
|
|
431 output_hindo (ofpter);
|
|
432 output_hinsi (ofpter);
|
|
433 #ifdef CONVERT_with_SiSheng
|
|
434 if (which_dict == CWNN_REV_DICT)
|
|
435 output_sisheng (ofpter);
|
|
436 #endif
|
|
437 output_kanji (ofpter);
|
|
438 }
|
|
439
|
|
440 static void
|
|
441 output_dic_index (void)
|
|
442 {
|
|
443 if (which_dict == WNN_UD_DICT)
|
|
444 {
|
|
445 fprintf (stderr, " tnum = %d\n ind2= %d\n kanji = %d\n", tnum, (char *) uhopter - (char *) hostart, kanjicount);
|
|
446 jt.maxtable = tnum;
|
|
447 jt.maxhontai = (char *) uhopter - (char *) hostart;
|
|
448 jt.maxri2 = jt.maxri1[D_YOMI] = jt.maxri1[D_KANJI] = 0;
|
|
449 jt.hontai = hostart;
|
|
450 jt.table = tary;
|
|
451 jt.ri1[D_YOMI] = NULL;
|
|
452 jt.ri1[D_KANJI] = NULL;
|
|
453 jt.ri2 = NULL;
|
|
454 #ifdef CHINESE
|
|
455 }
|
|
456 else if ((which_dict & 0xff) == WNN_REV_DICT)
|
|
457 {
|
|
458 #else
|
|
459 }
|
|
460 else if (which_dict == WNN_REV_DICT)
|
|
461 {
|
|
462 #endif
|
|
463 fprintf (stderr, "kanji = %d\n", kanjicount);
|
|
464 jt.maxtable = 0;
|
|
465 jt.maxhontai = 0;
|
|
466 jt.maxri2 = jt.maxserial;
|
|
467 jt.hontai = NULL;
|
|
468 jt.table = NULL;
|
|
469 /* jt.ri1, jt.ri2 is set in create_rev_dict */
|
|
470 }
|
|
471 else
|
|
472 {
|
|
473 fprintf (stderr, "node_count = %d ind= %d\n kanji = %d\n", node_count, (char *) hopter - (char *) hostart, kanjicount);
|
|
474 jt.maxtable = 0;
|
|
475 jt.maxhontai = (char *) hopter - (char *) hostart;
|
|
476 jt.maxri2 = jt.maxri1[D_YOMI] = jt.maxri1[D_KANJI] = 0;
|
|
477 jt.hontai = hostart;
|
|
478 jt.table = NULL;
|
|
479 jt.ri1[D_YOMI] = NULL;
|
|
480 jt.ri1[D_KANJI] = NULL;
|
|
481 jt.ri2 = NULL;
|
|
482 }
|
|
483 jt.maxkanji = kanjicount;
|
|
484
|
|
485 if (little_endian ())
|
|
486 {
|
|
487 revdic (&jt, 1);
|
|
488 }
|
|
489
|
|
490 if (which_dict == WNN_UD_DICT)
|
|
491 {
|
|
492 fwrite ((char *) tary, sizeof (struct uind1), tnum, ofpter);
|
|
493 fwrite (hostart, 1, (char *) uhopter - (char *) hostart, ofpter);
|
|
494 #ifdef CHINESE
|
|
495 }
|
|
496 else if ((which_dict & 0xff) == WNN_REV_DICT)
|
|
497 {
|
|
498 #else
|
|
499 }
|
|
500 else if (which_dict == WNN_REV_DICT)
|
|
501 {
|
|
502 #endif
|
|
503 output_ri (ofpter);
|
|
504 }
|
|
505 else
|
|
506 {
|
|
507 fwrite (hostart, 1, (char *) hopter - (char *) hostart, ofpter);
|
|
508 }
|
|
509 }
|
|
510
|
|
511
|
|
512 static void
|
|
513 usage (void)
|
|
514 {
|
|
515 fprintf (stderr, "Usage : %s [-r -R -S -U -e -s maximum word count(default %d) -P passwd (or -N) -p hindo_passwd (or -n) -h hinsi_file_name] <dictonary filename>\n", com_name, MAX_ENTRIES);
|
|
516 fprintf (stderr, "Input the ascii dictionary from stdin\n");
|
|
517 fprintf (stderr, "-r is for creating dictionary with normal and reverse index\n");
|
|
518 fprintf (stderr, "-R is for creating reverse (implies updatable) dictionary. (default)\n");
|
|
519 fprintf (stderr, "-S is for creating static dictionary.\n");
|
|
520 fprintf (stderr, "-U is for creating updatable dictionary.\n");
|
|
521 fprintf (stderr, "-e is for compacting kanji string.\n");
|
|
522 exit1 ();
|
|
523 }
|
|
524
|
|
525 /* SD commands */
|
|
526
|
|
527 int
|
|
528 yStrcmp (w_char* a, w_char*b)
|
|
529 {
|
|
530 register int c, d;
|
|
531 for (; *a && *a == *b; a++, b++);
|
|
532 if (*a == 0)
|
|
533 return (-(int) *b);
|
|
534 if (*b == 0)
|
|
535 return ((int) *a);
|
|
536 c = *(a + 1);
|
|
537 d = *(b + 1);
|
|
538 if (c == 0 && d == 0)
|
|
539 return ((int) *a - (int) *b);
|
|
540 if (c == 0)
|
|
541 return (-1);
|
|
542 if (d == 0)
|
|
543 return (1);
|
|
544 return ((int) *a - (int) *b);
|
|
545 }
|
|
546
|
|
547 int
|
1
|
548 sort_func_sdic (const char* a, const char* b)
|
0
|
549 {
|
|
550 int tmp;
|
|
551 struct je *pa, *pb;
|
|
552 pa = *((struct je **) a);
|
|
553 pb = *((struct je **) b);
|
|
554 tmp = yStrcmp (pa->yomi, pb->yomi);
|
|
555 if (tmp)
|
|
556 return (tmp);
|
|
557 /* Changed the order to sort, from yomi->kanji->hinsi
|
|
558 * to yomi->hinsi->kanji, in order to separate (struct jdata) by
|
|
559 * hinsi'es.
|
|
560 */
|
|
561 if (pa->hinsi != pb->hinsi)
|
|
562 return ((int) (pa->hinsi) - (int) (pb->hinsi));
|
|
563 return (wnn_Strcmp (pa->kan, pb->kan));
|
|
564 }
|
|
565
|
|
566
|
|
567 static void
|
|
568 sdic_sort (void)
|
|
569 {
|
|
570 if (!Sorted ((char *) jeary, (int) jt.maxserial, sizeof (struct je *), sort_func_sdic))
|
|
571 {
|
|
572 qsort ((char *) jeary, (int) jt.maxserial, sizeof (struct je *), sort_func_sdic);
|
|
573 }
|
|
574 }
|
|
575
|
|
576
|
|
577 static w_char chartable[YOMI_KINDS];
|
|
578 static w_char numtable1[YOMI_KINDS];
|
|
579 static w_char *numtable = numtable1 + 1;
|
|
580 static int endt = 0;
|
|
581
|
|
582 int
|
|
583 analize_size (int start_je, int level,
|
|
584 int* statep, int* end_jep, int* mid_jep)
|
|
585 {
|
|
586 w_char *c = jeary[start_je]->yomi;
|
|
587 int je;
|
|
588 int level1 = level + 1;
|
|
589 int end_je, mid_je;
|
|
590 w_char y1, y2;
|
|
591 int je1, je2;
|
|
592
|
|
593 for (; endt-- > 0;)
|
|
594 {
|
|
595 chartable[endt] = numtable[endt] = 0;
|
|
596 }
|
|
597 endt = 0; /* This line is needed when endt is originally 0 */
|
|
598 for (je = start_je; je < jt.maxserial && wnn_Strncmp (c, jeary[je]->yomi, level) == 0 && jeary[je]->yomi[level1] == 0; je++);
|
|
599 *mid_jep = mid_je = je;
|
|
600 for (je = start_je; je < jt.maxserial && wnn_Strncmp (c, jeary[je]->yomi, level) == 0; je++);
|
|
601 *end_jep = end_je = je;
|
|
602
|
|
603 je1 = start_je;
|
|
604 je2 = mid_je;
|
|
605 y1 = (je1 >= mid_je) ? 0xffff : jeary[je1]->yomi[level];
|
|
606 y2 = (je2 >= end_je) ? 0xffff : jeary[je2]->yomi[level];
|
|
607 for (;;)
|
|
608 {
|
|
609 if (y1 > y2)
|
|
610 {
|
|
611 chartable[endt] = y2;
|
|
612 for (numtable[endt] = numtable[endt - 1]; je2 < end_je && jeary[je2]->yomi[level] == y2; je2++);
|
|
613 y2 = (je2 >= end_je) ? 0xffff : jeary[je2]->yomi[level];
|
|
614 endt++;
|
|
615 }
|
|
616 else if (y1 < y2)
|
|
617 {
|
|
618 chartable[endt] = y1;
|
|
619 for (numtable[endt] = numtable[endt - 1]; je1 < end_je && jeary[je1]->yomi[level] == y1; je1++, numtable[endt]++);
|
|
620 y1 = (je1 >= mid_je) ? 0xffff : jeary[je1]->yomi[level];
|
|
621 endt++;
|
|
622 }
|
|
623 else
|
|
624 {
|
|
625 if (y1 >= 0xffff && y2 >= 0xffff)
|
|
626 break;
|
|
627 chartable[endt] = y1;
|
|
628 for (; je2 < end_je && jeary[je2]->yomi[level] == y2; je2++);
|
|
629 for (numtable[endt] = numtable[endt - 1]; je1 < mid_je && jeary[je1]->yomi[level] == y1; je1++, numtable[endt]++);
|
|
630 y2 = (je2 >= end_je) ? 0xffff : jeary[je2]->yomi[level];
|
|
631 y1 = (je1 >= mid_je) ? 0xffff : jeary[je1]->yomi[level];
|
|
632 endt++;
|
|
633 }
|
|
634 }
|
|
635 if (mid_je == start_je)
|
|
636 {
|
|
637 if (endt == 1)
|
|
638 *statep = ST_SMALL;
|
|
639 else
|
|
640 *statep = ST_NOENT;
|
|
641 }
|
|
642 else if (mid_je == end_je)
|
|
643 *statep = ST_NOPTER;
|
|
644 else
|
|
645 *statep = ST_NORMAL;
|
|
646
|
|
647 return (endt);
|
|
648 }
|
|
649
|
|
650
|
|
651
|
|
652 static void
|
|
653 ujistosd (int start_je, int level)
|
|
654 {
|
|
655 int state;
|
|
656 int tsize;
|
|
657 w_char *charst;
|
|
658 w_char *sumst;
|
|
659 int *ptrst;
|
|
660 int mid_je, end_je;
|
|
661 int je;
|
|
662 int k;
|
|
663
|
|
664 node_count++;
|
|
665
|
|
666 tsize = analize_size (start_je, level, &state, &end_je, &mid_je);
|
|
667
|
|
668 *(w_char *) hopter = state;
|
|
669 hopter += 2;
|
|
670 switch (state)
|
|
671 {
|
|
672 case ST_NORMAL:
|
|
673 if (hopter + tsize * 8 + 12 + 16 >= hoend)
|
|
674 not_enough_area ();
|
|
675 *(w_char *) hopter = tsize;
|
|
676 hopter += 2;
|
|
677 *(int *) hopter = start_je;
|
|
678 hopter += 4;
|
|
679 *(int *) hopter = kanjicount;
|
|
680 hopter += 4;
|
|
681 charst = (w_char *) hopter;
|
|
682 sumst = ((w_char *) hopter + tsize + 2); /* + 2 keeps two zero words */
|
|
683 charst[tsize] = charst[tsize + 1] = 0;
|
|
684 ptrst = (int *) ((w_char *) sumst + tsize);
|
|
685 hopter = (UCHAR *) (ptrst + tsize);
|
|
686 for (k = 0; k < tsize; k++)
|
|
687 {
|
|
688 charst[k] = chartable[k];
|
|
689 sumst[k] = numtable[k];
|
|
690 }
|
|
691 for (k = start_je; k < mid_je; k++)
|
|
692 {
|
|
693 upd_kanjicount (k);
|
|
694 }
|
|
695 for (je = mid_je, k = 0; je < end_je;)
|
|
696 {
|
|
697 for (; k < tsize && charst[k] < jeary[je]->yomi[level]; k++)
|
|
698 ptrst[k] = ENDPTR;
|
|
699 if (k == tsize)
|
|
700 fprintf (stderr, "Error\n");
|
|
701 ptrst[k] = (char *) hopter - (char *) hostart;
|
|
702 ujistosd (je, level + 1);
|
|
703 for (; je < end_je && jeary[je]->yomi[level] == charst[k]; je++);
|
|
704 k++;
|
|
705 }
|
|
706 break;
|
|
707 case ST_NOENT:
|
|
708 if (hopter + tsize * 8 + 4 + 16 >= hoend)
|
|
709 not_enough_area ();
|
|
710 *(w_char *) hopter = tsize;
|
|
711 hopter += 2;
|
|
712 charst = (w_char *) hopter;
|
|
713 ptrst = (int *) AL_INT ((w_char *) charst + tsize);
|
|
714 hopter = (UCHAR *) (ptrst + tsize);
|
|
715 for (k = 0; k < tsize; k++)
|
|
716 {
|
|
717 charst[k] = chartable[k];
|
|
718 }
|
|
719 for (je = mid_je, k = 0; je < end_je;)
|
|
720 {
|
|
721 for (; k < tsize && charst[k] < (jeary[je]->yomi)[level]; k++)
|
|
722 ptrst[k] = ENDPTR;
|
|
723 if (k == tsize)
|
|
724 fprintf (stderr, "Error\n");
|
|
725 ptrst[k] = (char *) hopter - (char *) hostart;
|
|
726 ujistosd (je, level + 1);
|
|
727 for (; je < end_je && (jeary[je]->yomi)[level] == charst[k]; je++);
|
|
728 k++;
|
|
729 }
|
|
730 break;
|
|
731 case ST_NOPTER:
|
|
732 if (hopter + tsize * 4 + 12 + 16 >= hoend)
|
|
733 not_enough_area ();
|
|
734 *(w_char *) hopter = tsize;
|
|
735 hopter += 2;
|
|
736 *(int *) hopter = start_je;
|
|
737 hopter += 4;
|
|
738 *(int *) hopter = kanjicount;
|
|
739 hopter += 4;
|
|
740 charst = (w_char *) hopter;
|
|
741 sumst = ((w_char *) hopter + tsize + 2); /* + 2 keeps two zero words */
|
|
742 charst[tsize] = charst[tsize + 1] = 0;
|
|
743 hopter = (UCHAR *) ((w_char *) sumst + tsize);
|
|
744 for (k = 0; k < tsize; k++)
|
|
745 {
|
|
746 charst[k] = chartable[k];
|
|
747 sumst[k] = numtable[k];
|
|
748 }
|
|
749 for (k = start_je; k < mid_je; k++)
|
|
750 {
|
|
751 upd_kanjicount (k);
|
|
752 }
|
|
753 break;
|
|
754 case ST_SMALL:
|
|
755 if (hopter + 4 + 16 >= hoend)
|
|
756 not_enough_area ();
|
|
757 *(w_char *) hopter = chartable[0];
|
|
758 hopter += 2;
|
|
759 ujistosd (start_je, level + 1);
|
|
760 break;
|
|
761 }
|
|
762 }
|
|
763
|
|
764 #define INCR_HO 150
|
|
765
|
|
766 static void
|
|
767 not_enough_area (void)
|
|
768 {
|
|
769 /*
|
|
770 UCHAR *oldstart = hostart;
|
|
771 int size = hoend - hostart + INCR_HO * HONTAI_PER_ENTRY;
|
|
772 */
|
|
773 fprintf (stderr, "Sorry allocated area is exhausted.node_count = %d\n", node_count);
|
|
774 fprintf (stderr, "Retry with option -S <dict_size>.\n");
|
|
775 exit (1);
|
|
776
|
|
777 /*
|
|
778 * Only for Sdic
|
|
779 * Can't Realloc!! many pointeres exist on the stack!!
|
|
780 */
|
|
781
|
|
782 /*
|
|
783 fprintf(stderr, "Realloc.\n");
|
|
784 if((hostart = (char *)realloc(hostart, size)) == NULL){
|
|
785 fprintf(stderr, "Ralloc Failed\n");
|
|
786 exit(1);
|
|
787 }
|
|
788 hopter = hostart + (hopter - oldstart);
|
|
789 hoend = (char *)((char *)hostart + size);
|
|
790 */
|
|
791 }
|
|
792
|
|
793
|
|
794 static void
|
|
795 get_pwd (char* fname, char* crypted)
|
|
796 {
|
|
797 char pwd[WNN_PASSWD_LEN];
|
|
798 FILE *fp;
|
|
799
|
|
800 if ((fp = fopen (fname, "r")) == NULL)
|
|
801 {
|
|
802 fprintf (stderr, "No password_file %s.\n", fname);
|
|
803 exit (1);
|
|
804 }
|
|
805 fgets (pwd, WNN_PASSWD_LEN, fp);
|
|
806 fclose (fp);
|
|
807 new_pwd (pwd, crypted);
|
|
808 }
|
|
809
|
|
810 /* output dict routine's */
|
|
811 static void
|
|
812 output_comment (FILE* fp)
|
|
813 {
|
|
814 put_n_EU_str (fp, file_comment, jt.maxcomment);
|
|
815 }
|
|
816
|
|
817 static void
|
|
818 output_hinsi_list (FILE* fp)
|
|
819 {
|
|
820 put_n_EU_str (fp, hinsi_list, jt.maxhinsi_list);
|
|
821 }
|
|
822
|
|
823 static void
|
|
824 output_hindo (FILE* ofpter)
|
|
825 {
|
|
826 register int i;
|
|
827 for (i = 0; i < jt.maxserial; i++)
|
|
828 {
|
|
829 putc (asshuku (jeary[i]->hindo), ofpter);
|
|
830 }
|
|
831 }
|
|
832
|
|
833 static void
|
|
834 output_hinsi (FILE* ofpter)
|
|
835 {
|
|
836 int i;
|
|
837 short k;
|
|
838 int little_endianp = little_endian ();
|
|
839 /* hinsi ha koko de hikkuri kaesu */
|
|
840
|
|
841 for (i = 0; i < jt.maxserial; i++)
|
|
842 {
|
|
843 k = jeary[i]->hinsi;
|
|
844 if (little_endianp)
|
|
845 {
|
|
846 rev_short_fun (&k);
|
|
847 }
|
|
848 fwrite (&k, 1, 2, ofpter);
|
|
849 }
|
|
850 }
|
|
851
|
|
852 static void
|
|
853 output_kanji (FILE* ofpter)
|
|
854 {
|
|
855 int i;
|
|
856
|
|
857 for (i = 0; i < jt.maxserial; i++)
|
|
858 {
|
|
859 fwrite (jeary[i]->kanji, 1, *(jeary[i]->kanji), ofpter);
|
|
860 }
|
|
861 }
|
|
862
|
|
863 static void
|
|
864 rev_dic_data (void)
|
|
865 {
|
|
866 int i;
|
|
867
|
|
868 /* Comment and Hinsi list do not need to reverse. */
|
|
869 for (i = 0; i < jt.maxserial; i++)
|
|
870 {
|
|
871 rev_w_char ((w_char *) (jeary[i]->kanji + 2), *(jeary[i]->kanji) / 2 - 1);
|
|
872 }
|
|
873 }
|
|
874
|
|
875 #ifdef CONVERT_with_SiSheng
|
|
876 extern void put_short ();
|
|
877
|
|
878 static void
|
|
879 output_sisheng (FILE* ofpter)
|
|
880 {
|
|
881 int i;
|
|
882
|
|
883 for (i = 0; i < jt.maxserial; i++)
|
|
884 {
|
|
885 put_short (ofpter, jeary[i]->ss);
|
|
886 }
|
|
887 }
|
|
888 #endif /* CONVERT_with_SiSheng */
|