Mercurial > freewnn
comparison Wnn/jutil/atod.c @ 0:bbc77ca4def5
initial import
author | Yoshiki Yazawa <yaz@cc.rim.or.jp> |
---|---|
date | Thu, 13 Dec 2007 04:30:14 +0900 |
parents | |
children | 790205f476c0 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:bbc77ca4def5 |
---|---|
1 /* | |
2 * $Id: atod.c,v 1.13 2004/08/12 09:03:19 aono Exp $ | |
3 */ | |
4 | |
5 /* | |
6 * FreeWnn is a network-extensible Kana-to-Kanji conversion system. | |
7 * This file is part of FreeWnn. | |
8 * | |
9 * Copyright Kyoto University Research Institute for Mathematical Sciences | |
10 * 1987, 1988, 1989, 1990, 1991, 1992 | |
11 * Copyright OMRON Corporation. 1987, 1988, 1989, 1990, 1991, 1992, 1999 | |
12 * Copyright ASTEC, Inc. 1987, 1988, 1989, 1990, 1991, 1992 | |
13 * Copyright FreeWnn Project 1999, 2000, 2002, 2004 | |
14 * | |
15 * Maintainer: FreeWnn Project <freewnn@tomo.gr.jp> | |
16 * | |
17 * This program is free software; you can redistribute it and/or modify | |
18 * it under the terms of the GNU General Public License as published by | |
19 * the Free Software Foundation; either version 2 of the License, or | |
20 * (at your option) any later version. | |
21 * | |
22 * This program is distributed in the hope that it will be useful, | |
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
25 * GNU General Public License for more details. | |
26 * | |
27 * You should have received a copy of the GNU General Public License | |
28 * along with this program; if not, write to the Free Software | |
29 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
30 */ | |
31 | |
32 /* | |
33 UJIS 形式を、辞書登録可能形式, 及び固定形式辞書に変換するプログラム。 | |
34 */ | |
35 | |
36 #ifndef lint | |
37 static char *rcs_id = "$Id: atod.c,v 1.13 2004/08/12 09:03:19 aono Exp $"; | |
38 #endif /* lint */ | |
39 | |
40 #ifdef HAVE_CONFIG_H | |
41 # include <config.h> | |
42 #endif | |
43 | |
44 #include <stdio.h> | |
45 #include <ctype.h> | |
46 #if STDC_HEADERS | |
47 # include <stdlib.h> | |
48 # include <string.h> | |
49 #else | |
50 # if HAVE_MALLOC_H | |
51 # include <malloc.h> | |
52 # endif | |
53 # if HAVE_STRINGS_H | |
54 # include <strings.h> | |
55 # endif | |
56 #endif /* STDC_HEADERS */ | |
57 #if HAVE_UNISTD_H | |
58 # include <unistd.h> | |
59 #endif | |
60 | |
61 #include "commonhd.h" | |
62 #include "wnn_config.h" | |
63 #include "jslib.h" | |
64 #include "jh.h" | |
65 #include "jdata.h" | |
66 #ifdef CHINESE | |
67 #include "cplib.h" | |
68 #endif | |
69 #include "getopt.h" /* GNU getopt in the stock */ | |
70 #include "wnn_string.h" | |
71 #include "wnn_os.h" | |
72 | |
73 extern int wnn_loadhinsi (), init_heap (), little_endian (), Sorted (), asshuku (), revdic (); | |
74 extern void ujis_header (), read_ujis (), reverse_yomi (), create_rev_dict (), | |
75 uniq_je (), sort_if_not_sorted (), output_header (), udytoS (), output_ri (), exit1 (), new_pwd (), rev_short_fun (), rev_w_char (); | |
76 extern | |
77 #ifdef BDIC_WRITE_CHECK | |
78 int | |
79 #else | |
80 void | |
81 #endif | |
82 put_n_EU_str (); | |
83 static void ujistoud (), output_dic_index (), usage (), sdic_sort (), | |
84 ujistosd (), not_enough_area (), get_pwd (), output_hindo (), output_hinsi (), output_kanji (), rev_dic_data (), set_pter1 (), output_comment (), output_hinsi_list (); | |
85 void upd_kanjicount (), output_dic_data (); | |
86 | |
87 /* Switcher variable between UD and SD */ | |
88 | |
89 int which_dict = WNN_REV_DICT; | |
90 #ifdef CHINESE | |
91 extern int pzy_flag; | |
92 static void output_sisheng (); | |
93 #endif | |
94 | |
95 /* Variables both for UD and SD */ | |
96 int reverse_dict = NORMAL; | |
97 | |
98 char *com_name; | |
99 int maxserial = MAX_ENTRIES; | |
100 int kanjicount = 0; | |
101 char outfile[LINE_SIZE]; | |
102 struct wnn_file_head file_head; | |
103 struct JT jt; | |
104 UCHAR *hostart, *hoend; /* index 2 */ | |
105 int to_esc = 0; | |
106 | |
107 /* For SD */ | |
108 #define HONTAI_PER_ENTRY 20 | |
109 int node_count = 0; | |
110 UCHAR *hopter; | |
111 | |
112 | |
113 /* For UD */ | |
114 struct uind1 *tary; /* index 1 */ | |
115 int tnum = 0; | |
116 struct uind2 *uhopter; | |
117 | |
118 char *hinsi_file_name = NULL; | |
119 | |
120 | |
121 void | |
122 init (int argc, char **argv) | |
123 { | |
124 int c; | |
125 | |
126 maxserial = MAX_ENTRIES; | |
127 while ((c = getopt (argc, argv, "SURrs:P:p:Nneh:")) != EOF) | |
128 { | |
129 switch (c) | |
130 { | |
131 case 'S': | |
132 which_dict = WNN_STATIC_DICT; | |
133 break; | |
134 case 'U': | |
135 which_dict = WNN_UD_DICT; | |
136 break; | |
137 case 'R': | |
138 which_dict = WNN_REV_DICT; | |
139 break; | |
140 case 'r': | |
141 reverse_dict = REVERSE; | |
142 break; | |
143 case 'P': | |
144 get_pwd (optarg, file_head.file_passwd); | |
145 break; | |
146 case 'p': | |
147 get_pwd (optarg, jt.hpasswd); | |
148 break; | |
149 case 'N': | |
150 strcpy (file_head.file_passwd, "*"); | |
151 break; | |
152 case 'n': | |
153 strcpy (jt.hpasswd, "*"); | |
154 break; | |
155 case 'e': | |
156 to_esc = 1; | |
157 break; | |
158 case 'h': | |
159 hinsi_file_name = optarg; | |
160 break; | |
161 case 's': | |
162 if (sscanf (optarg, "%d", &maxserial) == 0) | |
163 { | |
164 usage (); | |
165 exit (1); | |
166 } | |
167 break; | |
168 } | |
169 } | |
170 if (to_esc && which_dict == WNN_REV_DICT) | |
171 { | |
172 fprintf (stderr, "You can't make the kanji component of reverse dictionary compact.\n"); | |
173 exit (1); | |
174 } | |
175 if (optind) | |
176 { | |
177 optind--; | |
178 argc -= optind; | |
179 argv += optind; | |
180 } | |
181 if (argc != 2) | |
182 { | |
183 usage (); | |
184 exit (1); | |
185 } | |
186 strncpy (outfile, argv[1], LINE_SIZE-1); | |
187 outfile[LINE_SIZE-1] = '\0'; | |
188 if (wnn_loadhinsi (hinsi_file_name) != 0) | |
189 { | |
190 fprintf (stderr, "Can't Open hinsi_file.\n"); | |
191 exit (1); | |
192 } | |
193 if (init_heap (DEF_ENTRIES * HEAP_PER_LINE, DEF_ENTRIES * YOMI_PER_LINE, maxserial, DEF_ENTRIES, stdin) == -1) | |
194 exit (1); | |
195 } | |
196 | |
197 void | |
198 alloc_area (void) | |
199 { | |
200 if (which_dict == WNN_STATIC_DICT) | |
201 { | |
202 if ((hostart = (UCHAR *) malloc (maxserial * HONTAI_PER_ENTRY)) == NULL) | |
203 { | |
204 fprintf (stderr, "Malloc Failed\n"); | |
205 exit (1); | |
206 } | |
207 hopter = hostart; | |
208 hoend = (UCHAR *) hostart + maxserial * HONTAI_PER_ENTRY; | |
209 } | |
210 else | |
211 { | |
212 if ((tary = (struct uind1 *) malloc (jt.maxserial * sizeof (struct uind1))) == NULL || | |
213 /* Too large? */ | |
214 (hostart = (UCHAR *) malloc (jt.maxserial * (sizeof (struct uind2) + sizeof (w_char) * 2))) == NULL) | |
215 { | |
216 /* 2 must be enough? */ | |
217 fprintf (stderr, "Malloc Failed\n"); | |
218 exit (1); | |
219 } | |
220 hoend = (UCHAR *) ((char *) hostart + jt.maxserial * (sizeof (struct uind2) + sizeof (w_char) * 2)); | |
221 } | |
222 } | |
223 | |
224 extern int sort_func_sdic (); | |
225 extern int sort_func_je (); | |
226 FILE *ofpter; | |
227 | |
228 int | |
229 main (int argc, char** argv) | |
230 { | |
231 char *cswidth_name; | |
232 extern char *get_cswidth_name (); | |
233 extern void set_cswidth (); | |
234 | |
235 com_name = argv[0]; | |
236 init (argc, argv); | |
237 | |
238 if (cswidth_name = get_cswidth_name (WNN_DEFAULT_LANG)) | |
239 set_cswidth (create_cswidth (cswidth_name)); | |
240 | |
241 #ifdef CHINESE | |
242 ujis_header (&which_dict); /* read header of UJIS dic */ | |
243 #else | |
244 ujis_header (); /* read header of UJIS dic */ | |
245 #endif | |
246 /* like comment,total,hinsi */ | |
247 #ifdef CHINESE | |
248 read_ujis (reverse_dict, to_esc, which_dict); | |
249 | |
250 if (which_dict != CWNN_REV_DICT && which_dict != BWNN_REV_DICT) | |
251 reverse_yomi (); | |
252 #else | |
253 read_ujis (reverse_dict, to_esc, (which_dict == WNN_REV_DICT) ? 1 : 0); | |
254 #ifndef CONVERT_from_TOP | |
255 reverse_yomi (); | |
256 #endif | |
257 #endif | |
258 | |
259 if ((ofpter = fopen (outfile, "w")) == NULL) | |
260 { | |
261 fprintf (stderr, "Can't open the output file %s.\n", outfile); | |
262 perror (""); | |
263 exit (1); | |
264 } | |
265 | |
266 #ifdef CHINESE | |
267 if ((which_dict & 0xff) == WNN_REV_DICT) | |
268 { | |
269 #else | |
270 if (which_dict == WNN_REV_DICT) | |
271 { | |
272 #endif | |
273 create_rev_dict (); | |
274 } | |
275 else | |
276 { | |
277 alloc_area (); | |
278 if (which_dict == WNN_STATIC_DICT) | |
279 { | |
280 sdic_sort (); | |
281 uniq_je (sort_func_sdic); | |
282 output_dic_data (); | |
283 ujistosd (0, 0); | |
284 } | |
285 else | |
286 { | |
287 sort_if_not_sorted (); | |
288 uniq_je (sort_func_je); | |
289 output_dic_data (); | |
290 ujistoud (); | |
291 set_pter1 (); | |
292 } | |
293 } | |
294 output_dic_index (); | |
295 rewind (ofpter); | |
296 output_header (ofpter, &jt, &file_head); | |
297 #ifdef nodef | |
298 output_comment (ofpter); /* In order to change the byte order */ | |
299 output_hinsi_list (ofpter); /* In order to change the byte order */ | |
300 #endif | |
301 exit (0); | |
302 } | |
303 | |
304 | |
305 w_char * | |
306 addyomient (int tn, w_char* yomi) | |
307 { | |
308 int len = wnn_Strlen (yomi); | |
309 tary[tn].yomi2 = 0; | |
310 tary[tn].yomi1 = yomi[0] << 16; | |
311 | |
312 uhopter->yomi[0] = len; | |
313 if (yomi[1]) | |
314 { | |
315 tary[tn].yomi1 |= yomi[1]; | |
316 if (yomi[2]) | |
317 { | |
318 tary[tn].yomi2 = yomi[2] << 16; | |
319 if (yomi[3]) | |
320 { | |
321 tary[tn].yomi2 |= yomi[3]; | |
322 } | |
323 if (len > 4) | |
324 { | |
325 wnn_Strncpy (uhopter->yomi + 1, yomi + 4, len - 4); | |
326 return (uhopter->yomi + 1 + len - 4); | |
327 } | |
328 } | |
329 } | |
330 return (uhopter->yomi + 1); | |
331 } | |
332 | |
333 static void | |
334 ujistoud (void) | |
335 { | |
336 w_char *yomi; | |
337 w_char dummy = 0; /* 2 byte yomi */ | |
338 w_char *pyomi; /* maeno tangono yomi */ | |
339 w_char *wcp; | |
340 int serial_count; | |
341 w_char *kosuup = NULL; | |
342 | |
343 *(int *) hostart = 0; | |
344 uhopter = (struct uind2 *) ((int *) hostart + 1); | |
345 | |
346 yomi = &dummy; | |
347 | |
348 for (serial_count = 0; serial_count < jt.maxserial; serial_count++) | |
349 { | |
350 pyomi = yomi; | |
351 yomi = jeary[serial_count]->yomi; | |
352 | |
353 if (wnn_Strcmp (yomi, pyomi)) | |
354 { | |
355 tary[tnum++].pter = (char *) uhopter - (char *) hostart; | |
356 uhopter->next = 0; | |
357 uhopter->serial = serial_count; | |
358 uhopter->kanjipter = kanjicount; | |
359 uhopter->kosuu = 1; | |
360 kosuup = &uhopter->kosuu; | |
361 wcp = addyomient (tnum - 1, yomi); | |
362 uhopter = (struct uind2 *) (AL_INT (wcp)); | |
363 } | |
364 else | |
365 { | |
366 if (kosuup) | |
367 *kosuup += 1; | |
368 } | |
369 upd_kanjicount (serial_count); | |
370 } | |
371 } | |
372 | |
373 void | |
374 upd_kanjicount (int k) | |
375 { | |
376 kanjicount += *jeary[k]->kanji; | |
377 } | |
378 | |
379 static void | |
380 set_pter1 (void) | |
381 { | |
382 int k; | |
383 int len; | |
384 w_char oyomi[LENGTHYOMI], nyomi[LENGTHYOMI]; | |
385 /* May be a little slow, but simple! */ | |
386 int lasts[LENGTHYOMI]; /* pter_to */ | |
387 | |
388 for (k = 0; k < LENGTHYOMI; k++) | |
389 { | |
390 lasts[k] = -1; | |
391 } | |
392 | |
393 for (k = 0; k < tnum; k++) | |
394 { | |
395 len = ((struct uind2 *) ((char *) hostart + tary[k].pter))->yomi[0]; | |
396 lasts[len] = k; | |
397 for (len--; len; len--) | |
398 { | |
399 if (lasts[len] >= 0) | |
400 { | |
401 udytoS (oyomi, lasts[len], hostart, tary); | |
402 udytoS (nyomi, k, hostart, tary); | |
403 if (wnn_Substr (oyomi, nyomi)) | |
404 { | |
405 tary[k].pter1 = lasts[len]; | |
406 break; | |
407 } | |
408 } | |
409 } | |
410 if (len == 0) | |
411 tary[k].pter1 = -1; | |
412 } | |
413 } | |
414 | |
415 void | |
416 output_dic_data (void) | |
417 { | |
418 | |
419 fprintf (stderr, "%d words are in this dictionary\n", jt.maxserial); | |
420 | |
421 if (little_endian ()) | |
422 { | |
423 rev_dic_data (); | |
424 } | |
425 | |
426 jt.syurui = which_dict; | |
427 jt.gosuu = jt.maxserial; | |
428 output_header (ofpter, &jt, &file_head); /* dummy; Will be rewitten. */ | |
429 output_comment (ofpter); | |
430 output_hinsi_list (ofpter); | |
431 output_hindo (ofpter); | |
432 output_hinsi (ofpter); | |
433 #ifdef CONVERT_with_SiSheng | |
434 if (which_dict == CWNN_REV_DICT) | |
435 output_sisheng (ofpter); | |
436 #endif | |
437 output_kanji (ofpter); | |
438 } | |
439 | |
440 static void | |
441 output_dic_index (void) | |
442 { | |
443 if (which_dict == WNN_UD_DICT) | |
444 { | |
445 fprintf (stderr, " tnum = %d\n ind2= %d\n kanji = %d\n", tnum, (char *) uhopter - (char *) hostart, kanjicount); | |
446 jt.maxtable = tnum; | |
447 jt.maxhontai = (char *) uhopter - (char *) hostart; | |
448 jt.maxri2 = jt.maxri1[D_YOMI] = jt.maxri1[D_KANJI] = 0; | |
449 jt.hontai = hostart; | |
450 jt.table = tary; | |
451 jt.ri1[D_YOMI] = NULL; | |
452 jt.ri1[D_KANJI] = NULL; | |
453 jt.ri2 = NULL; | |
454 #ifdef CHINESE | |
455 } | |
456 else if ((which_dict & 0xff) == WNN_REV_DICT) | |
457 { | |
458 #else | |
459 } | |
460 else if (which_dict == WNN_REV_DICT) | |
461 { | |
462 #endif | |
463 fprintf (stderr, "kanji = %d\n", kanjicount); | |
464 jt.maxtable = 0; | |
465 jt.maxhontai = 0; | |
466 jt.maxri2 = jt.maxserial; | |
467 jt.hontai = NULL; | |
468 jt.table = NULL; | |
469 /* jt.ri1, jt.ri2 is set in create_rev_dict */ | |
470 } | |
471 else | |
472 { | |
473 fprintf (stderr, "node_count = %d ind= %d\n kanji = %d\n", node_count, (char *) hopter - (char *) hostart, kanjicount); | |
474 jt.maxtable = 0; | |
475 jt.maxhontai = (char *) hopter - (char *) hostart; | |
476 jt.maxri2 = jt.maxri1[D_YOMI] = jt.maxri1[D_KANJI] = 0; | |
477 jt.hontai = hostart; | |
478 jt.table = NULL; | |
479 jt.ri1[D_YOMI] = NULL; | |
480 jt.ri1[D_KANJI] = NULL; | |
481 jt.ri2 = NULL; | |
482 } | |
483 jt.maxkanji = kanjicount; | |
484 | |
485 if (little_endian ()) | |
486 { | |
487 revdic (&jt, 1); | |
488 } | |
489 | |
490 if (which_dict == WNN_UD_DICT) | |
491 { | |
492 fwrite ((char *) tary, sizeof (struct uind1), tnum, ofpter); | |
493 fwrite (hostart, 1, (char *) uhopter - (char *) hostart, ofpter); | |
494 #ifdef CHINESE | |
495 } | |
496 else if ((which_dict & 0xff) == WNN_REV_DICT) | |
497 { | |
498 #else | |
499 } | |
500 else if (which_dict == WNN_REV_DICT) | |
501 { | |
502 #endif | |
503 output_ri (ofpter); | |
504 } | |
505 else | |
506 { | |
507 fwrite (hostart, 1, (char *) hopter - (char *) hostart, ofpter); | |
508 } | |
509 } | |
510 | |
511 | |
512 static void | |
513 usage (void) | |
514 { | |
515 fprintf (stderr, "Usage : %s [-r -R -S -U -e -s maximum word count(default %d) -P passwd (or -N) -p hindo_passwd (or -n) -h hinsi_file_name] <dictonary filename>\n", com_name, MAX_ENTRIES); | |
516 fprintf (stderr, "Input the ascii dictionary from stdin\n"); | |
517 fprintf (stderr, "-r is for creating dictionary with normal and reverse index\n"); | |
518 fprintf (stderr, "-R is for creating reverse (implies updatable) dictionary. (default)\n"); | |
519 fprintf (stderr, "-S is for creating static dictionary.\n"); | |
520 fprintf (stderr, "-U is for creating updatable dictionary.\n"); | |
521 fprintf (stderr, "-e is for compacting kanji string.\n"); | |
522 exit1 (); | |
523 } | |
524 | |
525 /* SD commands */ | |
526 | |
527 int | |
528 yStrcmp (w_char* a, w_char*b) | |
529 { | |
530 register int c, d; | |
531 for (; *a && *a == *b; a++, b++); | |
532 if (*a == 0) | |
533 return (-(int) *b); | |
534 if (*b == 0) | |
535 return ((int) *a); | |
536 c = *(a + 1); | |
537 d = *(b + 1); | |
538 if (c == 0 && d == 0) | |
539 return ((int) *a - (int) *b); | |
540 if (c == 0) | |
541 return (-1); | |
542 if (d == 0) | |
543 return (1); | |
544 return ((int) *a - (int) *b); | |
545 } | |
546 | |
547 int | |
548 sort_func_sdic (char* a, char* b) | |
549 { | |
550 int tmp; | |
551 struct je *pa, *pb; | |
552 pa = *((struct je **) a); | |
553 pb = *((struct je **) b); | |
554 tmp = yStrcmp (pa->yomi, pb->yomi); | |
555 if (tmp) | |
556 return (tmp); | |
557 /* Changed the order to sort, from yomi->kanji->hinsi | |
558 * to yomi->hinsi->kanji, in order to separate (struct jdata) by | |
559 * hinsi'es. | |
560 */ | |
561 if (pa->hinsi != pb->hinsi) | |
562 return ((int) (pa->hinsi) - (int) (pb->hinsi)); | |
563 return (wnn_Strcmp (pa->kan, pb->kan)); | |
564 } | |
565 | |
566 | |
567 static void | |
568 sdic_sort (void) | |
569 { | |
570 if (!Sorted ((char *) jeary, (int) jt.maxserial, sizeof (struct je *), sort_func_sdic)) | |
571 { | |
572 qsort ((char *) jeary, (int) jt.maxserial, sizeof (struct je *), sort_func_sdic); | |
573 } | |
574 } | |
575 | |
576 | |
577 static w_char chartable[YOMI_KINDS]; | |
578 static w_char numtable1[YOMI_KINDS]; | |
579 static w_char *numtable = numtable1 + 1; | |
580 static int endt = 0; | |
581 | |
582 int | |
583 analize_size (int start_je, int level, | |
584 int* statep, int* end_jep, int* mid_jep) | |
585 { | |
586 w_char *c = jeary[start_je]->yomi; | |
587 int je; | |
588 int level1 = level + 1; | |
589 int end_je, mid_je; | |
590 w_char y1, y2; | |
591 int je1, je2; | |
592 | |
593 for (; endt-- > 0;) | |
594 { | |
595 chartable[endt] = numtable[endt] = 0; | |
596 } | |
597 endt = 0; /* This line is needed when endt is originally 0 */ | |
598 for (je = start_je; je < jt.maxserial && wnn_Strncmp (c, jeary[je]->yomi, level) == 0 && jeary[je]->yomi[level1] == 0; je++); | |
599 *mid_jep = mid_je = je; | |
600 for (je = start_je; je < jt.maxserial && wnn_Strncmp (c, jeary[je]->yomi, level) == 0; je++); | |
601 *end_jep = end_je = je; | |
602 | |
603 je1 = start_je; | |
604 je2 = mid_je; | |
605 y1 = (je1 >= mid_je) ? 0xffff : jeary[je1]->yomi[level]; | |
606 y2 = (je2 >= end_je) ? 0xffff : jeary[je2]->yomi[level]; | |
607 for (;;) | |
608 { | |
609 if (y1 > y2) | |
610 { | |
611 chartable[endt] = y2; | |
612 for (numtable[endt] = numtable[endt - 1]; je2 < end_je && jeary[je2]->yomi[level] == y2; je2++); | |
613 y2 = (je2 >= end_je) ? 0xffff : jeary[je2]->yomi[level]; | |
614 endt++; | |
615 } | |
616 else if (y1 < y2) | |
617 { | |
618 chartable[endt] = y1; | |
619 for (numtable[endt] = numtable[endt - 1]; je1 < end_je && jeary[je1]->yomi[level] == y1; je1++, numtable[endt]++); | |
620 y1 = (je1 >= mid_je) ? 0xffff : jeary[je1]->yomi[level]; | |
621 endt++; | |
622 } | |
623 else | |
624 { | |
625 if (y1 >= 0xffff && y2 >= 0xffff) | |
626 break; | |
627 chartable[endt] = y1; | |
628 for (; je2 < end_je && jeary[je2]->yomi[level] == y2; je2++); | |
629 for (numtable[endt] = numtable[endt - 1]; je1 < mid_je && jeary[je1]->yomi[level] == y1; je1++, numtable[endt]++); | |
630 y2 = (je2 >= end_je) ? 0xffff : jeary[je2]->yomi[level]; | |
631 y1 = (je1 >= mid_je) ? 0xffff : jeary[je1]->yomi[level]; | |
632 endt++; | |
633 } | |
634 } | |
635 if (mid_je == start_je) | |
636 { | |
637 if (endt == 1) | |
638 *statep = ST_SMALL; | |
639 else | |
640 *statep = ST_NOENT; | |
641 } | |
642 else if (mid_je == end_je) | |
643 *statep = ST_NOPTER; | |
644 else | |
645 *statep = ST_NORMAL; | |
646 | |
647 return (endt); | |
648 } | |
649 | |
650 | |
651 | |
652 static void | |
653 ujistosd (int start_je, int level) | |
654 { | |
655 int state; | |
656 int tsize; | |
657 w_char *charst; | |
658 w_char *sumst; | |
659 int *ptrst; | |
660 int mid_je, end_je; | |
661 int je; | |
662 int k; | |
663 | |
664 node_count++; | |
665 | |
666 tsize = analize_size (start_je, level, &state, &end_je, &mid_je); | |
667 | |
668 *(w_char *) hopter = state; | |
669 hopter += 2; | |
670 switch (state) | |
671 { | |
672 case ST_NORMAL: | |
673 if (hopter + tsize * 8 + 12 + 16 >= hoend) | |
674 not_enough_area (); | |
675 *(w_char *) hopter = tsize; | |
676 hopter += 2; | |
677 *(int *) hopter = start_je; | |
678 hopter += 4; | |
679 *(int *) hopter = kanjicount; | |
680 hopter += 4; | |
681 charst = (w_char *) hopter; | |
682 sumst = ((w_char *) hopter + tsize + 2); /* + 2 keeps two zero words */ | |
683 charst[tsize] = charst[tsize + 1] = 0; | |
684 ptrst = (int *) ((w_char *) sumst + tsize); | |
685 hopter = (UCHAR *) (ptrst + tsize); | |
686 for (k = 0; k < tsize; k++) | |
687 { | |
688 charst[k] = chartable[k]; | |
689 sumst[k] = numtable[k]; | |
690 } | |
691 for (k = start_je; k < mid_je; k++) | |
692 { | |
693 upd_kanjicount (k); | |
694 } | |
695 for (je = mid_je, k = 0; je < end_je;) | |
696 { | |
697 for (; k < tsize && charst[k] < jeary[je]->yomi[level]; k++) | |
698 ptrst[k] = ENDPTR; | |
699 if (k == tsize) | |
700 fprintf (stderr, "Error\n"); | |
701 ptrst[k] = (char *) hopter - (char *) hostart; | |
702 ujistosd (je, level + 1); | |
703 for (; je < end_je && jeary[je]->yomi[level] == charst[k]; je++); | |
704 k++; | |
705 } | |
706 break; | |
707 case ST_NOENT: | |
708 if (hopter + tsize * 8 + 4 + 16 >= hoend) | |
709 not_enough_area (); | |
710 *(w_char *) hopter = tsize; | |
711 hopter += 2; | |
712 charst = (w_char *) hopter; | |
713 ptrst = (int *) AL_INT ((w_char *) charst + tsize); | |
714 hopter = (UCHAR *) (ptrst + tsize); | |
715 for (k = 0; k < tsize; k++) | |
716 { | |
717 charst[k] = chartable[k]; | |
718 } | |
719 for (je = mid_je, k = 0; je < end_je;) | |
720 { | |
721 for (; k < tsize && charst[k] < (jeary[je]->yomi)[level]; k++) | |
722 ptrst[k] = ENDPTR; | |
723 if (k == tsize) | |
724 fprintf (stderr, "Error\n"); | |
725 ptrst[k] = (char *) hopter - (char *) hostart; | |
726 ujistosd (je, level + 1); | |
727 for (; je < end_je && (jeary[je]->yomi)[level] == charst[k]; je++); | |
728 k++; | |
729 } | |
730 break; | |
731 case ST_NOPTER: | |
732 if (hopter + tsize * 4 + 12 + 16 >= hoend) | |
733 not_enough_area (); | |
734 *(w_char *) hopter = tsize; | |
735 hopter += 2; | |
736 *(int *) hopter = start_je; | |
737 hopter += 4; | |
738 *(int *) hopter = kanjicount; | |
739 hopter += 4; | |
740 charst = (w_char *) hopter; | |
741 sumst = ((w_char *) hopter + tsize + 2); /* + 2 keeps two zero words */ | |
742 charst[tsize] = charst[tsize + 1] = 0; | |
743 hopter = (UCHAR *) ((w_char *) sumst + tsize); | |
744 for (k = 0; k < tsize; k++) | |
745 { | |
746 charst[k] = chartable[k]; | |
747 sumst[k] = numtable[k]; | |
748 } | |
749 for (k = start_je; k < mid_je; k++) | |
750 { | |
751 upd_kanjicount (k); | |
752 } | |
753 break; | |
754 case ST_SMALL: | |
755 if (hopter + 4 + 16 >= hoend) | |
756 not_enough_area (); | |
757 *(w_char *) hopter = chartable[0]; | |
758 hopter += 2; | |
759 ujistosd (start_je, level + 1); | |
760 break; | |
761 } | |
762 } | |
763 | |
764 #define INCR_HO 150 | |
765 | |
766 static void | |
767 not_enough_area (void) | |
768 { | |
769 /* | |
770 UCHAR *oldstart = hostart; | |
771 int size = hoend - hostart + INCR_HO * HONTAI_PER_ENTRY; | |
772 */ | |
773 fprintf (stderr, "Sorry allocated area is exhausted.node_count = %d\n", node_count); | |
774 fprintf (stderr, "Retry with option -S <dict_size>.\n"); | |
775 exit (1); | |
776 | |
777 /* | |
778 * Only for Sdic | |
779 * Can't Realloc!! many pointeres exist on the stack!! | |
780 */ | |
781 | |
782 /* | |
783 fprintf(stderr, "Realloc.\n"); | |
784 if((hostart = (char *)realloc(hostart, size)) == NULL){ | |
785 fprintf(stderr, "Ralloc Failed\n"); | |
786 exit(1); | |
787 } | |
788 hopter = hostart + (hopter - oldstart); | |
789 hoend = (char *)((char *)hostart + size); | |
790 */ | |
791 } | |
792 | |
793 | |
794 static void | |
795 get_pwd (char* fname, char* crypted) | |
796 { | |
797 char pwd[WNN_PASSWD_LEN]; | |
798 FILE *fp; | |
799 | |
800 if ((fp = fopen (fname, "r")) == NULL) | |
801 { | |
802 fprintf (stderr, "No password_file %s.\n", fname); | |
803 exit (1); | |
804 } | |
805 fgets (pwd, WNN_PASSWD_LEN, fp); | |
806 fclose (fp); | |
807 new_pwd (pwd, crypted); | |
808 } | |
809 | |
810 /* output dict routine's */ | |
811 static void | |
812 output_comment (FILE* fp) | |
813 { | |
814 put_n_EU_str (fp, file_comment, jt.maxcomment); | |
815 } | |
816 | |
817 static void | |
818 output_hinsi_list (FILE* fp) | |
819 { | |
820 put_n_EU_str (fp, hinsi_list, jt.maxhinsi_list); | |
821 } | |
822 | |
823 static void | |
824 output_hindo (FILE* ofpter) | |
825 { | |
826 register int i; | |
827 for (i = 0; i < jt.maxserial; i++) | |
828 { | |
829 putc (asshuku (jeary[i]->hindo), ofpter); | |
830 } | |
831 } | |
832 | |
833 static void | |
834 output_hinsi (FILE* ofpter) | |
835 { | |
836 int i; | |
837 short k; | |
838 int little_endianp = little_endian (); | |
839 /* hinsi ha koko de hikkuri kaesu */ | |
840 | |
841 for (i = 0; i < jt.maxserial; i++) | |
842 { | |
843 k = jeary[i]->hinsi; | |
844 if (little_endianp) | |
845 { | |
846 rev_short_fun (&k); | |
847 } | |
848 fwrite (&k, 1, 2, ofpter); | |
849 } | |
850 } | |
851 | |
852 static void | |
853 output_kanji (FILE* ofpter) | |
854 { | |
855 int i; | |
856 | |
857 for (i = 0; i < jt.maxserial; i++) | |
858 { | |
859 fwrite (jeary[i]->kanji, 1, *(jeary[i]->kanji), ofpter); | |
860 } | |
861 } | |
862 | |
863 static void | |
864 rev_dic_data (void) | |
865 { | |
866 int i; | |
867 | |
868 /* Comment and Hinsi list do not need to reverse. */ | |
869 for (i = 0; i < jt.maxserial; i++) | |
870 { | |
871 rev_w_char ((w_char *) (jeary[i]->kanji + 2), *(jeary[i]->kanji) / 2 - 1); | |
872 } | |
873 } | |
874 | |
875 #ifdef CONVERT_with_SiSheng | |
876 extern void put_short (); | |
877 | |
878 static void | |
879 output_sisheng (FILE* ofpter) | |
880 { | |
881 int i; | |
882 | |
883 for (i = 0; i < jt.maxserial; i++) | |
884 { | |
885 put_short (ofpter, jeary[i]->ss); | |
886 } | |
887 } | |
888 #endif /* CONVERT_with_SiSheng */ |