0
|
1 /*
|
|
2 * $Id: ujisf.c,v 1.7 2002/07/14 04:26:57 hiroo Exp $
|
|
3 */
|
|
4
|
|
5 /*
|
|
6 * FreeWnn is a network-extensible Kana-to-Kanji conversion system.
|
|
7 * This file is part of FreeWnn.
|
|
8 *
|
|
9 * Copyright Kyoto University Research Institute for Mathematical Sciences
|
|
10 * 1987, 1988, 1989, 1990, 1991, 1992
|
|
11 * Copyright OMRON Corporation. 1987, 1988, 1989, 1990, 1991, 1992, 1999
|
|
12 * Copyright ASTEC, Inc. 1987, 1988, 1989, 1990, 1991, 1992
|
|
13 * Copyright FreeWnn Project 1999, 2000, 2002
|
|
14 *
|
|
15 * Maintainer: FreeWnn Project <freewnn@tomo.gr.jp>
|
|
16 *
|
|
17 * This program is free software; you can redistribute it and/or modify
|
|
18 * it under the terms of the GNU General Public License as published by
|
|
19 * the Free Software Foundation; either version 2 of the License, or
|
|
20 * (at your option) any later version.
|
|
21 *
|
|
22 * This program is distributed in the hope that it will be useful,
|
|
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
25 * GNU General Public License for more details.
|
|
26 *
|
|
27 * You should have received a copy of the GNU General Public License
|
|
28 * along with this program; if not, write to the Free Software
|
|
29 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
30 */
|
|
31
|
|
32 /*
|
|
33 * Ujis format <--> internal data.
|
|
34 */
|
|
35
|
|
36 #ifdef HAVE_CONFIG_H
|
|
37 # include <config.h>
|
|
38 #endif
|
|
39
|
|
40 #include <stdio.h>
|
|
41 #if STDC_HEADERS
|
|
42 # include <stdlib.h>
|
|
43 # include <string.h>
|
|
44 #else
|
|
45 # if HAVE_MALLOC_H
|
|
46 # include <malloc.h>
|
|
47 # endif
|
|
48 # if HAVE_STRINGS_H
|
|
49 # include <strings.h>
|
|
50 # endif
|
|
51 #endif /* STDC_HEADERS */
|
|
52
|
|
53 #include "commonhd.h"
|
|
54 #include "jslib.h"
|
|
55 #include "jh.h"
|
|
56 #include "jdata.h"
|
|
57 #include "wnn_os.h"
|
|
58 #include "wnn_string.h"
|
|
59
|
|
60 #ifdef CHINESE
|
|
61 #include "cplib.h"
|
|
62 int pzy_flag = CWNN_PINYIN; /* Pinyin or Zhuyin */
|
|
63 static void sisheng_num (), read_kanji_str_w ();
|
|
64 #endif
|
|
65
|
|
66 #ifndef min
|
|
67 #define min(a, b) ((a > b)? b:a)
|
|
68 #define max(a, b) ((a < b)? b:a)
|
|
69 #endif
|
|
70
|
|
71 extern unsigned char kanjiaddr ();
|
|
72 extern void Print_entry ();
|
|
73 extern int wnn_find_hinsi_by_name ();
|
|
74 #ifdef CHINESE
|
|
75 extern void cwnn_zy_str_analysis (), cwnn_py_str_analysis ();
|
|
76 #endif
|
|
77 int sort_func (), Sorted (), w_stradd ();
|
|
78 static void Kanjistradd (), bunpou_num (), read_kanji_str (), toesc ();
|
|
79 void exit1 ();
|
|
80
|
|
81 extern struct JT jt;
|
|
82
|
|
83 /* extern variables */
|
|
84
|
|
85 struct je **jeary;
|
|
86 int wnnerror;
|
|
87
|
|
88 #define WNN_HINSI_LEN 4096
|
|
89
|
|
90 w_char file_comment[WNN_COMMENT_LEN];
|
|
91 w_char hinsi_list[WNN_HINSI_LEN];
|
|
92
|
|
93
|
|
94
|
|
95 /* static variables */
|
|
96 static UCHAR *heap, *hp, *heapend;
|
|
97 static w_char *yomi_heap, *yhp, *yheapend;
|
|
98 static struct je *je_heap, *jehp, *jeheapend;
|
|
99 static FILE *ifpter;
|
|
100 static int maxline;
|
|
101
|
|
102 /* extern functions of this file are
|
|
103 ujis_header();
|
|
104 read_ujis(reversep, to_esc, which_dict);
|
|
105 reverse_yomi();
|
|
106 sort();
|
|
107 uniq_je(func);
|
|
108 output_ujis(opter, serial_out, esc_exp);
|
|
109 */
|
|
110
|
|
111 int lc;
|
|
112
|
|
113 static char stack[LINE_SIZE] = { 0 };
|
|
114
|
|
115 int
|
|
116 get_line (c)
|
|
117 register char *c;
|
|
118 {
|
|
119 if (stack[0])
|
|
120 {
|
|
121 strcpy (c, stack);
|
|
122 stack[0] = 0;
|
|
123 }
|
|
124 else
|
|
125 {
|
|
126 if (fgets (c, LINE_SIZE, ifpter) == NULL)
|
|
127 {
|
|
128 return (EOF);
|
|
129 }
|
|
130 }
|
|
131 return (0);
|
|
132 }
|
|
133
|
|
134 void
|
|
135 unget_line (c)
|
|
136 char *c;
|
|
137 {
|
|
138 strcpy (stack, c);
|
|
139 }
|
|
140
|
|
141
|
|
142 char *
|
|
143 get_string (str, buf)
|
|
144 register char *str;
|
|
145 char *buf;
|
|
146 {
|
|
147 register char *c = buf;
|
|
148 for (; *c == '\t' || *c == ' '; c++);
|
|
149 if (*c == '\0' || *c == '\n')
|
|
150 {
|
|
151 *str = 0;
|
|
152 return (NULL);
|
|
153 }
|
|
154 for (; *c != '\t' && *c != ' ' && *c != '\n' && *c != '\0'; c++)
|
|
155 {
|
|
156 *str++ = *c;
|
|
157 }
|
|
158 *str = 0;
|
|
159 return (c);
|
|
160 }
|
|
161
|
|
162 void
|
|
163 bad_line (bf)
|
|
164 char *bf;
|
|
165 {
|
|
166 static int badl = 0;
|
|
167
|
|
168 fprintf (stderr, "Bad line \"%s\"\n", bf);
|
|
169 fprintf (stderr, "Bad line omitted\n");
|
|
170 if (++badl > BADLMAX)
|
|
171 {
|
|
172 fprintf (stderr, "Too many bad lines.\n");
|
|
173 exit1 ();
|
|
174 }
|
|
175 }
|
|
176
|
|
177 void
|
|
178 error_no_heap ()
|
|
179 {
|
|
180 fprintf (stderr, "Heap area is exhausted.\n");
|
|
181 exit1 ();
|
|
182 }
|
|
183
|
|
184 static int
|
|
185 get_one_line (buffer, jep, rev, to_esc, which_dict)
|
|
186 char *buffer;
|
|
187 register struct je **jep;
|
|
188 int rev;
|
|
189 int to_esc;
|
|
190 int which_dict;
|
|
191 {
|
|
192 register char *c = buffer;
|
|
193 static char tmp[LINE_SIZE];
|
|
194 static char ckanji[LINE_SIZE];
|
|
195 static char cyomi[LINE_SIZE];
|
|
196 static w_char yomi[LINE_SIZE];
|
|
197 static w_char kanji[LINE_SIZE];
|
|
198 static w_char comm[LINE_SIZE];
|
|
199 #ifdef CHINESE
|
|
200 static w_char un_sisheng_yincod_str[LINE_SIZE];
|
|
201 static w_char yincod_str[LINE_SIZE];
|
|
202 static char csisheng[LINE_SIZE];
|
|
203 static w_char wtmp[LINE_SIZE];
|
|
204 #endif
|
|
205 char *c1;
|
|
206
|
|
207 if (jehp == jeheapend)
|
|
208 {
|
|
209 if ((jehp = je_heap = (struct je *) malloc ((HEAPINC * sizeof (struct je)))) == NULL)
|
|
210 {
|
|
211 fprintf (stderr, "Malloc Failed\n");
|
|
212 return (-1);
|
|
213 }
|
|
214 jeheapend = je_heap + HEAPINC;
|
|
215 }
|
|
216 *jep = jehp;
|
|
217 jehp++;
|
|
218
|
|
219 if (rev == REVERSE)
|
|
220 {
|
|
221 if ((c = get_string (ckanji, c)) == NULL)
|
|
222 return (1);
|
|
223 }
|
|
224 else
|
|
225 {
|
|
226 if ((c = get_string (cyomi, c)) == NULL)
|
|
227 return (1);
|
|
228 }
|
|
229 if (rev == REVERSE)
|
|
230 {
|
|
231 if ((c = get_string (cyomi, c)) == NULL)
|
|
232 return (-1);
|
|
233 }
|
|
234 else
|
|
235 {
|
|
236 if ((c = get_string (ckanji, c)) == NULL)
|
|
237 return (-1);
|
|
238 }
|
|
239 #ifdef CHINESE
|
|
240 /* here ,should seperate pinyin to two part */
|
|
241 /* one is usually pinyin string like Zhong.Guo. */
|
|
242 /* the is sisheng string like 23 */
|
|
243
|
|
244 if (which_dict == CWNN_REV_DICT || which_dict == BWNN_REV_DICT)
|
|
245 {
|
|
246 if (pzy_flag == CWNN_ZHUYIN)
|
|
247 cwnn_zy_str_analysis (cyomi, csisheng, un_sisheng_yincod_str, yincod_str);
|
|
248 else
|
|
249 cwnn_py_str_analysis (cyomi, csisheng, un_sisheng_yincod_str, yincod_str);
|
|
250
|
|
251 sisheng_num (csisheng, &((*jep)->ss));
|
|
252 read_kanji_str_w (wtmp, un_sisheng_yincod_str);
|
|
253 wnn_Strcpy (yomi, wtmp);
|
|
254 }
|
|
255 else
|
|
256 {
|
|
257 read_kanji_str (tmp, cyomi);
|
|
258 wnn_Sstrcpy (yomi, tmp);
|
|
259 }
|
|
260 #else
|
|
261 read_kanji_str (tmp, cyomi);
|
|
262 wnn_Sstrcpy (yomi, tmp);
|
|
263 #endif
|
|
264 if (wnn_Strlen (yomi) >= LENGTHYOMI)
|
|
265 {
|
|
266 fprintf (stderr, "YOMI is longer in line %d.\n", lc);
|
|
267 return (-1);
|
|
268 }
|
|
269 w_stradd (yomi, &((*jep)->yomi));
|
|
270
|
|
271 read_kanji_str (tmp, ckanji);
|
|
272 wnn_Sstrcpy (kanji, tmp);
|
|
273 if (wnn_Strlen (kanji) >= LENGTHYOMI)
|
|
274 {
|
|
275 fprintf (stderr, "KANJI is longer in line %d.\n", lc);
|
|
276 return (-1);
|
|
277 }
|
|
278 w_stradd (kanji, &((*jep)->kan));
|
|
279
|
|
280 if ((c = get_string (tmp, c)) == NULL)
|
|
281 return (-1);
|
|
282 bunpou_num (tmp, &((*jep)->hinsi));
|
|
283
|
|
284 if ((c = get_string (tmp, c)) == NULL)
|
|
285 return (-1);
|
|
286 if (tmp[0] == '-')
|
|
287 {
|
|
288 (*jep)->hindo = -1; /* Real hindo == -1 means Not to use it */
|
|
289 }
|
|
290 else
|
|
291 {
|
|
292 sscanf (tmp, "%d", &((*jep)->hindo));
|
|
293 }
|
|
294
|
|
295 if ((get_string (tmp, c)) == NULL)
|
|
296 {
|
|
297 c1 = NULL;
|
|
298 (*jep)->comm = NULL;
|
|
299 comm[0] = 0;
|
|
300 }
|
|
301 else
|
|
302 {
|
|
303 /* left entries are all considered as comment */
|
|
304 for (; *c == '\t' || *c == ' '; c++);
|
|
305 if (c[strlen (c) - 1] == '\n')
|
|
306 c[strlen (c) - 1] = '\0';
|
|
307 c1 = c;
|
|
308 wnn_Sstrcpy (comm, c1);
|
|
309 if (wnn_Strlen (comm) >= LENGTHYOMI)
|
|
310 {
|
|
311 fprintf (stderr, "COMMENT is longer in line %d.\n", lc);
|
|
312 return (-1);
|
|
313 }
|
|
314 w_stradd (comm, &((*jep)->comm));
|
|
315 }
|
|
316
|
|
317 if (to_esc)
|
|
318 {
|
|
319 toesc (ckanji, cyomi);
|
|
320 }
|
|
321 /*
|
|
322 if(strchr(ckanji, DIC_COMMENT_CHAR) ||
|
|
323 strchr(ckanji, DIC_YOMI_CHAR)){
|
|
324 fprintf(stderr, "Bad character in kanji\n");
|
|
325 return(-1);
|
|
326 }
|
|
327 if(which_dict){
|
|
328 if(strchr(cyomi, DIC_COMMENT_CHAR) ||
|
|
329 strchr(cyomi, DIC_YOMI_CHAR)){
|
|
330 fprintf(stderr, "Bad character in yomi\n");
|
|
331 return(-1);
|
|
332 }
|
|
333 }
|
|
334 */
|
|
335 Kanjistradd (kanji,
|
|
336 #ifdef CHINESE
|
|
337 ((which_dict == CWNN_REV_DICT || which_dict == BWNN_REV_DICT) ? yincod_str : ((which_dict == WNN_REV_DICT) ? yomi : NULL)),
|
|
338 #else
|
|
339 (which_dict) ? yomi : NULL,
|
|
340 #endif
|
|
341 comm, &(*jep)->kanji);
|
|
342 return (0);
|
|
343 }
|
|
344
|
|
345 static void
|
|
346 Kanjistradd (k, y, c, cp)
|
|
347 register UCHAR **cp;
|
|
348 w_char *k, *y, *c;
|
|
349 {
|
|
350 int len;
|
|
351 if (hp + LENGTHKANJI >= heapend)
|
|
352 {
|
|
353 if ((hp = heap = (UCHAR *) malloc ((HEAPINC * HEAP_PER_LINE))) == NULL)
|
|
354 {
|
|
355 fprintf (stderr, "Malloc Failed\n");
|
|
356 exit (1);
|
|
357 }
|
|
358 heapend = heap + (HEAPINC * HEAP_PER_LINE);
|
|
359 }
|
|
360 *cp = hp;
|
|
361 if ((len = kanjiaddr (hp, k, y, c)) >= LENGTHKANJI)
|
|
362 {
|
|
363 fprintf (stderr, "KANJI is longer in line %d.\n", lc);
|
|
364 exit (1);
|
|
365 }
|
|
366 hp += len;
|
|
367 }
|
|
368
|
|
369 int
|
|
370 w_stradd (str, cp)
|
|
371 register w_char **cp;
|
|
372 register w_char *str;
|
|
373 {
|
|
374 register int len = wnn_Strlen (str);
|
|
375
|
|
376 if (yhp + len + 1 >= yheapend)
|
|
377 {
|
|
378 if ((yhp = yomi_heap = (w_char *) malloc ((HEAPINC * sizeof (w_char)))) == NULL)
|
|
379 {
|
|
380 fprintf (stderr, "Malloc Failed\n");
|
|
381 return (-1);
|
|
382 }
|
|
383 yheapend = yomi_heap + HEAPINC;
|
|
384 }
|
|
385 *cp = yhp;
|
|
386 wnn_Strcpy (yhp, str);
|
|
387 yhp += len + 1;
|
|
388 return (0);
|
|
389 }
|
|
390
|
|
391 void
|
|
392 #ifdef CHINESE
|
|
393 ujis_header (which_dict)
|
|
394 int *which_dict;
|
|
395 #else
|
|
396 ujis_header ()
|
|
397 #endif
|
|
398 {
|
|
399 char buffer[LINE_SIZE];
|
|
400 char *c = buffer;
|
|
401 char str[LINE_SIZE];
|
|
402
|
|
403 jt.total = 0;
|
|
404 file_comment[0] = 0;
|
|
405 hinsi_list[0] = 0;
|
|
406
|
|
407 for (;;)
|
|
408 {
|
|
409 if (get_line (buffer) == EOF)
|
|
410 {
|
|
411 goto EOF_HEAD;
|
|
412 }
|
|
413 c = buffer;
|
|
414 if ((c = get_string (str, c)) == NULL)
|
|
415 continue;
|
|
416 if (strcmp (str, COMMENT) == 0)
|
|
417 {
|
|
418 /* for(;;){
|
|
419 if(get_line(buffer) == EOF){
|
|
420 goto EOF_EHAD;
|
|
421 }
|
|
422 if(buffer[0] == '\\'){
|
|
423 unget_line(buffer);
|
|
424 break;
|
|
425 }
|
|
426 if(wnn_Strlen(file_comment) + strlen(buffer)
|
|
427 >= WNN_COMMENT_LEN){
|
|
428 fprintf(stderr, "Too Long Comment.\n");
|
|
429 exit1();
|
|
430 }
|
|
431 wnn_Sstrcpy(file_comment + wnn_Strlen(file_comment), buffer);
|
|
432 }
|
|
433 */
|
|
434 get_string (str, c);
|
|
435 /*
|
|
436 if(str[strlen(str) - 1] == '\n'){
|
|
437 c[strlen(str) - 1] = '\0';
|
|
438 }
|
|
439 */
|
|
440 wnn_Sstrcpy (file_comment, str);
|
|
441 #ifdef CHINESE
|
|
442 }
|
|
443 else if (strcmp (str, PINYIN) == 0)
|
|
444 {
|
|
445 *which_dict = CWNN_REV_DICT;
|
|
446 pzy_flag = CWNN_PINYIN;
|
|
447 }
|
|
448 else if (strcmp (str, ZHUYIN) == 0)
|
|
449 {
|
|
450 *which_dict = CWNN_REV_DICT;
|
|
451 pzy_flag = CWNN_ZHUYIN;
|
|
452 }
|
|
453 else if (strcmp (str, BIXING) == 0)
|
|
454 {
|
|
455 *which_dict = BWNN_REV_DICT;
|
|
456 #endif
|
|
457 }
|
|
458 else if (strcmp (str, HINSI) == 0
|
|
459 #ifdef CHINESE
|
|
460 || strcmp (str, CHINSI) == 0
|
|
461 #endif
|
|
462 )
|
|
463 {
|
|
464 for (;;)
|
|
465 {
|
|
466 if (get_line (buffer) == EOF)
|
|
467 {
|
|
468 goto EOF_HEAD;
|
|
469 }
|
|
470 if (buffer[0] == '\\' || buffer[0] == '\n')
|
|
471 {
|
|
472 unget_line (buffer);
|
|
473 break;
|
|
474 }
|
|
475 wnn_Sstrcpy (hinsi_list + wnn_Strlen (hinsi_list), buffer);
|
|
476 }
|
|
477 }
|
|
478 else if (strcmp (str, TOTAL) == 0)
|
|
479 {
|
|
480 get_string (str, c);
|
|
481 jt.total = atoi (str);
|
|
482 }
|
|
483 else if (strcmp (str, DIC_NO) == 0)
|
|
484 { /* for word_reg.c */
|
|
485 get_string (str, c);
|
|
486 jt.total = atoi (str);
|
|
487 }
|
|
488 else
|
|
489 {
|
|
490 unget_line (buffer);
|
|
491 break;
|
|
492 }
|
|
493 }
|
|
494 EOF_HEAD:
|
|
495 jt.maxcomment = wnn_Strlen (file_comment);
|
|
496 jt.maxhinsi_list = wnn_Strlen (hinsi_list) + 1;
|
|
497 }
|
|
498
|
|
499 void
|
|
500 read_ujis (rev, to_esc, which_dict)
|
|
501 int rev;
|
|
502 int to_esc;
|
|
503 int which_dict;
|
|
504 {
|
|
505 char buffer[LINE_SIZE];
|
|
506 register int tmp;
|
|
507
|
|
508 for (lc = 0; get_line (buffer) != EOF;)
|
|
509 {
|
|
510 if ((tmp = get_one_line (buffer, jeary + lc, rev, to_esc, which_dict)) == -1)
|
|
511 {
|
|
512 bad_line (buffer);
|
|
513 }
|
|
514 else if (tmp == 0)
|
|
515 { /* succeed */
|
|
516 lc++;
|
|
517 if (lc > maxline)
|
|
518 {
|
|
519 error_no_heap ();
|
|
520 }
|
|
521 }
|
|
522 }
|
|
523 jt.maxserial = lc; /* i starts with 1 in order to leave 0 unused */
|
|
524 #ifdef CHINESE
|
|
525 jt.syurui = which_dict;
|
|
526 #endif
|
|
527 }
|
|
528
|
|
529 void
|
|
530 reverse_yomi ()
|
|
531 {
|
|
532 register int i;
|
|
533 w_char ytmp[LINE_SIZE];
|
|
534
|
|
535 for (i = 0; i < jt.maxserial; i++)
|
|
536 {
|
|
537 if (jeary[i]->yomi != 0)
|
|
538 { /* ºï½ü¤µ¤ì¤Æ¤Ê¤¤¤â¤Î¤À¤± */
|
|
539 wnn_Sreverse (ytmp, jeary[i]->yomi);
|
|
540 wnn_Strcpy (jeary[i]->yomi, ytmp);
|
|
541 wnn_Sreverse (ytmp, jeary[i]->kan);
|
|
542 wnn_Strcpy (jeary[i]->kan, ytmp);
|
|
543 }
|
|
544
|
|
545 }
|
|
546 }
|
|
547
|
|
548 extern char *wnn_get_hinsi_name ();
|
|
549
|
|
550 void
|
|
551 print_je (jep, opter, serial_out, esc_exp)
|
|
552 register FILE *opter;
|
|
553 register struct je *jep;
|
|
554 int serial_out;
|
|
555 int esc_exp;
|
|
556 {
|
|
557 /* if (jep->yomi != 0) { */
|
|
558 if (jep->hinsi != SAKUJO_HINSI)
|
|
559 {
|
|
560 Print_entry (jep->yomi, jep->kan, jep->comm, jep->hindo, 0, jep->hinsi, serial_out ? jep->serial : -1, opter, esc_exp);
|
|
561 }
|
|
562 }
|
|
563
|
|
564 #ifdef nodef
|
|
565 kprint (fp, kpter)
|
|
566 register FILE *fp;
|
|
567 register w_char *kpter;
|
|
568 {
|
|
569 char out_str[LENGTHKANJI];
|
|
570 register int out_len;
|
|
571 char tmp[LENGTHKANJI];
|
|
572
|
|
573 wnn_sStrcpy (tmp, kpter);
|
|
574 out_len = make_kanji_str (out_str, tmp);
|
|
575 fprintf (fp, "%s", out_str);
|
|
576 if (out_len < 8)
|
|
577 putc ('\t', fp);
|
|
578 if (out_len < 16)
|
|
579 putc ('\t', fp);
|
|
580 putc ('\t', fp);
|
|
581 }
|
|
582 #endif
|
|
583
|
|
584 void
|
|
585 output_ujis (opter, serial_out, esc_exp)
|
|
586 register FILE *opter;
|
|
587 int serial_out;
|
|
588 int esc_exp;
|
|
589 {
|
|
590 register struct je **jep;
|
|
591 char buffer[WNN_COMMENT_LEN + WNN_HINSI_LEN];
|
|
592 register int i;
|
|
593
|
|
594 wnn_sStrcpy (buffer, file_comment);
|
|
595 fprintf (opter, "%s\t%s\n", COMMENT, buffer);
|
|
596 fprintf (opter, "%s\t%d\n", TOTAL, jt.total);
|
|
597 wnn_sStrcpy (buffer, hinsi_list);
|
|
598 #ifdef CHINESE
|
|
599 fprintf (opter, "%s\n", CHINSI);
|
|
600 if (jt.syurui == CWNN_REV_DICT)
|
|
601 {
|
|
602 if (pzy_flag == CWNN_PINYIN)
|
|
603 {
|
|
604 fprintf (opter, "%s\n", PINYIN);
|
|
605 }
|
|
606 else
|
|
607 {
|
|
608 fprintf (opter, "%s\n", ZHUYIN);
|
|
609 }
|
|
610 }
|
|
611 else if (jt.syurui == BWNN_REV_DICT)
|
|
612 {
|
|
613 fprintf (opter, "%s\n", BIXING);
|
|
614 }
|
|
615 #else
|
|
616 fprintf (opter, "%s\n", HINSI);
|
|
617 #endif
|
|
618 fprintf (opter, "%s", buffer);
|
|
619 fprintf (opter, "\n");
|
|
620 for (jep = jeary, i = 0; i < jt.maxserial; i++, jep++)
|
|
621 {
|
|
622 print_je (*jep, opter, serial_out, esc_exp);
|
|
623 }
|
|
624 }
|
|
625
|
|
626 int
|
|
627 init_heap (hpb, yhpb, l, rl, ipf)
|
|
628 int hpb, yhpb, l, rl;
|
|
629 FILE *ipf;
|
|
630 {
|
|
631 jehp = je_heap = (struct je *) malloc ((rl * sizeof (struct je)));
|
|
632 hp = heap = (UCHAR *) malloc (hpb);
|
|
633 yhp = yomi_heap = (w_char *) malloc ((yhpb * sizeof (w_char)));
|
|
634 if ((jeary = (struct je **) calloc (l, sizeof (struct je *))) == NULL)
|
|
635 {
|
|
636 fprintf (stderr, "Malloc Failed\n");
|
|
637 return (-1);
|
|
638 }
|
|
639 ifpter = ipf;
|
|
640 maxline = l;
|
|
641 heapend = heap + hpb;
|
|
642 yheapend = yomi_heap + yhpb;
|
|
643 jeheapend = je_heap + rl;
|
|
644 return (0);
|
|
645 }
|
|
646
|
|
647 void
|
|
648 init_jeary ()
|
|
649 {
|
|
650 int k;
|
|
651 for (k = 0; k < jt.maxserial; k++)
|
|
652 {
|
|
653 jeary[k] = je_heap + k;
|
|
654 }
|
|
655 }
|
|
656
|
|
657 /* test program
|
|
658 main()
|
|
659 {
|
|
660 yhp = yomi_heap = (w_char *)malloc(100000);
|
|
661 jeary = (struct je *)malloc(100000);
|
|
662
|
|
663 ifpter = stdin;
|
|
664 ujis_header();
|
|
665 read_ujis();
|
|
666
|
|
667 sort();
|
|
668 uniq_je(func);
|
|
669 output_ujis(stdout, 0, 1);
|
|
670 }
|
|
671 */
|
|
672
|
|
673 void
|
|
674 exit1 ()
|
|
675 {
|
|
676 exit (1);
|
|
677 }
|
|
678
|
|
679 /* must be updated later */
|
|
680
|
|
681 static void
|
|
682 bunpou_num (a, p)
|
|
683 register char *a;
|
|
684 register int *p;
|
|
685 {
|
|
686 int tmp;
|
|
687 if ((tmp = wnn_find_hinsi_by_name (a)) == -1)
|
|
688 {
|
|
689 if (sscanf (a, "%d", p) == 0)
|
|
690 {
|
|
691 fprintf (stderr, "Bad hinsi name \"%s\".\n", a);
|
|
692 exit1 ();
|
|
693 }
|
|
694 }
|
|
695 else
|
|
696 {
|
|
697 *p = tmp;
|
|
698 }
|
|
699 #ifdef nodef
|
|
700 sscanf (a, "%d", p);
|
|
701 #endif
|
|
702 }
|
|
703
|
|
704 #ifdef CHINESE
|
|
705 static void
|
|
706 sisheng_num (a, p)
|
|
707 register char *a;
|
|
708 register int *p;
|
|
709 {
|
|
710 sscanf (a, "%d", p);
|
|
711 }
|
|
712 #endif
|
|
713
|
|
714 int
|
|
715 sort_func_je (a, b)
|
|
716 char *a, *b;
|
|
717 {
|
|
718 return (sort_func (a, b, D_YOMI));
|
|
719 }
|
|
720
|
|
721 int
|
|
722 sort_func_je_kanji (a, b)
|
|
723 char *a, *b;
|
|
724 {
|
|
725 return (sort_func (a, b, D_KANJI));
|
|
726 }
|
|
727
|
|
728 int
|
|
729 sort_func (a, b, which)
|
|
730 register char *a, *b;
|
|
731 int which;
|
|
732 {
|
|
733 register int tmp;
|
|
734 register struct je *pa, *pb;
|
|
735 pa = *((struct je **) a);
|
|
736 pb = *((struct je **) b);
|
|
737 if (pa->hinsi == SAKUJO_HINSI)
|
|
738 {
|
|
739 if (pb->hinsi == SAKUJO_HINSI)
|
|
740 return (0);
|
|
741 return (-1);
|
|
742 }
|
|
743 if (pb->hinsi == SAKUJO_HINSI)
|
|
744 return (1);
|
|
745
|
|
746 if (which == D_YOMI)
|
|
747 {
|
|
748
|
|
749 if (!(pa->yomi) || !(pb->yomi))
|
|
750 return (0);
|
|
751 tmp = wnn_Strcmp (pa->yomi, pb->yomi);
|
|
752 if (tmp)
|
|
753 return (tmp);
|
|
754
|
|
755 if (pa->hinsi != pb->hinsi)
|
|
756 {
|
|
757 return ((int) (pa->hinsi) - (int) (pb->hinsi));
|
|
758 }
|
|
759
|
|
760 #ifdef CHINESE
|
|
761 if (jt.syurui == CWNN_REV_DICT)
|
|
762 {
|
|
763 if (pa->ss != pb->ss)
|
|
764 {
|
|
765 return ((int) (pa->ss) - (int) (pb->ss));
|
|
766 }
|
|
767 }
|
|
768 #endif
|
|
769 if (!(pa->kan) || !(pb->kan))
|
|
770 return (0);
|
|
771 tmp = wnn_Strcmp (pa->kan, pb->kan);
|
|
772 if (tmp)
|
|
773 return (tmp);
|
|
774 }
|
|
775 else
|
|
776 {
|
|
777 if (!(pa->kan) || !(pb->kan))
|
|
778 return (0);
|
|
779 tmp = wnn_Strcmp (pa->kan, pb->kan);
|
|
780 if (tmp)
|
|
781 return (tmp);
|
|
782
|
|
783 if (pa->hinsi != pb->hinsi)
|
|
784 {
|
|
785 return ((int) (pa->hinsi) - (int) (pb->hinsi));
|
|
786 }
|
|
787 #ifdef CHINESE
|
|
788 if (jt.syurui == CWNN_REV_DICT)
|
|
789 {
|
|
790 if (pa->ss != pb->ss)
|
|
791 {
|
|
792 return ((int) (pa->ss) - (int) (pb->ss));
|
|
793 }
|
|
794 }
|
|
795 #endif
|
|
796 if (!(pa->yomi) || !(pb->yomi))
|
|
797 return (0);
|
|
798 tmp = wnn_Strcmp (pa->yomi, pb->yomi);
|
|
799 if (tmp)
|
|
800 return (tmp);
|
|
801 }
|
|
802 return (0);
|
|
803 }
|
|
804
|
|
805 void
|
|
806 sort ()
|
|
807 {
|
|
808 qsort ((char *) jeary, jt.maxserial, sizeof (struct je *), sort_func_je);
|
|
809 }
|
|
810
|
|
811 void
|
|
812 sort_if_not_sorted ()
|
|
813 {
|
|
814 if (!Sorted ((char *) jeary, jt.maxserial, sizeof (struct je *), sort_func_je))
|
|
815 {
|
|
816 sort ();
|
|
817 }
|
|
818 }
|
|
819
|
|
820 void
|
|
821 sort_kanji ()
|
|
822 {
|
|
823 qsort ((char *) jeary, jt.maxserial, sizeof (struct je *), sort_func_je_kanji);
|
|
824 }
|
|
825
|
|
826 void
|
|
827 uniq_je (func)
|
|
828 int (*func) ();
|
|
829 {
|
|
830 int k;
|
|
831 struct je **prev, **jep;
|
|
832
|
|
833 if (jt.maxserial == 0)
|
|
834 return;
|
|
835 prev = &jeary[0];
|
|
836 for (k = 1; k < jt.maxserial; k++)
|
|
837 {
|
|
838 jep = &jeary[k];
|
|
839 if (func ((char *) jep, (char *) prev) == 0)
|
|
840 {
|
|
841 w_char tmp[LENGTHYOMI];
|
|
842 char tmp1[LENGTHYOMI];
|
|
843 char tmp2[LENGTHKANJI];
|
|
844 #ifdef CHINESE
|
|
845 char tmp3[LENGTHKANJI];
|
|
846 if (jt.syurui == BWNN_REV_DICT || jt.syurui == CWNN_REV_DICT)
|
|
847 {
|
|
848 wnn_Strcpy (tmp, (*jep)->yomi);
|
|
849 wnn_sStrcpy (tmp1, tmp);
|
|
850 wnn_Strcpy (tmp, (*jep)->kan);
|
|
851 wnn_sStrcpy (tmp2, tmp);
|
|
852 sprintf (tmp3, "%d", (*jep)->ss);
|
|
853 }
|
|
854 else
|
|
855 {
|
|
856 #endif
|
|
857 wnn_Sreverse (tmp, (*jep)->yomi);
|
|
858 wnn_sStrcpy (tmp1, tmp);
|
|
859 wnn_Sreverse (tmp, (*jep)->kan);
|
|
860 wnn_sStrcpy (tmp2, tmp);
|
|
861 #ifdef CHINESE
|
|
862 }
|
|
863 if (jt.syurui == CWNN_REV_DICT)
|
|
864 fprintf (stderr, "Entries with same yomi(%s), kanji(%s), hinsi(%s),sisheng(%s) are merged.\n", tmp1, tmp2, wnn_get_hinsi_name ((*jep)->hinsi), tmp3);
|
|
865 else
|
|
866 #endif
|
|
867 fprintf (stderr, "Entries with same yomi(%s), kanji(%s), hinsi(%s) are merged.\n", tmp1, tmp2, wnn_get_hinsi_name ((*jep)->hinsi));
|
|
868 }
|
|
869 else
|
|
870 {
|
|
871 prev++;
|
|
872 if (prev != jep)
|
|
873 {
|
|
874 *prev = *jep;
|
|
875 }
|
|
876 }
|
|
877 }
|
|
878 prev++;
|
|
879 jt.maxserial = prev - &jeary[0];
|
|
880 }
|
|
881
|
|
882 #ifdef nodef
|
|
883 make_kanji_str (o, c)
|
|
884 register UCHAR *o, *c;
|
|
885 {
|
|
886 register UCHAR *o0 = o;
|
|
887
|
|
888 for (; *c; c++)
|
|
889 {
|
|
890 if (*c == '\\')
|
|
891 {
|
|
892 *o++ = '\\';
|
|
893 *o++ = '\\';
|
|
894 }
|
|
895 else if (*c > 0x20)
|
|
896 {
|
|
897 *o++ = *c;
|
|
898 }
|
|
899 else
|
|
900 {
|
|
901 sprintf (o, "\\0%o", *c);
|
|
902 for (; *o; o++);
|
|
903 }
|
|
904 }
|
|
905 *o = 0;
|
|
906 return (o - o0);
|
|
907 }
|
|
908 #endif
|
|
909
|
|
910 static void
|
|
911 read_kanji_str (c, o)
|
|
912 register char *c, *o;
|
|
913 {
|
|
914 for (; *o; c++)
|
|
915 {
|
|
916 if (*o == '\\')
|
|
917 {
|
|
918 if (*++o == '0')
|
|
919 {
|
|
920 o += 1;
|
|
921 if (*o >= '0' && *o <= '7')
|
|
922 {
|
|
923 *c = (*o++ - '0');
|
|
924 }
|
|
925 else
|
|
926 continue;
|
|
927 if (*o >= '0' && *o <= '7')
|
|
928 {
|
|
929 *c *= 8;
|
|
930 *c |= (*o++ - '0');
|
|
931 }
|
|
932 else
|
|
933 continue;
|
|
934 }
|
|
935 else
|
|
936 {
|
|
937 *c = *o++;
|
|
938 }
|
|
939 }
|
|
940 else
|
|
941 {
|
|
942 *c = *o++;
|
|
943 }
|
|
944 }
|
|
945 *c = 0;
|
|
946 }
|
|
947
|
|
948 #ifdef CHINESE
|
|
949 static void
|
|
950 read_kanji_str_w (c, o)
|
|
951 register w_char *c, *o;
|
|
952 {
|
|
953 for (; *o; c++)
|
|
954 {
|
|
955 if (*o == (w_char) '\\')
|
|
956 {
|
|
957 if (*++o == (w_char) '0')
|
|
958 {
|
|
959 o += 1;
|
|
960 if (*o >= (w_char) '0' && *o <= (w_char) '7')
|
|
961 {
|
|
962 *c = (*o++ - (w_char) '0');
|
|
963 }
|
|
964 else
|
|
965 continue;
|
|
966 if (*o >= (w_char) '0' && *o <= (w_char) '7')
|
|
967 {
|
|
968 *c *= 8;
|
|
969 *c |= (*o++ - (w_char) '0');
|
|
970 }
|
|
971 else
|
|
972 continue;
|
|
973 }
|
|
974 else
|
|
975 {
|
|
976 *c = *o++;
|
|
977 }
|
|
978 }
|
|
979 else
|
|
980 {
|
|
981 *c = *o++;
|
|
982 }
|
|
983 }
|
|
984 *c = 0;
|
|
985 }
|
|
986 #endif
|
|
987
|
|
988 int
|
|
989 Sorted (st, lc, size, sort_fun)
|
|
990 register char *st;
|
|
991 register int lc;
|
|
992 int size;
|
|
993 int (*sort_fun) ();
|
|
994 {
|
|
995 char *dst = st + size;
|
|
996 for (lc--; lc > 0; lc--, st = dst, dst += size)
|
|
997 {
|
|
998 if (sort_fun (st, dst) > 0)
|
|
999 {
|
|
1000 return (0);
|
|
1001 }
|
|
1002 }
|
|
1003 return (1);
|
|
1004 }
|
|
1005
|
|
1006 int
|
|
1007 is_katakana (k, y)
|
|
1008 register char *k, *y;
|
|
1009 {
|
|
1010 for (; *k && *y;)
|
|
1011 {
|
|
1012 if (*y == (char) 0xa1 && *k == (char) 0xa1 && *(y + 1) == (char) 0xbc && *(y + 1) == (char) 0xbc)
|
|
1013 { /*"¡¼" */
|
|
1014 y += 2;
|
|
1015 k += 2;
|
|
1016 continue;
|
|
1017 }
|
|
1018 if (*y++ != (char) 0xa4 || *k++ != (char) 0xa5)
|
|
1019 return (0);
|
|
1020 /* be careful, char comparison. */
|
|
1021 if (*y++ != *k++)
|
|
1022 {
|
|
1023 return (0);
|
|
1024 }
|
|
1025 }
|
|
1026 return (!(*k | *y));
|
|
1027 }
|
|
1028
|
|
1029 static void
|
|
1030 toesc (ckanji, cyomi)
|
|
1031 char *ckanji, *cyomi;
|
|
1032 {
|
|
1033 if (strcmp (ckanji, cyomi) == 0)
|
|
1034 {
|
|
1035 strcpy (ckanji, DIC_HIRAGANA);
|
|
1036 }
|
|
1037 else if (is_katakana (ckanji, cyomi))
|
|
1038 {
|
|
1039 strcpy (ckanji, DIC_KATAKANA);
|
|
1040 }
|
|
1041 }
|