0
|
1 /*
|
|
2 * $Id: dic_atojis.c,v 1.4 2002/07/14 04:26:57 hiroo Exp $
|
|
3 */
|
|
4
|
|
5 /*
|
|
6 * FreeWnn is a network-extensible Kana-to-Kanji conversion system.
|
|
7 * This file is part of FreeWnn.
|
|
8 *
|
|
9 * Copyright Kyoto University Research Institute for Mathematical Sciences
|
|
10 * 1987, 1988, 1989, 1990, 1991, 1992
|
|
11 * Copyright OMRON Corporation. 1987, 1988, 1989, 1990, 1991, 1992, 1999
|
|
12 * Copyright ASTEC, Inc. 1987, 1988, 1989, 1990, 1991, 1992
|
|
13 * Copyright FreeWnn Project 1999, 2000, 2002
|
|
14 *
|
|
15 * Maintainer: FreeWnn Project <freewnn@tomo.gr.jp>
|
|
16 *
|
|
17 * This program is free software; you can redistribute it and/or modify
|
|
18 * it under the terms of the GNU General Public License as published by
|
|
19 * the Free Software Foundation; either version 2 of the License, or
|
|
20 * (at your option) any later version.
|
|
21 *
|
|
22 * This program is distributed in the hope that it will be useful,
|
|
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
25 * GNU General Public License for more details.
|
|
26 *
|
|
27 * You should have received a copy of the GNU General Public License
|
|
28 * along with this program; if not, write to the Free Software
|
|
29 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
30 */
|
|
31
|
|
32 #ifdef HAVE_CONFIG_H
|
|
33 # include <config.h>
|
|
34 #endif
|
|
35
|
|
36 #if STDC_HEADERS
|
|
37 # include <string.h>
|
|
38 #else
|
|
39 # if HAVE_STRINGS_H
|
|
40 # include <strings.h>
|
|
41 # endif
|
|
42 #endif /* STDC_HEADERS */
|
|
43
|
|
44 #include "commonhd.h"
|
|
45 #include "jslib.h"
|
|
46 #include "jdata.h"
|
|
47 #include "wnn_os.h"
|
|
48 #include "wnn_string.h"
|
|
49
|
|
50 static int keta_4;
|
|
51
|
|
52 static w_char _tatojis[] = {
|
|
53 0xa1a1, 0xa1aa, 0xa1c9, 0xa1f4, 0xa1f0, 0xa1f3, 0xa1f5, 0xa1c7,
|
|
54 0xa1ca, 0xa1cb, 0xa1f6, 0xa1dc, 0xa1a4, 0xa1dd, 0xa1a5, 0xa1bf,
|
|
55 0xa3b0, 0xa3b1, 0xa3b2, 0xa3b3, 0xa3b4, 0xa3b5, 0xa3b6, 0xa3b7,
|
|
56 0xa3b8, 0xa3b9, 0xa1a7, 0xa1a8, 0xa1e3, 0xa1e1, 0xa1e4, 0xa1a9,
|
|
57 0xa1f7, 0xa3c1, 0xa3c2, 0xa3c3, 0xa3c4, 0xa3c5, 0xa3c6, 0xa3c7,
|
|
58 0xa3c8, 0xa3c9, 0xa3ca, 0xa3cb, 0xa3cc, 0xa3cd, 0xa3ce, 0xa3cf,
|
|
59 0xa3d0, 0xa3d1, 0xa3d2, 0xa3d3, 0xa3d4, 0xa3d5, 0xa3d6, 0xa3d7,
|
|
60 0xa3d8, 0xa3d9, 0xa3da, 0xa1ce, 0xa1ef, 0xa1cf, 0xa1b0, 0xa1b2,
|
|
61 0xa1ae, 0xa3e1, 0xa3e2, 0xa3e3, 0xa3e4, 0xa3e5, 0xa3e6, 0xa3e7,
|
|
62 0xa3e8, 0xa3e9, 0xa3ea, 0xa3eb, 0xa3ec, 0xa3ed, 0xa3ee, 0xa3ef,
|
|
63 0xa3f0, 0xa3f1, 0xa3f2, 0xa3f3, 0xa3f4, 0xa3f5, 0xa3f6, 0xa3f7,
|
|
64 0xa3f8, 0xa3f9, 0xa3fa, 0xa1d0, 0xa1c3, 0xa1d1, 0xa1b1, 0x007f
|
|
65 };
|
|
66
|
|
67
|
|
68 static int
|
|
69 atojis (c)
|
|
70 register w_char c;
|
|
71 {
|
|
72 if (c >= ' ' && c <= (unsigned char) '\376')
|
|
73 c = _tatojis[c - ' '];
|
|
74 return (c);
|
|
75 }
|
|
76
|
|
77 static w_char _tatokan[] = {
|
|
78 0xa1bb, 0xb0ec, 0xc6f3, 0xbbb0, 0xbbcd, /* ¡»°ìÆó»°»Í */
|
|
79 0xb8de, 0xcfbb, 0xbcb7, 0xc8ac, 0xb6e5 /* ¸ÞÏ»¼·È¬¶å */
|
|
80 };
|
|
81
|
|
82 static w_char _tatokan_o[] = {
|
|
83 0xceed, 0xb0ed, 0xc6f5, 0xbbb2, 0xbbcd, /* Îí°íÆõ»²»Í */
|
|
84 0xb8e0, 0xcfbb, 0xbcb7, 0xc8ac, 0xb6e5 /* ¸àÏ»¼·È¬¶å */
|
|
85 };
|
|
86
|
|
87 static w_char
|
|
88 atokan (c)
|
|
89 register w_char c;
|
|
90 {
|
|
91 if (c >= '0' && c <= '9')
|
|
92 c = _tatokan[c - '0'];
|
|
93 return (c);
|
|
94 }
|
|
95
|
|
96 static w_char
|
|
97 atokan_o (c)
|
|
98 register w_char c;
|
|
99 {
|
|
100 if (c >= '0' && c <= '9')
|
|
101 c = _tatokan_o[c - '0'];
|
|
102 return (c);
|
|
103 }
|
|
104
|
|
105 static w_char _tatoket[] = {
|
|
106 0xbdbd, 0xc9b4, 0xc0e9, /* ½½É´Àé */
|
|
107 0xcbfc, 0xb2af, 0xc3fb, 0xb5fe, /* Ëü²¯Ãûµþ */
|
|
108 0xd4b6, 0xa4b7, 0xbef7, 0xb9c2, /* Ô¶¤·¾÷¹Â */
|
|
109 0xb4c2, 0xc0b5, 0xbadc, 0xb6cb /* ´ÂÀµºÜ¶Ë */
|
|
110 /* ¹±²Ïº» *//* °¤ÁεÀ *//* Æáͳ¾ *//* ÉÔ²Ä»×µÄ *//* ̵ÎÌÂç¿ô */
|
|
111 };
|
|
112
|
|
113 static w_char _tatoket_o[] = {
|
|
114 0xbda6, 0xeff9, 0xeff4, /* ½¦ïùïô */
|
|
115 0xe8df /* èß */
|
|
116 };
|
|
117
|
|
118 static void
|
|
119 getketa (k, kouho, col)
|
|
120 int k;
|
|
121 w_char kouho[];
|
|
122 int *col;
|
|
123 {
|
|
124 if (k % 4)
|
|
125 {
|
|
126 if (keta_4 != 0)
|
|
127 {
|
|
128 keta_4 = 0;
|
|
129 getketa (k / 4 * 4, kouho, col);
|
|
130 }
|
|
131 kouho[(*col)++] = _tatoket[k % 4 - 1];
|
|
132 }
|
|
133 else if ((k / 4 > 0) && (k / 4 < 13))
|
|
134 {
|
|
135 kouho[(*col)++] = _tatoket[k / 4 + 2];
|
|
136 }
|
|
137 else if (k / 4 == 13)
|
|
138 {
|
|
139 kouho[(*col)++] = 0xbabb;
|
|
140 kouho[(*col)++] = 0xb2cf;
|
|
141 kouho[(*col)++] = 0xb9b1; /* ¹±²Ïº» */
|
|
142 }
|
|
143 else if (k / 4 == 14)
|
|
144 {
|
|
145 kouho[(*col)++] = 0xb5c0;
|
|
146 kouho[(*col)++] = 0xc1ce;
|
|
147 kouho[(*col)++] = 0xb0a4; /* °¤ÁεÀ */
|
|
148 }
|
|
149 else if (k / 4 == 15)
|
|
150 {
|
|
151 kouho[(*col)++] = 0xc2be;
|
|
152 kouho[(*col)++] = 0xcdb3;
|
|
153 kouho[(*col)++] = 0xc6e1; /* Æáͳ¾ */
|
|
154 }
|
|
155 else if (k / 4 == 16)
|
|
156 {
|
|
157 kouho[(*col)++] = 0xb5c4;
|
|
158 kouho[(*col)++] = 0xbbd7;
|
|
159 kouho[(*col)++] = 0xb2c4;
|
|
160 kouho[(*col)++] = 0xc9d4; /* ÉÔ²Ä»×µÄ */
|
|
161 }
|
|
162 else if (k / 4 == 17)
|
|
163 {
|
|
164 kouho[(*col)++] = 0xbff4;
|
|
165 kouho[(*col)++] = 0xc2e7;
|
|
166 kouho[(*col)++] = 0xcecc;
|
|
167 kouho[(*col)++] = 0xccb5; /* ̵ÎÌÂç¿ô */
|
|
168 }
|
|
169 else
|
|
170 {
|
|
171 kouho[(*col)++] = 0xa1a9; /* ¡© */
|
|
172 }
|
|
173 keta_4 = 0;
|
|
174 }
|
|
175
|
|
176 static void
|
|
177 getketa_o (k, kouho, col)
|
|
178 int k;
|
|
179 w_char kouho[];
|
|
180 int *col;
|
|
181 {
|
|
182 if (k % 4)
|
|
183 {
|
|
184 if (keta_4 != 0)
|
|
185 {
|
|
186 keta_4 = 0;
|
|
187 if (k / 4 == 1)
|
|
188 kouho[(*col)++] = _tatoket_o[3]; /* èß */
|
|
189 else
|
|
190 getketa (k / 4 * 4, kouho, col);
|
|
191 }
|
|
192 kouho[(*col)++] = _tatoket_o[k % 4 - 1];
|
|
193 }
|
|
194 else if (k / 4 == 1)
|
|
195 {
|
|
196 kouho[(*col)++] = _tatoket_o[3]; /* èß */
|
|
197 }
|
|
198 else
|
|
199 {
|
|
200 getketa (k, kouho, col);
|
|
201 }
|
|
202 keta_4 = 0;
|
|
203 }
|
|
204
|
|
205 int
|
|
206 substr (c1, c2)
|
|
207 char *c1;
|
|
208 w_char *c2;
|
|
209 {
|
|
210 for (; *c1; c1++, c2++)
|
|
211 {
|
|
212 if (*c1 != *c2)
|
|
213 break;
|
|
214 }
|
|
215 if (*c1)
|
|
216 return (0);
|
|
217 return (1);
|
|
218 }
|
|
219
|
|
220 w_char *
|
|
221 kanji_giji_str (bun, bunl, c, kouho)
|
|
222 w_char *bun;
|
|
223 int bunl;
|
|
224 w_char *c;
|
|
225 w_char *kouho;
|
|
226 {
|
|
227 register int k;
|
|
228 w_char revkouho[LENGTHYOMI];
|
|
229 char tmp[LENGTHYOMI];
|
|
230 int col = 0, keta = 0;
|
|
231
|
|
232 if (substr (DIC_HIRAGANA, c))
|
|
233 {
|
|
234 for (k = bunl - 1; k >= 0; k--)
|
|
235 {
|
|
236 *kouho++ = bun[k];
|
|
237 }
|
|
238 }
|
|
239 else if (substr (DIC_KATAKANA, c))
|
|
240 { /* ¥«¥¿¥«¥Ê */
|
|
241 for (k = bunl - 1; k >= 0; k--)
|
|
242 {
|
|
243 if ((bun[k] & 0xff00) == 0xa400)
|
|
244 *kouho++ = bun[k] | 0x100;
|
|
245 else
|
|
246 *kouho++ = bun[k];
|
|
247 }
|
|
248 }
|
|
249 else if (substr (DIC_ZENKAKU, c))
|
|
250 { /* Á´³Ñ¿ô»ú *//* £±£²£³ */
|
|
251 for (k = bunl - 1; k >= 0; k--)
|
|
252 {
|
|
253 *kouho++ = atojis (bun[k]);
|
|
254 }
|
|
255 }
|
|
256 else if (substr (DIC_NUM_KAN, c))
|
|
257 { /* ´Á¿ô»ú *//* °ìÆó»° */
|
|
258 for (k = bunl - 1; k >= 0; k--)
|
|
259 {
|
|
260 *kouho++ = atokan (bun[k]);
|
|
261 }
|
|
262 }
|
|
263 else if (substr (DIC_NUM_HANCAN, c))
|
|
264 { /* Ⱦ³Ñ¿ô»ú *//* 1,234 */
|
|
265 for (k = 0; k < bunl; k++)
|
|
266 {
|
|
267 if ((keta != 0) && (keta % 3 == 0))
|
|
268 revkouho[col++] = ','; /* , */
|
|
269 revkouho[col++] = bun[k];
|
|
270 keta++;
|
|
271 }
|
|
272 while (--col >= 0)
|
|
273 {
|
|
274 *kouho++ = revkouho[col];
|
|
275 }
|
|
276 }
|
|
277 else if (substr (DIC_NUM_ZENCAN, c))
|
|
278 { /* Á´³Ñ¿ô»ú *//* £±¡¤£²£³£´ */
|
|
279 for (k = 0; k < bunl; k++)
|
|
280 {
|
|
281 if ((keta != 0) && (keta % 3 == 0))
|
|
282 revkouho[col++] = 0xa1a4; /* ¡¤ */
|
|
283 revkouho[col++] = atojis (bun[k]);
|
|
284 keta++;
|
|
285 }
|
|
286 while (--col >= 0)
|
|
287 {
|
|
288 *kouho++ = revkouho[col];
|
|
289 }
|
|
290 }
|
|
291 else if (substr (DIC_NUM_KANSUUJI, c))
|
|
292 { /* ´Á¿ô»ú *//* É´Æó½½»° */
|
|
293 keta_4 = 0;
|
|
294 for (k = 0; k < bunl; k++)
|
|
295 {
|
|
296 if (bun[k] != '0')
|
|
297 {
|
|
298 if (keta != 0)
|
|
299 getketa (keta, revkouho, &col);
|
|
300 if (bun[k] != '1' || keta % 4 == 0 || keta % 4 == 3)
|
|
301 revkouho[col++] = atokan (bun[k]);
|
|
302 }
|
|
303 else if ((keta != 0) && (keta % 4 == 0))
|
|
304 {
|
|
305 keta_4 = 1;
|
|
306 }
|
|
307 keta++;
|
|
308 }
|
|
309 if (col == 0)
|
|
310 *kouho++ = _tatokan[0];
|
|
311 while (--col >= 0)
|
|
312 {
|
|
313 *kouho++ = revkouho[col];
|
|
314 }
|
|
315 }
|
|
316 else if (substr (DIC_NUM_KANOLD, c))
|
|
317 { /* ´Á¿ô»ú *//* °íÉ´Æõ½¦»² chao */
|
|
318 keta_4 = 0;
|
|
319 for (k = 0; k < bunl; k++)
|
|
320 {
|
|
321 if (bun[k] != '0')
|
|
322 {
|
|
323 if (keta != 0)
|
|
324 getketa_o (keta, revkouho, &col);
|
|
325 if (bun[k] != '1' || keta % 4 == 0 || keta % 4 == 3)
|
|
326 revkouho[col++] = atokan_o (bun[k]);
|
|
327 }
|
|
328 else if ((keta != 0) && (keta % 4 == 0))
|
|
329 {
|
|
330 keta_4 = 1;
|
|
331 }
|
|
332 keta++;
|
|
333 }
|
|
334 if (col == 0)
|
|
335 *kouho++ = _tatokan_o[0];
|
|
336 while (--col >= 0)
|
|
337 {
|
|
338 *kouho++ = revkouho[col];
|
|
339 }
|
|
340 }
|
|
341 else if (substr (DIC_ESC, c))
|
|
342 {
|
|
343 *kouho++ = '\\';
|
|
344 }
|
|
345 else if (substr (DIC_HEX, c) || substr (DIC_HEXc, c))
|
|
346 {
|
|
347 int num;
|
|
348 wnn_sStrcpy (tmp, c + strlen (DIC_HEX));
|
|
349 sscanf (tmp, "%x", &num);
|
|
350 *kouho++ = num;
|
|
351 }
|
|
352 else if (substr (DIC_OCT, c))
|
|
353 {
|
|
354 int num;
|
|
355 wnn_sStrcpy (tmp, c + strlen (DIC_OCT));
|
|
356 sscanf (tmp, "%o", &num);
|
|
357 *kouho++ = num;
|
|
358 }
|
|
359 else
|
|
360 {
|
|
361 *kouho++ = *c++;
|
|
362 for (; *c && *c != '\\'; c++)
|
|
363 {
|
|
364 *kouho++ = *c;
|
|
365 }
|
|
366 if (*c == '\\')
|
|
367 *kouho++ = *c;
|
|
368 }
|
|
369 *kouho = 0;
|
|
370 return (kouho);
|
|
371 }
|
|
372
|
|
373
|
|
374 void
|
|
375 kanji_esc_str (w, oy, oyl)
|
|
376 w_char *w, *oy;
|
|
377 int oyl;
|
|
378 {
|
|
379 register w_char *ret;
|
|
380 register w_char *c = w;
|
|
381 w_char tmp[LENGTHKANJI];
|
|
382
|
|
383 for (; *c; c++)
|
|
384 {
|
|
385 if (*c == '\\')
|
|
386 break;
|
|
387 }
|
|
388 if (!*c)
|
|
389 return;
|
|
390 wnn_Strcpy (tmp, w);
|
|
391 c = tmp;
|
|
392 while (*c)
|
|
393 {
|
|
394 if (*c == '\\' && (ret = kanji_giji_str (oy, oyl, c, w)) != NULL)
|
|
395 {
|
|
396 w = ret;
|
|
397 for (++c; *c; c++)
|
|
398 {
|
|
399 if (*c == '\\')
|
|
400 {
|
|
401 c++;
|
|
402 break;
|
|
403 }
|
|
404 }
|
|
405 }
|
|
406 else
|
|
407 {
|
|
408 *w++ = *c++;
|
|
409 }
|
|
410 }
|
|
411 *w = 0;
|
|
412 }
|
|
413
|
|
414 void
|
|
415 Get_kanji (kptr, oy, oyl, kanji, yomi, comment)
|
|
416 UCHAR *kptr;
|
|
417 w_char *kanji, *comment, *yomi, *oy;
|
|
418 int oyl;
|
|
419 {
|
|
420 extern void get_kanji_str ();
|
|
421
|
|
422 get_kanji_str (kptr, kanji, yomi, comment);
|
|
423 if (kanji)
|
|
424 {
|
|
425 kanji_esc_str (kanji, oy, oyl);
|
|
426 }
|
|
427 if (yomi)
|
|
428 {
|
|
429 kanji_esc_str (yomi, oy, oyl);
|
|
430 }
|
|
431 }
|
|
432
|
|
433
|
|
434 void
|
|
435 Get_knj1 (kptr, oy, oyl, kanji2, kouho, yomi, comment)
|
|
436 UCHAR *kptr;
|
|
437 int kanji2;
|
|
438 w_char *kouho, *comment, *yomi, *oy;
|
|
439 int oyl;
|
|
440 {
|
|
441 int tcnt;
|
|
442 for (tcnt = 0; tcnt < kanji2; tcnt++)
|
|
443 {
|
|
444 kptr += *kptr;
|
|
445 }
|
|
446 Get_kanji (kptr, oy, oyl, kouho, yomi, comment);
|
|
447 return;
|
|
448 }
|