comparison Wnn/romkan/rk_bltinfn.c @ 0:bbc77ca4def5

initial import
author Yoshiki Yazawa <yaz@cc.rim.or.jp>
date Thu, 13 Dec 2007 04:30:14 +0900
parents
children a7ccf412ba02
comparison
equal deleted inserted replaced
-1:000000000000 0:bbc77ca4def5
1 /*
2 * $Id: rk_bltinfn.c,v 1.6 2005/04/10 15:26:38 aonoto Exp $
3 */
4
5 /*
6 * FreeWnn is a network-extensible Kana-to-Kanji conversion system.
7 * This file is part of FreeWnn.
8 *
9 * Copyright Kyoto University Research Institute for Mathematical Sciences
10 * 1987, 1988, 1989, 1990, 1991, 1992
11 * Copyright OMRON Corporation. 1987, 1988, 1989, 1990, 1991, 1992, 1999
12 * Copyright ASTEC, Inc. 1987, 1988, 1989, 1990, 1991, 1992
13 * Copyright FreeWnn Project 1999, 2000, 2002
14 *
15 * Maintainer: FreeWnn Project <freewnn@tomo.gr.jp>
16 *
17 * This library is free software; you can redistribute it and/or
18 * modify it under the terms of the GNU Lesser General Public
19 * License as published by the Free Software Foundation; either
20 * version 2 of the License, or (at your option) any later version.
21 *
22 * This library is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
25 * Lesser General Public License for more details.
26 *
27 * You should have received a copy of the GNU Lesser General Public
28 * License along with this library; if not, write to the
29 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
30 * Boston, MA 02111-1307, USA.
31 */
32
33 /***********************************************************************
34 rk_bltinfn.c
35 87.12. 6 Äû Êä
36
37 ÊÑ´¹ÍѤÎÁȤ߹þ¤ß´Ø¿ô¤Î¤¦¤ÁÊ£»¨¤Ê¤â¤Î¤òÄêµÁ¤·¤Æ¤¢¤ë¡£
38 Á´³Ñ¢«¢ªÈ¾³Ñ¤ÎÊÑ´¹¤¬¼çÂΡ£
39 ***********************************************************************/
40 /* Version 3.0 */
41
42 #ifdef HAVE_CONFIG_H
43 # include <config.h>
44 #endif
45
46 #if STDC_HEADERS
47 # include <string.h>
48 #elif HAVE_STRINGS_H
49 # include <strings.h>
50 #endif /* STDC_HEADERS */
51
52 #include "commonhd.h"
53 #include "wnn_config.h"
54 #include "rk_header.h"
55
56 /* Ⱦ³Ñʸ»ú¤Î¥³¡¼¥É¤Îdefine */
57 #define HKCHOU (HNKAK1 * 0x100 + 0xB0) /* Ž° */
58 #define HKDKTN (HNKAK1 * 0x100 + 0xDE) /* ŽÞ */
59 #define HKHNDK (HNKAK1 * 0x100 + 0xDF) /* Žß */
60 #define HKMARU (HNKAK1 * 0x100 + 0xA1) /* Ž¡ */
61 #define HKHRKG (HNKAK1 * 0x100 + 0xA2) /* Ž¢ */
62 #define HKTJKG (HNKAK1 * 0x100 + 0xA3) /* Ž£ */
63 #define HKTTEN (HNKAK1 * 0x100 + 0xA4) /* Ž¤ */
64 #define HKNKPT (HNKAK1 * 0x100 + 0xA5) /* Ž¥ */
65
66 /* Á´³Ñʸ»ú¤Î¥³¡¼¥É¤Îdefine */
67 #define CHOUON (0xA1BC) /* ¡¼ */
68 #define DAKUTN (0xA1AB) /* ¡« */
69 #define HNDAKU (0xA1AC) /* ¡¬ */
70 #define MNMARU (0xA1A3) /* ¡£ */ /* ̾Á°¤Ï MaNMARU¡Ê¤Þ¤ó¤Þ¤ë¡Ë¤Îά */
71 #define HRKKAG (0xA1D6) /* ¡Ö */
72 #define TJIKAG (0xA1D7) /* ¡× */
73 #define TOUTEN (0xA1A2) /* ¡¢ */
74 #define NKPOTU (0xA1A6) /* ¡¦ */
75
76 static char _lang[6];
77
78 void
79 romkan_set_lang (lang)
80 char *lang;
81 {
82 strncpy (_lang, lang, 5);
83 _lang[5] = 0;
84 }
85
86 /** ASCIIʸ»ú¢ªÁ´³Ñ */
87 /* *INDENT-OFF* */
88 letter
89 to_zenalpha (l)
90 letter l;
91 /* *INDENT-ON* */
92 {
93 letter retval;
94
95 static uns_chr *data = (uns_chr *) "¡¡¡ª¡É¡ô¡ð¡ó¡õ¡Ç¡Ê¡Ë¡ö¡Ü¡¤¡Ý¡¥¡¿£°£±£²£³£´£µ£¶£·£¸£¹¡§¡¨¡ã¡á¡ä¡©\
96 ¡÷£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú¡Î¡ï¡Ï¡°¡²¡®£á£â£ã£ä£å\
97 £æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú¡Ð¡Ã¡Ñ¡±";
98
99 #ifdef CHINESE
100 static uns_chr *data_cn = (uns_chr *) "¡¡£¡¡±££¡ç£¥£¦£§£¨£©¡ù£«£¬¡ª£®£¯£°£±£²£³£´£µ£¶£·£¸£¹£º£»¡´£½¡µ£¿\
101 £À£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú£Û£¤£Ý£Þ£ß£à£á£â£ã£ä£å\
102 £æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú£û£ü£ý£þ";
103 #endif /* CHINESE */
104 #ifdef KOREAN
105 static uns_chr *data_ko = (uns_chr *) "¡¡£¡£¢£££¤£¥£¦£§£¨£©£ª£«£¬£­£®£¯£°£±£²£³£´£µ£¶£·£¸£¹£º£»£¼£½£¾£¿\
106 £À£Á£Â£Ã£Ä£Å£Æ£Ç£È£É£Ê£Ë£Ì£Í£Î£Ï£Ð£Ñ£Ò£Ó£Ô£Õ£Ö£×£Ø£Ù£Ú£Û£Ü£Ý£Þ£ß£à£á£â£ã£ä£å\
107 £æ£ç£è£é£ê£ë£ì£í£î£ï£ð£ñ£ò£ó£ô£õ£ö£÷£ø£ù£ú£û£ü£ý£þ";
108 #endif /* KOREAN */
109
110 if (' ' <= l && l <= '~')
111 {
112 l = (l - ' ') << 1;
113 #ifdef CHINESE
114 if (!strcmp (_lang, WNN_C_LANG) || !strcmp (_lang, WNN_T_LANG))
115 {
116 retval = data_cn[l] << 8;
117 retval += data_cn[++l];
118 }
119 else
120 #endif
121 #ifdef KOREAN
122 if (!strcmp (_lang, WNN_K_LANG))
123 {
124 retval = data_ko[l] << 8;
125 retval += data_ko[++l];
126 }
127 else
128 #endif /* KOREAN */
129 {
130 retval = data[l] << 8;
131 retval += data[++l];
132 }
133 return (retval);
134 }
135 else
136 return (l);
137 }
138
139 static char *hankdata[] = {
140 "Ž§", "Ž±", "Ž¨", "Ž²", "Ž©", "Ž³", "Žª", "Ž´", "Ž«", "Žµ",
141 "Ž¶", "Ž¶ŽÞ", "Ž·", "Ž·ŽÞ", "Ž¸", "Ž¸ŽÞ", "Ž¹", "Ž¹ŽÞ", "Žº", "ŽºŽÞ",
142 "Ž»", "Ž»ŽÞ", "Ž¼", "Ž¼ŽÞ", "Ž½", "Ž½ŽÞ", "Ž¾", "Ž¾ŽÞ", "Ž¿", "Ž¿ŽÞ",
143 "ŽÀ", "ŽÀŽÞ", "ŽÁ", "ŽÁŽÞ", "Ž¯", "ŽÂ", "ŽÂŽÞ", "ŽÃ", "ŽÃŽÞ", "ŽÄ", "ŽÄŽÞ",
144 "ŽÅ", "ŽÆ", "ŽÇ", "ŽÈ", "ŽÉ",
145 "ŽÊ", "ŽÊŽÞ", "ŽÊŽß", "ŽË", "ŽËŽÞ", "ŽËŽß", "ŽÌ", "ŽÌŽÞ", "ŽÌŽß",
146 "ŽÍ", "ŽÍŽÞ", "ŽÍŽß", "ŽÎ", "ŽÎŽÞ", "ŽÎŽß",
147 "ŽÏ", "ŽÐ", "ŽÑ", "ŽÒ", "ŽÓ",
148 "Ž¬", "ŽÔ", "Ž­", "ŽÕ", "Ž®", "ŽÖ",
149 "Ž×", "ŽØ", "ŽÙ", "ŽÚ", "ŽÛ",
150 "¥î", "ŽÜ", "¥ð", "¥ñ", "Ž¦", "ŽÝ",
151 "Ž³ŽÞ", "¥õ", "¥ö"
152 }; /* Á´³Ñ¤¬º®¤¸¤Ã¤Æ¤ë¤Î¤ÇÃí°Õ */
153
154 /** ¾å¤Îhankdata¤¬¡¢¼ÂºÝ¤Ë»È¤¦È¾³Ñ¥³¡¼¥É¤òɽ¤·¤Æ¤¤¤Ê¤¤¤È¤­¡¢¼ÂºÝ¤Î¤â¤Î¤Ë
155 ½¤Àµ¤¹¤ë¡£½é´üÀßÄê»þ¤Ë°ì²ó¤À¤±¸Æ¤Ö */
156 void
157 hank_setup ()
158 {
159 int i;
160 char *s, orig_hnkak1;
161
162 orig_hnkak1 = *hankdata[0];
163 /* *hankdata[] ¤Ç¤ÎȾ³Ñʸ»ú¤Î£±¥Ð¥¤¥È¤á¡£È¾³Ñʸ»ú¤Î£±¥Ð¥¤¥È¤á¤À¤±¤¬°Û¤Ê¤ë
164 ¤è¤¦¤Ê¾µ¡¼ï¤Ë°Ü¿¢¤¹¤ë¤È¤­¤Ï¡¢HNKAK1¤Îdefine¤òÊѤ¨¤ì¤ÐOK¡£Ã¢¤·romkan¤Î
165 ¥½¡¼¥¹Ãæ¤ÎȾ³Ñʸ»ú¡Ê¤³¤Î¥Õ¥¡¥¤¥ë¤Ë¤Î¤ß¸ºß¡Ë¤â¥³¥ó¥Ð¡¼¥È¤·¤Æ¡¢¤½¤Îµ¡¼ï
166 ¤Ë¹ç¤ï¤»¤ë¤Û¤¦¤¬Ë¾¤Þ¤·¤¤¡£¤·¤«¤·¡¢¥¨¥Ç¥£¥¿¤Ç¤³¤Î¥Õ¥¡¥¤¥ë¤ò½¤Àµ¤·¤¿¤ê
167 ¤¹¤ë¾ì¹ç¤Ë¡¢È¾³Ñʸ»ú¤Î°·¤¤¤¬¤¦¤Þ¤¯¤¤¤«¤Ê¤¤¾ì¹ç¤¬¤¢¤ë¤Î¤Ç¡¢ÆäË
168 ¥³¥ó¥Ð¡¼¥È¤ò¤·¤Ê¤¯¤È¤âÆ°ºî¤¹¤ë¤è¤¦¤Ë½èÃ֤Ϥ·¤Æ¤¢¤ë¡£¤½¤ì¤¬¡¢¤³¤Î
169 hank_setup()¤Ç¤¢¤ë¡£hankdata¤Ï¡¢½é´üÀßÄê»þ¤Ë hank_setup()¤Ë¤è¤Ã¤Æ
170 ¼ÂºÝ¤ÎȾ³Ñ¥³¡¼¥É¤Ëľ¤µ¤ì¤ë¡£ */
171
172 if (orig_hnkak1 == (char) HNKAK1)
173 return;
174 for (i = 0; i < numberof (hankdata); i++)
175 {
176 for (s = hankdata[i]; *s; s += 2)
177 if (*s == orig_hnkak1)
178 *s = HNKAK1;
179 }
180 }
181
182 /** ¤«¤Ê¢ªÈ¾³Ñ¥«¥¿¥«¥Ê¡£·ë²Ì¤¬Æóʸ»ú¤Ë¤Ê¤ë¤³¤È¤â¤¢¤ë¡£*/
183 void
184 to_hankata (in, outp)
185 letter in, **outp;
186 {
187 uns_chr *p, c;
188 letter *out;
189
190 out = *outp;
191 switch (in)
192 {
193 case CHOUON:
194 *out++ = HKCHOU;
195 break;
196 case DAKUTN:
197 *out++ = HKDKTN;
198 break;
199 case HNDAKU:
200 *out++ = HKHNDK;
201 break;
202 case MNMARU:
203 *out++ = HKMARU;
204 break;
205 case HRKKAG:
206 *out++ = HKHRKG;
207 break;
208 case TJIKAG:
209 *out++ = HKTJKG;
210 break;
211 case TOUTEN:
212 *out++ = HKTTEN;
213 break;
214 case NKPOTU:
215 *out++ = HKNKPT;
216 break;
217 default:
218 if (is_kata (in))
219 {
220 for (p = (uns_chr *) hankdata[in - KATBGN]; c = *p; p++)
221 *out++ = (c << 8) + *++p;
222 }
223 else if (is_hira (in))
224 {
225 for (p = (uns_chr *) hankdata[in - HIRBGN]; c = *p; p++)
226 *out++ = (c << 8) + *++p;
227 }
228 else
229 {
230 *out++ = in;
231 }
232 }
233 *out = EOLTTR;
234 *outp = out;
235 }
236
237 /** Ⱦ³Ñ¥«¥¿¥«¥Ê¢ª¤Ò¤é¤¬¤Ê¡£Ã¢¤·¡¢ÂùÅÀ¤ò»ý¤Äʸ»ú¤ò°ì¤Ä¤Ë¤Þ¤È¤á¤Æ¤Ï
238 ¤¯¤ì¤Ê¤¤¤Î¤ÇÃí°Õ¡£*/
239 /* *INDENT-OFF* */
240 letter
241 to_zenhira (l)
242 letter l;
243 /* *INDENT-ON* */
244 {
245 letter retval;
246
247 static uns_chr *data = (uns_chr *) "¡£¡Ö¡×¡¢¡¦¤ò¤¡¤£¤¥¤§¤©¤ã¤å¤ç¤Ã¡¼¤¢¤¤¤¦¤¨¤ª¤«¤­¤¯¤±¤³¤µ¤·¤¹¤»¤½¤¿\
248 ¤Á¤Ä¤Æ¤È¤Ê¤Ë¤Ì¤Í¤Î¤Ï¤Ò¤Õ¤Ø¤Û¤Þ¤ß¤à¤á¤â¤ä¤æ¤è¤é¤ê¤ë¤ì¤í¤ï¤ó¡«¡¬";
249
250 #ifdef CHINESE
251 static uns_chr *data_cn = (uns_chr *) "¡£¡¸¡¹¡¢¡¤¤ò¤¡¤£¤¥¤§¤©¤ã¤å¤ç¤Ã¡¼¤¢¤¤¤¦¤¨¤ªÂ𤭤¯¤±¤³¶È¤·¤¹¤»¤½¤¿\
252 ¤Á¤Ä¤Æ¤È¤Ê¤Ë¤Ì¤ÍµÄ¤Ï¤Ò¤Õ¤Ø¤Û¤Þ¤ß¤à¤áÒ²¤ä¤æ¤è¤é¤ê¤ë¤ì¤í¤ï¤ó¡å¡ã";
253 #endif /* CHINESE */
254
255 if (is_hankata (l))
256 {
257 l = (l - HKKBGN) << 1;
258 #ifdef CHINESE
259 if (!strcmp (_lang, WNN_C_LANG) || !strcmp (_lang, WNN_T_LANG))
260 {
261 retval = data_cn[l] << 8;
262 retval += data_cn[++l];
263 }
264 else
265 #endif
266 {
267 retval = data[l] << 8;
268 retval += data[++l];
269 }
270 return (retval);
271 }
272 else
273 return (l);
274 }
275
276 /** Ⱦ³Ñ¥«¥¿¥«¥Ê¢ªÁ´³Ñ¡£Ã¢¤·¡¢ÂùÅÀ¤ò»ý¤Äʸ»ú¤ò°ì¤Ä¤Ë¤Þ¤È¤á¤Æ¤Ï
277 ¤¯¤ì¤Ê¤¤¤Î¤ÇÃí°Õ¡£*/
278 /* *INDENT-OFF* */
279 letter
280 to_zenkata (l)
281 letter l;
282 /* *INDENT-ON* */
283 {
284 return (is_hankata (l) ? (l = to_zenhira (l), to_kata (l)) : l);
285 }
286
287 /* ¥Ó¥Ã¥È¥Ù¥¯¥¿¤Î¹½À® */
288 #define bitvec(b0, b1, b2, b3, b4, b5, b6, b7) ( \
289 (char)b0 | ((char)b1 << 1) | ((char)b2 << 2) | ((char)b3 << 3) | ((char)b4 << 4) | ((char)b5 << 5) | \
290 ((char)b6 << 6) | ((char)b7 << 7) \
291 )
292
293 /** char¤ÎÇÛÎó h ¤ò¥Ó¥Ã¥È¥Ù¥¯¥¿¤È¸«¤Æ¤½¤ÎÂèi¥Ó¥Ã¥È¤ò¥Á¥§¥Ã¥¯¤¹¤ë */
294 #define bitlook(h, i) (h[(i) >> 3] & (1 << ((i) & 7)))
295
296 #define KATRPT 0xA1B3 /* ¡³ */
297 #define HIRRPT 0xA1B5 /* ¡µ */
298 #define KATA_U 0xA5A6 /* ¥¦ */
299 #define KAT_VU 0xA5F4 /* ¥ô */
300 #define HIR_KA 0xA4AB /* ¤« */
301 #define HIR_HO 0xA4DB /* ¤Û */
302 #define KAT_KA 0xA5AB /* ¥« */
303 #define KAT_HO 0xA5DB /* ¥Û */
304 #define HIR_HA 0xA4CF /* ¤Ï */
305 #define KAT_HA 0xA5CF /* ¥Ï */
306
307 /** ¸å¤í¤ËȾÂùÅÀ¤ò¤¯¤Ã¤Ä¤±¤ë¡£·ë²Ì¤Ï°ìËô¤ÏÆóʸ»ú¡£*/
308 void
309 handakuadd (in, outp)
310 letter in, **outp;
311 {
312 if ((HIR_HA <= in && in <= HIR_HO) ? 0 == (in - HIR_HA) % 3 : (KAT_HA <= in && in <= KAT_HO && 0 == (in - KAT_HA) % 3))
313 {
314 *(*outp)++ = in + 2;
315 }
316 else
317 {
318 *(*outp)++ = in;
319 *(*outp)++ = HNDAKU;
320 }
321 **outp = EOLTTR;
322 }
323
324 /** ¸å¤í¤ËÂùÅÀ¤ò¤¯¤Ã¤Ä¤±¤ë¡£·ë²Ì¤Ï°ìËô¤ÏÆóʸ»ú¡£*/
325 void
326 dakuadd (in, outp)
327 letter in, **outp;
328 {
329 static char flgbit[] = {
330 bitvec (1, 0, 1, 0, 1, 0, 1, 0), /* ¤«¤¬¤­¤®¤¯¤°¤±¤² */
331 bitvec (1, 0, 1, 0, 1, 0, 1, 0), /* ¤³¤´¤µ¤¶¤·¤¸¤¹¤º */
332 bitvec (1, 0, 1, 0, 1, 0, 1, 0), /* ¤»¤¼¤½¤¾¤¿¤À¤Á¤Â */
333 bitvec (0, 1, 0, 1, 0, 1, 0, 0), /* ¤Ã¤Ä¤Å¤Æ¤Ç¤È¤É¤Ê */
334 bitvec (0, 0, 0, 0, 1, 0, 0, 1), /* ¤Ë¤Ì¤Í¤Î¤Ï¤Ð¤Ñ¤Ò */
335 bitvec (0, 0, 1, 0, 0, 1, 0, 0), /* ¤Ó¤Ô¤Õ¤Ö¤×¤Ø¤Ù¤Ú */
336 bitvec (1, 0, 0, 0, 0, 0, 0, 0) /* ¤Û */
337 };
338 letter c;
339
340 if ((HIR_KA <= in && in <= HIR_HO) ? (c = in - HIR_KA, 1) : (KAT_KA <= in && in <= KAT_HO && (c = in - KAT_KA, 1)))
341 {
342 if (bitlook (flgbit, c))
343 {
344 *(*outp)++ = in + 1;
345 }
346 else
347 {
348 *(*outp)++ = in;
349 *(*outp)++ = DAKUTN;
350 }
351 }
352 else
353 switch (in)
354 {
355 case KATRPT:
356 case HIRRPT:
357 *(*outp)++ = in + 1;
358 break;
359 case KATA_U:
360 *(*outp)++ = KAT_VU;
361 break;
362 default:
363 *(*outp)++ = in;
364 *(*outp)++ = DAKUTN;
365 }
366 **outp = EOLTTR;
367 }
368
369 /** in¤ÇÍ¿¤¨¤é¤ì¤¿¥³¡¼¥É¤òbase¿Ê¤Î¿ô»ú¤Ë¤·¤Æoutp¤ËÆþ¤ì¤ë¡£*/
370 void
371 to_digit (in, base, outp)
372 letter in, base, **outp;
373 {
374 letter c, vtol ();
375
376 if (c = in, c /= base)
377 to_digit (c, base, outp);
378 *(*outp)++ = vtol (in % base);
379 **outp = EOLTTR;
380 }