Mercurial > kinput2.yaz
comparison lib/ctext.c @ 0:92745d501b9a
initial import from kinput2-v3.1
author | Yoshiki Yazawa <yaz@honeyplanet.jp> |
---|---|
date | Mon, 08 Mar 2010 04:44:30 +0900 |
parents | |
children | 5a32b68b627d |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:92745d501b9a |
---|---|
1 /* | |
2 * ctext.c -- Compound Text <-> Japanese Wide Character String converter | |
3 */ | |
4 | |
5 /****************************************************************************** | |
6 | |
7 $B!&;X<((B (designation) | |
8 1byte multi-byte | |
9 94char 96char 94char 96char | |
10 ------------------------------------------------------- | |
11 G0 : ESC ( F | -none- ESC $ ( F | -none- | |
12 G1 : ESC ) F | ESC - F ESC $ ) F | ESC $ - F | |
13 | |
14 $B!&=*C<J8;z(B F | |
15 1byte | |
16 94chars | |
17 B ASCII | |
18 I JIS KANA | |
19 J JIS-ROMAN | |
20 96chars | |
21 A 8859/1 right half | |
22 B 8859/2 right half | |
23 C 8859/3 right half | |
24 D 8859/4 right half | |
25 F 8859/7 right half | |
26 G 8859/6 right half | |
27 H 8859/8 right half | |
28 M 8859/9 (DIS) right half | |
29 multi-byte | |
30 94chars ^ 2 | |
31 A GB Hanzi | |
32 B JIS Kanji 1983 | |
33 C KS Hangul/Hanja | |
34 | |
35 ------------------------------------------------------------------------------- | |
36 COMPOUND_TEXT $B$N;EMM(B (Comopund Text Encoding Version 1 -- MIT X Consortium Standard) | |
37 $B!&(BG0 G1 $B$N$_$r;HMQ$9$k!#(BG2 G3 $B$O;HMQ$7$J$$!#(B | |
38 $B!&(BG0 $B$,(B GL$B!"(BG1 $B$,(B GR $B$K8F$S=P$5$l$F$*$j!"$=$l$rJQ99$9$k$3$H$O$G$-$J$$!#(B | |
39 $B$D$^$j!"(BLocking Shift $B$*$h$S(B Single Shift $B$O;HMQ$7$J$$!#(B | |
40 $B!&=i4|@_Dj$H$7$F(B ISO Latin-1 $B$,(B G0/G1 $B$K;X<($5$l$F$$$k!#(B | |
41 $B!&%^%k%A%P%$%H$NJ8;z$r(B G0 $B$K;X<($9$k$N$K!"(BESC-$-F $B$O;HMQ$7$J$$!#(B | |
42 ESC-$-(-F $B$r;HMQ$9$k!#(B | |
43 $B!&;HMQ$G$-$k=*C<J8;z$O!">e$K=q$+$l$?DL$j!#(B | |
44 $B!&(BC0 $B$G;HMQ$G$-$kJ8;z$O!"(BNL TAB ESC $B$N$_$H$9$k!#(B | |
45 $B!&(BC1 $B$G;HMQ$G$-$kJ8;z$O(B CSI $B$N$_$H$9$k!#(B | |
46 $B!&%F%-%9%H$NIA2hJ}8~$N%7!<%1%s%9$,4^$^$l$k!#(B | |
47 $B:8$+$i1&(B | |
48 $B1&$+$i:8(B | |
49 $B85$NJ}8~$KLa$k(B | |
50 ******************************************************************************/ | |
51 | |
52 /* | |
53 * Copyright (c) 1989 Software Research Associates, Inc. | |
54 * | |
55 * Permission to use, copy, modify, and distribute this software and its | |
56 * documentation for any purpose and without fee is hereby granted, provided | |
57 * that the above copyright notice appear in all copies and that both that | |
58 * copyright notice and this permission notice appear in supporting | |
59 * documentation, and that the name of Software Research Associates not be | |
60 * used in advertising or publicity pertaining to distribution of the | |
61 * software without specific, written prior permission. Software Research | |
62 * Associates makes no representations about the suitability of this software | |
63 * for any purpose. It is provided "as is" without express or implied | |
64 * warranty. | |
65 * | |
66 * Author: Makoto Ishisone, Software Research Associates, Inc., Japan | |
67 * ishisone@sra.co.jp | |
68 */ | |
69 | |
70 #ifndef lint | |
71 static char *rcsid = "$Id: ctext.c,v 2.6 1999/03/10 08:55:15 ishisone Exp $"; | |
72 #endif | |
73 | |
74 typedef unsigned short wchar; | |
75 | |
76 #define NULL 0 | |
77 | |
78 #define CS96 0x100 /* 96chars CS */ | |
79 #define MBCS 0x200 /* Multibyte CS */ | |
80 | |
81 /* convJWStoCT -- Japanese Wide Character String -> COMPOUND_TEXT */ | |
82 int | |
83 convJWStoCT(wstr, xstr, jisroman) | |
84 register wchar *wstr; | |
85 register unsigned char *xstr; | |
86 int jisroman; /* true $B$J$i$P(B G0 $B$N%-%c%i%/%?%;%C%H$H$7$F(B JIS ROMAN $B$r!"(B | |
87 * false $B$J$i$P(B ASCII $B$r;HMQ$9$k(B | |
88 */ | |
89 /* Wide Character string wstr $B$r(B COMPOUND_TEXT xstr $B$KJQ49$7!"(B | |
90 * $BJQ498e$N%P%$%H?t$rJV$9(B($B:G8e$N(B null byte $B$O4^$^$J$$(B)$B!#$b$7(B xstr $B$,(B | |
91 * NULL $B$J$i$PJQ49$O$;$:!"J8;z?t$N$_$rJV$9!#(B | |
92 */ | |
93 { | |
94 register int c; | |
95 register int g0, g1; | |
96 register int n = 0; | |
97 int g0cs; | |
98 | |
99 g0cs = jisroman ? 'J' : 'B'; | |
100 | |
101 g0 = 'B'; | |
102 g1 = CS96|'A'; | |
103 | |
104 /* | |
105 * G0, G1 $B$O<!$N$h$&$K;H$$J,$1$k(B | |
106 * G0: ASCII / JIS-ROMAN | |
107 * G1: $B4A;z(B / $B$+$J(B | |
108 */ | |
109 | |
110 while (c = *wstr++) { | |
111 switch (c & 0x8080) { | |
112 case 0: /* ASCII or C0 or DEL */ | |
113 if (g0 != g0cs) { | |
114 if (xstr) { | |
115 *xstr++ = '\033'; | |
116 *xstr++ = '('; | |
117 *xstr++ = g0cs; | |
118 } | |
119 n += 3; | |
120 g0 = g0cs; | |
121 /* | |
122 * We have to invalidate G1 here, | |
123 * which is unnecessary if Xlib | |
124 * implementation is sane. | |
125 */ | |
126 g1 = g0cs; | |
127 } | |
128 /* | |
129 * Of course it isn't necessary to disignate | |
130 * ASCII to G0 before a control character, but | |
131 * someone reported certain version of Xlib needs | |
132 * this. sigh. | |
133 */ | |
134 if (c < ' ' || c == 0x7f) { | |
135 /* C0 or DEL */ | |
136 if (c == '\t' || c == '\n') { | |
137 if (xstr) *xstr++ = c; | |
138 n++; | |
139 } | |
140 break; | |
141 } | |
142 if (xstr) *xstr++ = c & 0x7f; | |
143 n++; | |
144 break; | |
145 case 0x80: /* $B$+$J(B or C1 */ | |
146 if (0x80 <= c && c <= 0x9f) break; | |
147 if (g1 != 'I') { | |
148 if (xstr) { | |
149 *xstr++ = '\033'; | |
150 *xstr++ = ')'; | |
151 *xstr++ = 'I'; | |
152 } | |
153 n += 3; | |
154 g1 = 'I'; | |
155 g0 = 'I'; /* invalidate G0. see below */ | |
156 } | |
157 if (xstr) *xstr++ = c & 0xff; | |
158 n++; | |
159 break; | |
160 case 0x8080: /* $B4A;z(B */ | |
161 if (g1 != (MBCS|'B')) { | |
162 if (xstr) { | |
163 *xstr++ = '\033'; | |
164 *xstr++ = '$'; | |
165 *xstr++ = ')'; | |
166 *xstr++ = 'B'; | |
167 } | |
168 n += 4; | |
169 g1 = MBCS|'B'; | |
170 /* | |
171 * We have to invalidate G0 here, | |
172 * which is unnecessary if Xlib | |
173 * implementation is sane. | |
174 */ | |
175 g0 = MBCS|'B'; | |
176 } | |
177 if (xstr) { | |
178 *xstr++ = (c >> 8) & 0xff; | |
179 *xstr++ = c & 0xff; | |
180 } | |
181 n += 2; | |
182 break; | |
183 default: | |
184 /* $BL5;k$9$k(B */ | |
185 break; | |
186 } | |
187 } | |
188 if (xstr) *xstr = '\0'; | |
189 return n; | |
190 } | |
191 | |
192 static unsigned char * | |
193 getesc(str, len) | |
194 unsigned char *str; | |
195 int len; | |
196 { | |
197 register int c; | |
198 | |
199 /* $B%(%9%1!<%W%7!<%1%s%9$N!"%(%9%1!<%W$KB3$/(B | |
200 * $BCf4VJ8;z$H=*C<J8;z$rD4$Y$k(B | |
201 */ | |
202 /* $BCf4VJ8;z$O(B 02/00 $B$+$i(B 02/15 $B$^$G(B */ | |
203 while (len > 0) { | |
204 c = *str; | |
205 if (c < 0x20 || 0x2f < c) | |
206 break; | |
207 len--, str++; | |
208 } | |
209 /* $B=*C<J8;z$O(B 03/00 $B$+$i(B 07/14 $B$^$G(B */ | |
210 if (--len < 0 || (c = *str++) < 0x30 || 0x7e < c) | |
211 return (unsigned char *)NULL; | |
212 | |
213 return str; | |
214 } | |
215 | |
216 static unsigned char * | |
217 getcsi(str, len) | |
218 unsigned char *str; | |
219 int len; | |
220 { | |
221 register int c; | |
222 | |
223 /* CSI $B%7!<%1%s%9$N!"(BCSI $B$KB3$/(B | |
224 * $B%Q%i%a%?J8;z!&Cf4VJ8;z$H=*C<J8;z$rD4$Y$k(B | |
225 */ | |
226 /* $B%Q%i%a%?$O(B 03/00 $B$+$i(B 03/15 $B$^$G(B */ | |
227 while (len > 0) { | |
228 c = *str; | |
229 if (c < 0x30 || 0x3f < c) | |
230 break; | |
231 len--, str++; | |
232 } | |
233 /* $BCf4VJ8;z$O(B 02/00 $B$+$i(B 02/15 $B$^$G(B */ | |
234 while (len > 0) { | |
235 c = *str; | |
236 if (c < 0x20 || 0x2f < c) | |
237 break; | |
238 len--, str++; | |
239 } | |
240 /* $B=*C<J8;z$O(B 04/00 $B$+$i(B 07/14 $B$^$G(B */ | |
241 if (--len < 0 || (c = *str++) < 0x40 || 0x7e < c) | |
242 return (unsigned char *)NULL; | |
243 | |
244 return str; | |
245 } | |
246 | |
247 /* convCTtoJWS -- COMPOUND_TEXT -> Japanese Wide Character String */ | |
248 int | |
249 convCTtoJWS(xstr, len, wstr) | |
250 register unsigned char *xstr; | |
251 int len; | |
252 wchar *wstr; | |
253 /* COMPOUND_TEXT xstr $B$r(B Wide Character string wstr $B$KJQ49$7!"(B | |
254 * $BJQ498e$NJ8;z?t$rJV$9(B($B:G8e$N(B null $BJ8;z$O4^$^$J$$(B)$B!#$b$7(B wstr $B$,(B | |
255 * NULL $B$J$i$PJQ49$O$;$:!"J8;z?t$N$_$rJV$9!#(B | |
256 */ | |
257 { | |
258 register int c; | |
259 int nskip; | |
260 int n = 0; | |
261 int g0, g1, gs; | |
262 unsigned char *xstr1; | |
263 | |
264 /* | |
265 * Compound Text $BCf$K$O(B null octet $B$,4^$^$l$k2DG=@-$,$"$k(B | |
266 * $B$=$3$GJ8;zNs$ND9$5(B len $B$r0z?t$G;XDj$G$-$k$h$&$K$7$F$"$k$N$@$,!"(B | |
267 * 0 $B$"$k$$$OIi$N;~$K$O(B (null octet $B$O$J$$$b$N$H$7$F(B) strlen() $B$G(B | |
268 * $BD9$5$rD4$Y$k(B | |
269 */ | |
270 if (len <= 0) { | |
271 len = strlen((char *)xstr); | |
272 } | |
273 | |
274 /* $B=i4|>uBV$O!"(BISO 8859/1 $B$,(B G0/G1 $B$KF~$C$F$$$k(B */ | |
275 g0 = 'B'; /* ASCII -> G0 */ | |
276 g1 = CS96|'A'; /* Latin/1 right hand part -> G1 */ | |
277 | |
278 while (len-- > 0) { | |
279 switch (c = *xstr++) { | |
280 case '\n': /* NEWLINE */ | |
281 case '\t': /* TAB */ | |
282 if (wstr) *wstr++ = c; | |
283 n++; | |
284 break; | |
285 case 0x9b: /* CSI */ | |
286 /* | |
287 * CSI $B$N0lHL7A$O(B | |
288 * CSI {P} {I} F | |
289 * $B%Q%i%a%?(B P $B$O(B 03/00 $B$+$i(B 03/15$B!"(B | |
290 * $BCf4VJ8;z(B I $B$O(B 02/00 $B$+$i(B 02/15$B!"(B | |
291 * $B=*C<J8;z(B F $B$O(B 04/00 $B$+$i(B 07/14 $B$NHO0O(B | |
292 */ | |
293 /* | |
294 * $B8=:_Dj5A$5$l$F$$$k$N$O(B directionality $B$@$1$G!"(B | |
295 * $B$=$l$O(B | |
296 * CSI-1-] begin left-to-right text | |
297 * CSI-2-] begin right-to-left text | |
298 * CSI-] end of string | |
299 * $B$G$"$k(B | |
300 * $B$,$H$j$"$($::#$O$3$l$rL5;k$9$k$N$G!"(BCSI $B$N(B | |
301 * $B%7!<%1%s%9$O$9$Y$FL5;k!"$H$$$&$3$H$K$J$k(B | |
302 */ | |
303 xstr1 = getcsi(xstr, len); | |
304 if (xstr1 == NULL) | |
305 return -1; | |
306 len -= xstr1 - xstr; | |
307 xstr = xstr1; | |
308 break; | |
309 case '\033': /* ESC */ | |
310 /* | |
311 * $B%(%9%1!<%W%7!<%1%s%9$N0lHL7A$O(B | |
312 * ESC {I} F | |
313 * $BCf4VJ8;z(B I $B$O(B 02/00 $B$+$i(B 02/15 $B$G!"(B | |
314 * $B=*C<J8;z(B F $B$O(B 03/00 $B$+$i(B 07/14 $B$NHO0O(B | |
315 */ | |
316 /* | |
317 * $B8=:_Dj5A$5$l$F$$$k$N$O!"(B | |
318 * $B%9%?%s%@!<%I%-%c%i%/%?%;%C%H(B | |
319 * ESC-(-F | |
320 * ESC-$-(-F | |
321 * ESC-)-F | |
322 * ESC---F | |
323 * ESC-$-)-F | |
324 * $B%N%s%9%?%s%@!<%I%-%c%i%/%?%;%C%H(B | |
325 * ESC-%-/-[0123] | |
326 * $B%9%?%s%@!<%I$J%-%c%i%/%?%;%C%H$O@5$7$/2r<a(B | |
327 * $B$7$J$/$F$O$J$i$J$$$7!"%N%s%9%?%s%@!<%I$J$b$N$O(B | |
328 * $BL5;k$9$k$1$l$I$b%G!<%?$r%9%-%C%W$9$kI,MW$,$"$k(B | |
329 */ | |
330 xstr1 = getesc(xstr, len); | |
331 if (xstr1 == NULL) | |
332 return -1; | |
333 len -= xstr1 - xstr; | |
334 switch (xstr1 - xstr) { | |
335 case 2: /* ESC - I - F */ | |
336 switch (*xstr++) { | |
337 case '(': /* 94chars CS -> G0 */ | |
338 g0 = *xstr; | |
339 break; | |
340 case ')': /* 94chars CS -> G1 */ | |
341 g1 = *xstr; | |
342 break; | |
343 case '-': /* 96chars CS -> G1 */ | |
344 g1 = *xstr | CS96; | |
345 break; | |
346 default: /* ignore */ | |
347 break; | |
348 } | |
349 break; | |
350 case 3: /* ESC - I - I - F */ | |
351 switch (*xstr++) { | |
352 case '$': | |
353 switch (*xstr++) { | |
354 case '(': /* 94chars MBCS -> G0 */ | |
355 g0 = *xstr | MBCS; | |
356 break; | |
357 case ')': /* 94chars MBCS -> G1 */ | |
358 g1 = *xstr | MBCS; | |
359 break; | |
360 case '-': /* 96chars MBCS -> G1 */ | |
361 g1 = *xstr | CS96 | MBCS; | |
362 break; | |
363 default: /* ignore */ | |
364 break; | |
365 } | |
366 break; | |
367 case '%': | |
368 if (*xstr++ != '/') { | |
369 /* unknown sequence */ | |
370 break; | |
371 } | |
372 /* | |
373 * $B%W%i%$%Y!<%H%(%s%3!<%G%#%s%0(B | |
374 * $B40A4$KL5;k$9$k(B | |
375 * $B$?$@$7$=$N$"$H$KB3$/%G!<%?$r(B | |
376 * $B%9%-%C%W$9$kI,MW$,$"$k(B | |
377 * ESC-%-/-F-M-L | |
378 */ | |
379 len -= 2; | |
380 if (len < 0) | |
381 return -1; | |
382 nskip = (*xstr1 & 0x7f) * 128 + | |
383 (*(xstr1 + 1) & 0x7f); | |
384 if ((len -= nskip) < 0) | |
385 return -1; | |
386 xstr1 += nskip + 2; | |
387 break; | |
388 default: | |
389 break; | |
390 } | |
391 break; | |
392 default: | |
393 break; | |
394 } | |
395 xstr = xstr1; | |
396 break; | |
397 default: | |
398 if (!(c & 0x60)) { | |
399 /* | |
400 * NL/TAB/ESC/CSI $B0J30$N(B C0 or C1 | |
401 * $B$3$l$OL@$i$+$K%(%i!<(B | |
402 */ | |
403 return -1; | |
404 } | |
405 gs = (c & 0x80) ? g1 : g0; | |
406 c &= 0x7f; | |
407 if (gs & MBCS) { | |
408 switch (gs & 0x70) { | |
409 case 0x70: /* 4byte/char */ | |
410 if (--len < 0) return -1; | |
411 c = (c << 8) | (*xstr++ & 0x7f); | |
412 case 0x60: /* 3byte/char */ | |
413 if (--len < 0) return -1; | |
414 c = (c << 8) | (*xstr++ & 0x7f); | |
415 case 0x50: /* 2byte/char */ | |
416 case 0x40: /* 2byte/char */ | |
417 if (--len < 0) return -1; | |
418 c = (c << 8) | (*xstr++ & 0x7f); | |
419 break; | |
420 default: | |
421 return -1; | |
422 } | |
423 } | |
424 if (wstr) { | |
425 switch (gs) { | |
426 case 'B': | |
427 case 'J': | |
428 *wstr++ = c; | |
429 n++; | |
430 break; | |
431 case 'I': | |
432 *wstr++ = 0x80 | c; | |
433 n++; | |
434 break; | |
435 case MBCS|'B': | |
436 *wstr++ = 0x8080 | c; | |
437 n++; | |
438 break; | |
439 } | |
440 } else { | |
441 switch (gs) { | |
442 case 'B': | |
443 case 'J': | |
444 case 'I': | |
445 n++; | |
446 break; | |
447 case MBCS|'B': | |
448 n++; | |
449 break; | |
450 } | |
451 } | |
452 break; | |
453 } | |
454 } | |
455 if (wstr) *wstr = 0; | |
456 return n; | |
457 } |