diff lib/ctext.c @ 0:92745d501b9a

initial import from kinput2-v3.1
author Yoshiki Yazawa <yaz@honeyplanet.jp>
date Mon, 08 Mar 2010 04:44:30 +0900
parents
children 5a32b68b627d
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lib/ctext.c	Mon Mar 08 04:44:30 2010 +0900
@@ -0,0 +1,457 @@
+/*
+ * ctext.c -- Compound Text <-> Japanese Wide Character String converter
+ */
+
+/******************************************************************************
+
+$B!&;X<((B (designation)
+		1byte			multi-byte
+		94char      96char	94char       96char
+	-------------------------------------------------------
+	G0  :	ESC ( F	 |  -none-	ESC $ ( F  |  -none-
+	G1  :	ESC ) F  |  ESC - F	ESC $ ) F  |  ESC $ - F
+
+$B!&=*C<J8;z(B F
+	1byte
+	    94chars
+		B	ASCII
+		I	JIS KANA
+		J	JIS-ROMAN
+	    96chars
+		A	8859/1 right half
+		B	8859/2 right half
+		C	8859/3 right half
+		D	8859/4 right half
+		F	8859/7 right half
+		G	8859/6 right half
+		H	8859/8 right half
+		M	8859/9 (DIS) right half
+	multi-byte
+	    94chars ^ 2
+		A	GB Hanzi
+		B	JIS Kanji 1983
+		C	KS Hangul/Hanja
+
+-------------------------------------------------------------------------------
+COMPOUND_TEXT $B$N;EMM(B (Comopund Text Encoding Version 1 -- MIT X Consortium Standard)
+$B!&(BG0 G1 $B$N$_$r;HMQ$9$k!#(BG2 G3 $B$O;HMQ$7$J$$!#(B
+$B!&(BG0 $B$,(B GL$B!"(BG1 $B$,(B GR $B$K8F$S=P$5$l$F$*$j!"$=$l$rJQ99$9$k$3$H$O$G$-$J$$!#(B
+  $B$D$^$j!"(BLocking Shift $B$*$h$S(B Single Shift $B$O;HMQ$7$J$$!#(B
+$B!&=i4|@_Dj$H$7$F(B ISO Latin-1 $B$,(B G0/G1 $B$K;X<($5$l$F$$$k!#(B
+$B!&%^%k%A%P%$%H$NJ8;z$r(B G0 $B$K;X<($9$k$N$K!"(BESC-$-F $B$O;HMQ$7$J$$!#(B
+  ESC-$-(-F $B$r;HMQ$9$k!#(B
+$B!&;HMQ$G$-$k=*C<J8;z$O!">e$K=q$+$l$?DL$j!#(B
+$B!&(BC0 $B$G;HMQ$G$-$kJ8;z$O!"(BNL TAB ESC $B$N$_$H$9$k!#(B
+$B!&(BC1 $B$G;HMQ$G$-$kJ8;z$O(B CSI $B$N$_$H$9$k!#(B
+$B!&%F%-%9%H$NIA2hJ}8~$N%7!<%1%s%9$,4^$^$l$k!#(B
+	$B:8$+$i1&(B
+	$B1&$+$i:8(B
+	$B85$NJ}8~$KLa$k(B
+******************************************************************************/
+
+/*
+ * Copyright (c) 1989  Software Research Associates, Inc.
+ *
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation for any purpose and without fee is hereby granted, provided
+ * that the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Software Research Associates not be
+ * used in advertising or publicity pertaining to distribution of the
+ * software without specific, written prior permission.  Software Research
+ * Associates makes no representations about the suitability of this software
+ * for any purpose.  It is provided "as is" without express or implied
+ * warranty.
+ *
+ * Author:  Makoto Ishisone, Software Research Associates, Inc., Japan
+ *		ishisone@sra.co.jp
+ */
+
+#ifndef lint
+static char *rcsid = "$Id: ctext.c,v 2.6 1999/03/10 08:55:15 ishisone Exp $";
+#endif
+
+typedef unsigned short	wchar;
+
+#define NULL	0
+
+#define CS96	0x100	/* 96chars CS */
+#define MBCS	0x200	/* Multibyte CS */
+
+/* convJWStoCT -- Japanese Wide Character String -> COMPOUND_TEXT */
+int
+convJWStoCT(wstr, xstr, jisroman)
+register wchar *wstr;
+register unsigned char *xstr;
+int jisroman;	/* true $B$J$i$P(B G0 $B$N%-%c%i%/%?%;%C%H$H$7$F(B JIS ROMAN $B$r!"(B
+		 * false $B$J$i$P(B ASCII $B$r;HMQ$9$k(B
+		 */
+/* Wide Character string wstr $B$r(B COMPOUND_TEXT xstr $B$KJQ49$7!"(B
+ * $BJQ498e$N%P%$%H?t$rJV$9(B($B:G8e$N(B null byte $B$O4^$^$J$$(B)$B!#$b$7(B xstr $B$,(B
+ * NULL $B$J$i$PJQ49$O$;$:!"J8;z?t$N$_$rJV$9!#(B
+ */
+{
+	register int	c;
+	register int	g0, g1;
+	register int	n = 0;
+	int		g0cs;
+	
+	g0cs = jisroman ? 'J' : 'B';
+
+	g0 = 'B';
+	g1 = CS96|'A';
+	
+	/*
+	 * G0, G1 $B$O<!$N$h$&$K;H$$J,$1$k(B
+	 *  G0: ASCII / JIS-ROMAN
+	 *  G1: $B4A;z(B / $B$+$J(B
+	 */
+	
+	while (c = *wstr++) {
+		switch (c & 0x8080) {
+		case 0:		/* ASCII or C0 or DEL */
+			if (g0 != g0cs) {
+				if (xstr) {
+					*xstr++ = '\033';
+					*xstr++ = '(';
+					*xstr++ = g0cs;
+				}
+				n += 3;
+				g0 = g0cs;
+				/*
+				 * We have to invalidate G1 here,
+				 * which is unnecessary if Xlib
+				 * implementation is sane.
+				 */
+				g1 = g0cs;
+			}
+			/*
+			 * Of course it isn't necessary to disignate
+			 * ASCII to G0 before a control character, but
+			 * someone reported certain version of Xlib needs
+			 * this. sigh.
+			 */
+			if (c < ' ' || c == 0x7f) {
+				/* C0 or DEL */
+				if (c == '\t' || c == '\n') {
+					if (xstr) *xstr++ = c;
+					n++;
+				}
+				break;
+			}
+			if (xstr) *xstr++ = c & 0x7f;
+			n++;
+			break;
+		case 0x80:	/* $B$+$J(B or C1 */
+			if (0x80 <= c && c <= 0x9f) break;
+			if (g1 != 'I') {
+				if (xstr) {
+					*xstr++ = '\033';
+					*xstr++ = ')';
+					*xstr++ = 'I';
+				}
+				n += 3;
+				g1 = 'I';
+				g0 = 'I';	/* invalidate G0. see below */
+			}
+			if (xstr) *xstr++ = c & 0xff;
+			n++;
+			break;
+		case 0x8080:	/* $B4A;z(B */
+			if (g1 != (MBCS|'B')) {
+				if (xstr) {
+					*xstr++ = '\033';
+					*xstr++ = '$';
+					*xstr++ = ')';
+					*xstr++ = 'B';
+				}
+				n += 4;
+				g1 = MBCS|'B';
+				/*
+				 * We have to invalidate G0 here,
+				 * which is unnecessary if Xlib
+				 * implementation is sane.
+				 */
+				g0 = MBCS|'B';
+			}
+			if (xstr) {
+				*xstr++ = (c >> 8) & 0xff;
+				*xstr++ = c & 0xff;
+			}
+			n += 2;
+			break;
+		default:
+			/* $BL5;k$9$k(B */
+			break;
+		}
+	}
+	if (xstr) *xstr = '\0';
+	return n;
+}
+
+static unsigned char *
+getesc(str, len)
+unsigned char *str;
+int len;
+{
+	register int	c;
+
+	/* $B%(%9%1!<%W%7!<%1%s%9$N!"%(%9%1!<%W$KB3$/(B
+	 * $BCf4VJ8;z$H=*C<J8;z$rD4$Y$k(B
+	 */
+	/* $BCf4VJ8;z$O(B 02/00 $B$+$i(B 02/15 $B$^$G(B */
+	while (len > 0) {
+		c = *str;
+		if (c < 0x20 || 0x2f < c)
+			break;
+		len--, str++;
+	}
+	/* $B=*C<J8;z$O(B 03/00 $B$+$i(B 07/14 $B$^$G(B */
+	if (--len < 0 || (c = *str++) < 0x30 || 0x7e < c)
+		return (unsigned char *)NULL;
+
+	return str;
+}
+
+static unsigned char *
+getcsi(str, len)
+unsigned char *str;
+int len;
+{
+	register int	c;
+
+	/* CSI $B%7!<%1%s%9$N!"(BCSI $B$KB3$/(B
+	 * $B%Q%i%a%?J8;z!&Cf4VJ8;z$H=*C<J8;z$rD4$Y$k(B
+	 */
+	/* $B%Q%i%a%?$O(B 03/00 $B$+$i(B 03/15 $B$^$G(B */
+	while (len > 0) {
+		c = *str;
+		if (c < 0x30 || 0x3f < c)
+			break;
+		len--, str++;
+	}
+	/* $BCf4VJ8;z$O(B 02/00 $B$+$i(B 02/15 $B$^$G(B */
+	while (len > 0) {
+		c = *str;
+		if (c < 0x20 || 0x2f < c)
+			break;
+		len--, str++;
+	}
+	/* $B=*C<J8;z$O(B 04/00 $B$+$i(B 07/14 $B$^$G(B */
+	if (--len < 0 || (c = *str++) < 0x40 || 0x7e < c)
+		return (unsigned char *)NULL;
+
+	return str;
+}
+
+/* convCTtoJWS -- COMPOUND_TEXT -> Japanese Wide Character String */
+int
+convCTtoJWS(xstr, len, wstr)
+register unsigned char *xstr;
+int len;
+wchar *wstr;
+/* COMPOUND_TEXT xstr $B$r(B Wide Character string wstr $B$KJQ49$7!"(B
+ * $BJQ498e$NJ8;z?t$rJV$9(B($B:G8e$N(B null $BJ8;z$O4^$^$J$$(B)$B!#$b$7(B wstr $B$,(B
+ * NULL $B$J$i$PJQ49$O$;$:!"J8;z?t$N$_$rJV$9!#(B
+ */
+{
+	register int	c;
+	int	nskip;
+	int	n = 0;
+	int	g0, g1, gs;
+	unsigned char	*xstr1;
+
+	/*
+	 * Compound Text $BCf$K$O(B null octet $B$,4^$^$l$k2DG=@-$,$"$k(B
+	 * $B$=$3$GJ8;zNs$ND9$5(B len $B$r0z?t$G;XDj$G$-$k$h$&$K$7$F$"$k$N$@$,!"(B
+	 * 0 $B$"$k$$$OIi$N;~$K$O(B (null octet $B$O$J$$$b$N$H$7$F(B) strlen() $B$G(B
+	 * $BD9$5$rD4$Y$k(B
+	 */
+	if (len <= 0) {
+		len = strlen((char *)xstr);
+	}
+
+	/* $B=i4|>uBV$O!"(BISO 8859/1 $B$,(B G0/G1 $B$KF~$C$F$$$k(B */
+	g0 = 'B';	/* ASCII -> G0 */
+	g1 = CS96|'A';	/* Latin/1 right hand part -> G1 */
+
+	while (len-- > 0) {
+		switch (c = *xstr++) {
+		case '\n':	/* NEWLINE */
+		case '\t':	/* TAB */
+			if (wstr) *wstr++ = c;
+			n++;
+			break;
+		case 0x9b:	/* CSI */
+			/*
+			 * CSI $B$N0lHL7A$O(B
+			 *	CSI {P} {I} F
+			 * $B%Q%i%a%?(B P $B$O(B 03/00 $B$+$i(B 03/15$B!"(B
+			 * $BCf4VJ8;z(B I $B$O(B 02/00 $B$+$i(B 02/15$B!"(B
+			 * $B=*C<J8;z(B F $B$O(B 04/00 $B$+$i(B 07/14 $B$NHO0O(B
+			 */
+			/*
+			 * $B8=:_Dj5A$5$l$F$$$k$N$O(B directionality $B$@$1$G!"(B
+			 * $B$=$l$O(B
+			 *	CSI-1-]		begin left-to-right text
+			 *	CSI-2-]		begin right-to-left text
+			 *	CSI-]		end of string
+			 * $B$G$"$k(B
+			 * $B$,$H$j$"$($::#$O$3$l$rL5;k$9$k$N$G!"(BCSI $B$N(B
+			 * $B%7!<%1%s%9$O$9$Y$FL5;k!"$H$$$&$3$H$K$J$k(B
+			 */
+			xstr1 = getcsi(xstr, len);
+			if (xstr1 == NULL)
+				return -1;
+			len -= xstr1 - xstr;
+			xstr = xstr1;
+			break;
+		case '\033':	/* ESC */
+			/*
+			 * $B%(%9%1!<%W%7!<%1%s%9$N0lHL7A$O(B
+			 *	ESC {I} F
+			 * $BCf4VJ8;z(B I $B$O(B 02/00 $B$+$i(B 02/15 $B$G!"(B
+			 * $B=*C<J8;z(B F $B$O(B 03/00 $B$+$i(B 07/14 $B$NHO0O(B
+			 */
+			/*
+			 * $B8=:_Dj5A$5$l$F$$$k$N$O!"(B
+			 *   $B%9%?%s%@!<%I%-%c%i%/%?%;%C%H(B
+			 *	ESC-(-F
+			 *	ESC-$-(-F
+			 *	ESC-)-F
+			 *	ESC---F
+			 *	ESC-$-)-F
+			 *   $B%N%s%9%?%s%@!<%I%-%c%i%/%?%;%C%H(B
+			 *	ESC-%-/-[0123]
+			 * $B%9%?%s%@!<%I$J%-%c%i%/%?%;%C%H$O@5$7$/2r<a(B
+			 * $B$7$J$/$F$O$J$i$J$$$7!"%N%s%9%?%s%@!<%I$J$b$N$O(B
+			 * $BL5;k$9$k$1$l$I$b%G!<%?$r%9%-%C%W$9$kI,MW$,$"$k(B
+			 */
+			xstr1 = getesc(xstr, len);
+			if (xstr1 == NULL)
+				return -1;
+			len -= xstr1 - xstr;
+			switch (xstr1 - xstr) {
+			case 2:		/* ESC - I - F */
+				switch (*xstr++) {
+				case '(':	/* 94chars CS -> G0 */
+					g0 = *xstr;
+					break;
+				case ')':	/* 94chars CS -> G1 */
+					g1 = *xstr;
+					break;
+				case '-':	/* 96chars CS -> G1 */
+					g1 = *xstr | CS96;
+					break;
+				default:	/* ignore */
+					break;
+				}
+				break;
+			case 3:		/* ESC - I - I - F */
+				switch (*xstr++) {
+				case '$':
+					switch (*xstr++) {
+					case '(':	/* 94chars MBCS -> G0 */
+						g0 = *xstr | MBCS;
+						break;
+					case ')':	/* 94chars MBCS -> G1 */
+						g1 = *xstr | MBCS;
+						break;
+					case '-':	/* 96chars MBCS -> G1 */
+						g1 = *xstr | CS96 | MBCS;
+						break;
+					default:	/* ignore */
+						break;
+					}
+					break;
+				case '%':
+					if (*xstr++ != '/') {
+						/* unknown sequence */
+						break;
+					}
+					/*
+					 * $B%W%i%$%Y!<%H%(%s%3!<%G%#%s%0(B
+					 * $B40A4$KL5;k$9$k(B
+					 * $B$?$@$7$=$N$"$H$KB3$/%G!<%?$r(B
+					 * $B%9%-%C%W$9$kI,MW$,$"$k(B
+					 *	ESC-%-/-F-M-L
+					 */
+					len -= 2;
+					if (len < 0)
+						return -1;
+					nskip = (*xstr1 & 0x7f) * 128 +
+					    (*(xstr1 + 1) & 0x7f);
+					if ((len -= nskip) < 0)
+						return -1;
+					xstr1 += nskip + 2;
+					break;
+				default:
+					break;
+				}
+				break;
+			default:
+				break;
+			}
+			xstr = xstr1;
+			break;
+		default:
+			if (!(c & 0x60)) {
+				/*
+				 * NL/TAB/ESC/CSI $B0J30$N(B C0 or C1
+				 * $B$3$l$OL@$i$+$K%(%i!<(B
+				 */
+				return -1;
+			}
+			gs = (c & 0x80) ? g1 : g0;
+			c &= 0x7f;
+			if (gs & MBCS) {
+				switch (gs & 0x70) {
+				case 0x70:	/* 4byte/char */
+					if (--len < 0) return -1;
+					c = (c << 8) | (*xstr++ & 0x7f);
+				case 0x60:	/* 3byte/char */
+					if (--len < 0) return -1;
+					c = (c << 8) | (*xstr++ & 0x7f);
+				case 0x50:	/* 2byte/char */
+				case 0x40:	/* 2byte/char */
+					if (--len < 0) return -1;
+					c = (c << 8) | (*xstr++ & 0x7f);
+					break;
+				default:
+					return -1;
+				}
+			}
+			if (wstr) {
+				switch (gs) {
+				case 'B':
+				case 'J':
+					*wstr++ = c;
+					n++;
+					break;
+				case 'I':
+					*wstr++ = 0x80 | c;
+					n++;
+					break;
+				case MBCS|'B':
+					*wstr++ = 0x8080 | c;
+					n++;
+					break;
+				}
+			} else {
+				switch (gs) {
+				case 'B':
+				case 'J':
+				case 'I':
+					n++;
+					break;
+				case MBCS|'B':
+					n++;
+					break;
+				}
+			}
+			break;
+		}
+	}
+	if (wstr) *wstr = 0;
+	return n;
+}