diff src/charset.c @ 17052:d0d7b244b1d0

Initial revision
author Karl Heuer <kwzh@gnu.org>
date Thu, 20 Feb 1997 07:02:49 +0000
parents
children 70194012fb3a
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/charset.c	Thu Feb 20 07:02:49 1997 +0000
@@ -0,0 +1,1452 @@
+/* Multilingual characters handler.
+   Ver.1.0
+
+   Copyright (C) 1995 Free Software Foundation, Inc.
+   Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+/* At first, see the document in `charset.h' to understand the code in
+   this file.  */
+
+#include <stdio.h>
+
+#ifdef emacs
+
+#include <sys/types.h>
+#include <config.h>
+#include "lisp.h"
+#include "buffer.h"
+#include "charset.h"
+#include "coding.h"
+
+#else  /* not emacs */
+
+#include "mulelib.h"
+
+#endif /* emacs */
+
+Lisp_Object Qcharset, Qascii, Qcomposition;
+
+/* Declaration of special leading-codes.  */
+int leading_code_composition;	/* for composite characters */
+int leading_code_private_11;	/* for private DIMENSION1 of 1-column */
+int leading_code_private_12;	/* for private DIMENSION1 of 2-column */
+int leading_code_private_21;	/* for private DIMENSION2 of 1-column */
+int leading_code_private_22;	/* for private DIMENSION2 of 2-column */
+
+/* Declaration of special charsets.  */
+int charset_ascii;		/* ASCII */
+int charset_composition;	/* for a composite character */
+int charset_latin_iso8859_1;	/* ISO8859-1 (Latin-1) */
+int charset_jisx0208_1978;	/* JISX0208.1978 (Japanese Kanji old set) */
+int charset_jisx0208;		/* JISX0208.1983 (Japanese Kanji) */
+int charset_katakana_jisx0201;	/* JISX0201.Kana (Japanese Katakana) */
+int charset_latin_jisx0201;	/* JISX0201.Roman (Japanese Roman) */
+int charset_big5_1;		/* Big5 Level 1 (Chinese Traditional) */
+int charset_big5_2;		/* Big5 Level 2 (Chinese Traditional) */
+
+Lisp_Object Qcharset_table;
+
+/* A char-table containing information of each character set.  */
+Lisp_Object Vcharset_table;
+
+/* A vector of charset symbol indexed by charset-id.  This is used
+   only for returning charset symbol from C functions.  */
+Lisp_Object Vcharset_symbol_table;
+
+/* A list of charset symbols ever defined.  */
+Lisp_Object Vcharset_list;
+
+/* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD.  */
+int bytes_by_char_head[256];
+int width_by_char_head[256];
+
+/* Mapping table from ISO2022's charset (specified by DIMENSION,
+   CHARS, and FINAL-CHAR) to Emacs' charset.  */
+int iso_charset_table[2][2][128];
+
+/* Variables used locally in the macro FETCH_MULTIBYTE_CHAR.  */
+unsigned char *_fetch_multibyte_char_p;
+int _fetch_multibyte_char_len;
+
+/* Set STR a pointer to the multi-byte form of the character C.  If C
+   is not a composite character, the multi-byte form is set in WORKBUF
+   and STR points WORKBUF.  The caller should allocate at least 4-byte
+   area at WORKBUF in advance.  Returns the length of the multi-byte
+   form.
+
+   Use macro `CHAR_STRING (C, WORKBUF, STR)' instead of calling this
+   function directly if C can be an ASCII character.  */
+
+int
+non_ascii_char_to_string (c, workbuf, str)
+     int c;
+     unsigned char *workbuf, **str;
+{
+  int charset;
+  unsigned char c1, c2;
+
+  if (COMPOSITE_CHAR_P (c))
+    {
+      int cmpchar_id = COMPOSITE_CHAR_ID (c);
+
+      if (cmpchar_id < n_cmpchars)
+	{
+	  *str = cmpchar_table[cmpchar_id]->data;
+	  return cmpchar_table[cmpchar_id]->len;
+	}
+      else
+	{
+	  *str = workbuf;
+	  return 0;
+	}
+    }
+
+  SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
+
+  *str = workbuf;
+  *workbuf++ = CHARSET_LEADING_CODE_BASE (charset);
+  if (*workbuf = CHARSET_LEADING_CODE_EXT (charset))
+    workbuf++;
+  *workbuf++ = c1 | 0x80;
+  if (c2)
+    *workbuf++ = c2 | 0x80;
+
+  return (workbuf - *str);
+}
+
+/* Return a non-ASCII character of which multi-byte form is at STR of
+   length LEN.  If ACTUAL_LEN is not NULL, the actual length of the
+   character is set to the address ACTUAL_LEN.
+
+   Use macro `STRING_CHAR (STR, LEN)' instead of calling this function
+   directly if STR can hold an ASCII character.  */
+
+string_to_non_ascii_char (str, len, actual_len)
+     unsigned char *str;
+     int len, *actual_len;
+{
+  int charset;
+  unsigned char c1, c2;
+  register int c;
+
+  if (SPLIT_STRING (str, len, charset, c1, c2) == CHARSET_ASCII)
+    {
+      if (actual_len)
+	*actual_len = 1;
+      return (int) *str;
+    }
+
+  c = MAKE_NON_ASCII_CHAR (charset, c1, c2);
+
+  if (actual_len)
+    *actual_len = (charset == CHARSET_COMPOSITION
+		   ? cmpchar_table[COMPOSITE_CHAR_ID (c)]->len
+		   : BYTES_BY_CHAR_HEAD (*str));
+  return c;
+}
+
+/* Return the length of the multi-byte form at string STR of length LEN.  */
+int
+multibyte_form_length (str, len)
+     unsigned char *str;
+     int len;
+{
+  int charset;
+  unsigned char c1, c2;
+  register int c;
+
+  if (SPLIT_STRING (str, len, charset, c1, c2) == CHARSET_ASCII)
+    return 1;
+
+  return (charset == CHARSET_COMPOSITION
+	  ? cmpchar_table[(c1 << 7) | c2]->len
+	  : BYTES_BY_CHAR_HEAD (*str));
+}
+
+/* Check if string STR of length LEN contains valid multi-byte form of
+   a character.  If valid, charset and position codes of the character
+   is set at *CHARSET, *C1, and *C2, and return 0.  If not valid,
+   return -1.  This should be used only in the macro SPLIT_STRING
+   which checks range of STR in advance.  */
+
+split_non_ascii_string (str, len, charset, c1, c2)
+     register unsigned char *str, *c1, *c2;
+     register int len, *charset;
+{
+  register unsigned int cs = *str++;
+
+  if (cs == LEADING_CODE_COMPOSITION)
+    {
+      int cmpchar_id = str_cmpchar_id (str - 1, len);
+
+      if (cmpchar_id < 0)
+	return -1;
+      *charset = cs, *c1 = cmpchar_id >> 7, *c2 = cmpchar_id & 0x7F;
+    }
+  else if ((cs < LEADING_CODE_PRIVATE_11 || (cs = *str++) >= 0xA0)
+	   && CHARSET_DEFINED_P (cs))
+    {
+      *charset = cs;
+      if (*str < 0xA0)
+	return -1;
+      *c1 = (*str++) & 0x7F;
+      if (CHARSET_DIMENSION (cs) == 2)
+	{
+	  if (*str < 0xA0)
+	    return -1;
+	  *c2 = (*str++) & 0x7F;
+	}
+    }
+  else
+    return -1;
+  return 0;
+}
+
+/* Update the table Vcharset_table with the given arguments (see the
+   document of `define-charset' for the meaning of each argument).
+   Several other table contents are also updated.  The caller should
+   check the validity of CHARSET-ID and the remaining arguments in
+   advance.  */
+
+void
+update_charset_table (charset_id, dimension, chars, width, direction,
+		      iso_final_char, iso_graphic_plane,
+		      short_name, long_name, description)
+     Lisp_Object charset_id, dimension, chars, width, direction;
+     Lisp_Object iso_final_char, iso_graphic_plane;
+     Lisp_Object short_name, long_name, description;
+{
+  int charset = XINT (charset_id);
+  int bytes;
+  unsigned char leading_code_base, leading_code_ext;
+
+  if (NILP (Faref (Vcharset_table, charset_id)))
+    Faset (Vcharset_table, charset_id,
+	   Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil));
+
+  /* Get byte length of multibyte form, base leading-code, and
+     extended leading-code of the charset.  See the comment under the
+     title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h.  */
+  bytes = XINT (dimension);
+  if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
+    {
+      /* Official charset, it doesn't have an extended leading-code.  */
+      if (charset != CHARSET_ASCII)
+	bytes += 1; /* For a base leading-code.  */
+      leading_code_base = charset;
+      leading_code_ext = 0;
+    }
+  else
+    {
+      /* Private charset.  */
+      bytes += 2; /* For base and extended leading-codes.  */
+      leading_code_base
+	= (charset < LEADING_CODE_EXT_12
+	   ? LEADING_CODE_PRIVATE_11
+	   : (charset < LEADING_CODE_EXT_21
+	      ? LEADING_CODE_PRIVATE_12
+	      : (charset < LEADING_CODE_EXT_22
+		 ? LEADING_CODE_PRIVATE_21
+		 : LEADING_CODE_PRIVATE_22)));
+      leading_code_ext = charset;
+    } 
+
+  CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
+  CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
+  CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
+  CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
+  CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
+  CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
+  CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
+    = make_number (leading_code_base);
+  CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
+    = make_number (leading_code_ext);
+  CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
+  CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
+    = iso_graphic_plane;
+  CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
+  CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
+  CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
+  CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
+
+  {
+    /* If we have already defined a charset which has the same
+       DIMENSION, CHARS and ISO-FINAL-CHAR but the different
+       DIRECTION, we must update the entry REVERSE-CHARSET of both
+       charsets.  If there's no such charset, the value of the entry
+       is set to nil.  */
+    int i;
+
+    for (i = 0; i < MAX_CHARSET; i++)
+      if (!NILP (CHARSET_TABLE_ENTRY (i)))
+	{
+	  if (CHARSET_DIMENSION (i) == XINT (dimension)
+	      && CHARSET_CHARS (i) == XINT (chars)
+	      && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
+	      && CHARSET_DIRECTION (i) != XINT (direction))
+	    {
+	      CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
+		= make_number (i);
+	      CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
+	      break;
+	    }
+	}
+    if (i >= MAX_CHARSET)
+      /* No such a charset.  */
+      CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
+	= make_number (-1);
+  }
+
+  if (charset != CHARSET_ASCII
+      && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
+    {
+      /* Update tables bytes_by_char_head and width_by_char_head.  */
+      bytes_by_char_head[leading_code_base] = bytes;
+      width_by_char_head[leading_code_base] = XINT (width);
+
+      /* Update table emacs_code_class.  */
+      emacs_code_class[charset] = (bytes == 2
+				   ? EMACS_leading_code_2
+				   : (bytes == 3
+				      ? EMACS_leading_code_3
+				      : EMACS_leading_code_4));
+    }
+
+  /* Update table iso_charset_table.  */
+  if (ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
+    ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
+}
+
+#ifdef emacs
+
+/* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
+   is invalid.  */
+int
+get_charset_id (charset_symbol)
+     Lisp_Object charset_symbol;
+{
+  Lisp_Object val;
+  int charset;
+
+  return ((SYMBOLP (charset_symbol)
+	   && (val = Fget (charset_symbol, Qcharset), VECTORP (val))
+	   && (charset = XINT (XVECTOR (val)->contents[CHARSET_ID_IDX]),
+	       CHARSET_VALID_P (charset)))
+	  ? charset : -1);
+}
+
+/* Return an identification number for a new private charset of
+   DIMENSION and WIDTH.  If there's no more room for the new charset,
+   return 0.  */
+Lisp_Object
+get_new_private_charset_id (dimension, width)
+     int dimension, width;
+{
+  int charset, from, to;
+
+  if (dimension == 1)
+    {
+      if (width == 1)
+	from = LEADING_CODE_EXT_11, to = LEADING_CODE_EXT_12;
+      else
+	from = LEADING_CODE_EXT_12, to = LEADING_CODE_EXT_21;
+    }
+  else
+    {
+      if (width == 1)
+	from = LEADING_CODE_EXT_21, to = LEADING_CODE_EXT_22;
+      else
+	from = LEADING_CODE_EXT_22, to = LEADING_CODE_EXT_MAX - 1;
+    }
+
+  for (charset = from; charset < to; charset++)
+    if (!CHARSET_DEFINED_P (charset)) break;
+
+  return make_number (charset < to ? charset : 0);
+}
+
+DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
+  "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
+If CHARSET-ID is nil, it is set automatically, which means CHARSET is\n\
+ treated as a private charset.\n\
+INFO-VECTOR is a vector of the format:\n\
+   [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
+    SHORT-NAME LONG-NAME DESCRIPTION]\n\
+The meanings of each elements is as follows:\n\
+DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.\n\
+CHARS (integer) is the number of characters in a dimension: 94 or 96.\n\
+WIDTH (integer) is the number of columns a character in the charset\n\
+occupies on the screen: one of 0, 1, and 2.\n\
+\n\
+DIRECTION (integer) is the rendering direction of characters in the\n\
+charset when rendering.  If 0, render from right to left, else\n\
+render from left to right.\n\
+\n\
+ISO-FINAL-CHAR (character) is the final character of the\n\
+corresponding ISO 2022 charset.\n\
+\n\
+ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\
+while encoding to variants of ISO 2022 coding system, one of the\n\
+following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\
+\n\
+SHORT-NAME (string) is the short name to refer to the charset.\n\
+\n\
+LONG-NAME (string) is the long name to refer to the charset.\n\
+\n\
+DESCRIPTION (string) is the description string of the charset.")
+  (charset_id, charset_symbol, info_vector)
+     Lisp_Object charset_id, charset_symbol, info_vector;
+{
+  Lisp_Object *vec;
+
+  if (!NILP (charset_id))
+    CHECK_NUMBER (charset_id, 0);
+  CHECK_SYMBOL (charset_symbol, 1);
+  CHECK_VECTOR (info_vector, 2);
+
+  if (! NILP (charset_id))
+    {
+      if (! CHARSET_VALID_P (XINT (charset_id)))
+	error ("Invalid CHARSET: %d", XINT (charset_id));
+      else if (CHARSET_DEFINED_P (XINT (charset_id)))
+	error ("Already defined charset: %d", XINT (charset_id));
+    }
+
+  vec = XVECTOR (info_vector)->contents;
+  if (XVECTOR (info_vector)->size != 9
+      || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
+      || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
+      || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
+      || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
+      || !INTEGERP (vec[4]) || !(XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~')
+      || !INTEGERP (vec[5]) || !(XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
+      || !STRINGP (vec[6])
+      || !STRINGP (vec[7])
+      || !STRINGP (vec[8]))
+    error ("Invalid info-vector argument for defining charset %s",
+	   XSYMBOL (charset_symbol)->name->data);
+
+  if (NILP (charset_id))
+    {
+      charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
+      if (XINT (charset_id) == 0)
+	error ("There's no room for a new private charset %s",
+	       XSYMBOL (charset_symbol)->name->data);
+    }
+
+  update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
+			vec[4], vec[5], vec[6], vec[7], vec[8]);
+  Fput (charset_symbol, Qcharset, Faref (Vcharset_table, charset_id));
+  CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
+  Vcharset_list = Fcons (charset_symbol, Vcharset_list);
+  return Qnil;
+}
+
+DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
+       4, 4, 0,
+  "Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.\n\
+CHARSET should be defined by `defined-charset' in advance.")
+  (dimension, chars, final_char, charset_symbol)
+     Lisp_Object dimension, chars, final_char, charset_symbol;
+{
+  int charset;
+
+  CHECK_NUMBER (dimension, 0);
+  CHECK_NUMBER (chars, 1);
+  CHECK_NUMBER (final_char, 2);
+  CHECK_SYMBOL (charset_symbol, 3);
+
+  if (XINT (dimension) != 1 && XINT (dimension) != 2)
+    error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
+  if (XINT (chars) != 94 && XINT (chars) != 96)
+    error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
+  if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
+    error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
+  if ((charset = get_charset_id (charset_symbol)) < 0)
+    error ("Invalid charset %s", XSYMBOL (charset_symbol)->name->data);
+
+  ISO_CHARSET_TABLE (dimension, chars, final_char) = charset;
+  return Qnil;
+}
+
+/* Return number of different charsets in STR of length LEN.  In
+   addition, for each found charset N, CHARSETS[N] is set 1.  The
+   caller should allocate CHARSETS (MAX_CHARSET bytes) in advance.  */
+
+int
+find_charset_in_str (str, len, charsets)
+     unsigned char *str, *charsets;
+     int len;
+{
+  int num = 0;
+
+  while (len > 0)
+    {
+      int bytes = BYTES_BY_CHAR_HEAD (*str);
+      int charset = CHARSET_AT (str);
+
+      if (!charsets[charset])
+	{
+	  charsets[charset] = 1;
+	  num += 1;
+	}
+      str += bytes;
+      len -= bytes;
+    }
+  return num;
+}
+
+DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
+       2, 2, 0,
+  "Return a list of charsets in the region between BEG and END.\n\
+BEG and END are buffer positions.")
+  (beg, end)
+     Lisp_Object beg, end;
+{
+  char charsets[MAX_CHARSET];
+  int from, to, stop, i;
+  Lisp_Object val;
+
+  validate_region (&beg, &end);
+  from = XFASTINT (beg);
+  stop = to = XFASTINT (end);
+  if (from < GPT && GPT < to)
+    stop = GPT;
+  bzero (charsets, MAX_CHARSET);
+  while (1)
+    {
+      find_charset_in_str (POS_ADDR (from), stop - from, charsets);
+      if (stop < to)
+	from = stop, stop = to;
+      else
+	break;
+    }
+  val = Qnil;
+  for (i = MAX_CHARSET - 1; i >= 0; i--)
+    if (charsets[i])
+      val = Fcons (CHARSET_SYMBOL (i), val);
+  return val;
+}
+
+DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
+       1, 1, 0,
+  "Return a list of charsets in STR.")
+  (str)
+     Lisp_Object str;
+{
+  char charsets[MAX_CHARSET];
+  int i;
+  Lisp_Object val;
+
+  CHECK_STRING (str, 0);
+  bzero (charsets, MAX_CHARSET);
+  find_charset_in_str (XSTRING (str)->data, XSTRING (str)->size, charsets);
+  val = Qnil;
+  for (i = MAX_CHARSET - 1; i >= 0; i--)
+    if (charsets[i])
+      val = Fcons (CHARSET_SYMBOL (i), val);
+  return val;
+}
+
+DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
+  "Return a character of CHARSET and position-codes CODE1 and CODE2.\n\
+CODE1 and CODE2 are optional, but if you don't supply\n\
+ sufficient position-codes, return a generic character which stands for\n\
+all characters or group of characters in the character sets.\n\
+A generic character can be an argument of `modify-syntax-entry' and\n\
+`modify-category-entry'.")
+  (charset, code1, code2)
+     Lisp_Object charset, code1, code2;
+{
+  CHECK_NUMBER (charset, 0);
+
+  if (NILP (code1))
+    XSETFASTINT (code1, 0);
+  else
+    CHECK_NUMBER (code1, 1);
+  if (NILP (code2))
+    XSETFASTINT (code2, 0);
+  else
+    CHECK_NUMBER (code2, 2);
+
+  if (!CHARSET_DEFINED_P (XINT (charset)))
+    error ("Invalid charset: %d", XINT (charset));
+
+  return make_number (MAKE_CHAR (XINT (charset), XINT (code1), XINT (code2)));
+}
+
+DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
+  "Return list of charset and one or two position-codes of CHAR.")
+  (ch)
+     Lisp_Object ch;
+{
+  Lisp_Object val;
+  int charset;
+  unsigned char c1, c2;
+
+  CHECK_NUMBER (ch, 0);
+  SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
+  return ((charset == CHARSET_COMPOSITION || CHARSET_DIMENSION (charset) == 2)
+	  ? Fcons (CHARSET_SYMBOL (charset),
+		   Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
+	  : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
+}
+
+DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
+  "Return charset of CHAR.")
+  (ch)
+     Lisp_Object ch;
+{
+  CHECK_NUMBER (ch, 0);
+
+  return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
+}
+
+DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
+  "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.")
+  (dimension, chars, final_char)
+     Lisp_Object dimension, chars, final_char;
+{
+  int charset;
+
+  CHECK_NUMBER (dimension, 0);
+  CHECK_NUMBER (chars, 1);
+  CHECK_NUMBER (final_char, 2);
+
+  if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
+    return Qnil;
+  return CHARSET_SYMBOL (charset);
+}
+
+DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
+  "Return byte length of multi-byte form of CHAR.")
+  (ch)
+     Lisp_Object ch;
+{
+  Lisp_Object val;
+  int bytes;
+
+  CHECK_NUMBER (ch, 0);
+  if (COMPOSITE_CHAR_P (XFASTINT (ch)))
+    {
+      unsigned int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
+
+      bytes = (id < n_cmpchars ? cmpchar_table[id]->len : 1);
+    }
+  else
+    {
+      int charset = CHAR_CHARSET (XFASTINT (ch));
+
+      bytes = CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1;
+    }
+
+  XSETFASTINT (val, bytes);
+  return val;
+}
+
+/* Return the width of character of which multi-byte form starts with
+   C.  The width is measured by how many columns occupied on the
+   screen when displayed in the current buffer.  */
+
+#define ONE_BYTE_CHAR_WIDTH(c)					     	\
+  (c < 0x20							     	\
+   ? (c == '\t'							     	\
+      ? current_buffer->tab_width				     	\
+      : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2)))	\
+   : (c < 0x7f							     	\
+      ? 1							     	\
+      : (c == 0x7F						     	\
+	 ? (NILP (current_buffer->ctl_arrow) ? 4 : 2)		     	\
+	 : ((! NILP (current_buffer->enable_multibyte_characters)    	\
+	     && BASE_LEADING_CODE_P (c))			     	\
+	    ? WIDTH_BY_CHAR_HEAD (c)				     	\
+	    : 4))))						     	\
+
+
+DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
+  "Return width of CHAR when displayed in the current buffer.\n\
+The width is measured by how many columns it occupies on the screen.")
+  (ch)
+       Lisp_Object ch;
+{
+  Lisp_Object val;
+  int c;
+
+  CHECK_NUMBER (ch, 0);
+
+  c = XFASTINT (ch);
+  if (SINGLE_BYTE_CHAR_P (c))
+    XSETFASTINT (val, ONE_BYTE_CHAR_WIDTH (c));
+  else if (COMPOSITE_CHAR_P (c))
+    {
+      int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
+      XSETFASTINT (val, (id < n_cmpchars ? cmpchar_table[id]->width : 0));
+    }
+  else
+    {
+      int charset = CHAR_CHARSET (c);
+
+      XSETFASTINT (val, CHARSET_WIDTH (charset));
+    }
+  return val;
+}
+
+/* Return width of string STR of length LEN when displayed in the
+   current buffer.  The width is measured by how many columns it
+   occupies on the screen.  */
+int
+strwidth (str, len)
+     unsigned char *str;
+     int len;
+{
+  unsigned char *endp = str + len;
+  int width = 0;
+
+  while (str < endp) {
+    if (*str == LEADING_CODE_COMPOSITION)
+      {
+	int id = str_cmpchar_id (str, endp - str);
+
+	if (id < 0)
+	  {
+	    width += 4;
+	    str++;
+	  }
+	else
+	  {
+	    width += cmpchar_table[id]->width;
+	    str += cmpchar_table[id]->len;
+	  }
+      }
+    else
+      {
+	width += ONE_BYTE_CHAR_WIDTH (*str);
+	str += BYTES_BY_CHAR_HEAD (*str);
+      }
+  }
+  return width;
+}
+
+DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
+  "Return width of STRING when displayed in the current buffer.\n\
+Width is measured by how many columns it occupies on the screen.\n\
+When calculating width of a multi-byte character in STRING,\n\
+ only the base leading-code is considered and the validity of\n\
+ the following bytes are not checked.")
+  (str)
+     Lisp_Object str;
+{
+  Lisp_Object val;
+
+  CHECK_STRING (str, 0);
+  XSETFASTINT (val, strwidth (XSTRING (str)->data, XSTRING (str)->size));
+  return val;
+}
+
+DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
+  "Return the direction of CHAR.\n\
+The returned value is 0 for left-to-right and 1 for right-to-left.")
+  (ch)
+     Lisp_Object ch;
+{
+  int charset;
+
+  CHECK_NUMBER (ch, 0);
+  charset = CHAR_CHARSET (XFASTINT (ch));
+  if (!CHARSET_DEFINED_P (charset))
+    error ("Invalid character: %d", XINT (ch));
+  return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
+}
+
+DEFUN ("chars-in-string", Fchars_in_string, Schars_in_string, 1, 1, 0,
+  "Return number of characters in STRING.")
+  (str)
+     Lisp_Object str;
+{
+  Lisp_Object val;
+  unsigned char *p, *endp;
+  int chars;
+
+  CHECK_STRING (str, 0);
+
+  p = XSTRING (str)->data; endp = p + XSTRING (str)->size;
+  chars = 0;
+  while (p < endp)
+    {
+      if (*p == LEADING_CODE_COMPOSITION)
+	{
+	  p++;
+	  while (p < endp && ! CHAR_HEAD_P (p)) p++;
+	}
+      else
+	p += BYTES_BY_CHAR_HEAD (*p);
+      chars++;
+    }
+
+  XSETFASTINT (val, chars);
+  return val;
+}
+
+DEFUN ("char-boundary-p", Fchar_boundary_p, Schar_boundary_p, 1, 1, 0,
+  "Return non-nil value if POS is at character boundary of multibyte form.\n\
+The return value is:\n\
+ 0 if POS is at an ASCII character or at the end of range,\n\
+ 1 if POS is at a head of 2-byte length multi-byte form,\n\
+ 2 if POS is at a head of 3-byte length multi-byte form,\n\
+ 3 if POS is at a head of 4-byte length multi-byte form,\n\
+ 4 if POS is at a head of multi-byte form of a composite character.\n\
+If POS is out of range or not at character boundary, return NIL.")
+  (pos)
+     Lisp_Object pos;
+{
+  Lisp_Object val;
+  int n;
+
+  CHECK_NUMBER_COERCE_MARKER (pos, 0);
+
+  n = XINT (pos);
+  if (n < BEGV || n > ZV)
+    return Qnil;
+
+  if (n == ZV || NILP (current_buffer->enable_multibyte_characters))
+    XSETFASTINT (val, 0);
+  else
+    {
+      unsigned char *p = POS_ADDR (n);
+
+      if (SINGLE_BYTE_CHAR_P (*p))
+	XSETFASTINT (val, 0);
+      else if (*p == LEADING_CODE_COMPOSITION)
+	XSETFASTINT (val, 4);
+      else if (BYTES_BY_CHAR_HEAD (*p) > 1)
+	XSETFASTINT (val, BYTES_BY_CHAR_HEAD (*p) - 1);
+      else
+	val = Qnil;
+    }
+  return val;
+}
+
+DEFUN ("concat-chars", Fconcat_chars, Sconcat_chars, 1, MANY, 0,
+  "Concatenate all the argument characters and make the result a string.")
+  (nargs, args)
+     int nargs;
+     Lisp_Object *args;
+{
+  int i, n = XINT (nargs);
+  unsigned char *buf
+    = (unsigned char *) malloc (MAX_LENGTH_OF_MULTI_BYTE_FORM * n);
+  unsigned char *p = buf;
+  Lisp_Object val;
+
+  for (i = 0; i < n; i++)
+    {
+      int c, len;
+      unsigned char *str;
+
+      if (!INTEGERP (args[i]))
+	{
+	  free (buf);
+	  CHECK_NUMBER (args[i], 0);
+	}
+      c = XINT (args[i]);
+      len = CHAR_STRING (c, p, str);
+      if (p != str)
+	/* C is a composite character.  */
+	bcopy (str, p, len);
+      p += len;
+    }
+
+  val = make_string (buf, p - buf);
+  free (buf);
+  return val;
+}
+
+#endif /* emacs */
+
+/*** Composite characters staffs ***/
+
+/* Each composite character is identified by CMPCHAR-ID which is
+   assigned when Emacs needs the character code of the composite
+   character (e.g. when displaying it on the screen).  See the
+   document "GENERAL NOTE on COMPOSITE CHARACTER" in `charset.h' how a
+   composite character is represented in Emacs.  */
+
+/* If `static' is defined, it means that it is defined to null string. */
+#ifndef static
+/* The following function is copied from lread.c.  */
+static int
+hash_string (ptr, len)
+     unsigned char *ptr;
+     int len;
+{
+  register unsigned char *p = ptr;
+  register unsigned char *end = p + len;
+  register unsigned char c;
+  register int hash = 0;
+
+  while (p != end)
+    {
+      c = *p++;
+      if (c >= 0140) c -= 40;
+      hash = ((hash<<3) + (hash>>28) + c);
+    }
+  return hash & 07777777777;
+}
+#endif
+
+/* Table of pointers to the structure `cmpchar_info' indexed by
+   CMPCHAR-ID.  */
+struct cmpchar_info **cmpchar_table;
+/* The current size of `cmpchar_table'.  */
+static int cmpchar_table_size;
+/* Number of the current composite characters.  */
+int n_cmpchars;
+
+#define CMPCHAR_HASH_TABLE_SIZE 0xFFF
+
+static int *cmpchar_hash_table[CMPCHAR_HASH_TABLE_SIZE];
+
+/* Each element of `cmpchar_hash_table' is a pointer to an array of
+   integer, where the 1st element is the size of the array, the 2nd
+   element is how many elements are actually used in the array, and
+   the remaining elements are CMPCHAR-IDs of composite characters of
+   the same hash value.  */
+#define CMPCHAR_HASH_SIZE(table) table[0]
+#define CMPCHAR_HASH_USED(table) table[1]
+#define CMPCHAR_HASH_CMPCHAR_ID(table, i) table[i]
+
+/* Return CMPCHAR-ID of the composite character in STR of the length
+   LEN.  If the composite character has not yet been registered,
+   register it in `cmpchar_table' and assign new CMPCHAR-ID.  This
+   is the sole function for assigning CMPCHAR-ID.  */
+int
+str_cmpchar_id (str, len)
+     unsigned char *str;
+     int len;
+{
+  int hash_idx, *hashp;
+  unsigned char *buf;
+  int embedded_rule;		/* 1 if composition rule is embedded.  */
+  int chars;			/* number of components.  */
+  int i;
+  struct cmpchar_info *cmpcharp;
+
+  if (len < 5)
+    /* Any composite char have at least 3-byte length.  */
+    return -1;
+
+  /* The second byte 0xFF means compostion rule is embedded.  */
+  embedded_rule = (str[1] == 0xFF);
+
+  /* At first, get the actual length of the composite character.  */
+  {
+    unsigned char *p, *endp = str + 1, *lastp = str + len;
+    int bytes;
+
+    while (endp < lastp && ! CHAR_HEAD_P (endp)) endp++;
+    chars = 0;
+    p = str + 1 + embedded_rule;
+    while (p < endp)
+      {
+	/* No need of checking if *P is 0xA0 because
+	 BYTES_BY_CHAR_HEAD (0x80) surely returns 2.  */
+	p += (bytes = BYTES_BY_CHAR_HEAD (*p - 0x20) + embedded_rule);
+	chars++;
+      }
+    len = (p -= embedded_rule) - str;
+    if (p > endp)
+      len -= - bytes, chars--;
+
+    if (chars < 2 || chars > MAX_COMPONENT_COUNT)
+      /* Invalid number of components.  */
+      return -1;
+  }
+  hash_idx = hash_string (str, len) % CMPCHAR_HASH_TABLE_SIZE;
+  hashp = cmpchar_hash_table[hash_idx];
+
+  /* Then, look into the hash table.  */
+  if (hashp != NULL)
+    /* Find the correct one among composite characters of the same
+       hash value.  */
+    for (i = 2; i < CMPCHAR_HASH_USED (hashp); i++)
+      {
+	cmpcharp = cmpchar_table[CMPCHAR_HASH_CMPCHAR_ID (hashp, i)];
+	if (len == cmpcharp->len
+	    && ! bcmp (str, cmpcharp->data, len))
+	  return CMPCHAR_HASH_CMPCHAR_ID (hashp, i);
+      }
+
+  /* We have to register the composite character in cmpchar_table.  */
+  /* Make the entry in hash table.  */
+  if (hashp == NULL)
+    {
+      /* Make a table for 8 composite characters initially.  */
+      hashp = (cmpchar_hash_table[hash_idx]
+	       = (int *) xmalloc (sizeof (int) * (2 + 8)));
+      CMPCHAR_HASH_SIZE (hashp) = 10;
+      CMPCHAR_HASH_USED (hashp) = 2;
+    }
+  else if (CMPCHAR_HASH_USED (hashp) >= CMPCHAR_HASH_SIZE (hashp))
+    {
+      CMPCHAR_HASH_SIZE (hashp) += 8;
+      hashp = (cmpchar_hash_table[hash_idx]
+	       = (int *) xrealloc (hashp, 
+				   sizeof (int) * CMPCHAR_HASH_SIZE (hashp)));
+    }
+  CMPCHAR_HASH_CMPCHAR_ID (hashp, CMPCHAR_HASH_USED (hashp)) = n_cmpchars;
+  CMPCHAR_HASH_USED (hashp)++;
+
+  /* Set information of the composite character in cmpchar_table.  */
+  if (cmpchar_table_size == 0)
+    {
+      /* This is the first composite character to be registered.  */
+      cmpchar_table_size = 256;
+      cmpchar_table
+	= (struct cmpchar_info **) xmalloc (sizeof (cmpchar_table[0])
+					    * cmpchar_table_size);
+    }
+  else if (cmpchar_table_size <= n_cmpchars)
+    {
+      cmpchar_table_size += 256;
+      cmpchar_table
+	= (struct cmpchar_info **) xrealloc (cmpchar_table,
+					     sizeof (cmpchar_table[0])
+					     * cmpchar_table_size);
+    }
+
+  cmpcharp = (struct cmpchar_info *) xmalloc (sizeof (struct cmpchar_info));
+
+  cmpcharp->len = len;
+  cmpcharp->data = (unsigned char *) xmalloc (len + 1);
+  bcopy (str, cmpcharp->data, len);
+  cmpcharp->data[len] = 0;
+  cmpcharp->glyph_len = chars;
+  cmpcharp->glyph = (GLYPH *) xmalloc (sizeof (GLYPH) * chars);
+  if (embedded_rule)
+    {
+      cmpcharp->cmp_rule = (unsigned char *) xmalloc (chars);
+      cmpcharp->col_offset = (float *) xmalloc (sizeof (float) * chars);
+    }
+  else
+    {
+      cmpcharp->cmp_rule = NULL;
+      cmpcharp->col_offset = NULL;
+    }
+
+  /* Setup GLYPH data and composition rules (if any) so as not to make
+     them every time on displaying.  */
+  {
+    unsigned char *bufp;
+    int width;
+    float leftmost = 0.0, rightmost = 1.0;
+
+    if (embedded_rule)
+      /* At first, col_offset[N] is set to relative to col_offset[0].  */
+      cmpcharp->col_offset[0] = 0;
+
+    for (i = 0, bufp = cmpcharp->data + 1; i < chars; i++)
+      {
+	if (embedded_rule)
+	  cmpcharp->cmp_rule[i] = *bufp++;
+
+	if (*bufp == 0xA0)	/* This is an ASCII character.  */
+	  {
+	    cmpcharp->glyph[i] = FAST_MAKE_GLYPH ((*++bufp & 0x7F), 0);
+	    width = 1;
+	    bufp++;
+	  }
+	else			/* Multibyte character.  */
+	  {
+	    /* Make `bufp' point normal multi-byte form temporally.  */
+	    *bufp -= 0x20;
+	    cmpcharp->glyph[i]
+	      = FAST_MAKE_GLYPH (string_to_non_ascii_char (bufp, 4, 0), 0);
+	    width = WIDTH_BY_CHAR_HEAD (*bufp);
+	    *bufp += 0x20;
+	    bufp += BYTES_BY_CHAR_HEAD (*bufp - 0x20);
+	  }
+
+	if (embedded_rule && i > 0)
+	  {
+	    /* Reference points (global_ref and new_ref) are
+	       encoded as below:
+	       
+	       0--1--2 -- ascent
+	       |     |
+	       |     |
+	       |  4 -+--- center
+	    -- 3     5 -- baseline
+	       |     |
+	       6--7--8 -- descent
+
+	       Now, we calculate the column offset of the new glyph
+	       from the left edge of the first glyph.  This can avoid
+	       the same calculation everytime displaying this
+	       composite character.  */
+
+	    /* Reference points of global glyph and new glyph.  */
+	    int global_ref = (cmpcharp->cmp_rule[i] - 0xA0) / 9;
+	    int new_ref = (cmpcharp->cmp_rule[i] - 0xA0) % 9;
+	    /* Column offset relative to the first glyph.  */
+	    float left = (leftmost
+			  + (global_ref % 3) * (rightmost - leftmost) / 2.0
+			  - (new_ref % 3) * width / 2.0);
+
+	    cmpcharp->col_offset[i] = left;
+	    if (left < leftmost)
+	      leftmost = left;
+	    if (left + width > rightmost)
+	      rightmost = left + width;
+	  }
+	else
+	  {
+	    if (width > rightmost)
+	      rightmost = width;
+	  }
+      }
+    if (embedded_rule)
+      {
+	/* Now col_offset[N] are relative to the left edge of the
+           first component.  Make them relative to the left edge of
+           overall glyph.  */
+	for (i = 0; i < chars; i++)
+	  cmpcharp->col_offset[i] -= leftmost;
+	/* Make rightmost holds width of overall glyph.  */
+	rightmost -= leftmost;
+      }
+
+    cmpcharp->width = rightmost;
+    if (cmpcharp->width < rightmost)
+      /* To get a ceiling integer value.  */
+      cmpcharp->width++;
+  }
+
+  cmpchar_table[n_cmpchars] = cmpcharp;
+
+  return n_cmpchars++;
+}
+
+/* Return the Nth element of the composite character C.  */
+int
+cmpchar_component (c, n)
+     unsigned int c, n;
+{
+  int id = COMPOSITE_CHAR_ID (c);
+
+  if (id >= n_cmpchars		/* C is not a valid composite character.  */
+      || n >= cmpchar_table[id]->glyph_len) /* No such component.  */
+    return -1;
+  /* No face data is stored in glyph code.  */
+  return ((int) (cmpchar_table[id]->glyph[n]));
+}
+
+DEFUN ("cmpcharp", Fcmpcharp, Scmpcharp, 1, 1, 0,
+  "T if CHAR is a composite character.")
+  (ch)
+     Lisp_Object ch;
+{
+  CHECK_NUMBER (ch, 0);
+  return (COMPOSITE_CHAR_P (XINT (ch)) ? Qt : Qnil);
+}
+
+DEFUN ("composite-char-component", Fcmpchar_component, Scmpchar_component,
+       2, 2, 0,
+  "Return the IDXth component character of composite character CHARACTER.")
+  (character, idx)
+     Lisp_Object character, idx;
+{
+  int c;
+
+  CHECK_NUMBER (character, 0);
+  CHECK_NUMBER (idx, 1);
+
+  if ((c = cmpchar_component (XINT (character), XINT (idx))) < 0)
+    args_out_of_range (character, idx);
+
+  return make_number (c);
+}
+
+DEFUN ("composite-char-composition-rule", Fcmpchar_cmp_rule, Scmpchar_cmp_rule,
+       2, 2, 0,
+  "Return the IDXth composition rule embedded in composite character CHARACTER.
+The returned rule is for composing the IDXth component
+on the (IDX-1)th component.  If IDX is 0, the returned value is always 255.")
+  (character, idx)
+     Lisp_Object character, idx;
+{
+  int id, i;
+
+  CHECK_NUMBER (character, 0);
+  CHECK_NUMBER (idx, 1);
+
+  id = COMPOSITE_CHAR_ID (XINT (character));
+  if (id < 0 || id >= n_cmpchars)
+    error ("Invalid composite character: %d", XINT (character));
+  i = XINT (idx);
+  if (i > cmpchar_table[id]->glyph_len)
+    args_out_of_range (character, idx);
+
+  return make_number (cmpchar_table[id]->cmp_rule[i]);
+}
+
+DEFUN ("composite-char-composition-rule-p", Fcmpchar_cmp_rule_p,
+       Scmpchar_cmp_rule_p, 1, 1, 0,
+  "Return non-nil if composite character CHARACTER contains a embedded rule.")
+  (character)
+     Lisp_Object character;
+{
+  int id;
+
+  CHECK_NUMBER (character, 0);
+  id = COMPOSITE_CHAR_ID (XINT (character));
+  if (id < 0 || id >= n_cmpchars)
+    error ("Invalid composite character: %d", XINT (character));
+
+  return (cmpchar_table[id]->cmp_rule ? Qt : Qnil);
+}
+
+DEFUN ("composite-char-component-count", Fcmpchar_cmp_count,
+       Scmpchar_cmp_count, 1, 1, 0,
+  "Return number of compoents of composite character CHARACTER.")
+  (character)
+     Lisp_Object character;
+{
+  int id;
+
+  CHECK_NUMBER (character, 0);
+  id = COMPOSITE_CHAR_ID (XINT (character));
+  if (id < 0 || id >= n_cmpchars)
+    error ("Invalid composite character: %d", XINT (character));
+
+  return (make_number (cmpchar_table[id]->glyph_len));
+}
+
+DEFUN ("compose-string", Fcompose_string, Scompose_string,
+       1, 1, 0,
+  "Return one char string composed from all characters in STRING.")
+  (str)
+     Lisp_Object str;
+{
+  unsigned char buf[MAX_LENGTH_OF_MULTI_BYTE_FORM], *p, *pend, *ptemp;
+  int len, i;
+
+  CHECK_STRING (str, 0);
+
+  buf[0] = LEADING_CODE_COMPOSITION;
+  p = XSTRING (str)->data;
+  pend = p + XSTRING (str)->size;
+  i = 1;
+  while (p < pend)
+    {
+      if (*p < 0x20 || *p == 127) /* control code */
+	error ("Invalid component character: %d", *p);
+      else if (*p < 0x80)	/* ASCII */
+	{
+	  if (i + 2 >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
+	    error ("Too long string to be composed: %s", XSTRING (str)->data);
+	  /* Prepend an ASCII charset indicator 0xA0, set MSB of the
+             code itself.  */
+	  buf[i++] = 0xA0;
+	  buf[i++] = *p++ + 0x80;
+	}
+      else if (*p == LEADING_CODE_COMPOSITION) /* composite char */
+	{
+	  /* Already composed.  Eliminate the heading
+             LEADING_CODE_COMPOSITION, keep the remaining bytes
+             unchanged.  */
+	  p++;
+	  ptemp = p;
+	  while (! CHAR_HEAD_P (p)) p++;
+	  if (i + (p - ptemp) >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
+	    error ("Too long string to be composed: %s", XSTRING (str)->data);
+	  bcopy (ptemp, buf + i, p - ptemp);
+	  i += p - ptemp;
+	}
+      else			/* multibyte char */
+	{
+	  /* Add 0x20 to the base leading-code, keep the remaining
+             bytes unchanged.  */
+	  len = BYTES_BY_CHAR_HEAD (*p);
+	  if (i + len >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
+	    error ("Too long string to be composed: %s", XSTRING (str)->data);
+	  bcopy (p, buf + i, len);
+	  buf[i] += 0x20;
+	  p += len, i += len;
+	}
+    }
+
+  if (i < 5)
+    /* STR contains only one character, which can't be composed.  */
+    error ("Too short string to be composed: %s", XSTRING (str)->data);
+
+  return make_string (buf, i);
+}
+
+
+charset_id_internal (charset_name)
+     char *charset_name;
+{
+  Lisp_Object val = Fget (intern (charset_name), Qcharset);
+
+  if (!VECTORP (val))
+    error ("Charset %s is not defined", charset_name);
+
+  return (XINT (XVECTOR (val)->contents[0]));
+}
+
+DEFUN ("setup-special-charsets", Fsetup_special_charsets,
+       Ssetup_special_charsets, 0, 0, 0, "Internal use only.")
+   ()
+{
+  charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
+  charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
+  charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
+  charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
+  charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
+  charset_big5_1 = charset_id_internal ("chinese-big5-1");
+  charset_big5_2 = charset_id_internal ("chinese-big5-2");
+  return Qnil;
+}
+
+init_charset_once ()
+{
+  int i, j, k;
+
+  staticpro (&Vcharset_table);
+  staticpro (&Vcharset_symbol_table);
+
+  /* This has to be done here, before we call Fmake_char_table.  */
+  Qcharset_table = intern ("charset-table");
+  staticpro (&Qcharset_table);
+
+  /* Intern this now in case it isn't already done.
+     Setting this variable twice is harmless.
+     But don't staticpro it here--that is done in alloc.c.  */
+  Qchar_table_extra_slots = intern ("char-table-extra-slots");
+
+  /* Now we are ready to set up this property, so we can
+     create the charset table.  */
+  Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
+  Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
+
+  Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET), Qnil);
+
+  /* Setup tables.  */
+  for (i = 0; i < 2; i++)
+    for (j = 0; j < 2; j++)
+      for (k = 0; k < 128; k++)
+	iso_charset_table [i][j][k] = -1;
+
+  bzero (cmpchar_hash_table, sizeof cmpchar_hash_table);
+  cmpchar_table_size = n_cmpchars = 0;
+
+  for (i = 0; i < 256; i++)
+    BYTES_BY_CHAR_HEAD (i) = 1;
+  BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 3;
+  BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 3;
+  BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 4;
+  BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 4;
+  /* The following doesn't reflect the actual bytes, but just to tell
+     that it is a start of a multibyte character.  */
+  BYTES_BY_CHAR_HEAD (LEADING_CODE_COMPOSITION) = 2;
+
+  for (i = 0; i < 128; i++)
+    WIDTH_BY_CHAR_HEAD (i) = 1;
+  for (; i < 256; i++)
+    WIDTH_BY_CHAR_HEAD (i) = 4;
+  WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 1;
+  WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 2;
+  WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 1;
+  WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 2;
+}
+
+#ifdef emacs
+
+syms_of_charset ()
+{
+  Qascii = intern ("ascii");
+  staticpro (&Qascii);
+
+  Qcharset = intern ("charset");
+  staticpro (&Qcharset);
+
+  /* Define ASCII charset now.  */
+  update_charset_table (make_number (CHARSET_ASCII),
+			make_number (1), make_number (94),
+			make_number (1),
+			make_number (0),
+			make_number ('B'),
+			make_number (0),
+			build_string ("ASCII"),
+			build_string ("ASCII"),
+			build_string ("ASCII (ISO646 IRV)"));
+  CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
+  Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
+
+  Qcomposition = intern ("composition");
+  staticpro (&Qcomposition);
+  CHARSET_SYMBOL (CHARSET_COMPOSITION) = Qcomposition;
+
+  defsubr (&Sdefine_charset);
+  defsubr (&Sdeclare_equiv_charset);
+  defsubr (&Sfind_charset_region);
+  defsubr (&Sfind_charset_string);
+  defsubr (&Smake_char_internal);
+  defsubr (&Ssplit_char);
+  defsubr (&Schar_charset);
+  defsubr (&Siso_charset);
+  defsubr (&Schar_bytes);
+  defsubr (&Schar_width);
+  defsubr (&Sstring_width);
+  defsubr (&Schar_direction);
+  defsubr (&Schars_in_string);
+  defsubr (&Schar_boundary_p);
+  defsubr (&Sconcat_chars);
+  defsubr (&Scmpcharp);
+  defsubr (&Scmpchar_component);
+  defsubr (&Scmpchar_cmp_rule);
+  defsubr (&Scmpchar_cmp_rule_p);
+  defsubr (&Scmpchar_cmp_count);
+  defsubr (&Scompose_string);
+  defsubr (&Ssetup_special_charsets);
+
+  DEFVAR_LISP ("charset-list", &Vcharset_list,
+    "List of charsets ever defined.");
+  Vcharset_list = Fcons (Qascii, Qnil);
+
+  DEFVAR_INT ("leading-code-composition", &leading_code_composition,
+    "Leading-code of composite characters.");
+  leading_code_composition = LEADING_CODE_COMPOSITION;
+
+  DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
+    "Leading-code of private TYPE9N charset of column-width 1.");
+  leading_code_private_11 = LEADING_CODE_PRIVATE_11;
+
+  DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
+    "Leading-code of private TYPE9N charset of column-width 2.");
+  leading_code_private_12 = LEADING_CODE_PRIVATE_12;
+
+  DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
+    "Leading-code of private TYPE9Nx9N charset of column-width 1.");
+  leading_code_private_21 = LEADING_CODE_PRIVATE_21;
+
+  DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
+    "Leading-code of private TYPE9Nx9N charset of column-width 2.");
+  leading_code_private_22 = LEADING_CODE_PRIVATE_22;
+}
+
+#endif /* emacs */