changeset 88455:bae0bd953f61

(struct charset_map_entries): New struct. (load_charset_map): Renamed from parse_charset_map. New args entries and n_entries. Caller changed. (load_charset_map_from_file): Renamed from load_charset_map. Caller changed. New arg control_flag. Call load_charset_map at the tail. (load_charset_map_from_vector): New function. (Fdefine_charset_internal): Setup charset.compact_codes_p. (encode_char): If the charset is compact, change a character index to a code point.
author Kenichi Handa <handa@m17n.org>
date Tue, 07 May 2002 04:50:29 +0000
parents 47b395dd6f2c
children a7b309f72920
files src/charset.c
diffstat 1 files changed, 185 insertions(+), 91 deletions(-) [+]
line wrap: on
line diff
--- a/src/charset.c	Tue May 07 04:50:09 2002 +0000
+++ b/src/charset.c	Tue May 07 04:50:29 2002 +0000
@@ -153,12 +153,20 @@
 
 
 
-/* Set to 1 when a charset map is loaded to warn that a buffer text
-   and a string data may be relocated.  */
+/* Set to 1 to warn that a charset map is loaded and thus a buffer
+   text and a string data may be relocated.  */
 int charset_map_loaded;
 
-/* Parse the mapping vector MAP which has this form:
-	[CODE0 CHAR0 CODE1 CHAR1 ... ]
+struct charset_map_entries
+{
+  struct {
+    unsigned from, to;
+    int c;
+  } entry[0x10000];
+  struct charset_map_entries *next;
+};
+
+/* Load the mapping information for CHARSET from ENTRIES.
 
    If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char.
 
@@ -170,9 +178,10 @@
    setup it too.  */
 
 static void
-parse_charset_map (charset, map, control_flag)
+load_charset_map (charset, entries, n_entries, control_flag)
   struct charset *charset;
-  Lisp_Object map;
+  struct charset_map_entries *entries;
+  int n_entries;
   int control_flag;
 {
   Lisp_Object vec, table;
@@ -180,12 +189,14 @@
   unsigned max_code = CHARSET_MAX_CODE (charset);
   int ascii_compatible_p = charset->ascii_compatible_p;
   int min_char, max_char, nonascii_min_char;
-  int size;
   int i;
   int first;
   unsigned char *fast_map = charset->fast_map;
 
-  if (control_flag)
+  if (n_entries <= 0)
+    return;
+
+  if (control_flag > 0)
     {
       int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
       unsigned invalid_code = CHARSET_INVALID_CODE (charset);
@@ -199,37 +210,53 @@
       charset_map_loaded = 1;
     }
 
-  size = ASIZE (map);
+  min_char = max_char = entries->entry[0].c;
   nonascii_min_char = MAX_CHAR;
-  CHARSET_COMPACT_CODES_P (charset) = 1;
-  for (first = 1, i = 0; i < size; i += 2)
+  for (i = 0; i < n_entries; i++)
     {
-      Lisp_Object val;
-      unsigned code;
+      unsigned from, to;
       int c, char_index;
+      int idx = i % 0x10000;
 
-      val = AREF (map, i);
-      CHECK_NATNUM (val);
-      code = XFASTINT (val);
-      val = AREF (map, i + 1);
-      CHECK_NATNUM (val);
-      c = XFASTINT (val);
+      if (i > 0 && idx == 0)
+	entries = entries->next;
+      from = entries->entry[idx].from;
+      to = entries->entry[idx].to;
+      c = entries->entry[idx].c;
 
-      if (code < min_code || code > max_code)
-	continue;
-      char_index = CODE_POINT_TO_INDEX (charset, code);
-      if (char_index < 0
-	  || c > MAX_CHAR)
-	continue;
-	
       if (control_flag < 2)
 	{
-	  if (first)
+	  if (control_flag == 1)
 	    {
-	      min_char = max_char = c;
-	      first = 0;
+	      unsigned code = from;
+	      int from_index, to_index;
+
+	      from_index = CODE_POINT_TO_INDEX (charset, from);
+	      if (from == to)
+		to_index = from_index;
+	      else
+		to_index = CODE_POINT_TO_INDEX (charset, to);
+	      if (from_index < 0 || to_index < 0)
+		continue;
+	      if (CHARSET_COMPACT_CODES_P (charset))
+		while (1)
+		  {
+		    ASET (vec, from_index, make_number (c));
+		    CHAR_TABLE_SET (table, c, make_number (code));
+		    if (from_index == to_index)
+		      break;
+		    from_index++, c++;
+		    code = INDEX_TO_CODE_POINT (charset, from_index);
+		  }
+	      else
+		for (; from_index <= to_index; from_index++, c++)
+		  {
+		    ASET (vec, from_index, make_number (c));
+		    CHAR_TABLE_SET (table, c, make_number (from_index));
+		  }
 	    }
-	  else if (c > max_char)
+
+	  if (c > max_char)
 	    max_char = c;
 	  else if (c < min_char)
 	    min_char = c;
@@ -239,27 +266,12 @@
 
 	  CHARSET_FAST_MAP_SET (c, fast_map);
 	}
-
-      if (control_flag)
+      else
 	{
-	  if (control_flag == 1)
+	  for (; from <= to; from++)
 	    {
-	      if (char_index >= ASIZE (vec))
-		abort ();
-	      ASET (vec, char_index, make_number (c));
-	      if (code > 0x7FFFFFF)
-		{
-		  CHAR_TABLE_SET (table, c,
-				  Fcons (make_number (code >> 16),
-					 make_number (code & 0xFFFF)));
-		  CHARSET_COMPACT_CODES_P (charset) = 0;
-		}
-	      else
-		CHAR_TABLE_SET (table, c, make_number (code));
-	    }
-	  else
-	    {
-	      int c1 = DECODE_CHAR (charset, code);
+	      int c1 = DECODE_CHAR (charset, from);
+
 	      if (c1 >= 0)
 		{
 		  CHAR_TABLE_SET (table, c, make_number (c1));
@@ -277,7 +289,7 @@
       CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
 				    ? nonascii_min_char : min_char);
       CHARSET_MAX_CHAR (charset) = max_char;
-      if (control_flag)
+      if (control_flag == 1)
 	{
 	  CHARSET_DECODER (charset) = vec;
 	  CHARSET_ENCODER (charset) = table;
@@ -325,36 +337,43 @@
   else
     while ((c = getc (fp)) != EOF && isdigit (c))
       n = (n * 10) + c - '0';
+  if (c != EOF)
+    ungetc (c, fp);
   return n;
 }
 
 
 /* Return a mapping vector for CHARSET loaded from MAPFILE.
-   Each line of MAPFILE has this form:
-	0xAAAA 0xBBBB
-   where 0xAAAA is a code-point and 0xBBBB is the corresponding
-   character code.
+   Each line of MAPFILE has this form
+	0xAAAA 0xCCCC
+   where 0xAAAA is a code-point and 0xCCCC is the corresponding
+   character code, or this form
+	0xAAAA-0xBBBB 0xCCCC
+   where 0xAAAA and 0xBBBB are code-points specifying a range, and
+   0xCCCC is the first character code of the range.
+
    The returned vector has this form:
 	[ CODE1 CHAR1 CODE2 CHAR2 .... ]
-*/
+   where CODE1 is a code-point or a cons of code-points specifying a
+   range.  */
 
 extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
 
-static Lisp_Object
-load_charset_map (charset, mapfile)
+static void
+load_charset_map_from_file (charset, mapfile, control_flag)
      struct charset *charset;
      Lisp_Object mapfile;
+     int control_flag;
 {
+  unsigned min_code = CHARSET_MIN_CODE (charset);
+  unsigned max_code = CHARSET_MAX_CODE (charset);
   int fd;
   FILE *fp;
-  int num;
-  unsigned *numbers_table[256];
-  int numbers_table_used;
-  unsigned *numbers;
   int eof;
   Lisp_Object suffixes;
-  Lisp_Object vec;
   int i;
+  struct charset_map_entries *head, *entries;
+  int n_entries;
 
   suffixes = Fcons (build_string (".map"),
 		    Fcons (build_string (".TXT"), Qnil));
@@ -365,42 +384,114 @@
       || ! (fp = fdopen (fd, "r")))
     {
       add_to_log ("Failure in loading charset map: %S", mapfile, Qnil);
-      return Qnil;
+      return;
     }
 
-  numbers_table_used = 0;
-  num = 0;
+  head = entries = ((struct charset_map_entries *)
+		    alloca (sizeof (struct charset_map_entries)));
+  n_entries = 0;
   eof = 0;
   while (1)
     {
-      unsigned n = read_hex (fp, &eof);
+      unsigned from, to;
+      int c;
+      int idx;
 
+      from = read_hex (fp, &eof);
       if (eof)
 	break;
-      if ((num % 0x10000) == 0)
+      if (getc (fp) == '-')
+	to = read_hex (fp, &eof);
+      else
+	to = from;
+      c = (int) read_hex (fp, &eof);
+
+      if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
+	continue;
+
+      if (n_entries > 0 && (n_entries % 0x10000) == 0)
 	{
-	  if (numbers_table_used == 256)
-	    break;
-	  numbers = (unsigned *) alloca (sizeof (unsigned) * 0x10000);
-	  numbers_table[numbers_table_used++] = numbers;	  
+	  entries->next = ((struct charset_map_entries *)
+			   alloca (sizeof (struct charset_map_entries)));
+	  entries = entries->next;
 	}
-      *numbers++ = n;
-      num++;
+      idx = n_entries % 0x10000;
+      entries->entry[idx].from = from;
+      entries->entry[idx].to = to;
+      entries->entry[idx].c = c;
+      n_entries++;
     }
   fclose (fp);
   close (fd);
 
-  vec = Fmake_vector (make_number (num), Qnil);
-  for (i = 0; i < num; i++, numbers++)
+  load_charset_map (charset, head, n_entries, control_flag);
+}
+
+static void
+load_charset_map_from_vector (charset, vec, control_flag)
+     struct charset *charset;
+     Lisp_Object vec;
+     int control_flag;
+{
+  unsigned min_code = CHARSET_MIN_CODE (charset);
+  unsigned max_code = CHARSET_MAX_CODE (charset);
+  struct charset_map_entries *head, *entries;
+  int n_entries;
+  int len = ASIZE (vec);
+  int i;
+
+  if (len % 2 == 1)
     {
-      if ((i % 0x10000) == 0)
-	numbers = numbers_table[i / 0x10000];
-      ASET (vec, i, make_number (*numbers));
+      add_to_log ("Failure in loading charset map: %V", vec, Qnil);
+      return;
     }
 
-  charset_map_loaded = 1;
+  head = entries = ((struct charset_map_entries *)
+		    alloca (sizeof (struct charset_map_entries)));
+  n_entries = 0;
+  for (i = 0; i < len; i += 2)
+    {
+      Lisp_Object val, val2;
+      unsigned from, to;
+      int c;
+      int idx;
 
-  return vec;
+      val = AREF (vec, i);
+      if (CONSP (val))
+	{
+	  val2 = XCDR (val);
+	  val = XCAR (val);
+	  CHECK_NATNUM (val);
+	  CHECK_NATNUM (val2);
+	  from = XFASTINT (val);
+	  to = XFASTINT (val2);
+	}
+      else
+	{
+	  CHECK_NATNUM (val);
+	  from = to = XFASTINT (val);
+	}
+      val = AREF (vec, i + 1);
+      CHECK_NATNUM (val);
+      c = XFASTINT (val);
+
+      if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
+	continue;
+
+      if ((n_entries % 0x10000) == 0)
+	{
+	  entries->next = ((struct charset_map_entries *)
+			   alloca (sizeof (struct charset_map_entries)));
+	  entries = entries->next;
+	}
+      idx = n_entries % 0x10000;
+      entries->entry[idx].from = from;
+      entries->entry[idx].to = to;
+      entries->entry[idx].c = c;
+      n_entries++;
+    }
+
+  load_charset_map (charset, head, n_entries, control_flag);
 }
 
 static void
@@ -413,8 +504,9 @@
 
       map = CHARSET_MAP (charset);
       if (STRINGP (map))
-	map = load_charset_map (charset, map);
-      parse_charset_map (charset, map, 1);
+	load_charset_map_from_file (charset, map, 1);
+      else
+	load_charset_map_from_vector (charset, map, 1);
       CHARSET_METHOD (charset) = CHARSET_METHOD_MAP;
     }
 }
@@ -621,6 +713,8 @@
 		      | (charset.code_space[9] << 16)
 		      | (charset.code_space[13] << 24));
 
+  charset.compact_codes_p = charset.max_code < 0x1000000;
+
   val = args[charset_arg_invalid_code];
   if (NILP (val))
     {
@@ -708,9 +802,9 @@
       val = args[charset_arg_map];
       ASET (attrs, charset_map, val);
       if (STRINGP (val))
-	val = load_charset_map (&charset, val);
-      CHECK_VECTOR (val);
-      parse_charset_map (&charset, val, 0);
+	load_charset_map_from_file (&charset, val, 0);
+      else
+	load_charset_map_from_vector (&charset, val, 0);
       charset.method = CHARSET_METHOD_MAP_DEFERRED;
     }
   else if (! NILP (args[charset_arg_parents]))
@@ -901,8 +995,9 @@
   if (NILP (unify_map))
     unify_map = CHARSET_UNIFY_MAP (cs);
   if (STRINGP (unify_map))
-    unify_map = load_charset_map (cs, unify_map);
-  parse_charset_map (cs, unify_map, 2);
+    load_charset_map_from_file (cs, unify_map, 2);
+  else
+    load_charset_map_from_vector (cs, unify_map, 2);
   CHARSET_UNIFIED_P (cs) = 1;
   return Qnil;
 }
@@ -1277,10 +1372,9 @@
       if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
 	return CHARSET_INVALID_CODE (charset);
       val = CHAR_TABLE_REF (encoder, c);
-      if (CONSP (val))
-	code = (XINT (XCAR (val)) << 16) | XINT (XCDR (val));
-      else
-	code = XINT (val);
+      code = XINT (val);
+      if (! CHARSET_COMPACT_CODES_P (charset))
+	code = INDEX_TO_CODE_POINT (charset, code);
     }
   else
     {