Mercurial > emacs
changeset 88745:f247f70ed2c3
(load_charset_map): Set the default value of encoder
and deunifier char-tables to nil.
(map_charset_chars): Argument changed. Callers changed. Use
map_char_table_for_charset instead of map_char_table.
(Fmap_charset_chars): New optional args from_code and to_code.
(Fdefine_charset_internal): Adjusted for the change of
`define-charset' (:parents -> :subset or :superset).
(charset_work): New variable.
(encode_char): Adjusted for the change of
Fdefine_charset_internal.
(syms_of_charset): Likewise.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Wed, 12 Jun 2002 00:13:57 +0000 |
parents | 5b9a72e491d4 |
children | 388c82972bb9 |
files | src/charset.c |
diffstat | 1 files changed, 176 insertions(+), 95 deletions(-) [+] |
line wrap: on
line diff
--- a/src/charset.c Wed Jun 12 00:13:36 2002 +0000 +++ b/src/charset.c Wed Jun 12 00:13:57 2002 +0000 @@ -205,13 +205,12 @@ if (control_flag > 0) { int n = CODE_POINT_TO_INDEX (charset, max_code) + 1; - unsigned invalid_code = CHARSET_INVALID_CODE (charset); - table = Fmake_char_table (Qnil, make_number (invalid_code)); + table = Fmake_char_table (Qnil, Qnil); if (control_flag == 1) vec = Fmake_vector (make_number (n), make_number (-1)); else if (! CHAR_TABLE_P (Vchar_unify_table)) - Vchar_unify_table = Fmake_char_table (Qnil, make_number (-1)); + Vchar_unify_table = Fmake_char_table (Qnil, Qnil); charset_map_loaded = 1; } @@ -551,26 +550,41 @@ void -map_charset_chars (c_function, function, charset_symbol, arg) - void (*c_function) P_ ((Lisp_Object, Lisp_Object, Lisp_Object)); - Lisp_Object function, charset_symbol, arg; +map_charset_chars (c_function, function, arg, + charset, from, to) + void (*c_function) P_ ((Lisp_Object, Lisp_Object)); + Lisp_Object function, arg; + struct charset *charset; + unsigned from, to; + { - int id; - struct charset *charset; Lisp_Object range; - - CHECK_CHARSET_GET_ID (charset_symbol, id); - charset = CHARSET_FROM_ID (id); + int partial; if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED) load_charset (charset); + partial = (from > CHARSET_MIN_CODE (charset) + || to < CHARSET_MAX_CODE (charset)); + + if (CHARSET_UNIFIED_P (charset) + && CHAR_TABLE_P (CHARSET_DEUNIFIER (charset))) + { + map_char_table_for_charset (c_function, function, + CHARSET_DEUNIFIER (charset), arg, + partial ? charset : NULL, from, to); + } + if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET) { - range = Fcons (make_number (CHARSET_MIN_CHAR (charset)), - make_number (CHARSET_MAX_CHAR (charset))); + int from_idx = CODE_POINT_TO_INDEX (charset, from); + int to_idx = CODE_POINT_TO_INDEX (charset, to); + int from_c = from_idx + CHARSET_CODE_OFFSET (charset); + int to_c = to_idx + CHARSET_CODE_OFFSET (charset); + + range = Fcons (make_number (from_c), make_number (to_c)); if (NILP (function)) - (*c_function) (arg, range, Qnil); + (*c_function) (range, arg); else call2 (function, range, arg); } @@ -578,86 +592,87 @@ { if (! CHAR_TABLE_P (CHARSET_ENCODER (charset))) return; - if (CHARSET_ASCII_COMPATIBLE_P (charset)) + if (CHARSET_ASCII_COMPATIBLE_P (charset) && from <= 127) { - range = Fcons (make_number (0), make_number (127)); + range = Fcons (make_number (from), make_number (to)); + if (to >= 128) + XSETCAR (range, make_number (127)); + if (NILP (function)) - (*c_function) (arg, range, Qnil); + (*c_function) (range, arg); else call2 (function, range, arg); } - map_char_table (c_function, function, CHARSET_ENCODER (charset), arg, - 0, NULL); + map_char_table_for_charset (c_function, function, + CHARSET_ENCODER (charset), arg, + partial ? charset : NULL, from, to); } - else /* i.e. CHARSET_METHOD_PARENT */ + else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET) { - int from, to, c; - unsigned code; - int i, j, k, l; - int *code_space = CHARSET_CODE_SPACE (charset); - Lisp_Object val; + Lisp_Object subset_info; + int offset; - range = Fcons (Qnil, Qnil); - from = to = -2; - for (i = code_space[12]; i <= code_space[13]; i++) - for (j = code_space[8]; j <= code_space[9]; j++) - for (k = code_space[4]; k <= code_space[5]; k++) - for (l = code_space[0]; l <= code_space[1]; l++) - { - code = (i << 24) | (j << 16) | (k << 8) | l; - c = DECODE_CHAR (charset, code); - if (c == to + 1) - { - to++; - continue; - } - if (from >= 0) - { - if (from < to) - { - XSETCAR (range, make_number (from)); - XSETCDR (range, make_number (to)); - val = range; - } - else - val = make_number (from); - if (NILP (function)) - (*c_function) (arg, val, Qnil); - else - call2 (function, val, arg); - } - from = to = (c < 0 ? -2 : c); - } - if (from >= 0) + subset_info = CHARSET_SUBSET (charset); + charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0))); + offset = XINT (AREF (subset_info, 3)); + from -= offset; + if (from < XFASTINT (AREF (subset_info, 1))) + from = XFASTINT (AREF (subset_info, 1)); + to -= offset; + if (to > XFASTINT (AREF (subset_info, 2))) + to = XFASTINT (AREF (subset_info, 2)); + map_charset_chars (c_function, function, arg, charset, from, to); + } + else /* i.e. CHARSET_METHOD_SUPERSET */ + { + Lisp_Object parents; + + for (parents = CHARSET_SUPERSET (charset); CONSP (parents); + parents = XCDR (parents)) { - if (from < to) - { - XSETCAR (range, make_number (from)); - XSETCDR (range, make_number (to)); - val = range; - } - else - val = make_number (from); - if (NILP (function)) - (*c_function) (arg, val, Qnil); - else - call2 (function, val, arg); + int offset; + unsigned this_from, this_to; + + charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents)))); + offset = XINT (XCDR (XCAR (parents))); + this_from = from - offset; + this_to = to - offset; + if (this_from < CHARSET_MIN_CODE (charset)) + this_from = CHARSET_MIN_CODE (charset); + if (this_to > CHARSET_MAX_CODE (charset)) + this_to = CHARSET_MAX_CODE (charset); + map_charset_chars (c_function, function, arg, charset, from, to); } } } -DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 3, 0, + +DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0, doc: /* Call FUNCTION for all characters in CHARSET. -FUNCTION is called with an argument RANGE and optional 2nd +FUNCTION is called with an argument RANGE and the optional 3rd argument ARG. -RANGE is either a cons (FROM . TO), where FROM and TO indicate a range of -characters contained in CHARSET or a single character in the case that -FROM and TO would be equal. (The charset mapping may have gaps.)*/) - (function, charset, arg) - Lisp_Object function, charset, arg; +RANGE is a cons (FROM . TO), where FROM and TO indicate a range of +characters contained in CHARSET. + +The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the +range of code points of targer characters. */) + (function, charset, arg, from_code, to_code) + Lisp_Object function, charset, arg, from_code, to_code; { - map_charset_chars (NULL, function, charset, arg); + struct charset *cs; + + CHECK_CHARSET_GET_CHARSET (charset, cs); + if (NILP (from_code)) + from_code = 0; + if (from_code < CHARSET_MIN_CODE (cs)) + from_code = CHARSET_MIN_CODE (cs); + if (NILP (to_code)) + to_code = 0xFFFFFFFF; + if (to_code > CHARSET_MAX_CODE (cs)) + to_code = CHARSET_MAX_CODE (cs); + + map_charset_chars (NULL, function, arg, cs, from_code, to_code); return Qnil; } @@ -893,13 +908,45 @@ load_charset_map_from_vector (&charset, val, 0); charset.method = CHARSET_METHOD_MAP_DEFERRED; } - else if (! NILP (args[charset_arg_parents])) + else if (! NILP (args[charset_arg_subset])) { - val = args[charset_arg_parents]; - CHECK_LIST (val); - charset.method = CHARSET_METHOD_INHERIT; + Lisp_Object parent; + Lisp_Object parent_min_code, parent_max_code, parent_code_offset; + struct charset *parent_charset; + + val = args[charset_arg_subset]; + parent = Fcar (val); + CHECK_CHARSET_GET_CHARSET (parent, parent_charset); + parent_min_code = Fnth (make_number (1), val); + CHECK_NATNUM (parent_min_code); + parent_max_code = Fnth (make_number (2), val); + CHECK_NATNUM (parent_max_code); + parent_code_offset = Fnth (make_number (3), val); + CHECK_NUMBER (parent_code_offset); + val = Fmake_vector (make_number (4), Qnil); + ASET (val, 0, make_number (parent_charset->id)); + ASET (val, 1, parent_min_code); + ASET (val, 2, parent_max_code); + ASET (val, 3, parent_code_offset); + ASET (attrs, charset_subset, val); + + charset.method = CHARSET_METHOD_SUBSET; + /* Here, we just copy the parent's fast_map. It's not accurate, + but at least it works for quickly detecting which character + DOESN'T belong to this charset. */ + for (i = 0; i < 190; i++) + charset.fast_map[i] = parent_charset->fast_map[i]; + + /* We also copy these for parents. */ + charset.min_char = parent_charset->min_char; + charset.max_char = parent_charset->max_char; + } + else if (! NILP (args[charset_arg_superset])) + { + val = args[charset_arg_superset]; + charset.method = CHARSET_METHOD_SUPERSET; val = Fcopy_sequence (val); - ASET (attrs, charset_parents, val); + ASET (attrs, charset_superset, val); charset.min_char = MAX_CHAR; charset.max_char = 0; @@ -1351,17 +1398,30 @@ method = CHARSET_METHOD (charset); } - if (method == CHARSET_METHOD_INHERIT) + if (method == CHARSET_METHOD_SUBSET) + { + Lisp_Object subset_info; + + subset_info = CHARSET_SUBSET (charset); + charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0))); + code -= XINT (AREF (subset_info, 3)); + if (code < XFASTINT (AREF (subset_info, 1)) + || code > XFASTINT (AREF (subset_info, 2))) + c = -1; + else + c = DECODE_CHAR (charset, code); + } + else if (method == CHARSET_METHOD_SUPERSET) { Lisp_Object parents; - parents = CHARSET_PARENTS (charset); + parents = CHARSET_SUPERSET (charset); c = -1; for (; CONSP (parents); parents = XCDR (parents)) { int id = XINT (XCAR (XCAR (parents))); int code_offset = XINT (XCDR (XCAR (parents))); - unsigned this_code = code + code_offset; + unsigned this_code = code - code_offset; charset = CHARSET_FROM_ID (id); if ((c = DECODE_CHAR (charset, this_code)) >= 0) @@ -1398,6 +1458,8 @@ return c; } +/* Variable used temporarily by the macro ENCODE_CHAR. */ +Lisp_Object charset_work; /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to CHARSET, return CHARSET_INVALID_CODE (CHARSET). */ @@ -1412,8 +1474,7 @@ if (CHARSET_UNIFIED_P (charset)) { - Lisp_Object deunifier; - int deunified; + Lisp_Object deunifier, deunified; deunifier = CHARSET_DEUNIFIER (charset); if (! CHAR_TABLE_P (deunifier)) @@ -1421,20 +1482,36 @@ Funify_charset (CHARSET_NAME (charset), Qnil); deunifier = CHARSET_DEUNIFIER (charset); } - deunified = XINT (CHAR_TABLE_REF (deunifier, c)); - if (deunified > 0) - c = deunified; + deunified = CHAR_TABLE_REF (deunifier, c); + if (! NILP (deunified)) + c = XINT (deunified); } if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map) || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset)) return CHARSET_INVALID_CODE (charset); - if (method == CHARSET_METHOD_INHERIT) + if (method == CHARSET_METHOD_SUBSET) + { + Lisp_Object subset_info; + struct charset *this_charset; + + subset_info = CHARSET_SUBSET (charset); + this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0))); + code = ENCODE_CHAR (this_charset, c); + if (code == CHARSET_INVALID_CODE (this_charset) + || code < XFASTINT (AREF (subset_info, 1)) + || code > XFASTINT (AREF (subset_info, 2))) + return CHARSET_INVALID_CODE (charset); + code += XINT (AREF (subset_info, 3)); + return code; + } + + if (method == CHARSET_METHOD_SUPERSET) { Lisp_Object parents; - parents = CHARSET_PARENTS (charset); + parents = CHARSET_SUPERSET (charset); for (; CONSP (parents); parents = XCDR (parents)) { int id = XINT (XCAR (XCAR (parents))); @@ -1445,7 +1522,7 @@ if (code != CHARSET_INVALID_CODE (this_charset) && (code_offset < 0 || code >= code_offset)) { - code -= code_offset; + code += code_offset; if (code >= charset->min_code && code <= charset->max_code && CODE_POINT_TO_INDEX (charset, code) >= 0) return code; @@ -1469,6 +1546,8 @@ if (! CHAR_TABLE_P (CHARSET_ENCODER (charset))) return CHARSET_INVALID_CODE (charset); val = CHAR_TABLE_REF (encoder, c); + if (NILP (val)) + return CHARSET_INVALID_CODE (charset); code = XINT (val); if (! CHARSET_COMPACT_CODES_P (charset)) code = INDEX_TO_CODE_POINT (charset, code); @@ -1962,7 +2041,8 @@ args[charset_arg_invalid_code] = Qnil; args[charset_arg_code_offset] = make_number (0); args[charset_arg_map] = Qnil; - args[charset_arg_parents] = Qnil; + args[charset_arg_subset] = Qnil; + args[charset_arg_superset] = Qnil; args[charset_arg_unify_map] = Qnil; /* The actual plist is set by mule-conf.el. */ plist[1] = args[charset_arg_name]; @@ -1993,7 +2073,8 @@ args[charset_arg_invalid_code] = Qnil; args[charset_arg_code_offset] = make_number (0); args[charset_arg_map] = Qnil; - args[charset_arg_parents] = Qnil; + args[charset_arg_subset] = Qnil; + args[charset_arg_superset] = Qnil; args[charset_arg_unify_map] = Qnil; /* The actual plist is set by mule-conf.el. */ plist[1] = args[charset_arg_name];