comparison src/charset.c @ 88745:f247f70ed2c3

(load_charset_map): Set the default value of encoder and deunifier char-tables to nil. (map_charset_chars): Argument changed. Callers changed. Use map_char_table_for_charset instead of map_char_table. (Fmap_charset_chars): New optional args from_code and to_code. (Fdefine_charset_internal): Adjusted for the change of `define-charset' (:parents -> :subset or :superset). (charset_work): New variable. (encode_char): Adjusted for the change of Fdefine_charset_internal. (syms_of_charset): Likewise.
author Kenichi Handa <handa@m17n.org>
date Wed, 12 Jun 2002 00:13:57 +0000
parents 8646a672489b
children c2d4566b3cde
comparison
equal deleted inserted replaced
88744:5b9a72e491d4 88745:f247f70ed2c3
203 return; 203 return;
204 204
205 if (control_flag > 0) 205 if (control_flag > 0)
206 { 206 {
207 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1; 207 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
208 unsigned invalid_code = CHARSET_INVALID_CODE (charset); 208
209 209 table = Fmake_char_table (Qnil, Qnil);
210 table = Fmake_char_table (Qnil, make_number (invalid_code));
211 if (control_flag == 1) 210 if (control_flag == 1)
212 vec = Fmake_vector (make_number (n), make_number (-1)); 211 vec = Fmake_vector (make_number (n), make_number (-1));
213 else if (! CHAR_TABLE_P (Vchar_unify_table)) 212 else if (! CHAR_TABLE_P (Vchar_unify_table))
214 Vchar_unify_table = Fmake_char_table (Qnil, make_number (-1)); 213 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
215 214
216 charset_map_loaded = 1; 215 charset_map_loaded = 1;
217 } 216 }
218 217
219 min_char = max_char = entries->entry[0].c; 218 min_char = max_char = entries->entry[0].c;
549 return (CHARSETP (object) ? Qt : Qnil); 548 return (CHARSETP (object) ? Qt : Qnil);
550 } 549 }
551 550
552 551
553 void 552 void
554 map_charset_chars (c_function, function, charset_symbol, arg) 553 map_charset_chars (c_function, function, arg,
555 void (*c_function) P_ ((Lisp_Object, Lisp_Object, Lisp_Object)); 554 charset, from, to)
556 Lisp_Object function, charset_symbol, arg; 555 void (*c_function) P_ ((Lisp_Object, Lisp_Object));
557 { 556 Lisp_Object function, arg;
558 int id; 557 struct charset *charset;
559 struct charset *charset; 558 unsigned from, to;
559
560 {
560 Lisp_Object range; 561 Lisp_Object range;
561 562 int partial;
562 CHECK_CHARSET_GET_ID (charset_symbol, id);
563 charset = CHARSET_FROM_ID (id);
564 563
565 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED) 564 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
566 load_charset (charset); 565 load_charset (charset);
567 566
567 partial = (from > CHARSET_MIN_CODE (charset)
568 || to < CHARSET_MAX_CODE (charset));
569
570 if (CHARSET_UNIFIED_P (charset)
571 && CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
572 {
573 map_char_table_for_charset (c_function, function,
574 CHARSET_DEUNIFIER (charset), arg,
575 partial ? charset : NULL, from, to);
576 }
577
568 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET) 578 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
569 { 579 {
570 range = Fcons (make_number (CHARSET_MIN_CHAR (charset)), 580 int from_idx = CODE_POINT_TO_INDEX (charset, from);
571 make_number (CHARSET_MAX_CHAR (charset))); 581 int to_idx = CODE_POINT_TO_INDEX (charset, to);
582 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
583 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
584
585 range = Fcons (make_number (from_c), make_number (to_c));
572 if (NILP (function)) 586 if (NILP (function))
573 (*c_function) (arg, range, Qnil); 587 (*c_function) (range, arg);
574 else 588 else
575 call2 (function, range, arg); 589 call2 (function, range, arg);
576 } 590 }
577 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP) 591 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
578 { 592 {
579 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset))) 593 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
580 return; 594 return;
581 if (CHARSET_ASCII_COMPATIBLE_P (charset)) 595 if (CHARSET_ASCII_COMPATIBLE_P (charset) && from <= 127)
582 { 596 {
583 range = Fcons (make_number (0), make_number (127)); 597 range = Fcons (make_number (from), make_number (to));
598 if (to >= 128)
599 XSETCAR (range, make_number (127));
600
584 if (NILP (function)) 601 if (NILP (function))
585 (*c_function) (arg, range, Qnil); 602 (*c_function) (range, arg);
586 else 603 else
587 call2 (function, range, arg); 604 call2 (function, range, arg);
588 } 605 }
589 map_char_table (c_function, function, CHARSET_ENCODER (charset), arg, 606 map_char_table_for_charset (c_function, function,
590 0, NULL); 607 CHARSET_ENCODER (charset), arg,
591 } 608 partial ? charset : NULL, from, to);
592 else /* i.e. CHARSET_METHOD_PARENT */ 609 }
593 { 610 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
594 int from, to, c; 611 {
595 unsigned code; 612 Lisp_Object subset_info;
596 int i, j, k, l; 613 int offset;
597 int *code_space = CHARSET_CODE_SPACE (charset); 614
598 Lisp_Object val; 615 subset_info = CHARSET_SUBSET (charset);
599 616 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
600 range = Fcons (Qnil, Qnil); 617 offset = XINT (AREF (subset_info, 3));
601 from = to = -2; 618 from -= offset;
602 for (i = code_space[12]; i <= code_space[13]; i++) 619 if (from < XFASTINT (AREF (subset_info, 1)))
603 for (j = code_space[8]; j <= code_space[9]; j++) 620 from = XFASTINT (AREF (subset_info, 1));
604 for (k = code_space[4]; k <= code_space[5]; k++) 621 to -= offset;
605 for (l = code_space[0]; l <= code_space[1]; l++) 622 if (to > XFASTINT (AREF (subset_info, 2)))
606 { 623 to = XFASTINT (AREF (subset_info, 2));
607 code = (i << 24) | (j << 16) | (k << 8) | l; 624 map_charset_chars (c_function, function, arg, charset, from, to);
608 c = DECODE_CHAR (charset, code); 625 }
609 if (c == to + 1) 626 else /* i.e. CHARSET_METHOD_SUPERSET */
610 { 627 {
611 to++; 628 Lisp_Object parents;
612 continue; 629
613 } 630 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
614 if (from >= 0) 631 parents = XCDR (parents))
615 { 632 {
616 if (from < to) 633 int offset;
617 { 634 unsigned this_from, this_to;
618 XSETCAR (range, make_number (from)); 635
619 XSETCDR (range, make_number (to)); 636 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
620 val = range; 637 offset = XINT (XCDR (XCAR (parents)));
621 } 638 this_from = from - offset;
622 else 639 this_to = to - offset;
623 val = make_number (from); 640 if (this_from < CHARSET_MIN_CODE (charset))
624 if (NILP (function)) 641 this_from = CHARSET_MIN_CODE (charset);
625 (*c_function) (arg, val, Qnil); 642 if (this_to > CHARSET_MAX_CODE (charset))
626 else 643 this_to = CHARSET_MAX_CODE (charset);
627 call2 (function, val, arg); 644 map_charset_chars (c_function, function, arg, charset, from, to);
628 }
629 from = to = (c < 0 ? -2 : c);
630 }
631 if (from >= 0)
632 {
633 if (from < to)
634 {
635 XSETCAR (range, make_number (from));
636 XSETCDR (range, make_number (to));
637 val = range;
638 }
639 else
640 val = make_number (from);
641 if (NILP (function))
642 (*c_function) (arg, val, Qnil);
643 else
644 call2 (function, val, arg);
645 } 645 }
646 } 646 }
647 } 647 }
648 648
649 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 3, 0, 649
650 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
650 doc: /* Call FUNCTION for all characters in CHARSET. 651 doc: /* Call FUNCTION for all characters in CHARSET.
651 FUNCTION is called with an argument RANGE and optional 2nd 652 FUNCTION is called with an argument RANGE and the optional 3rd
652 argument ARG. 653 argument ARG.
653 654
654 RANGE is either a cons (FROM . TO), where FROM and TO indicate a range of 655 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
655 characters contained in CHARSET or a single character in the case that 656 characters contained in CHARSET.
656 FROM and TO would be equal. (The charset mapping may have gaps.)*/) 657
657 (function, charset, arg) 658 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
658 Lisp_Object function, charset, arg; 659 range of code points of targer characters. */)
659 { 660 (function, charset, arg, from_code, to_code)
660 map_charset_chars (NULL, function, charset, arg); 661 Lisp_Object function, charset, arg, from_code, to_code;
662 {
663 struct charset *cs;
664
665 CHECK_CHARSET_GET_CHARSET (charset, cs);
666 if (NILP (from_code))
667 from_code = 0;
668 if (from_code < CHARSET_MIN_CODE (cs))
669 from_code = CHARSET_MIN_CODE (cs);
670 if (NILP (to_code))
671 to_code = 0xFFFFFFFF;
672 if (to_code > CHARSET_MAX_CODE (cs))
673 to_code = CHARSET_MAX_CODE (cs);
674
675 map_charset_chars (NULL, function, arg, cs, from_code, to_code);
661 return Qnil; 676 return Qnil;
662 } 677 }
663 678
664 679
665 /* Define a charset according to the arguments. The Nth argument is 680 /* Define a charset according to the arguments. The Nth argument is
891 load_charset_map_from_file (&charset, val, 0); 906 load_charset_map_from_file (&charset, val, 0);
892 else 907 else
893 load_charset_map_from_vector (&charset, val, 0); 908 load_charset_map_from_vector (&charset, val, 0);
894 charset.method = CHARSET_METHOD_MAP_DEFERRED; 909 charset.method = CHARSET_METHOD_MAP_DEFERRED;
895 } 910 }
896 else if (! NILP (args[charset_arg_parents])) 911 else if (! NILP (args[charset_arg_subset]))
897 { 912 {
898 val = args[charset_arg_parents]; 913 Lisp_Object parent;
899 CHECK_LIST (val); 914 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
900 charset.method = CHARSET_METHOD_INHERIT; 915 struct charset *parent_charset;
916
917 val = args[charset_arg_subset];
918 parent = Fcar (val);
919 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
920 parent_min_code = Fnth (make_number (1), val);
921 CHECK_NATNUM (parent_min_code);
922 parent_max_code = Fnth (make_number (2), val);
923 CHECK_NATNUM (parent_max_code);
924 parent_code_offset = Fnth (make_number (3), val);
925 CHECK_NUMBER (parent_code_offset);
926 val = Fmake_vector (make_number (4), Qnil);
927 ASET (val, 0, make_number (parent_charset->id));
928 ASET (val, 1, parent_min_code);
929 ASET (val, 2, parent_max_code);
930 ASET (val, 3, parent_code_offset);
931 ASET (attrs, charset_subset, val);
932
933 charset.method = CHARSET_METHOD_SUBSET;
934 /* Here, we just copy the parent's fast_map. It's not accurate,
935 but at least it works for quickly detecting which character
936 DOESN'T belong to this charset. */
937 for (i = 0; i < 190; i++)
938 charset.fast_map[i] = parent_charset->fast_map[i];
939
940 /* We also copy these for parents. */
941 charset.min_char = parent_charset->min_char;
942 charset.max_char = parent_charset->max_char;
943 }
944 else if (! NILP (args[charset_arg_superset]))
945 {
946 val = args[charset_arg_superset];
947 charset.method = CHARSET_METHOD_SUPERSET;
901 val = Fcopy_sequence (val); 948 val = Fcopy_sequence (val);
902 ASET (attrs, charset_parents, val); 949 ASET (attrs, charset_superset, val);
903 950
904 charset.min_char = MAX_CHAR; 951 charset.min_char = MAX_CHAR;
905 charset.max_char = 0; 952 charset.max_char = 0;
906 for (; ! NILP (val); val = Fcdr (val)) 953 for (; ! NILP (val); val = Fcdr (val))
907 { 954 {
1349 { 1396 {
1350 load_charset (charset); 1397 load_charset (charset);
1351 method = CHARSET_METHOD (charset); 1398 method = CHARSET_METHOD (charset);
1352 } 1399 }
1353 1400
1354 if (method == CHARSET_METHOD_INHERIT) 1401 if (method == CHARSET_METHOD_SUBSET)
1402 {
1403 Lisp_Object subset_info;
1404
1405 subset_info = CHARSET_SUBSET (charset);
1406 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1407 code -= XINT (AREF (subset_info, 3));
1408 if (code < XFASTINT (AREF (subset_info, 1))
1409 || code > XFASTINT (AREF (subset_info, 2)))
1410 c = -1;
1411 else
1412 c = DECODE_CHAR (charset, code);
1413 }
1414 else if (method == CHARSET_METHOD_SUPERSET)
1355 { 1415 {
1356 Lisp_Object parents; 1416 Lisp_Object parents;
1357 1417
1358 parents = CHARSET_PARENTS (charset); 1418 parents = CHARSET_SUPERSET (charset);
1359 c = -1; 1419 c = -1;
1360 for (; CONSP (parents); parents = XCDR (parents)) 1420 for (; CONSP (parents); parents = XCDR (parents))
1361 { 1421 {
1362 int id = XINT (XCAR (XCAR (parents))); 1422 int id = XINT (XCAR (XCAR (parents)));
1363 int code_offset = XINT (XCDR (XCAR (parents))); 1423 int code_offset = XINT (XCDR (XCAR (parents)));
1364 unsigned this_code = code + code_offset; 1424 unsigned this_code = code - code_offset;
1365 1425
1366 charset = CHARSET_FROM_ID (id); 1426 charset = CHARSET_FROM_ID (id);
1367 if ((c = DECODE_CHAR (charset, this_code)) >= 0) 1427 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1368 break; 1428 break;
1369 } 1429 }
1396 } 1456 }
1397 1457
1398 return c; 1458 return c;
1399 } 1459 }
1400 1460
1461 /* Variable used temporarily by the macro ENCODE_CHAR. */
1462 Lisp_Object charset_work;
1401 1463
1402 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to 1464 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1403 CHARSET, return CHARSET_INVALID_CODE (CHARSET). */ 1465 CHARSET, return CHARSET_INVALID_CODE (CHARSET). */
1404 1466
1405 unsigned 1467 unsigned
1410 unsigned code; 1472 unsigned code;
1411 enum charset_method method = CHARSET_METHOD (charset); 1473 enum charset_method method = CHARSET_METHOD (charset);
1412 1474
1413 if (CHARSET_UNIFIED_P (charset)) 1475 if (CHARSET_UNIFIED_P (charset))
1414 { 1476 {
1415 Lisp_Object deunifier; 1477 Lisp_Object deunifier, deunified;
1416 int deunified;
1417 1478
1418 deunifier = CHARSET_DEUNIFIER (charset); 1479 deunifier = CHARSET_DEUNIFIER (charset);
1419 if (! CHAR_TABLE_P (deunifier)) 1480 if (! CHAR_TABLE_P (deunifier))
1420 { 1481 {
1421 Funify_charset (CHARSET_NAME (charset), Qnil); 1482 Funify_charset (CHARSET_NAME (charset), Qnil);
1422 deunifier = CHARSET_DEUNIFIER (charset); 1483 deunifier = CHARSET_DEUNIFIER (charset);
1423 } 1484 }
1424 deunified = XINT (CHAR_TABLE_REF (deunifier, c)); 1485 deunified = CHAR_TABLE_REF (deunifier, c);
1425 if (deunified > 0) 1486 if (! NILP (deunified))
1426 c = deunified; 1487 c = XINT (deunified);
1427 } 1488 }
1428 1489
1429 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map) 1490 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1430 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset)) 1491 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1431 return CHARSET_INVALID_CODE (charset); 1492 return CHARSET_INVALID_CODE (charset);
1432 1493
1433 if (method == CHARSET_METHOD_INHERIT) 1494 if (method == CHARSET_METHOD_SUBSET)
1495 {
1496 Lisp_Object subset_info;
1497 struct charset *this_charset;
1498
1499 subset_info = CHARSET_SUBSET (charset);
1500 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1501 code = ENCODE_CHAR (this_charset, c);
1502 if (code == CHARSET_INVALID_CODE (this_charset)
1503 || code < XFASTINT (AREF (subset_info, 1))
1504 || code > XFASTINT (AREF (subset_info, 2)))
1505 return CHARSET_INVALID_CODE (charset);
1506 code += XINT (AREF (subset_info, 3));
1507 return code;
1508 }
1509
1510 if (method == CHARSET_METHOD_SUPERSET)
1434 { 1511 {
1435 Lisp_Object parents; 1512 Lisp_Object parents;
1436 1513
1437 parents = CHARSET_PARENTS (charset); 1514 parents = CHARSET_SUPERSET (charset);
1438 for (; CONSP (parents); parents = XCDR (parents)) 1515 for (; CONSP (parents); parents = XCDR (parents))
1439 { 1516 {
1440 int id = XINT (XCAR (XCAR (parents))); 1517 int id = XINT (XCAR (XCAR (parents)));
1441 int code_offset = XINT (XCDR (XCAR (parents))); 1518 int code_offset = XINT (XCDR (XCAR (parents)));
1442 struct charset *this_charset = CHARSET_FROM_ID (id); 1519 struct charset *this_charset = CHARSET_FROM_ID (id);
1443 1520
1444 code = ENCODE_CHAR (this_charset, c); 1521 code = ENCODE_CHAR (this_charset, c);
1445 if (code != CHARSET_INVALID_CODE (this_charset) 1522 if (code != CHARSET_INVALID_CODE (this_charset)
1446 && (code_offset < 0 || code >= code_offset)) 1523 && (code_offset < 0 || code >= code_offset))
1447 { 1524 {
1448 code -= code_offset; 1525 code += code_offset;
1449 if (code >= charset->min_code && code <= charset->max_code 1526 if (code >= charset->min_code && code <= charset->max_code
1450 && CODE_POINT_TO_INDEX (charset, code) >= 0) 1527 && CODE_POINT_TO_INDEX (charset, code) >= 0)
1451 return code; 1528 return code;
1452 } 1529 }
1453 } 1530 }
1467 1544
1468 encoder = CHARSET_ENCODER (charset); 1545 encoder = CHARSET_ENCODER (charset);
1469 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset))) 1546 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1470 return CHARSET_INVALID_CODE (charset); 1547 return CHARSET_INVALID_CODE (charset);
1471 val = CHAR_TABLE_REF (encoder, c); 1548 val = CHAR_TABLE_REF (encoder, c);
1549 if (NILP (val))
1550 return CHARSET_INVALID_CODE (charset);
1472 code = XINT (val); 1551 code = XINT (val);
1473 if (! CHARSET_COMPACT_CODES_P (charset)) 1552 if (! CHARSET_COMPACT_CODES_P (charset))
1474 code = INDEX_TO_CODE_POINT (charset, code); 1553 code = INDEX_TO_CODE_POINT (charset, code);
1475 } 1554 }
1476 else /* method == CHARSET_METHOD_OFFSET */ 1555 else /* method == CHARSET_METHOD_OFFSET */
1960 args[charset_arg_ascii_compatible_p] = Qt; 2039 args[charset_arg_ascii_compatible_p] = Qt;
1961 args[charset_arg_supplementary_p] = Qnil; 2040 args[charset_arg_supplementary_p] = Qnil;
1962 args[charset_arg_invalid_code] = Qnil; 2041 args[charset_arg_invalid_code] = Qnil;
1963 args[charset_arg_code_offset] = make_number (0); 2042 args[charset_arg_code_offset] = make_number (0);
1964 args[charset_arg_map] = Qnil; 2043 args[charset_arg_map] = Qnil;
1965 args[charset_arg_parents] = Qnil; 2044 args[charset_arg_subset] = Qnil;
2045 args[charset_arg_superset] = Qnil;
1966 args[charset_arg_unify_map] = Qnil; 2046 args[charset_arg_unify_map] = Qnil;
1967 /* The actual plist is set by mule-conf.el. */ 2047 /* The actual plist is set by mule-conf.el. */
1968 plist[1] = args[charset_arg_name]; 2048 plist[1] = args[charset_arg_name];
1969 plist[3] = args[charset_arg_dimension]; 2049 plist[3] = args[charset_arg_dimension];
1970 plist[5] = args[charset_arg_code_space]; 2050 plist[5] = args[charset_arg_code_space];
1991 args[charset_arg_ascii_compatible_p] = Qt; 2071 args[charset_arg_ascii_compatible_p] = Qt;
1992 args[charset_arg_supplementary_p] = Qnil; 2072 args[charset_arg_supplementary_p] = Qnil;
1993 args[charset_arg_invalid_code] = Qnil; 2073 args[charset_arg_invalid_code] = Qnil;
1994 args[charset_arg_code_offset] = make_number (0); 2074 args[charset_arg_code_offset] = make_number (0);
1995 args[charset_arg_map] = Qnil; 2075 args[charset_arg_map] = Qnil;
1996 args[charset_arg_parents] = Qnil; 2076 args[charset_arg_subset] = Qnil;
2077 args[charset_arg_superset] = Qnil;
1997 args[charset_arg_unify_map] = Qnil; 2078 args[charset_arg_unify_map] = Qnil;
1998 /* The actual plist is set by mule-conf.el. */ 2079 /* The actual plist is set by mule-conf.el. */
1999 plist[1] = args[charset_arg_name]; 2080 plist[1] = args[charset_arg_name];
2000 plist[3] = args[charset_arg_dimension]; 2081 plist[3] = args[charset_arg_dimension];
2001 plist[5] = args[charset_arg_code_space]; 2082 plist[5] = args[charset_arg_code_space];