Mercurial > emacs
comparison src/charset.c @ 20614:a4800f9842e4
(multibyte_chars_in_text): New function.
(Fstring): Use make_multibyte_string.
(Fcompose_string): Likewise.
(Ffind_charset_string): Handle bytes vs chars in string.
Special case for single-byte strings.
(Fchars_in_string): Function deleted.
(Fstring): Renamed from Fconcat_chars.
Handle bytes vs chars in string.
(syms_of_charset): Corresponding changes.
author | Richard M. Stallman <rms@gnu.org> |
---|---|
date | Fri, 09 Jan 1998 23:03:25 +0000 |
parents | 460e5621e8c9 |
children | 5ab6701a2b4b |
comparison
equal
deleted
inserted
replaced
20613:e0ed7fc921fa | 20614:a4800f9842e4 |
---|---|
699 int charsets[MAX_CHARSET + 1]; | 699 int charsets[MAX_CHARSET + 1]; |
700 int i; | 700 int i; |
701 Lisp_Object val; | 701 Lisp_Object val; |
702 | 702 |
703 CHECK_STRING (str, 0); | 703 CHECK_STRING (str, 0); |
704 | |
705 if (! STRING_MULTIBYTE (str)) | |
706 return Qnil; | |
707 | |
704 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int)); | 708 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int)); |
705 find_charset_in_str (XSTRING (str)->data, XSTRING (str)->size, | 709 find_charset_in_str (XSTRING (str)->data, XSTRING (str)->size_byte, |
706 charsets, table); | 710 charsets, table); |
707 val = Qnil; | 711 val = Qnil; |
708 for (i = MAX_CHARSET; i >= 0; i--) | 712 for (i = MAX_CHARSET; i >= 0; i--) |
709 if (charsets[i]) | 713 if (charsets[i]) |
710 val = Fcons (CHARSET_SYMBOL (i), val); | 714 val = Fcons (CHARSET_SYMBOL (i), val); |
976 if (!CHARSET_DEFINED_P (charset)) | 980 if (!CHARSET_DEFINED_P (charset)) |
977 error ("Invalid character: %d", XINT (ch)); | 981 error ("Invalid character: %d", XINT (ch)); |
978 return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX); | 982 return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX); |
979 } | 983 } |
980 | 984 |
981 DEFUN ("chars-in-string", Fchars_in_string, Schars_in_string, 1, 1, 0, | |
982 "Return number of characters in STRING.\n\ | |
983 When using multibyte characters, this is not the necessarily same as\n\ | |
984 the length of STRING; the length counts a multibyte characters as\n\ | |
985 several bytes, but this function counts a multibyte character as one\n\ | |
986 character.") | |
987 (str) | |
988 Lisp_Object str; | |
989 { | |
990 Lisp_Object val; | |
991 unsigned char *p, *endp; | |
992 int chars; | |
993 | |
994 CHECK_STRING (str, 0); | |
995 | |
996 if (NILP (current_buffer->enable_multibyte_characters)) | |
997 return make_number (XSTRING (str)->size); | |
998 | |
999 p = XSTRING (str)->data; endp = p + XSTRING (str)->size; | |
1000 chars = 0; | |
1001 while (p < endp) | |
1002 { | |
1003 if (*p == LEADING_CODE_COMPOSITION) | |
1004 { | |
1005 p++; | |
1006 while (p < endp && ! CHAR_HEAD_P (*p)) p++; | |
1007 } | |
1008 else | |
1009 p += BYTES_BY_CHAR_HEAD (*p); | |
1010 chars++; | |
1011 } | |
1012 | |
1013 XSETFASTINT (val, chars); | |
1014 return val; | |
1015 } | |
1016 | |
1017 DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0, | 985 DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0, |
1018 "Return number of characters between BEG and END.") | 986 "Return number of characters between BEG and END.") |
1019 (beg, end) | 987 (beg, end) |
1020 Lisp_Object beg, end; | 988 Lisp_Object beg, end; |
1021 { | 989 { |
1025 to = max (XFASTINT (beg), XFASTINT (end)); | 993 to = max (XFASTINT (beg), XFASTINT (end)); |
1026 | 994 |
1027 return to - from; | 995 return to - from; |
1028 } | 996 } |
1029 | 997 |
998 /* Return the number of characters in the NBYTES bytes at PTR. | |
999 This works by looking at the contents and checking for multibyte sequences. | |
1000 However, if the current buffer has enable-multibyte-characters = nil, | |
1001 we treat each byte as a character. */ | |
1002 | |
1030 int | 1003 int |
1031 chars_in_text (ptr, nbytes) | 1004 chars_in_text (ptr, nbytes) |
1032 unsigned char *ptr; | 1005 unsigned char *ptr; |
1033 int nbytes; | 1006 int nbytes; |
1034 { | 1007 { |
1035 unsigned char *endp; | 1008 unsigned char *endp; |
1036 int chars; | 1009 int chars; |
1037 | 1010 |
1038 if (NILP (current_buffer->enable_multibyte_characters)) | 1011 /* current_buffer is null at early stages of Emacs initialization. */ |
1012 if (current_buffer == 0 | |
1013 || NILP (current_buffer->enable_multibyte_characters)) | |
1039 return nbytes; | 1014 return nbytes; |
1040 | 1015 |
1041 endp = ptr + nbytes; | 1016 endp = ptr + nbytes; |
1042 chars = 0; | 1017 chars = 0; |
1043 | 1018 |
1054 } | 1029 } |
1055 | 1030 |
1056 return chars; | 1031 return chars; |
1057 } | 1032 } |
1058 | 1033 |
1059 DEFUN ("concat-chars", Fconcat_chars, Sconcat_chars, 1, MANY, 0, | 1034 /* Return the number of characters in the NBYTES bytes at PTR. |
1035 This works by looking at the contents and checking for multibyte sequences. | |
1036 It ignores enable-multibyte-characters. */ | |
1037 | |
1038 int | |
1039 multibyte_chars_in_text (ptr, nbytes) | |
1040 unsigned char *ptr; | |
1041 int nbytes; | |
1042 { | |
1043 unsigned char *endp; | |
1044 int chars; | |
1045 | |
1046 endp = ptr + nbytes; | |
1047 chars = 0; | |
1048 | |
1049 while (ptr < endp) | |
1050 { | |
1051 if (*ptr == LEADING_CODE_COMPOSITION) | |
1052 { | |
1053 ptr++; | |
1054 while (ptr < endp && ! CHAR_HEAD_P (*ptr)) ptr++; | |
1055 } | |
1056 else | |
1057 ptr += BYTES_BY_CHAR_HEAD (*ptr); | |
1058 chars++; | |
1059 } | |
1060 | |
1061 return chars; | |
1062 } | |
1063 | |
1064 DEFUN ("string", Fstring, Sstring, 1, MANY, 0, | |
1060 "Concatenate all the argument characters and make the result a string.") | 1065 "Concatenate all the argument characters and make the result a string.") |
1061 (n, args) | 1066 (n, args) |
1062 int n; | 1067 int n; |
1063 Lisp_Object *args; | 1068 Lisp_Object *args; |
1064 { | 1069 { |
1084 /* C is a composite character. */ | 1089 /* C is a composite character. */ |
1085 bcopy (str, p, len); | 1090 bcopy (str, p, len); |
1086 p += len; | 1091 p += len; |
1087 } | 1092 } |
1088 | 1093 |
1089 val = make_string (buf, p - buf); | 1094 val = make_multibyte_string (buf, n, p - buf); |
1090 return val; | 1095 return val; |
1091 } | 1096 } |
1092 | 1097 |
1093 #endif /* emacs */ | 1098 #endif /* emacs */ |
1094 | 1099 |
1457 | 1462 |
1458 CHECK_STRING (str, 0); | 1463 CHECK_STRING (str, 0); |
1459 | 1464 |
1460 buf[0] = LEADING_CODE_COMPOSITION; | 1465 buf[0] = LEADING_CODE_COMPOSITION; |
1461 p = XSTRING (str)->data; | 1466 p = XSTRING (str)->data; |
1462 pend = p + XSTRING (str)->size; | 1467 pend = p + XSTRING (str)->size_byte; |
1463 i = 1; | 1468 i = 1; |
1464 while (p < pend) | 1469 while (p < pend) |
1465 { | 1470 { |
1466 if (*p < 0x20 || *p == 127) /* control code */ | 1471 if (*p < 0x20 || *p == 127) /* control code */ |
1467 error ("Invalid component character: %d", *p); | 1472 error ("Invalid component character: %d", *p); |
1502 | 1507 |
1503 if (i < 5) | 1508 if (i < 5) |
1504 /* STR contains only one character, which can't be composed. */ | 1509 /* STR contains only one character, which can't be composed. */ |
1505 error ("Too short string to be composed: %s", XSTRING (str)->data); | 1510 error ("Too short string to be composed: %s", XSTRING (str)->data); |
1506 | 1511 |
1507 return make_string (buf, i); | 1512 return make_multibyte_string (buf, 1, i); |
1508 } | 1513 } |
1509 | 1514 |
1510 | 1515 |
1511 charset_id_internal (charset_name) | 1516 charset_id_internal (charset_name) |
1512 char *charset_name; | 1517 char *charset_name; |
1624 defsubr (&Schar_valid_p); | 1629 defsubr (&Schar_valid_p); |
1625 defsubr (&Schar_bytes); | 1630 defsubr (&Schar_bytes); |
1626 defsubr (&Schar_width); | 1631 defsubr (&Schar_width); |
1627 defsubr (&Sstring_width); | 1632 defsubr (&Sstring_width); |
1628 defsubr (&Schar_direction); | 1633 defsubr (&Schar_direction); |
1629 defsubr (&Schars_in_string); | |
1630 defsubr (&Schars_in_region); | 1634 defsubr (&Schars_in_region); |
1631 defsubr (&Sconcat_chars); | 1635 defsubr (&Sstring); |
1632 defsubr (&Scmpcharp); | 1636 defsubr (&Scmpcharp); |
1633 defsubr (&Scmpchar_component); | 1637 defsubr (&Scmpchar_component); |
1634 defsubr (&Scmpchar_cmp_rule); | 1638 defsubr (&Scmpchar_cmp_rule); |
1635 defsubr (&Scmpchar_cmp_rule_p); | 1639 defsubr (&Scmpchar_cmp_rule_p); |
1636 defsubr (&Scmpchar_cmp_count); | 1640 defsubr (&Scmpchar_cmp_count); |