comparison src/charset.c @ 20614:a4800f9842e4

(multibyte_chars_in_text): New function. (Fstring): Use make_multibyte_string. (Fcompose_string): Likewise. (Ffind_charset_string): Handle bytes vs chars in string. Special case for single-byte strings. (Fchars_in_string): Function deleted. (Fstring): Renamed from Fconcat_chars. Handle bytes vs chars in string. (syms_of_charset): Corresponding changes.
author Richard M. Stallman <rms@gnu.org>
date Fri, 09 Jan 1998 23:03:25 +0000
parents 460e5621e8c9
children 5ab6701a2b4b
comparison
equal deleted inserted replaced
20613:e0ed7fc921fa 20614:a4800f9842e4
699 int charsets[MAX_CHARSET + 1]; 699 int charsets[MAX_CHARSET + 1];
700 int i; 700 int i;
701 Lisp_Object val; 701 Lisp_Object val;
702 702
703 CHECK_STRING (str, 0); 703 CHECK_STRING (str, 0);
704
705 if (! STRING_MULTIBYTE (str))
706 return Qnil;
707
704 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int)); 708 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
705 find_charset_in_str (XSTRING (str)->data, XSTRING (str)->size, 709 find_charset_in_str (XSTRING (str)->data, XSTRING (str)->size_byte,
706 charsets, table); 710 charsets, table);
707 val = Qnil; 711 val = Qnil;
708 for (i = MAX_CHARSET; i >= 0; i--) 712 for (i = MAX_CHARSET; i >= 0; i--)
709 if (charsets[i]) 713 if (charsets[i])
710 val = Fcons (CHARSET_SYMBOL (i), val); 714 val = Fcons (CHARSET_SYMBOL (i), val);
976 if (!CHARSET_DEFINED_P (charset)) 980 if (!CHARSET_DEFINED_P (charset))
977 error ("Invalid character: %d", XINT (ch)); 981 error ("Invalid character: %d", XINT (ch));
978 return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX); 982 return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
979 } 983 }
980 984
981 DEFUN ("chars-in-string", Fchars_in_string, Schars_in_string, 1, 1, 0,
982 "Return number of characters in STRING.\n\
983 When using multibyte characters, this is not the necessarily same as\n\
984 the length of STRING; the length counts a multibyte characters as\n\
985 several bytes, but this function counts a multibyte character as one\n\
986 character.")
987 (str)
988 Lisp_Object str;
989 {
990 Lisp_Object val;
991 unsigned char *p, *endp;
992 int chars;
993
994 CHECK_STRING (str, 0);
995
996 if (NILP (current_buffer->enable_multibyte_characters))
997 return make_number (XSTRING (str)->size);
998
999 p = XSTRING (str)->data; endp = p + XSTRING (str)->size;
1000 chars = 0;
1001 while (p < endp)
1002 {
1003 if (*p == LEADING_CODE_COMPOSITION)
1004 {
1005 p++;
1006 while (p < endp && ! CHAR_HEAD_P (*p)) p++;
1007 }
1008 else
1009 p += BYTES_BY_CHAR_HEAD (*p);
1010 chars++;
1011 }
1012
1013 XSETFASTINT (val, chars);
1014 return val;
1015 }
1016
1017 DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0, 985 DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
1018 "Return number of characters between BEG and END.") 986 "Return number of characters between BEG and END.")
1019 (beg, end) 987 (beg, end)
1020 Lisp_Object beg, end; 988 Lisp_Object beg, end;
1021 { 989 {
1025 to = max (XFASTINT (beg), XFASTINT (end)); 993 to = max (XFASTINT (beg), XFASTINT (end));
1026 994
1027 return to - from; 995 return to - from;
1028 } 996 }
1029 997
998 /* Return the number of characters in the NBYTES bytes at PTR.
999 This works by looking at the contents and checking for multibyte sequences.
1000 However, if the current buffer has enable-multibyte-characters = nil,
1001 we treat each byte as a character. */
1002
1030 int 1003 int
1031 chars_in_text (ptr, nbytes) 1004 chars_in_text (ptr, nbytes)
1032 unsigned char *ptr; 1005 unsigned char *ptr;
1033 int nbytes; 1006 int nbytes;
1034 { 1007 {
1035 unsigned char *endp; 1008 unsigned char *endp;
1036 int chars; 1009 int chars;
1037 1010
1038 if (NILP (current_buffer->enable_multibyte_characters)) 1011 /* current_buffer is null at early stages of Emacs initialization. */
1012 if (current_buffer == 0
1013 || NILP (current_buffer->enable_multibyte_characters))
1039 return nbytes; 1014 return nbytes;
1040 1015
1041 endp = ptr + nbytes; 1016 endp = ptr + nbytes;
1042 chars = 0; 1017 chars = 0;
1043 1018
1054 } 1029 }
1055 1030
1056 return chars; 1031 return chars;
1057 } 1032 }
1058 1033
1059 DEFUN ("concat-chars", Fconcat_chars, Sconcat_chars, 1, MANY, 0, 1034 /* Return the number of characters in the NBYTES bytes at PTR.
1035 This works by looking at the contents and checking for multibyte sequences.
1036 It ignores enable-multibyte-characters. */
1037
1038 int
1039 multibyte_chars_in_text (ptr, nbytes)
1040 unsigned char *ptr;
1041 int nbytes;
1042 {
1043 unsigned char *endp;
1044 int chars;
1045
1046 endp = ptr + nbytes;
1047 chars = 0;
1048
1049 while (ptr < endp)
1050 {
1051 if (*ptr == LEADING_CODE_COMPOSITION)
1052 {
1053 ptr++;
1054 while (ptr < endp && ! CHAR_HEAD_P (*ptr)) ptr++;
1055 }
1056 else
1057 ptr += BYTES_BY_CHAR_HEAD (*ptr);
1058 chars++;
1059 }
1060
1061 return chars;
1062 }
1063
1064 DEFUN ("string", Fstring, Sstring, 1, MANY, 0,
1060 "Concatenate all the argument characters and make the result a string.") 1065 "Concatenate all the argument characters and make the result a string.")
1061 (n, args) 1066 (n, args)
1062 int n; 1067 int n;
1063 Lisp_Object *args; 1068 Lisp_Object *args;
1064 { 1069 {
1084 /* C is a composite character. */ 1089 /* C is a composite character. */
1085 bcopy (str, p, len); 1090 bcopy (str, p, len);
1086 p += len; 1091 p += len;
1087 } 1092 }
1088 1093
1089 val = make_string (buf, p - buf); 1094 val = make_multibyte_string (buf, n, p - buf);
1090 return val; 1095 return val;
1091 } 1096 }
1092 1097
1093 #endif /* emacs */ 1098 #endif /* emacs */
1094 1099
1457 1462
1458 CHECK_STRING (str, 0); 1463 CHECK_STRING (str, 0);
1459 1464
1460 buf[0] = LEADING_CODE_COMPOSITION; 1465 buf[0] = LEADING_CODE_COMPOSITION;
1461 p = XSTRING (str)->data; 1466 p = XSTRING (str)->data;
1462 pend = p + XSTRING (str)->size; 1467 pend = p + XSTRING (str)->size_byte;
1463 i = 1; 1468 i = 1;
1464 while (p < pend) 1469 while (p < pend)
1465 { 1470 {
1466 if (*p < 0x20 || *p == 127) /* control code */ 1471 if (*p < 0x20 || *p == 127) /* control code */
1467 error ("Invalid component character: %d", *p); 1472 error ("Invalid component character: %d", *p);
1502 1507
1503 if (i < 5) 1508 if (i < 5)
1504 /* STR contains only one character, which can't be composed. */ 1509 /* STR contains only one character, which can't be composed. */
1505 error ("Too short string to be composed: %s", XSTRING (str)->data); 1510 error ("Too short string to be composed: %s", XSTRING (str)->data);
1506 1511
1507 return make_string (buf, i); 1512 return make_multibyte_string (buf, 1, i);
1508 } 1513 }
1509 1514
1510 1515
1511 charset_id_internal (charset_name) 1516 charset_id_internal (charset_name)
1512 char *charset_name; 1517 char *charset_name;
1624 defsubr (&Schar_valid_p); 1629 defsubr (&Schar_valid_p);
1625 defsubr (&Schar_bytes); 1630 defsubr (&Schar_bytes);
1626 defsubr (&Schar_width); 1631 defsubr (&Schar_width);
1627 defsubr (&Sstring_width); 1632 defsubr (&Sstring_width);
1628 defsubr (&Schar_direction); 1633 defsubr (&Schar_direction);
1629 defsubr (&Schars_in_string);
1630 defsubr (&Schars_in_region); 1634 defsubr (&Schars_in_region);
1631 defsubr (&Sconcat_chars); 1635 defsubr (&Sstring);
1632 defsubr (&Scmpcharp); 1636 defsubr (&Scmpcharp);
1633 defsubr (&Scmpchar_component); 1637 defsubr (&Scmpchar_component);
1634 defsubr (&Scmpchar_cmp_rule); 1638 defsubr (&Scmpchar_cmp_rule);
1635 defsubr (&Scmpchar_cmp_rule_p); 1639 defsubr (&Scmpchar_cmp_rule_p);
1636 defsubr (&Scmpchar_cmp_count); 1640 defsubr (&Scmpchar_cmp_count);