Mercurial > emacs
comparison src/charset.c @ 21445:4c0b4a1025cd
(string_to_non_ascii_char): Include garbage bytes (if
any) following a multibyte character in *ACTUAL_LEN.
(Fcharset_after): New function.
(syms_of_charset): Defsubr it.
(multibyte_form_length): Modified to be consistent with
string_to_non_ascii_char.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Thu, 09 Apr 1998 05:40:23 +0000 |
parents | 95aae2ff5fcd |
children | fa9ff387d260 |
comparison
equal
deleted
inserted
replaced
21444:0cba6f211d7c | 21445:4c0b4a1025cd |
---|---|
177 const unsigned char *str; | 177 const unsigned char *str; |
178 int len, *actual_len; | 178 int len, *actual_len; |
179 { | 179 { |
180 int charset; | 180 int charset; |
181 unsigned char c1, c2; | 181 unsigned char c1, c2; |
182 register int c; | 182 register int c, bytes; |
183 | 183 |
184 if (SPLIT_STRING (str, len, charset, c1, c2) == CHARSET_ASCII) | 184 c = *str; |
185 { | 185 bytes = 1; |
186 if (actual_len) | 186 |
187 *actual_len = 1; | 187 if (BASE_LEADING_CODE_P (c)) |
188 return (int) *str; | 188 { |
189 } | 189 while (bytes < len && ! CHAR_HEAD_P (str[bytes])) bytes++; |
190 | 190 |
191 c = MAKE_NON_ASCII_CHAR (charset, c1, c2); | 191 if (c == LEADING_CODE_COMPOSITION) |
192 { | |
193 int cmpchar_id = str_cmpchar_id (str, bytes); | |
194 | |
195 if (cmpchar_id >= 0) | |
196 c = MAKE_COMPOSITE_CHAR (cmpchar_id); | |
197 } | |
198 else | |
199 { | |
200 int charset = c, c1, c2 = 0; | |
201 | |
202 str++; | |
203 if (c >= LEADING_CODE_PRIVATE_11) | |
204 charset = *str++; | |
205 if (BYTES_BY_CHAR_HEAD (c) <= bytes && CHARSET_DEFINED_P (charset)) | |
206 { | |
207 c1 = *str++ & 0x7f; | |
208 if (CHARSET_DIMENSION (charset) == 2) | |
209 c2 = *str & 0x7F; | |
210 c = MAKE_NON_ASCII_CHAR (charset, c1, c2); | |
211 } | |
212 } | |
213 } | |
192 | 214 |
193 if (actual_len) | 215 if (actual_len) |
194 *actual_len = (charset == CHARSET_COMPOSITION | 216 *actual_len = bytes; |
195 ? cmpchar_table[COMPOSITE_CHAR_ID (c)]->len | |
196 : BYTES_BY_CHAR_HEAD (*str)); | |
197 return c; | 217 return c; |
198 } | 218 } |
199 | 219 |
200 /* Return the length of the multi-byte form at string STR of length LEN. */ | 220 /* Return the length of the multi-byte form at string STR of length LEN. */ |
201 int | 221 int |
202 multibyte_form_length (str, len) | 222 multibyte_form_length (str, len) |
203 const unsigned char *str; | 223 const unsigned char *str; |
204 int len; | 224 int len; |
205 { | 225 { |
206 int charset; | 226 int bytes = 1; |
207 unsigned char c1, c2; | 227 |
208 register int c; | 228 if (BASE_LEADING_CODE_P (*str)) |
209 | 229 while (bytes < len && ! CHAR_HEAD_P (str[bytes])) bytes++; |
210 if (SPLIT_STRING (str, len, charset, c1, c2) == CHARSET_ASCII) | 230 |
211 return 1; | 231 return bytes; |
212 | |
213 return (charset == CHARSET_COMPOSITION | |
214 ? cmpchar_table[(c1 << 7) | c2]->len | |
215 : BYTES_BY_CHAR_HEAD (*str)); | |
216 } | 232 } |
217 | 233 |
218 /* Check if string STR of length LEN contains valid multi-byte form of | 234 /* Check if string STR of length LEN contains valid multi-byte form of |
219 a character. If valid, charset and position codes of the character | 235 a character. If valid, charset and position codes of the character |
220 is set at *CHARSET, *C1, and *C2, and return 0. If not valid, | 236 is set at *CHARSET, *C1, and *C2, and return 0. If not valid, |
801 Lisp_Object ch; | 817 Lisp_Object ch; |
802 { | 818 { |
803 CHECK_NUMBER (ch, 0); | 819 CHECK_NUMBER (ch, 0); |
804 | 820 |
805 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch))); | 821 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch))); |
822 } | |
823 | |
824 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0, | |
825 "Return charset of a character in current buffer at position POS.\n\ | |
826 If POS is nil, it defauls to the current point.") | |
827 (pos) | |
828 Lisp_Object pos; | |
829 { | |
830 register int pos_byte, c, charset; | |
831 register unsigned char *p; | |
832 | |
833 if (NILP (pos)) | |
834 pos_byte = PT_BYTE; | |
835 else if (MARKERP (pos)) | |
836 pos_byte = marker_byte_position (pos); | |
837 else | |
838 { | |
839 CHECK_NUMBER (pos, 0); | |
840 pos_byte = CHAR_TO_BYTE (XINT (pos)); | |
841 } | |
842 p = BYTE_POS_ADDR (pos_byte); | |
843 c = STRING_CHAR (p, Z_BYTE - pos_byte); | |
844 charset = CHAR_CHARSET (c); | |
845 return CHARSET_SYMBOL (charset); | |
806 } | 846 } |
807 | 847 |
808 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0, | 848 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0, |
809 "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.\n\ | 849 "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.\n\ |
810 \n\ | 850 \n\ |
1680 defsubr (&Sfind_charset_region); | 1720 defsubr (&Sfind_charset_region); |
1681 defsubr (&Sfind_charset_string); | 1721 defsubr (&Sfind_charset_string); |
1682 defsubr (&Smake_char_internal); | 1722 defsubr (&Smake_char_internal); |
1683 defsubr (&Ssplit_char); | 1723 defsubr (&Ssplit_char); |
1684 defsubr (&Schar_charset); | 1724 defsubr (&Schar_charset); |
1725 defsubr (&Scharset_after); | |
1685 defsubr (&Siso_charset); | 1726 defsubr (&Siso_charset); |
1686 defsubr (&Schar_valid_p); | 1727 defsubr (&Schar_valid_p); |
1687 defsubr (&Sunibyte_char_to_multibyte); | 1728 defsubr (&Sunibyte_char_to_multibyte); |
1688 defsubr (&Schar_bytes); | 1729 defsubr (&Schar_bytes); |
1689 defsubr (&Schar_width); | 1730 defsubr (&Schar_width); |