comparison src/charset.c @ 21445:4c0b4a1025cd

(string_to_non_ascii_char): Include garbage bytes (if any) following a multibyte character in *ACTUAL_LEN. (Fcharset_after): New function. (syms_of_charset): Defsubr it. (multibyte_form_length): Modified to be consistent with string_to_non_ascii_char.
author Kenichi Handa <handa@m17n.org>
date Thu, 09 Apr 1998 05:40:23 +0000
parents 95aae2ff5fcd
children fa9ff387d260
comparison
equal deleted inserted replaced
21444:0cba6f211d7c 21445:4c0b4a1025cd
177 const unsigned char *str; 177 const unsigned char *str;
178 int len, *actual_len; 178 int len, *actual_len;
179 { 179 {
180 int charset; 180 int charset;
181 unsigned char c1, c2; 181 unsigned char c1, c2;
182 register int c; 182 register int c, bytes;
183 183
184 if (SPLIT_STRING (str, len, charset, c1, c2) == CHARSET_ASCII) 184 c = *str;
185 { 185 bytes = 1;
186 if (actual_len) 186
187 *actual_len = 1; 187 if (BASE_LEADING_CODE_P (c))
188 return (int) *str; 188 {
189 } 189 while (bytes < len && ! CHAR_HEAD_P (str[bytes])) bytes++;
190 190
191 c = MAKE_NON_ASCII_CHAR (charset, c1, c2); 191 if (c == LEADING_CODE_COMPOSITION)
192 {
193 int cmpchar_id = str_cmpchar_id (str, bytes);
194
195 if (cmpchar_id >= 0)
196 c = MAKE_COMPOSITE_CHAR (cmpchar_id);
197 }
198 else
199 {
200 int charset = c, c1, c2 = 0;
201
202 str++;
203 if (c >= LEADING_CODE_PRIVATE_11)
204 charset = *str++;
205 if (BYTES_BY_CHAR_HEAD (c) <= bytes && CHARSET_DEFINED_P (charset))
206 {
207 c1 = *str++ & 0x7f;
208 if (CHARSET_DIMENSION (charset) == 2)
209 c2 = *str & 0x7F;
210 c = MAKE_NON_ASCII_CHAR (charset, c1, c2);
211 }
212 }
213 }
192 214
193 if (actual_len) 215 if (actual_len)
194 *actual_len = (charset == CHARSET_COMPOSITION 216 *actual_len = bytes;
195 ? cmpchar_table[COMPOSITE_CHAR_ID (c)]->len
196 : BYTES_BY_CHAR_HEAD (*str));
197 return c; 217 return c;
198 } 218 }
199 219
200 /* Return the length of the multi-byte form at string STR of length LEN. */ 220 /* Return the length of the multi-byte form at string STR of length LEN. */
201 int 221 int
202 multibyte_form_length (str, len) 222 multibyte_form_length (str, len)
203 const unsigned char *str; 223 const unsigned char *str;
204 int len; 224 int len;
205 { 225 {
206 int charset; 226 int bytes = 1;
207 unsigned char c1, c2; 227
208 register int c; 228 if (BASE_LEADING_CODE_P (*str))
209 229 while (bytes < len && ! CHAR_HEAD_P (str[bytes])) bytes++;
210 if (SPLIT_STRING (str, len, charset, c1, c2) == CHARSET_ASCII) 230
211 return 1; 231 return bytes;
212
213 return (charset == CHARSET_COMPOSITION
214 ? cmpchar_table[(c1 << 7) | c2]->len
215 : BYTES_BY_CHAR_HEAD (*str));
216 } 232 }
217 233
218 /* Check if string STR of length LEN contains valid multi-byte form of 234 /* Check if string STR of length LEN contains valid multi-byte form of
219 a character. If valid, charset and position codes of the character 235 a character. If valid, charset and position codes of the character
220 is set at *CHARSET, *C1, and *C2, and return 0. If not valid, 236 is set at *CHARSET, *C1, and *C2, and return 0. If not valid,
801 Lisp_Object ch; 817 Lisp_Object ch;
802 { 818 {
803 CHECK_NUMBER (ch, 0); 819 CHECK_NUMBER (ch, 0);
804 820
805 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch))); 821 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
822 }
823
824 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
825 "Return charset of a character in current buffer at position POS.\n\
826 If POS is nil, it defauls to the current point.")
827 (pos)
828 Lisp_Object pos;
829 {
830 register int pos_byte, c, charset;
831 register unsigned char *p;
832
833 if (NILP (pos))
834 pos_byte = PT_BYTE;
835 else if (MARKERP (pos))
836 pos_byte = marker_byte_position (pos);
837 else
838 {
839 CHECK_NUMBER (pos, 0);
840 pos_byte = CHAR_TO_BYTE (XINT (pos));
841 }
842 p = BYTE_POS_ADDR (pos_byte);
843 c = STRING_CHAR (p, Z_BYTE - pos_byte);
844 charset = CHAR_CHARSET (c);
845 return CHARSET_SYMBOL (charset);
806 } 846 }
807 847
808 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0, 848 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
809 "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.\n\ 849 "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.\n\
810 \n\ 850 \n\
1680 defsubr (&Sfind_charset_region); 1720 defsubr (&Sfind_charset_region);
1681 defsubr (&Sfind_charset_string); 1721 defsubr (&Sfind_charset_string);
1682 defsubr (&Smake_char_internal); 1722 defsubr (&Smake_char_internal);
1683 defsubr (&Ssplit_char); 1723 defsubr (&Ssplit_char);
1684 defsubr (&Schar_charset); 1724 defsubr (&Schar_charset);
1725 defsubr (&Scharset_after);
1685 defsubr (&Siso_charset); 1726 defsubr (&Siso_charset);
1686 defsubr (&Schar_valid_p); 1727 defsubr (&Schar_valid_p);
1687 defsubr (&Sunibyte_char_to_multibyte); 1728 defsubr (&Sunibyte_char_to_multibyte);
1688 defsubr (&Schar_bytes); 1729 defsubr (&Schar_bytes);
1689 defsubr (&Schar_width); 1730 defsubr (&Schar_width);