Mercurial > emacs
comparison src/coding.c @ 102422:82f82b92314e
(CODING_ISO_CMP_STATUS): New macro.
(CODING_ISO_EXTSEGMENT_LEN, CODING_ISO_EMBEDDED_UTF_8): New macros.
(MAX_ANNOTATION_LENGTH): Defined to 5.
(ADD_COMPOSITION_DATA): New arg nbytes.
(emacs_mule_char): New arg cmp_status.
(DECODE_EMACS_MULE_COMPOSITION_CHAR): Delete it.
(DECODE_EMACS_MULE_COMPOSITION_RULE_20): New arg c.
(DECODE_EMACS_MULE_COMPOSITION_RULE_21): New arg c.
(DECODE_EMACS_MULE_21_COMPOSITION): Delete the arg c.
(DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION): Likewise.
(DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION): Likewise.
(DECODE_EMACS_MULE_COMPOSITION_START): New macro.
(EMACS_MULE_COMPOSITION_END): New macro.
(emacs_mule_finish_composition): New function.
(EMACS_MULE_MAYBE_FINISH_COMPOSITION): New macro.
(decode_coding_emacs_mule): Avoid long looking ahead while
handling composition.
(DECODE_COMPOSITION_RULE): Argument changed to rule and nbytes.
(ENCODE_COMPOSITION_RULE): New macro.
(finish_composition): New function.
(MAYBE_FINISH_COMPOSITION): Call finish_composition.
(DECODE_COMPOSITION_START): New implementation.
(DECODE_COMPOSITION_END): Likewise.
(STORE_COMPOSITION_RULE): New macro.
(decode_coding_iso_2022): Avoid long looking ahead while handling
composition, CTEXT extended segment, and embedded UTF-8.
(setup_coding_system): For a coding of type iso-2022, reset
CODING_ISO_EXTSEGMENT_LEN (coding) and
CODING_ISO_EMBEDDED_UTF_8 (coding).
(get_translation): Delete arguments last_block, from_nchars,
to_nchars. Callers changed.
(produce_chars): Don't modify charbuf. Adjusted for the change of
get_translation.
(produce_composition): Adjusted for the new annotation sequence.
(handle_composition_annotation): Likewise.
(consume_chars): Adjusted for the change of get_translation.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Fri, 06 Mar 2009 07:51:52 +0000 |
parents | 7baaea85626e |
children | f556415c326b |
comparison
equal
deleted
inserted
replaced
102421:62a954f574fa | 102422:82f82b92314e |
---|---|
450 ((coding)->spec.iso_2022.single_shifting) | 450 ((coding)->spec.iso_2022.single_shifting) |
451 #define CODING_ISO_BOL(coding) \ | 451 #define CODING_ISO_BOL(coding) \ |
452 ((coding)->spec.iso_2022.bol) | 452 ((coding)->spec.iso_2022.bol) |
453 #define CODING_ISO_INVOKED_CHARSET(coding, plane) \ | 453 #define CODING_ISO_INVOKED_CHARSET(coding, plane) \ |
454 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane))) | 454 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane))) |
455 #define CODING_ISO_CMP_STATUS(coding) \ | |
456 (&(coding)->spec.iso_2022.cmp_status) | |
457 #define CODING_ISO_EXTSEGMENT_LEN(coding) \ | |
458 ((coding)->spec.iso_2022.ctext_extended_segment_len) | |
459 #define CODING_ISO_EMBEDDED_UTF_8(coding) \ | |
460 ((coding)->spec.iso_2022.embedded_utf_8) | |
455 | 461 |
456 /* Control characters of ISO2022. */ | 462 /* Control characters of ISO2022. */ |
457 /* code */ /* function */ | 463 /* code */ /* function */ |
458 #define ISO_CODE_LF 0x0A /* line-feed */ | 464 #define ISO_CODE_LF 0x0A /* line-feed */ |
459 #define ISO_CODE_CR 0x0D /* carriage-return */ | 465 #define ISO_CODE_CR 0x0D /* carriage-return */ |
943 static int detect_eol P_ ((const unsigned char *, | 949 static int detect_eol P_ ((const unsigned char *, |
944 EMACS_INT, enum coding_category)); | 950 EMACS_INT, enum coding_category)); |
945 static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int)); | 951 static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int)); |
946 static void decode_eol P_ ((struct coding_system *)); | 952 static void decode_eol P_ ((struct coding_system *)); |
947 static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *)); | 953 static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *)); |
948 static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *, | 954 static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *)); |
949 int, int *, int *)); | |
950 static int produce_chars P_ ((struct coding_system *, Lisp_Object, int)); | 955 static int produce_chars P_ ((struct coding_system *, Lisp_Object, int)); |
951 static INLINE void produce_composition P_ ((struct coding_system *, int *, | |
952 EMACS_INT)); | |
953 static INLINE void produce_charset P_ ((struct coding_system *, int *, | 956 static INLINE void produce_charset P_ ((struct coding_system *, int *, |
954 EMACS_INT)); | 957 EMACS_INT)); |
955 static void produce_annotation P_ ((struct coding_system *, EMACS_INT)); | 958 static void produce_annotation P_ ((struct coding_system *, EMACS_INT)); |
956 static int decode_coding P_ ((struct coding_system *)); | 959 static int decode_coding P_ ((struct coding_system *)); |
957 static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT, | 960 static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT, |
1206 return dst; | 1209 return dst; |
1207 } | 1210 } |
1208 | 1211 |
1209 /** Macros for annotations. */ | 1212 /** Macros for annotations. */ |
1210 | 1213 |
1211 /* Maximum length of annotation data (sum of annotations for | |
1212 composition and charset). */ | |
1213 #define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4) | |
1214 | |
1215 /* An annotation data is stored in the array coding->charbuf in this | 1214 /* An annotation data is stored in the array coding->charbuf in this |
1216 format: | 1215 format: |
1217 [ -LENGTH ANNOTATION_MASK NCHARS ... ] | 1216 [ -LENGTH ANNOTATION_MASK NCHARS ... ] |
1218 LENGTH is the number of elements in the annotation. | 1217 LENGTH is the number of elements in the annotation. |
1219 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK. | 1218 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK. |
1221 | 1220 |
1222 The format of the following elements depend on ANNOTATION_MASK. | 1221 The format of the following elements depend on ANNOTATION_MASK. |
1223 | 1222 |
1224 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements | 1223 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements |
1225 follows: | 1224 follows: |
1226 ... METHOD [ COMPOSITION-COMPONENTS ... ] | 1225 ... NBYTES METHOD [ COMPOSITION-COMPONENTS ... ] |
1226 | |
1227 NBYTES is the number of bytes specified in the header part of | |
1228 old-style emacs-mule encoding, or 0 for the other kind of | |
1229 composition. | |
1230 | |
1227 METHOD is one of enum composition_method. | 1231 METHOD is one of enum composition_method. |
1232 | |
1228 Optionnal COMPOSITION-COMPONENTS are characters and composition | 1233 Optionnal COMPOSITION-COMPONENTS are characters and composition |
1229 rules. | 1234 rules. |
1230 | 1235 |
1231 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID | 1236 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID |
1232 follows. */ | 1237 follows. |
1238 | |
1239 If ANNOTATION_MASK is 0, this annotation is just a space holder to | |
1240 recover from an invalid annotation, and should be skipped by | |
1241 produce_annotation. */ | |
1242 | |
1243 /* Maximum length of the header of annotation data. */ | |
1244 #define MAX_ANNOTATION_LENGTH 5 | |
1233 | 1245 |
1234 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars) \ | 1246 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars) \ |
1235 do { \ | 1247 do { \ |
1236 *(buf)++ = -(len); \ | 1248 *(buf)++ = -(len); \ |
1237 *(buf)++ = (mask); \ | 1249 *(buf)++ = (mask); \ |
1238 *(buf)++ = (nchars); \ | 1250 *(buf)++ = (nchars); \ |
1239 coding->annotated = 1; \ | 1251 coding->annotated = 1; \ |
1240 } while (0); | 1252 } while (0); |
1241 | 1253 |
1242 #define ADD_COMPOSITION_DATA(buf, nchars, method) \ | 1254 #define ADD_COMPOSITION_DATA(buf, nchars, nbytes, method) \ |
1243 do { \ | 1255 do { \ |
1244 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \ | 1256 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \ |
1257 *buf++ = nbytes; \ | |
1245 *buf++ = method; \ | 1258 *buf++ = method; \ |
1246 } while (0) | 1259 } while (0) |
1247 | 1260 |
1248 | 1261 |
1249 #define ADD_CHARSET_DATA(buf, nchars, id) \ | 1262 #define ADD_CHARSET_DATA(buf, nchars, id) \ |
1918 one-byte sequences which are their 8-bit code. | 1931 one-byte sequences which are their 8-bit code. |
1919 | 1932 |
1920 Next, character composition data are represented by the byte | 1933 Next, character composition data are represented by the byte |
1921 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ..., | 1934 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ..., |
1922 where, | 1935 where, |
1923 METHOD is 0xF0 plus one of composition method (enum | 1936 METHOD is 0xF2 plus one of composition method (enum |
1924 composition_method), | 1937 composition_method), |
1925 | 1938 |
1926 BYTES is 0xA0 plus a byte length of this composition data, | 1939 BYTES is 0xA0 plus a byte length of this composition data, |
1927 | 1940 |
1928 CHARS is 0x20 plus a number of characters composed by this | 1941 CHARS is 0xA0 plus a number of characters composed by this |
1929 data, | 1942 data, |
1930 | 1943 |
1931 COMPONENTs are characters of multibye form or composition | 1944 COMPONENTs are characters of multibye form or composition |
1932 rules encoded by two-byte of ASCII codes. | 1945 rules encoded by two-byte of ASCII codes. |
1933 | 1946 |
1945 represents a composition rule. | 1958 represents a composition rule. |
1946 */ | 1959 */ |
1947 | 1960 |
1948 char emacs_mule_bytes[256]; | 1961 char emacs_mule_bytes[256]; |
1949 | 1962 |
1963 | |
1964 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | |
1965 Check if a text is encoded in `emacs-mule'. If it is, return 1, | |
1966 else return 0. */ | |
1967 | |
1968 static int | |
1969 detect_coding_emacs_mule (coding, detect_info) | |
1970 struct coding_system *coding; | |
1971 struct coding_detection_info *detect_info; | |
1972 { | |
1973 const unsigned char *src = coding->source, *src_base; | |
1974 const unsigned char *src_end = coding->source + coding->src_bytes; | |
1975 int multibytep = coding->src_multibyte; | |
1976 int consumed_chars = 0; | |
1977 int c; | |
1978 int found = 0; | |
1979 | |
1980 detect_info->checked |= CATEGORY_MASK_EMACS_MULE; | |
1981 /* A coding system of this category is always ASCII compatible. */ | |
1982 src += coding->head_ascii; | |
1983 | |
1984 while (1) | |
1985 { | |
1986 src_base = src; | |
1987 ONE_MORE_BYTE (c); | |
1988 if (c < 0) | |
1989 continue; | |
1990 if (c == 0x80) | |
1991 { | |
1992 /* Perhaps the start of composite character. We simply skip | |
1993 it because analyzing it is too heavy for detecting. But, | |
1994 at least, we check that the composite character | |
1995 constitutes of more than 4 bytes. */ | |
1996 const unsigned char *src_base; | |
1997 | |
1998 repeat: | |
1999 src_base = src; | |
2000 do | |
2001 { | |
2002 ONE_MORE_BYTE (c); | |
2003 } | |
2004 while (c >= 0xA0); | |
2005 | |
2006 if (src - src_base <= 4) | |
2007 break; | |
2008 found = CATEGORY_MASK_EMACS_MULE; | |
2009 if (c == 0x80) | |
2010 goto repeat; | |
2011 } | |
2012 | |
2013 if (c < 0x80) | |
2014 { | |
2015 if (c < 0x20 | |
2016 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)) | |
2017 break; | |
2018 } | |
2019 else | |
2020 { | |
2021 int more_bytes = emacs_mule_bytes[*src_base] - 1; | |
2022 | |
2023 while (more_bytes > 0) | |
2024 { | |
2025 ONE_MORE_BYTE (c); | |
2026 if (c < 0xA0) | |
2027 { | |
2028 src--; /* Unread the last byte. */ | |
2029 break; | |
2030 } | |
2031 more_bytes--; | |
2032 } | |
2033 if (more_bytes != 0) | |
2034 break; | |
2035 found = CATEGORY_MASK_EMACS_MULE; | |
2036 } | |
2037 } | |
2038 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE; | |
2039 return 0; | |
2040 | |
2041 no_more_source: | |
2042 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK) | |
2043 { | |
2044 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE; | |
2045 return 0; | |
2046 } | |
2047 detect_info->found |= found; | |
2048 return 1; | |
2049 } | |
2050 | |
2051 | |
2052 /* Parse emacs-mule multibyte sequence at SRC and return the decoded | |
2053 character. If CMP_STATUS indicates that we must expect MSEQ or | |
2054 RULE described above, decode it and return the negative value of | |
2055 the deocded character or rule. If an invalid byte is found, return | |
2056 -1. If SRC is too short, return -2. */ | |
2057 | |
1950 int | 2058 int |
1951 emacs_mule_char (coding, src, nbytes, nchars, id) | 2059 emacs_mule_char (coding, src, nbytes, nchars, id, cmp_status) |
1952 struct coding_system *coding; | 2060 struct coding_system *coding; |
1953 const unsigned char *src; | 2061 const unsigned char *src; |
1954 int *nbytes, *nchars, *id; | 2062 int *nbytes, *nchars, *id; |
2063 struct composition_status *cmp_status; | |
1955 { | 2064 { |
1956 const unsigned char *src_end = coding->source + coding->src_bytes; | 2065 const unsigned char *src_end = coding->source + coding->src_bytes; |
1957 const unsigned char *src_base = src; | 2066 const unsigned char *src_base = src; |
1958 int multibytep = coding->src_multibyte; | 2067 int multibytep = coding->src_multibyte; |
1959 struct charset *charset; | 2068 struct charset *charset; |
1960 unsigned code; | 2069 unsigned code; |
1961 int c; | 2070 int c; |
1962 int consumed_chars = 0; | 2071 int consumed_chars = 0; |
2072 int mseq_found = 0; | |
1963 | 2073 |
1964 ONE_MORE_BYTE (c); | 2074 ONE_MORE_BYTE (c); |
1965 if (c < 0) | 2075 if (c < 0) |
1966 { | 2076 { |
1967 c = -c; | 2077 c = -c; |
1969 } | 2079 } |
1970 else | 2080 else |
1971 { | 2081 { |
1972 if (c >= 0xA0) | 2082 if (c >= 0xA0) |
1973 { | 2083 { |
1974 /* Old style component character of a composition. */ | 2084 if (cmp_status->state != COMPOSING_NO |
1975 if (c == 0xA0) | 2085 && cmp_status->old_form) |
1976 { | 2086 { |
1977 ONE_MORE_BYTE (c); | 2087 if (cmp_status->state == COMPOSING_CHAR) |
1978 c -= 0x80; | 2088 { |
2089 if (c == 0xA0) | |
2090 { | |
2091 ONE_MORE_BYTE (c); | |
2092 c -= 0x80; | |
2093 if (c < 0) | |
2094 goto invalid_code; | |
2095 } | |
2096 else | |
2097 c -= 0x20; | |
2098 mseq_found = 1; | |
2099 } | |
2100 else | |
2101 { | |
2102 *nbytes = src - src_base; | |
2103 *nchars = consumed_chars; | |
2104 return -c; | |
2105 } | |
1979 } | 2106 } |
1980 else | 2107 else |
1981 c -= 0x20; | 2108 goto invalid_code; |
1982 } | 2109 } |
1983 | 2110 |
1984 switch (emacs_mule_bytes[c]) | 2111 switch (emacs_mule_bytes[c]) |
1985 { | 2112 { |
1986 case 2: | 2113 case 2: |
2048 } | 2175 } |
2049 *nbytes = src - src_base; | 2176 *nbytes = src - src_base; |
2050 *nchars = consumed_chars; | 2177 *nchars = consumed_chars; |
2051 if (id) | 2178 if (id) |
2052 *id = charset->id; | 2179 *id = charset->id; |
2053 return c; | 2180 return (mseq_found ? -c : c); |
2054 | 2181 |
2055 no_more_source: | 2182 no_more_source: |
2056 return -2; | 2183 return -2; |
2057 | 2184 |
2058 invalid_code: | 2185 invalid_code: |
2059 return -1; | 2186 return -1; |
2060 } | 2187 } |
2061 | 2188 |
2062 | 2189 |
2063 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". | |
2064 Check if a text is encoded in `emacs-mule'. If it is, return 1, | |
2065 else return 0. */ | |
2066 | |
2067 static int | |
2068 detect_coding_emacs_mule (coding, detect_info) | |
2069 struct coding_system *coding; | |
2070 struct coding_detection_info *detect_info; | |
2071 { | |
2072 const unsigned char *src = coding->source, *src_base; | |
2073 const unsigned char *src_end = coding->source + coding->src_bytes; | |
2074 int multibytep = coding->src_multibyte; | |
2075 int consumed_chars = 0; | |
2076 int c; | |
2077 int found = 0; | |
2078 | |
2079 detect_info->checked |= CATEGORY_MASK_EMACS_MULE; | |
2080 /* A coding system of this category is always ASCII compatible. */ | |
2081 src += coding->head_ascii; | |
2082 | |
2083 while (1) | |
2084 { | |
2085 src_base = src; | |
2086 ONE_MORE_BYTE (c); | |
2087 if (c < 0) | |
2088 continue; | |
2089 if (c == 0x80) | |
2090 { | |
2091 /* Perhaps the start of composite character. We simple skip | |
2092 it because analyzing it is too heavy for detecting. But, | |
2093 at least, we check that the composite character | |
2094 constitutes of more than 4 bytes. */ | |
2095 const unsigned char *src_base; | |
2096 | |
2097 repeat: | |
2098 src_base = src; | |
2099 do | |
2100 { | |
2101 ONE_MORE_BYTE (c); | |
2102 } | |
2103 while (c >= 0xA0); | |
2104 | |
2105 if (src - src_base <= 4) | |
2106 break; | |
2107 found = CATEGORY_MASK_EMACS_MULE; | |
2108 if (c == 0x80) | |
2109 goto repeat; | |
2110 } | |
2111 | |
2112 if (c < 0x80) | |
2113 { | |
2114 if (c < 0x20 | |
2115 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)) | |
2116 break; | |
2117 } | |
2118 else | |
2119 { | |
2120 int more_bytes = emacs_mule_bytes[*src_base] - 1; | |
2121 | |
2122 while (more_bytes > 0) | |
2123 { | |
2124 ONE_MORE_BYTE (c); | |
2125 if (c < 0xA0) | |
2126 { | |
2127 src--; /* Unread the last byte. */ | |
2128 break; | |
2129 } | |
2130 more_bytes--; | |
2131 } | |
2132 if (more_bytes != 0) | |
2133 break; | |
2134 found = CATEGORY_MASK_EMACS_MULE; | |
2135 } | |
2136 } | |
2137 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE; | |
2138 return 0; | |
2139 | |
2140 no_more_source: | |
2141 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK) | |
2142 { | |
2143 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE; | |
2144 return 0; | |
2145 } | |
2146 detect_info->found |= found; | |
2147 return 1; | |
2148 } | |
2149 | |
2150 | |
2151 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ | 2190 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ |
2152 | 2191 |
2153 /* Decode a character represented as a component of composition | 2192 /* Handle these composition sequence ('|': the end of header elements, |
2154 sequence of Emacs 20/21 style at SRC. Set C to that character and | 2193 BYTES and CHARS >= 0xA0): |
2155 update SRC to the head of next character (or an encoded composition | 2194 |
2156 rule). If SRC doesn't points a composition component, set C to -1. | 2195 (1) relative composition: 0x80 0xF2 BYTES CHARS | CHAR ... |
2157 If SRC points an invalid byte sequence, global exit by a return | 2196 (2) altchar composition: 0x80 0xF4 BYTES CHARS | ALT ... ALT CHAR ... |
2158 value 0. */ | 2197 (3) alt&rule composition: 0x80 0xF5 BYTES CHARS | ALT RULE ... ALT CHAR ... |
2159 | 2198 |
2160 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \ | 2199 and these old form: |
2161 do \ | 2200 |
2162 { \ | 2201 (4) relative composition: 0x80 | MSEQ ... MSEQ |
2163 int c; \ | 2202 (5) rulebase composition: 0x80 0xFF | MSEQ MRULE ... MSEQ |
2164 int nbytes, nchars; \ | 2203 |
2165 \ | 2204 When the starter 0x80 and the following header elements are found, |
2166 if (src == src_end) \ | 2205 this annotation header is produced. |
2167 break; \ | 2206 |
2168 c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\ | 2207 [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS NBYTES METHOD ] |
2169 if (c < 0) \ | 2208 |
2170 { \ | 2209 NCHARS is CHARS - 0xA0 for (1), (2), (3), and 0 for (4), (5). |
2171 if (c == -2) \ | 2210 NBYTES is BYTES - 0xA0 for (1), (2), (3), and 0 for (4), (5). |
2172 break; \ | 2211 |
2173 goto invalid_code; \ | 2212 Then, upon reading the following elements, these codes are produced |
2174 } \ | 2213 until the composition end is found: |
2175 *buf++ = c; \ | 2214 |
2176 src += nbytes; \ | 2215 (1) CHAR ... CHAR |
2177 consumed_chars += nchars; \ | 2216 (2) ALT ... ALT CHAR ... CHAR |
2178 } \ | 2217 (3) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT CHAR ... CHAR |
2179 while (0) | 2218 (4) CHAR ... CHAR |
2180 | 2219 (5) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR |
2181 | 2220 |
2182 /* Decode a composition rule represented as a component of composition | 2221 When the composition end is found, LENGTH and NCHARS in the |
2183 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF, | 2222 annotation header is updated as below: |
2184 and increment BUF. If SRC points an invalid byte sequence, set C | 2223 |
2185 to -1. */ | 2224 (1) LENGTH: unchanged, NCHARS: unchanged |
2186 | 2225 (2) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged |
2187 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \ | 2226 (3) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged |
2227 (4) LENGTH: unchanged, NCHARS: number of CHARs | |
2228 (5) LENGTH: unchanged, NCHARS: number of CHARs | |
2229 | |
2230 If an error is found while composing, the annotation header is | |
2231 changed to the original composition header (plus filler -1s) as | |
2232 below: | |
2233 | |
2234 (1),(2),(3) [ 0x80 0xF2+METHOD BYTES CHARS -1 ] | |
2235 (5) [ 0x80 0xFF -1 -1- -1 ] | |
2236 | |
2237 and the sequence [ -2 DECODED-RULE ] is changed to the original | |
2238 byte sequence as below: | |
2239 o the original byte sequence is B: [ B -1 ] | |
2240 o the original byte sequence is B1 B2: [ B1 B2 ] | |
2241 | |
2242 Most of the routines are implemented by macros because many | |
2243 variables and labels in the caller decode_coding_emacs_mule must be | |
2244 accessible, and they are usually called just once (thus doesn't | |
2245 increase the size of compiled object). */ | |
2246 | |
2247 /* Decode a composition rule represented by C as a component of | |
2248 composition sequence of Emacs 20 style. Set RULE to the decoded | |
2249 rule. */ | |
2250 | |
2251 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(c, rule) \ | |
2188 do { \ | 2252 do { \ |
2189 int c, gref, nref; \ | 2253 int gref, nref; \ |
2190 \ | 2254 \ |
2191 if (src >= src_end) \ | |
2192 goto invalid_code; \ | |
2193 ONE_MORE_BYTE_NO_CHECK (c); \ | |
2194 c -= 0xA0; \ | 2255 c -= 0xA0; \ |
2195 if (c < 0 || c >= 81) \ | 2256 if (c < 0 || c >= 81) \ |
2196 goto invalid_code; \ | 2257 goto invalid_code; \ |
2197 \ | |
2198 gref = c / 9, nref = c % 9; \ | 2258 gref = c / 9, nref = c % 9; \ |
2199 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \ | 2259 if (gref == 4) gref = 10; \ |
2260 if (nref == 4) nref = 10; \ | |
2261 rule = COMPOSITION_ENCODE_RULE (gref, nref); \ | |
2200 } while (0) | 2262 } while (0) |
2201 | 2263 |
2202 | 2264 |
2203 /* Decode a composition rule represented as a component of composition | 2265 /* Decode a composition rule represented by C and the following byte |
2204 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF, | 2266 at SRC as a component of composition sequence of Emacs 21 style. |
2205 and increment BUF. If SRC points an invalid byte sequence, set C | 2267 Set RULE to the decoded rule. */ |
2206 to -1. */ | 2268 |
2207 | 2269 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(c, rule) \ |
2208 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \ | |
2209 do { \ | 2270 do { \ |
2210 int gref, nref; \ | 2271 int gref, nref; \ |
2211 \ | 2272 \ |
2212 if (src + 1>= src_end) \ | 2273 gref = c - 0x20; \ |
2274 if (gref < 0 || gref >= 81) \ | |
2213 goto invalid_code; \ | 2275 goto invalid_code; \ |
2214 ONE_MORE_BYTE_NO_CHECK (gref); \ | 2276 ONE_MORE_BYTE (c); \ |
2215 gref -= 0x20; \ | 2277 nref = c - 0x20; \ |
2216 ONE_MORE_BYTE_NO_CHECK (nref); \ | 2278 if (nref < 0 || nref >= 81) \ |
2217 nref -= 0x20; \ | |
2218 if (gref < 0 || gref >= 81 \ | |
2219 || nref < 0 || nref >= 81) \ | |
2220 goto invalid_code; \ | 2279 goto invalid_code; \ |
2221 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \ | 2280 rule = COMPOSITION_ENCODE_RULE (gref, nref); \ |
2222 } while (0) | 2281 } while (0) |
2223 | 2282 |
2224 | 2283 |
2225 #define DECODE_EMACS_MULE_21_COMPOSITION(c) \ | 2284 /* Start of Emacs 21 style format. The first three bytes at SRC are |
2285 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is the | |
2286 byte length of this composition information, CHARS is the number of | |
2287 characters composed by this composition. */ | |
2288 | |
2289 #define DECODE_EMACS_MULE_21_COMPOSITION() \ | |
2226 do { \ | 2290 do { \ |
2227 /* Emacs 21 style format. The first three bytes at SRC are \ | |
2228 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \ | |
2229 the byte length of this composition information, CHARS is the \ | |
2230 number of characters composed by this composition. */ \ | |
2231 enum composition_method method = c - 0xF2; \ | 2291 enum composition_method method = c - 0xF2; \ |
2232 int *charbuf_base = charbuf; \ | 2292 int *charbuf_base = charbuf; \ |
2233 int consumed_chars_limit; \ | |
2234 int nbytes, nchars; \ | 2293 int nbytes, nchars; \ |
2235 \ | 2294 \ |
2236 ONE_MORE_BYTE (c); \ | 2295 ONE_MORE_BYTE (c); \ |
2237 if (c < 0) \ | 2296 if (c < 0) \ |
2238 goto invalid_code; \ | 2297 goto invalid_code; \ |
2239 nbytes = c - 0xA0; \ | 2298 nbytes = c - 0xA0; \ |
2240 if (nbytes < 3) \ | 2299 if (nbytes < 3 || (method == COMPOSITION_RELATIVE && nbytes != 4)) \ |
2241 goto invalid_code; \ | 2300 goto invalid_code; \ |
2242 ONE_MORE_BYTE (c); \ | 2301 ONE_MORE_BYTE (c); \ |
2243 if (c < 0) \ | 2302 nchars = c - 0xA0; \ |
2303 if (nchars <= 0 || nchars >= MAX_COMPOSITION_COMPONENTS) \ | |
2244 goto invalid_code; \ | 2304 goto invalid_code; \ |
2245 nchars = c - 0xA0; \ | 2305 cmp_status->old_form = 0; \ |
2246 ADD_COMPOSITION_DATA (charbuf, nchars, method); \ | 2306 cmp_status->method = method; \ |
2247 consumed_chars_limit = consumed_chars_base + nbytes; \ | 2307 if (method == COMPOSITION_RELATIVE) \ |
2248 if (method != COMPOSITION_RELATIVE) \ | 2308 cmp_status->state = COMPOSING_CHAR; \ |
2249 { \ | 2309 else \ |
2250 int i = 0; \ | 2310 cmp_status->state = COMPOSING_COMPONENT_CHAR; \ |
2251 while (consumed_chars < consumed_chars_limit) \ | 2311 cmp_status->length = MAX_ANNOTATION_LENGTH; \ |
2252 { \ | 2312 cmp_status->nchars = nchars; \ |
2253 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \ | 2313 cmp_status->ncomps = nbytes - 4; \ |
2254 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \ | 2314 ADD_COMPOSITION_DATA (charbuf, nchars, nbytes, method); \ |
2255 else \ | |
2256 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \ | |
2257 i++; \ | |
2258 } \ | |
2259 if (consumed_chars < consumed_chars_limit) \ | |
2260 goto invalid_code; \ | |
2261 charbuf_base[0] -= i; \ | |
2262 } \ | |
2263 } while (0) | 2315 } while (0) |
2264 | 2316 |
2265 | 2317 |
2266 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \ | 2318 /* Start of Emacs 20 style format for relative composition. */ |
2267 do { \ | 2319 |
2268 /* Emacs 20 style format for relative composition. */ \ | 2320 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION() \ |
2269 /* Store multibyte form of characters to be composed. */ \ | 2321 do { \ |
2270 enum composition_method method = COMPOSITION_RELATIVE; \ | 2322 cmp_status->old_form = 1; \ |
2271 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \ | 2323 cmp_status->method = COMPOSITION_RELATIVE; \ |
2272 int *buf = components; \ | 2324 cmp_status->state = COMPOSING_CHAR; \ |
2273 int i, j; \ | 2325 cmp_status->length = MAX_ANNOTATION_LENGTH; \ |
2274 \ | 2326 cmp_status->nchars = cmp_status->ncomps = 0; \ |
2275 src = src_base; \ | 2327 ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \ |
2276 ONE_MORE_BYTE (c); /* skip 0x80 */ \ | |
2277 for (i = 0; *src >= 0xA0 && i < MAX_COMPOSITION_COMPONENTS; i++) \ | |
2278 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \ | |
2279 if (i < 2) \ | |
2280 goto invalid_code; \ | |
2281 ADD_COMPOSITION_DATA (charbuf, i, method); \ | |
2282 for (j = 0; j < i; j++) \ | |
2283 *charbuf++ = components[j]; \ | |
2284 } while (0) | 2328 } while (0) |
2285 | 2329 |
2286 | 2330 |
2287 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \ | 2331 /* Start of Emacs 20 style format for rule-base composition. */ |
2332 | |
2333 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION() \ | |
2288 do { \ | 2334 do { \ |
2289 /* Emacs 20 style format for rule-base composition. */ \ | 2335 cmp_status->old_form = 1; \ |
2290 /* Store multibyte form of characters to be composed. */ \ | 2336 cmp_status->method = COMPOSITION_WITH_RULE; \ |
2291 enum composition_method method = COMPOSITION_WITH_RULE; \ | 2337 cmp_status->state = COMPOSING_CHAR; \ |
2292 int *charbuf_base = charbuf; \ | 2338 cmp_status->length = MAX_ANNOTATION_LENGTH; \ |
2293 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \ | 2339 cmp_status->nchars = cmp_status->ncomps = 0; \ |
2294 int *buf = components; \ | 2340 ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \ |
2295 int i, j; \ | 2341 } while (0) |
2342 | |
2343 | |
2344 #define DECODE_EMACS_MULE_COMPOSITION_START() \ | |
2345 do { \ | |
2346 const unsigned char *current_src = src; \ | |
2347 \ | |
2348 ONE_MORE_BYTE (c); \ | |
2349 if (c < 0) \ | |
2350 goto invalid_code; \ | |
2351 if (c - 0xF2 >= COMPOSITION_RELATIVE \ | |
2352 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS) \ | |
2353 DECODE_EMACS_MULE_21_COMPOSITION (); \ | |
2354 else if (c < 0xA0) \ | |
2355 goto invalid_code; \ | |
2356 else if (c < 0xC0) \ | |
2357 { \ | |
2358 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (); \ | |
2359 /* Re-read C as a composition component. */ \ | |
2360 src = current_src; \ | |
2361 } \ | |
2362 else if (c == 0xFF) \ | |
2363 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (); \ | |
2364 else \ | |
2365 goto invalid_code; \ | |
2366 } while (0) | |
2367 | |
2368 #define EMACS_MULE_COMPOSITION_END() \ | |
2369 do { \ | |
2370 int idx = - cmp_status->length; \ | |
2296 \ | 2371 \ |
2297 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \ | 2372 if (cmp_status->old_form) \ |
2298 for (i = 1; i < MAX_COMPOSITION_COMPONENTS; i++) \ | 2373 charbuf[idx + 2] = cmp_status->nchars; \ |
2299 { \ | 2374 else if (cmp_status->method > COMPOSITION_RELATIVE) \ |
2300 if (*src < 0xA0) \ | 2375 charbuf[idx] = charbuf[idx + 2] - cmp_status->length; \ |
2301 break; \ | 2376 cmp_status->state = COMPOSING_NO; \ |
2302 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \ | 2377 } while (0) |
2303 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \ | 2378 |
2304 } \ | 2379 |
2305 if (i <= 1 || (buf - components) % 2 == 0) \ | 2380 static int |
2306 goto invalid_code; \ | 2381 emacs_mule_finish_composition (charbuf, cmp_status) |
2307 if (charbuf + i + (i / 2) + 1 >= charbuf_end) \ | 2382 int *charbuf; |
2308 goto no_more_source; \ | 2383 struct composition_status *cmp_status; |
2309 ADD_COMPOSITION_DATA (charbuf, i, method); \ | 2384 { |
2310 i = i * 2 - 1; \ | 2385 int idx = - cmp_status->length; |
2311 for (j = 0; j < i; j++) \ | 2386 int new_chars; |
2312 *charbuf++ = components[j]; \ | 2387 |
2313 charbuf_base[0] -= i; \ | 2388 if (cmp_status->old_form && cmp_status->nchars > 0) |
2314 for (j = 0; j < i; j += 2) \ | 2389 { |
2315 *charbuf++ = components[j]; \ | 2390 charbuf[idx + 2] = cmp_status->nchars; |
2391 new_chars = 0; | |
2392 if (cmp_status->method == COMPOSITION_WITH_RULE | |
2393 && cmp_status->state == COMPOSING_CHAR) | |
2394 { | |
2395 /* The last rule was invalid. */ | |
2396 int rule = charbuf[-1] + 0xA0; | |
2397 | |
2398 charbuf[-2] = BYTE8_TO_CHAR (rule); | |
2399 charbuf[-1] = -1; | |
2400 new_chars = 1; | |
2401 } | |
2402 } | |
2403 else | |
2404 { | |
2405 charbuf[idx++] = BYTE8_TO_CHAR (0x80); | |
2406 | |
2407 if (cmp_status->method == COMPOSITION_WITH_RULE) | |
2408 { | |
2409 charbuf[idx++] = BYTE8_TO_CHAR (0xFF); | |
2410 charbuf[idx++] = -3; | |
2411 charbuf[idx++] = 0; | |
2412 new_chars = 1; | |
2413 } | |
2414 else | |
2415 { | |
2416 int nchars = charbuf[idx + 1] + 0xA0; | |
2417 int nbytes = charbuf[idx + 2] + 0xA0; | |
2418 | |
2419 charbuf[idx++] = BYTE8_TO_CHAR (0xF2 + cmp_status->method); | |
2420 charbuf[idx++] = BYTE8_TO_CHAR (nbytes); | |
2421 charbuf[idx++] = BYTE8_TO_CHAR (nchars); | |
2422 charbuf[idx++] = -1; | |
2423 new_chars = 4; | |
2424 } | |
2425 } | |
2426 cmp_status->state = COMPOSING_NO; | |
2427 return new_chars; | |
2428 } | |
2429 | |
2430 #define EMACS_MULE_MAYBE_FINISH_COMPOSITION() \ | |
2431 do { \ | |
2432 if (cmp_status->state != COMPOSING_NO) \ | |
2433 char_offset += emacs_mule_finish_composition (charbuf, cmp_status); \ | |
2316 } while (0) | 2434 } while (0) |
2317 | 2435 |
2318 | 2436 |
2319 static void | 2437 static void |
2320 decode_coding_emacs_mule (coding) | 2438 decode_coding_emacs_mule (coding) |
2333 int last_offset = char_offset; | 2451 int last_offset = char_offset; |
2334 int last_id = charset_ascii; | 2452 int last_id = charset_ascii; |
2335 int eol_crlf = | 2453 int eol_crlf = |
2336 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); | 2454 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); |
2337 int byte_after_cr = -1; | 2455 int byte_after_cr = -1; |
2456 struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status; | |
2338 | 2457 |
2339 CODING_GET_INFO (coding, attrs, charset_list); | 2458 CODING_GET_INFO (coding, attrs, charset_list); |
2340 | 2459 |
2460 if (cmp_status->state != COMPOSING_NO) | |
2461 { | |
2462 int i; | |
2463 | |
2464 for (i = 0; i < cmp_status->length; i++) | |
2465 *charbuf++ = cmp_status->carryover[i]; | |
2466 coding->annotated = 1; | |
2467 } | |
2468 | |
2341 while (1) | 2469 while (1) |
2342 { | 2470 { |
2343 int c; | 2471 int c, id; |
2344 | 2472 |
2345 src_base = src; | 2473 src_base = src; |
2346 consumed_chars_base = consumed_chars; | 2474 consumed_chars_base = consumed_chars; |
2347 | 2475 |
2348 if (charbuf >= charbuf_end) | 2476 if (charbuf >= charbuf_end) |
2354 | 2482 |
2355 if (byte_after_cr >= 0) | 2483 if (byte_after_cr >= 0) |
2356 c = byte_after_cr, byte_after_cr = -1; | 2484 c = byte_after_cr, byte_after_cr = -1; |
2357 else | 2485 else |
2358 ONE_MORE_BYTE (c); | 2486 ONE_MORE_BYTE (c); |
2359 if (c < 0) | 2487 |
2360 { | 2488 if (c < 0 || c == 0x80) |
2361 *charbuf++ = -c; | 2489 { |
2362 char_offset++; | 2490 EMACS_MULE_MAYBE_FINISH_COMPOSITION (); |
2363 } | 2491 if (c < 0) |
2364 else if (c < 0x80) | 2492 { |
2493 *charbuf++ = -c; | |
2494 char_offset++; | |
2495 } | |
2496 else | |
2497 DECODE_EMACS_MULE_COMPOSITION_START (); | |
2498 continue; | |
2499 } | |
2500 | |
2501 if (c < 0x80) | |
2365 { | 2502 { |
2366 if (eol_crlf && c == '\r') | 2503 if (eol_crlf && c == '\r') |
2367 ONE_MORE_BYTE (byte_after_cr); | 2504 ONE_MORE_BYTE (byte_after_cr); |
2368 *charbuf++ = c; | 2505 id = charset_ascii; |
2369 char_offset++; | 2506 if (cmp_status->state != COMPOSING_NO) |
2370 } | 2507 { |
2371 else if (c == 0x80) | 2508 if (cmp_status->old_form) |
2372 { | 2509 EMACS_MULE_MAYBE_FINISH_COMPOSITION (); |
2373 ONE_MORE_BYTE (c); | 2510 else if (cmp_status->state >= COMPOSING_COMPONENT_CHAR) |
2374 if (c < 0) | 2511 cmp_status->ncomps--; |
2375 goto invalid_code; | 2512 } |
2376 if (c - 0xF2 >= COMPOSITION_RELATIVE | 2513 } |
2377 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS) | 2514 else |
2378 DECODE_EMACS_MULE_21_COMPOSITION (c); | 2515 { |
2379 else if (c < 0xC0) | 2516 int nchars, nbytes; |
2380 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c); | 2517 |
2381 else if (c == 0xFF) | 2518 c = emacs_mule_char (coding, src_base, &nbytes, &nchars, &id, |
2382 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c); | 2519 cmp_status); |
2383 else | |
2384 goto invalid_code; | |
2385 } | |
2386 else if (c < 0xA0 && emacs_mule_bytes[c] > 1) | |
2387 { | |
2388 int nbytes, nchars; | |
2389 int id; | |
2390 | |
2391 src = src_base; | |
2392 consumed_chars = consumed_chars_base; | |
2393 c = emacs_mule_char (coding, src, &nbytes, &nchars, &id); | |
2394 if (c < 0) | 2520 if (c < 0) |
2395 { | 2521 { |
2522 if (c == -1) | |
2523 goto invalid_code; | |
2396 if (c == -2) | 2524 if (c == -2) |
2397 break; | 2525 break; |
2398 goto invalid_code; | |
2399 } | 2526 } |
2527 src = src_base + nbytes; | |
2528 consumed_chars = consumed_chars_base + nchars; | |
2529 if (cmp_status->state >= COMPOSING_COMPONENT_CHAR) | |
2530 cmp_status->ncomps -= nchars; | |
2531 } | |
2532 | |
2533 /* Now if C >= 0, we found a normally encoded characer, if C < | |
2534 0, we found an old-style composition component character or | |
2535 rule. */ | |
2536 | |
2537 if (cmp_status->state == COMPOSING_NO) | |
2538 { | |
2400 if (last_id != id) | 2539 if (last_id != id) |
2401 { | 2540 { |
2402 if (last_id != charset_ascii) | 2541 if (last_id != charset_ascii) |
2403 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id); | 2542 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, |
2543 last_id); | |
2404 last_id = id; | 2544 last_id = id; |
2405 last_offset = char_offset; | 2545 last_offset = char_offset; |
2406 } | 2546 } |
2407 *charbuf++ = c; | 2547 *charbuf++ = c; |
2408 src += nbytes; | |
2409 consumed_chars += nchars; | |
2410 char_offset++; | 2548 char_offset++; |
2411 } | 2549 } |
2412 else | 2550 else if (cmp_status->state == COMPOSING_CHAR) |
2413 goto invalid_code; | 2551 { |
2552 if (cmp_status->old_form) | |
2553 { | |
2554 if (c >= 0) | |
2555 { | |
2556 EMACS_MULE_MAYBE_FINISH_COMPOSITION (); | |
2557 *charbuf++ = c; | |
2558 char_offset++; | |
2559 } | |
2560 else | |
2561 { | |
2562 *charbuf++ = -c; | |
2563 cmp_status->nchars++; | |
2564 cmp_status->length++; | |
2565 if (cmp_status->nchars == MAX_COMPOSITION_COMPONENTS) | |
2566 EMACS_MULE_COMPOSITION_END (); | |
2567 else if (cmp_status->method == COMPOSITION_WITH_RULE) | |
2568 cmp_status->state = COMPOSING_RULE; | |
2569 } | |
2570 } | |
2571 else | |
2572 { | |
2573 *charbuf++ = c; | |
2574 cmp_status->length++; | |
2575 cmp_status->nchars--; | |
2576 if (cmp_status->nchars == 0) | |
2577 EMACS_MULE_COMPOSITION_END (); | |
2578 } | |
2579 } | |
2580 else if (cmp_status->state == COMPOSING_RULE) | |
2581 { | |
2582 int rule; | |
2583 | |
2584 if (c >= 0) | |
2585 { | |
2586 EMACS_MULE_COMPOSITION_END (); | |
2587 *charbuf++ = c; | |
2588 char_offset++; | |
2589 } | |
2590 else | |
2591 { | |
2592 c = -c; | |
2593 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (c, rule); | |
2594 if (rule < 0) | |
2595 goto invalid_code; | |
2596 *charbuf++ = -2; | |
2597 *charbuf++ = rule; | |
2598 cmp_status->length += 2; | |
2599 cmp_status->state = COMPOSING_CHAR; | |
2600 } | |
2601 } | |
2602 else if (cmp_status->state == COMPOSING_COMPONENT_CHAR) | |
2603 { | |
2604 *charbuf++ = c; | |
2605 cmp_status->length++; | |
2606 if (cmp_status->ncomps == 0) | |
2607 cmp_status->state = COMPOSING_CHAR; | |
2608 else if (cmp_status->ncomps > 0) | |
2609 { | |
2610 if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS) | |
2611 cmp_status->state = COMPOSING_COMPONENT_RULE; | |
2612 } | |
2613 else | |
2614 EMACS_MULE_MAYBE_FINISH_COMPOSITION (); | |
2615 } | |
2616 else /* COMPOSING_COMPONENT_RULE */ | |
2617 { | |
2618 int rule; | |
2619 | |
2620 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (c, rule); | |
2621 if (rule < 0) | |
2622 goto invalid_code; | |
2623 *charbuf++ = -2; | |
2624 *charbuf++ = rule; | |
2625 cmp_status->length += 2; | |
2626 cmp_status->ncomps--; | |
2627 if (cmp_status->ncomps > 0) | |
2628 cmp_status->state = COMPOSING_COMPONENT_CHAR; | |
2629 else | |
2630 EMACS_MULE_MAYBE_FINISH_COMPOSITION (); | |
2631 } | |
2414 continue; | 2632 continue; |
2415 | 2633 |
2634 retry: | |
2635 src = src_base; | |
2636 consumed_chars = consumed_chars_base; | |
2637 continue; | |
2638 | |
2416 invalid_code: | 2639 invalid_code: |
2640 EMACS_MULE_MAYBE_FINISH_COMPOSITION (); | |
2417 src = src_base; | 2641 src = src_base; |
2418 consumed_chars = consumed_chars_base; | 2642 consumed_chars = consumed_chars_base; |
2419 ONE_MORE_BYTE (c); | 2643 ONE_MORE_BYTE (c); |
2420 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); | 2644 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); |
2421 char_offset++; | 2645 char_offset++; |
2422 coding->errors++; | 2646 coding->errors++; |
2423 } | 2647 } |
2424 | 2648 |
2425 no_more_source: | 2649 no_more_source: |
2650 if (cmp_status->state != COMPOSING_NO) | |
2651 { | |
2652 if (coding->mode & CODING_MODE_LAST_BLOCK) | |
2653 EMACS_MULE_MAYBE_FINISH_COMPOSITION (); | |
2654 else | |
2655 { | |
2656 int i; | |
2657 | |
2658 charbuf -= cmp_status->length; | |
2659 for (i = 0; i < cmp_status->length; i++) | |
2660 cmp_status->carryover[i] = charbuf[i]; | |
2661 } | |
2662 } | |
2426 if (last_id != charset_ascii) | 2663 if (last_id != charset_ascii) |
2427 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id); | 2664 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id); |
2428 coding->consumed_char += consumed_chars_base; | 2665 coding->consumed_char += consumed_chars_base; |
2429 coding->consumed = src_base - coding->source; | 2666 coding->consumed = src_base - coding->source; |
2430 coding->charbuf_used = charbuf - coding->charbuf; | 2667 coding->charbuf_used = charbuf - coding->charbuf; |
3075 if (prev == -2 && id == charset_ascii) \ | 3312 if (prev == -2 && id == charset_ascii) \ |
3076 chars_96 = -1; \ | 3313 chars_96 = -1; \ |
3077 } while (0) | 3314 } while (0) |
3078 | 3315 |
3079 | 3316 |
3317 /* Handle these composition sequence (ALT: alternate char): | |
3318 | |
3319 (1) relative composition: ESC 0 CHAR ... ESC 1 | |
3320 (2) rulebase composition: ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1 | |
3321 (3) altchar composition: ESC 3 ALT ... ALT ESC 0 CHAR ... ESC 1 | |
3322 (4) alt&rule composition: ESC 4 ALT RULE ... ALT ESC 0 CHAR ... ESC 1 | |
3323 | |
3324 When the start sequence (ESC 0/2/3/4) is found, this annotation | |
3325 header is produced. | |
3326 | |
3327 [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) 0 METHOD ] | |
3328 | |
3329 Then, upon reading CHAR or RULE (one or two bytes), these codes are | |
3330 produced until the end sequence (ESC 1) is found: | |
3331 | |
3332 (1) CHAR ... CHAR | |
3333 (2) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR | |
3334 (3) ALT ... ALT -1 -1 CHAR ... CHAR | |
3335 (4) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT -1 -1 CHAR ... CHAR | |
3336 | |
3337 When the end sequence (ESC 1) is found, LENGTH and NCHARS in the | |
3338 annotation header is updated as below: | |
3339 | |
3340 (1) LENGTH: unchanged, NCHARS: number of CHARs | |
3341 (2) LENGTH: unchanged, NCHARS: number of CHARs | |
3342 (3) LENGTH: += number of ALTs + 2, NCHARS: number of CHARs | |
3343 (4) LENGTH: += number of ALTs * 3, NCHARS: number of CHARs | |
3344 | |
3345 If an error is found while composing, the annotation header is | |
3346 changed to: | |
3347 | |
3348 [ ESC '0'/'2'/'3'/'4' -2 0 ] | |
3349 | |
3350 and the sequence [ -2 DECODED-RULE ] is changed to the original | |
3351 byte sequence as below: | |
3352 o the original byte sequence is B: [ B -1 ] | |
3353 o the original byte sequence is B1 B2: [ B1 B2 ] | |
3354 and the sequence [ -1 -1 ] is changed to the original byte | |
3355 sequence: | |
3356 [ ESC '0' ] | |
3357 */ | |
3358 | |
3359 /* Decode a composition rule C1 and maybe one more byte from the | |
3360 source, and set RULE to the encoded composition rule, NBYTES to the | |
3361 length of the composition rule. If the rule is invalid, set RULE | |
3362 to some negative value. */ | |
3363 | |
3364 #define DECODE_COMPOSITION_RULE(rule, nbytes) \ | |
3365 do { \ | |
3366 rule = c1 - 32; \ | |
3367 if (rule < 0) \ | |
3368 break; \ | |
3369 if (rule < 81) /* old format (before ver.21) */ \ | |
3370 { \ | |
3371 int gref = (rule) / 9; \ | |
3372 int nref = (rule) % 9; \ | |
3373 if (gref == 4) gref = 10; \ | |
3374 if (nref == 4) nref = 10; \ | |
3375 rule = COMPOSITION_ENCODE_RULE (gref, nref); \ | |
3376 nbytes = 1; \ | |
3377 } \ | |
3378 else /* new format (after ver.21) */ \ | |
3379 { \ | |
3380 int c; \ | |
3381 \ | |
3382 ONE_MORE_BYTE (c); \ | |
3383 rule = COMPOSITION_ENCODE_RULE (rule - 81, c - 32); \ | |
3384 if (rule >= 0) \ | |
3385 rule += 0x100; /* to destinguish it from the old format */ \ | |
3386 nbytes = 2; \ | |
3387 } \ | |
3388 } while (0) | |
3389 | |
3390 #define ENCODE_COMPOSITION_RULE(rule) \ | |
3391 do { \ | |
3392 int gref = (rule % 0x100) / 12, nref = (rule % 0x100) % 12; \ | |
3393 \ | |
3394 if (rule < 0x100) /* old format */ \ | |
3395 { \ | |
3396 if (gref == 10) gref = 4; \ | |
3397 if (nref == 10) nref = 4; \ | |
3398 charbuf[idx] = 32 + gref * 9 + nref; \ | |
3399 charbuf[idx + 1] = -1; \ | |
3400 new_chars++; \ | |
3401 } \ | |
3402 else /* new format */ \ | |
3403 { \ | |
3404 charbuf[idx] = 32 + 81 + gref; \ | |
3405 charbuf[idx + 1] = 32 + nref; \ | |
3406 new_chars += 2; \ | |
3407 } \ | |
3408 } while (0) | |
3409 | |
3410 /* Finish the current composition as invalid. */ | |
3411 | |
3412 static int finish_composition P_ ((int *, struct composition_status *)); | |
3413 | |
3414 static int | |
3415 finish_composition (charbuf, cmp_status) | |
3416 int *charbuf; | |
3417 struct composition_status *cmp_status; | |
3418 { | |
3419 int idx = - cmp_status->length; | |
3420 int new_chars; | |
3421 | |
3422 /* Recover the original ESC sequence */ | |
3423 charbuf[idx++] = ISO_CODE_ESC; | |
3424 charbuf[idx++] = (cmp_status->method == COMPOSITION_RELATIVE ? '0' | |
3425 : cmp_status->method == COMPOSITION_WITH_RULE ? '2' | |
3426 : cmp_status->method == COMPOSITION_WITH_ALTCHARS ? '3' | |
3427 /* cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS */ | |
3428 : '4'); | |
3429 charbuf[idx++] = -2; | |
3430 charbuf[idx++] = 0; | |
3431 charbuf[idx++] = -1; | |
3432 new_chars = cmp_status->nchars; | |
3433 if (cmp_status->method >= COMPOSITION_WITH_RULE) | |
3434 for (; idx < 0; idx++) | |
3435 { | |
3436 int elt = charbuf[idx]; | |
3437 | |
3438 if (elt == -2) | |
3439 { | |
3440 ENCODE_COMPOSITION_RULE (charbuf[idx + 1]); | |
3441 idx++; | |
3442 } | |
3443 else if (elt == -1) | |
3444 { | |
3445 charbuf[idx++] = ISO_CODE_ESC; | |
3446 charbuf[idx] = '0'; | |
3447 new_chars += 2; | |
3448 } | |
3449 } | |
3450 cmp_status->state = COMPOSING_NO; | |
3451 return new_chars; | |
3452 } | |
3453 | |
3454 /* If characers are under composition, finish the composition. */ | |
3080 #define MAYBE_FINISH_COMPOSITION() \ | 3455 #define MAYBE_FINISH_COMPOSITION() \ |
3081 do { \ | 3456 do { \ |
3082 int i; \ | 3457 if (cmp_status->state != COMPOSING_NO) \ |
3083 if (composition_state == COMPOSING_NO) \ | 3458 char_offset += finish_composition (charbuf, cmp_status); \ |
3084 break; \ | |
3085 /* It is assured that we have enough room for producing \ | |
3086 characters stored in the table `components'. */ \ | |
3087 if (charbuf + component_idx > charbuf_end) \ | |
3088 goto no_more_source; \ | |
3089 composition_state = COMPOSING_NO; \ | |
3090 if (method == COMPOSITION_RELATIVE \ | |
3091 || method == COMPOSITION_WITH_ALTCHARS) \ | |
3092 { \ | |
3093 for (i = 0; i < component_idx; i++) \ | |
3094 *charbuf++ = components[i]; \ | |
3095 char_offset += component_idx; \ | |
3096 } \ | |
3097 else \ | |
3098 { \ | |
3099 for (i = 0; i < component_idx; i += 2) \ | |
3100 *charbuf++ = components[i]; \ | |
3101 char_offset += (component_idx / 2) + 1; \ | |
3102 } \ | |
3103 } while (0) | 3459 } while (0) |
3104 | 3460 |
3105 | |
3106 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4. | 3461 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4. |
3462 | |
3107 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1 | 3463 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1 |
3108 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1 | 3464 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1 |
3109 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1 | 3465 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1 |
3110 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1 | 3466 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1 |
3111 */ | 3467 |
3112 | 3468 Produce this annotation sequence now: |
3113 #define DECODE_COMPOSITION_START(c1) \ | 3469 |
3114 do { \ | 3470 [ -LENGTH(==-4) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) METHOD ] |
3115 if (c1 == '0' \ | 3471 */ |
3116 && composition_state == COMPOSING_COMPONENT_RULE) \ | 3472 |
3117 { \ | 3473 #define DECODE_COMPOSITION_START(c1) \ |
3118 component_len = component_idx; \ | 3474 do { \ |
3119 composition_state = COMPOSING_CHAR; \ | 3475 if (c1 == '0' \ |
3120 } \ | 3476 && ((cmp_status->state == COMPOSING_COMPONENT_CHAR \ |
3121 else \ | 3477 && cmp_status->method == COMPOSITION_WITH_ALTCHARS) \ |
3122 { \ | 3478 || (cmp_status->state == COMPOSING_COMPONENT_RULE \ |
3123 const unsigned char *p; \ | 3479 && cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS))) \ |
3124 \ | 3480 { \ |
3125 MAYBE_FINISH_COMPOSITION (); \ | 3481 *charbuf++ = -1; \ |
3126 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \ | 3482 *charbuf++= -1; \ |
3127 goto no_more_source; \ | 3483 cmp_status->state = COMPOSING_CHAR; \ |
3128 for (p = src; p < src_end - 1; p++) \ | 3484 cmp_status->length += 2; \ |
3129 if (*p == ISO_CODE_ESC && p[1] == '1') \ | 3485 } \ |
3130 break; \ | 3486 else \ |
3131 if (p == src_end - 1) \ | 3487 { \ |
3132 { \ | 3488 MAYBE_FINISH_COMPOSITION (); \ |
3133 if (coding->mode & CODING_MODE_LAST_BLOCK) \ | 3489 cmp_status->method = (c1 == '0' ? COMPOSITION_RELATIVE \ |
3134 goto invalid_code; \ | 3490 : c1 == '2' ? COMPOSITION_WITH_RULE \ |
3135 /* The current composition doesn't end in the current \ | 3491 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \ |
3136 source. */ \ | 3492 : COMPOSITION_WITH_RULE_ALTCHARS); \ |
3137 record_conversion_result \ | 3493 cmp_status->state \ |
3138 (coding, CODING_RESULT_INSUFFICIENT_SRC); \ | 3494 = (c1 <= '2' ? COMPOSING_CHAR : COMPOSING_COMPONENT_CHAR); \ |
3139 goto no_more_source; \ | 3495 ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \ |
3140 } \ | 3496 cmp_status->length = MAX_ANNOTATION_LENGTH; \ |
3141 \ | 3497 cmp_status->nchars = cmp_status->ncomps = 0; \ |
3142 /* This is surely the start of a composition. */ \ | 3498 coding->annotated = 1; \ |
3143 method = (c1 == '0' ? COMPOSITION_RELATIVE \ | 3499 } \ |
3144 : c1 == '2' ? COMPOSITION_WITH_RULE \ | |
3145 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \ | |
3146 : COMPOSITION_WITH_RULE_ALTCHARS); \ | |
3147 composition_state = (c1 <= '2' ? COMPOSING_CHAR \ | |
3148 : COMPOSING_COMPONENT_CHAR); \ | |
3149 component_idx = component_len = 0; \ | |
3150 } \ | |
3151 } while (0) | 3500 } while (0) |
3152 | 3501 |
3153 | 3502 |
3154 /* Handle compositoin end sequence ESC 1. */ | 3503 /* Handle composition end sequence ESC 1. */ |
3155 | 3504 |
3156 #define DECODE_COMPOSITION_END() \ | 3505 #define DECODE_COMPOSITION_END() \ |
3157 do { \ | 3506 do { \ |
3158 int nchars = (component_len > 0 ? component_idx - component_len \ | 3507 if (cmp_status->nchars == 0 \ |
3159 : method == COMPOSITION_RELATIVE ? component_idx \ | 3508 || ((cmp_status->state == COMPOSING_CHAR) \ |
3160 : (component_idx + 1) / 2); \ | 3509 == (cmp_status->method == COMPOSITION_WITH_RULE))) \ |
3161 int i; \ | |
3162 int *saved_charbuf = charbuf; \ | |
3163 \ | |
3164 ADD_COMPOSITION_DATA (charbuf, nchars, method); \ | |
3165 if (method != COMPOSITION_RELATIVE) \ | |
3166 { \ | 3510 { \ |
3167 if (component_len == 0) \ | 3511 MAYBE_FINISH_COMPOSITION (); \ |
3168 for (i = 0; i < component_idx; i++) \ | 3512 goto invalid_code; \ |
3169 *charbuf++ = components[i]; \ | |
3170 else \ | |
3171 for (i = 0; i < component_len; i++) \ | |
3172 *charbuf++ = components[i]; \ | |
3173 *saved_charbuf = saved_charbuf - charbuf; \ | |
3174 } \ | 3513 } \ |
3175 if (method == COMPOSITION_WITH_RULE) \ | 3514 if (cmp_status->method == COMPOSITION_WITH_ALTCHARS) \ |
3176 for (i = 0; i < component_idx; i += 2, char_offset++) \ | 3515 charbuf[- cmp_status->length] -= cmp_status->ncomps + 2; \ |
3177 *charbuf++ = components[i]; \ | 3516 else if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS) \ |
3517 charbuf[- cmp_status->length] -= cmp_status->ncomps * 3; \ | |
3518 charbuf[- cmp_status->length + 2] = cmp_status->nchars; \ | |
3519 char_offset += cmp_status->nchars; \ | |
3520 cmp_status->state = COMPOSING_NO; \ | |
3521 } while (0) | |
3522 | |
3523 /* Store a composition rule RULE in charbuf, and update cmp_status. */ | |
3524 | |
3525 #define STORE_COMPOSITION_RULE(rule) \ | |
3526 do { \ | |
3527 *charbuf++ = -2; \ | |
3528 *charbuf++ = rule; \ | |
3529 cmp_status->length += 2; \ | |
3530 cmp_status->state--; \ | |
3531 } while (0) | |
3532 | |
3533 /* Store a composed char or a component char C in charbuf, and update | |
3534 cmp_status. */ | |
3535 | |
3536 #define STORE_COMPOSITION_CHAR(c) \ | |
3537 do { \ | |
3538 *charbuf++ = (c); \ | |
3539 cmp_status->length++; \ | |
3540 if (cmp_status->state == COMPOSING_CHAR) \ | |
3541 cmp_status->nchars++; \ | |
3178 else \ | 3542 else \ |
3179 for (i = component_len; i < component_idx; i++, char_offset++) \ | 3543 cmp_status->ncomps++; \ |
3180 *charbuf++ = components[i]; \ | 3544 if (cmp_status->method == COMPOSITION_WITH_RULE \ |
3181 coding->annotated = 1; \ | 3545 || (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS \ |
3182 composition_state = COMPOSING_NO; \ | 3546 && cmp_status->state == COMPOSING_COMPONENT_CHAR)) \ |
3183 } while (0) | 3547 cmp_status->state++; \ |
3184 | |
3185 | |
3186 /* Decode a composition rule from the byte C1 (and maybe one more byte | |
3187 from SRC) and store one encoded composition rule in | |
3188 coding->cmp_data. */ | |
3189 | |
3190 #define DECODE_COMPOSITION_RULE(c1) \ | |
3191 do { \ | |
3192 (c1) -= 32; \ | |
3193 if (c1 < 81) /* old format (before ver.21) */ \ | |
3194 { \ | |
3195 int gref = (c1) / 9; \ | |
3196 int nref = (c1) % 9; \ | |
3197 if (gref == 4) gref = 10; \ | |
3198 if (nref == 4) nref = 10; \ | |
3199 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \ | |
3200 } \ | |
3201 else if (c1 < 93) /* new format (after ver.21) */ \ | |
3202 { \ | |
3203 ONE_MORE_BYTE (c2); \ | |
3204 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \ | |
3205 } \ | |
3206 else \ | |
3207 c1 = 0; \ | |
3208 } while (0) | 3548 } while (0) |
3209 | 3549 |
3210 | 3550 |
3211 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ | 3551 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ |
3212 | 3552 |
3217 const unsigned char *src = coding->source + coding->consumed; | 3557 const unsigned char *src = coding->source + coding->consumed; |
3218 const unsigned char *src_end = coding->source + coding->src_bytes; | 3558 const unsigned char *src_end = coding->source + coding->src_bytes; |
3219 const unsigned char *src_base; | 3559 const unsigned char *src_base; |
3220 int *charbuf = coding->charbuf + coding->charbuf_used; | 3560 int *charbuf = coding->charbuf + coding->charbuf_used; |
3221 int *charbuf_end | 3561 int *charbuf_end |
3222 = coding->charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH; | 3562 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; |
3223 int consumed_chars = 0, consumed_chars_base; | 3563 int consumed_chars = 0, consumed_chars_base; |
3224 int multibytep = coding->src_multibyte; | 3564 int multibytep = coding->src_multibyte; |
3225 /* Charsets invoked to graphic plane 0 and 1 respectively. */ | 3565 /* Charsets invoked to graphic plane 0 and 1 respectively. */ |
3226 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0); | 3566 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0); |
3227 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1); | 3567 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1); |
3228 int charset_id_2, charset_id_3; | 3568 int charset_id_2, charset_id_3; |
3229 struct charset *charset; | 3569 struct charset *charset; |
3230 int c; | 3570 int c; |
3231 /* For handling composition sequence. */ | 3571 struct composition_status *cmp_status = CODING_ISO_CMP_STATUS (coding); |
3232 #define COMPOSING_NO 0 | |
3233 #define COMPOSING_CHAR 1 | |
3234 #define COMPOSING_RULE 2 | |
3235 #define COMPOSING_COMPONENT_CHAR 3 | |
3236 #define COMPOSING_COMPONENT_RULE 4 | |
3237 | |
3238 int composition_state = COMPOSING_NO; | |
3239 enum composition_method method; | |
3240 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1]; | |
3241 int component_idx; | |
3242 int component_len; | |
3243 Lisp_Object attrs, charset_list; | 3572 Lisp_Object attrs, charset_list; |
3244 int char_offset = coding->produced_char; | 3573 int char_offset = coding->produced_char; |
3245 int last_offset = char_offset; | 3574 int last_offset = char_offset; |
3246 int last_id = charset_ascii; | 3575 int last_id = charset_ascii; |
3247 int eol_crlf = | 3576 int eol_crlf = |
3248 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); | 3577 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); |
3249 int byte_after_cr = -1; | 3578 int byte_after_cr = -1; |
3579 int i; | |
3250 | 3580 |
3251 CODING_GET_INFO (coding, attrs, charset_list); | 3581 CODING_GET_INFO (coding, attrs, charset_list); |
3252 setup_iso_safe_charsets (attrs); | 3582 setup_iso_safe_charsets (attrs); |
3253 /* Charset list may have been changed. */ | 3583 /* Charset list may have been changed. */ |
3254 charset_list = CODING_ATTR_CHARSET_LIST (attrs); | 3584 charset_list = CODING_ATTR_CHARSET_LIST (attrs); |
3255 coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs)); | 3585 coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs)); |
3586 | |
3587 if (cmp_status->state != COMPOSING_NO) | |
3588 { | |
3589 for (i = 0; i < cmp_status->length; i++) | |
3590 *charbuf++ = cmp_status->carryover[i]; | |
3591 coding->annotated = 1; | |
3592 } | |
3256 | 3593 |
3257 while (1) | 3594 while (1) |
3258 { | 3595 { |
3259 int c1, c2; | 3596 int c1, c2; |
3260 | 3597 |
3273 else | 3610 else |
3274 ONE_MORE_BYTE (c1); | 3611 ONE_MORE_BYTE (c1); |
3275 if (c1 < 0) | 3612 if (c1 < 0) |
3276 goto invalid_code; | 3613 goto invalid_code; |
3277 | 3614 |
3615 if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0) | |
3616 { | |
3617 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1); | |
3618 char_offset++; | |
3619 CODING_ISO_EXTSEGMENT_LEN (coding)--; | |
3620 continue; | |
3621 } | |
3622 | |
3623 if (CODING_ISO_EMBEDDED_UTF_8 (coding)) | |
3624 { | |
3625 if (c1 == ISO_CODE_ESC) | |
3626 { | |
3627 if (src + 1 >= src_end) | |
3628 goto no_more_source; | |
3629 *charbuf++ = ISO_CODE_ESC; | |
3630 char_offset++; | |
3631 if (src[0] == '%' && src[1] == '@') | |
3632 { | |
3633 src += 2; | |
3634 consumed_chars += 2; | |
3635 char_offset += 2; | |
3636 /* We are sure charbuf can contain two more chars. */ | |
3637 *charbuf++ = '%'; | |
3638 *charbuf++ = '@'; | |
3639 CODING_ISO_EMBEDDED_UTF_8 (coding) = 0; | |
3640 } | |
3641 } | |
3642 else | |
3643 { | |
3644 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1); | |
3645 char_offset++; | |
3646 } | |
3647 continue; | |
3648 } | |
3649 | |
3650 if ((cmp_status->state == COMPOSING_RULE | |
3651 || cmp_status->state == COMPOSING_COMPONENT_RULE) | |
3652 && c1 != ISO_CODE_ESC) | |
3653 { | |
3654 int rule, nbytes; | |
3655 | |
3656 DECODE_COMPOSITION_RULE (rule, nbytes); | |
3657 if (rule < 0) | |
3658 goto invalid_code; | |
3659 STORE_COMPOSITION_RULE (rule); | |
3660 continue; | |
3661 } | |
3662 | |
3278 /* We produce at most one character. */ | 3663 /* We produce at most one character. */ |
3279 switch (iso_code_class [c1]) | 3664 switch (iso_code_class [c1]) |
3280 { | 3665 { |
3281 case ISO_0x20_or_0x7F: | 3666 case ISO_0x20_or_0x7F: |
3282 if (composition_state != COMPOSING_NO) | |
3283 { | |
3284 if (composition_state == COMPOSING_RULE | |
3285 || composition_state == COMPOSING_COMPONENT_RULE) | |
3286 { | |
3287 if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1) | |
3288 { | |
3289 DECODE_COMPOSITION_RULE (c1); | |
3290 components[component_idx++] = c1; | |
3291 composition_state--; | |
3292 continue; | |
3293 } | |
3294 /* Too long composition. */ | |
3295 MAYBE_FINISH_COMPOSITION (); | |
3296 } | |
3297 } | |
3298 if (charset_id_0 < 0 | 3667 if (charset_id_0 < 0 |
3299 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0))) | 3668 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0))) |
3300 /* This is SPACE or DEL. */ | 3669 /* This is SPACE or DEL. */ |
3301 charset = CHARSET_FROM_ID (charset_ascii); | 3670 charset = CHARSET_FROM_ID (charset_ascii); |
3302 else | 3671 else |
3303 charset = CHARSET_FROM_ID (charset_id_0); | 3672 charset = CHARSET_FROM_ID (charset_id_0); |
3304 break; | 3673 break; |
3305 | 3674 |
3306 case ISO_graphic_plane_0: | 3675 case ISO_graphic_plane_0: |
3307 if (composition_state != COMPOSING_NO) | |
3308 { | |
3309 if (composition_state == COMPOSING_RULE | |
3310 || composition_state == COMPOSING_COMPONENT_RULE) | |
3311 { | |
3312 if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1) | |
3313 { | |
3314 DECODE_COMPOSITION_RULE (c1); | |
3315 components[component_idx++] = c1; | |
3316 composition_state--; | |
3317 continue; | |
3318 } | |
3319 MAYBE_FINISH_COMPOSITION (); | |
3320 } | |
3321 } | |
3322 if (charset_id_0 < 0) | 3676 if (charset_id_0 < 0) |
3323 charset = CHARSET_FROM_ID (charset_ascii); | 3677 charset = CHARSET_FROM_ID (charset_ascii); |
3324 else | 3678 else |
3325 charset = CHARSET_FROM_ID (charset_id_0); | 3679 charset = CHARSET_FROM_ID (charset_id_0); |
3326 break; | 3680 break; |
3344 MAYBE_FINISH_COMPOSITION (); | 3698 MAYBE_FINISH_COMPOSITION (); |
3345 charset = CHARSET_FROM_ID (charset_ascii); | 3699 charset = CHARSET_FROM_ID (charset_ascii); |
3346 break; | 3700 break; |
3347 | 3701 |
3348 case ISO_control_1: | 3702 case ISO_control_1: |
3349 MAYBE_FINISH_COMPOSITION (); | |
3350 goto invalid_code; | 3703 goto invalid_code; |
3351 | 3704 |
3352 case ISO_shift_out: | 3705 case ISO_shift_out: |
3353 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT) | 3706 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT) |
3354 || CODING_ISO_DESIGNATION (coding, 1) < 0) | 3707 || CODING_ISO_DESIGNATION (coding, 1) < 0) |
3482 break; | 3835 break; |
3483 | 3836 |
3484 case '0': case '2': case '3': case '4': /* start composition */ | 3837 case '0': case '2': case '3': case '4': /* start composition */ |
3485 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)) | 3838 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)) |
3486 goto invalid_code; | 3839 goto invalid_code; |
3840 if (last_id != charset_ascii) | |
3841 { | |
3842 ADD_CHARSET_DATA (charbuf, char_offset- last_offset, last_id); | |
3843 last_id = charset_ascii; | |
3844 last_offset = char_offset; | |
3845 } | |
3487 DECODE_COMPOSITION_START (c1); | 3846 DECODE_COMPOSITION_START (c1); |
3488 continue; | 3847 continue; |
3489 | 3848 |
3490 case '1': /* end composition */ | 3849 case '1': /* end composition */ |
3491 if (composition_state == COMPOSING_NO) | 3850 if (cmp_status->state == COMPOSING_NO) |
3492 goto invalid_code; | 3851 goto invalid_code; |
3493 DECODE_COMPOSITION_END (); | 3852 DECODE_COMPOSITION_END (); |
3494 continue; | 3853 continue; |
3495 | 3854 |
3496 case '[': /* specification of direction */ | 3855 case '[': /* specification of direction */ |
3537 They may be decoded by post-read-conversion. */ | 3896 They may be decoded by post-read-conversion. */ |
3538 int dim, M, L; | 3897 int dim, M, L; |
3539 int size; | 3898 int size; |
3540 | 3899 |
3541 ONE_MORE_BYTE (dim); | 3900 ONE_MORE_BYTE (dim); |
3901 if (dim < 0 || dim > 4) | |
3902 goto invalid_code; | |
3542 ONE_MORE_BYTE (M); | 3903 ONE_MORE_BYTE (M); |
3904 if (M < 128) | |
3905 goto invalid_code; | |
3543 ONE_MORE_BYTE (L); | 3906 ONE_MORE_BYTE (L); |
3907 if (L < 128) | |
3908 goto invalid_code; | |
3544 size = ((M - 128) * 128) + (L - 128); | 3909 size = ((M - 128) * 128) + (L - 128); |
3545 if (charbuf + 8 + size > charbuf_end) | 3910 if (charbuf + 6 > charbuf_end) |
3546 goto break_loop; | 3911 goto break_loop; |
3547 *charbuf++ = ISO_CODE_ESC; | 3912 *charbuf++ = ISO_CODE_ESC; |
3548 *charbuf++ = '%'; | 3913 *charbuf++ = '%'; |
3549 *charbuf++ = '/'; | 3914 *charbuf++ = '/'; |
3550 *charbuf++ = dim; | 3915 *charbuf++ = dim; |
3551 *charbuf++ = BYTE8_TO_CHAR (M); | 3916 *charbuf++ = BYTE8_TO_CHAR (M); |
3552 *charbuf++ = BYTE8_TO_CHAR (L); | 3917 *charbuf++ = BYTE8_TO_CHAR (L); |
3553 while (size-- > 0) | 3918 CODING_ISO_EXTSEGMENT_LEN (coding) = size; |
3554 { | |
3555 ONE_MORE_BYTE (c1); | |
3556 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1); | |
3557 } | |
3558 } | 3919 } |
3559 else if (c1 == 'G') | 3920 else if (c1 == 'G') |
3560 { | 3921 { |
3561 /* XFree86 extension for embedding UTF-8 in CTEXT: | 3922 /* XFree86 extension for embedding UTF-8 in CTEXT: |
3562 ESC % G --UTF-8-BYTES-- ESC % @ | 3923 ESC % G --UTF-8-BYTES-- ESC % @ |
3563 We keep these bytes as is for the moment. | 3924 We keep these bytes as is for the moment. |
3564 They may be decoded by post-read-conversion. */ | 3925 They may be decoded by post-read-conversion. */ |
3565 int *p = charbuf; | 3926 if (charbuf + 3 > charbuf_end) |
3566 | |
3567 if (p + 6 > charbuf_end) | |
3568 goto break_loop; | 3927 goto break_loop; |
3569 *p++ = ISO_CODE_ESC; | 3928 *charbuf++ = ISO_CODE_ESC; |
3570 *p++ = '%'; | 3929 *charbuf++ = '%'; |
3571 *p++ = 'G'; | 3930 *charbuf++ = 'G'; |
3572 while (p < charbuf_end) | 3931 CODING_ISO_EMBEDDED_UTF_8 (coding) = 1; |
3573 { | |
3574 ONE_MORE_BYTE (c1); | |
3575 if (c1 == ISO_CODE_ESC | |
3576 && src + 1 < src_end | |
3577 && src[0] == '%' | |
3578 && src[1] == '@') | |
3579 { | |
3580 src += 2; | |
3581 break; | |
3582 } | |
3583 *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1); | |
3584 } | |
3585 if (p + 3 > charbuf_end) | |
3586 goto break_loop; | |
3587 *p++ = ISO_CODE_ESC; | |
3588 *p++ = '%'; | |
3589 *p++ = '@'; | |
3590 charbuf = p; | |
3591 } | 3932 } |
3592 else | 3933 else |
3593 goto invalid_code; | 3934 goto invalid_code; |
3594 continue; | 3935 continue; |
3595 break; | 3936 break; |
3623 } | 3964 } |
3624 continue; | 3965 continue; |
3625 } | 3966 } |
3626 } | 3967 } |
3627 | 3968 |
3628 if (charset->id != charset_ascii | 3969 if (cmp_status->state == COMPOSING_NO |
3970 && charset->id != charset_ascii | |
3629 && last_id != charset->id) | 3971 && last_id != charset->id) |
3630 { | 3972 { |
3631 if (last_id != charset_ascii) | 3973 if (last_id != charset_ascii) |
3632 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id); | 3974 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id); |
3633 last_id = charset->id; | 3975 last_id = charset->id; |
3665 *charbuf++ = *src_base; | 4007 *charbuf++ = *src_base; |
3666 else | 4008 else |
3667 *charbuf++ = BYTE8_TO_CHAR (*src_base); | 4009 *charbuf++ = BYTE8_TO_CHAR (*src_base); |
3668 } | 4010 } |
3669 } | 4011 } |
3670 else if (composition_state == COMPOSING_NO) | 4012 else if (cmp_status->state == COMPOSING_NO) |
3671 { | 4013 { |
3672 *charbuf++ = c; | 4014 *charbuf++ = c; |
3673 char_offset++; | 4015 char_offset++; |
3674 } | 4016 } |
4017 else if ((cmp_status->state == COMPOSING_CHAR | |
4018 ? cmp_status->nchars | |
4019 : cmp_status->ncomps) | |
4020 >= MAX_COMPOSITION_COMPONENTS) | |
4021 { | |
4022 /* Too long composition. */ | |
4023 MAYBE_FINISH_COMPOSITION (); | |
4024 *charbuf++ = c; | |
4025 char_offset++; | |
4026 } | |
3675 else | 4027 else |
3676 { | 4028 STORE_COMPOSITION_CHAR (c); |
3677 if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1) | |
3678 { | |
3679 components[component_idx++] = c; | |
3680 if (method == COMPOSITION_WITH_RULE | |
3681 || (method == COMPOSITION_WITH_RULE_ALTCHARS | |
3682 && composition_state == COMPOSING_COMPONENT_CHAR)) | |
3683 composition_state++; | |
3684 } | |
3685 else | |
3686 { | |
3687 MAYBE_FINISH_COMPOSITION (); | |
3688 *charbuf++ = c; | |
3689 char_offset++; | |
3690 } | |
3691 } | |
3692 continue; | 4029 continue; |
3693 | 4030 |
3694 invalid_code: | 4031 invalid_code: |
3695 MAYBE_FINISH_COMPOSITION (); | 4032 MAYBE_FINISH_COMPOSITION (); |
3696 src = src_base; | 4033 src = src_base; |
3704 break_loop: | 4041 break_loop: |
3705 break; | 4042 break; |
3706 } | 4043 } |
3707 | 4044 |
3708 no_more_source: | 4045 no_more_source: |
3709 if (last_id != charset_ascii) | 4046 if (cmp_status->state != COMPOSING_NO) |
4047 { | |
4048 if (coding->mode & CODING_MODE_LAST_BLOCK) | |
4049 MAYBE_FINISH_COMPOSITION (); | |
4050 else | |
4051 { | |
4052 charbuf -= cmp_status->length; | |
4053 for (i = 0; i < cmp_status->length; i++) | |
4054 cmp_status->carryover[i] = charbuf[i]; | |
4055 } | |
4056 } | |
4057 else if (last_id != charset_ascii) | |
3710 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id); | 4058 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id); |
3711 coding->consumed_char += consumed_chars_base; | 4059 coding->consumed_char += consumed_chars_base; |
3712 coding->consumed = src_base - coding->source; | 4060 coding->consumed = src_base - coding->source; |
3713 coding->charbuf_used = charbuf - coding->charbuf; | 4061 coding->charbuf_used = charbuf - coding->charbuf; |
3714 } | 4062 } |
5474 val = CODING_ATTR_SAFE_CHARSETS (attrs); | 5822 val = CODING_ATTR_SAFE_CHARSETS (attrs); |
5475 coding->max_charset_id = SCHARS (val) - 1; | 5823 coding->max_charset_id = SCHARS (val) - 1; |
5476 coding->safe_charsets = SDATA (val); | 5824 coding->safe_charsets = SDATA (val); |
5477 } | 5825 } |
5478 CODING_ISO_FLAGS (coding) = flags; | 5826 CODING_ISO_FLAGS (coding) = flags; |
5827 CODING_ISO_CMP_STATUS (coding)->state = COMPOSING_NO; | |
5828 CODING_ISO_CMP_STATUS (coding)->method = COMPOSITION_NO; | |
5829 CODING_ISO_EXTSEGMENT_LEN (coding) = 0; | |
5830 CODING_ISO_EMBEDDED_UTF_8 (coding) = 0; | |
5479 } | 5831 } |
5480 else if (EQ (coding_type, Qcharset)) | 5832 else if (EQ (coding_type, Qcharset)) |
5481 { | 5833 { |
5482 coding->detector = detect_coding_charset; | 5834 coding->detector = detect_coding_charset; |
5483 coding->decoder = decode_coding_charset; | 5835 coding->decoder = decode_coding_charset; |
5531 coding->detector = detect_coding_emacs_mule; | 5883 coding->detector = detect_coding_emacs_mule; |
5532 coding->decoder = decode_coding_emacs_mule; | 5884 coding->decoder = decode_coding_emacs_mule; |
5533 coding->encoder = encode_coding_emacs_mule; | 5885 coding->encoder = encode_coding_emacs_mule; |
5534 coding->common_flags | 5886 coding->common_flags |
5535 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK); | 5887 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK); |
5888 coding->spec.emacs_mule.full_support = 1; | |
5536 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full)) | 5889 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full)) |
5537 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list)) | 5890 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list)) |
5538 { | 5891 { |
5539 Lisp_Object tail, safe_charsets; | 5892 Lisp_Object tail, safe_charsets; |
5540 int max_charset_id = 0; | 5893 int max_charset_id = 0; |
5548 for (tail = Vemacs_mule_charset_list; CONSP (tail); | 5901 for (tail = Vemacs_mule_charset_list; CONSP (tail); |
5549 tail = XCDR (tail)) | 5902 tail = XCDR (tail)) |
5550 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0); | 5903 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0); |
5551 coding->max_charset_id = max_charset_id; | 5904 coding->max_charset_id = max_charset_id; |
5552 coding->safe_charsets = SDATA (safe_charsets); | 5905 coding->safe_charsets = SDATA (safe_charsets); |
5553 } | 5906 coding->spec.emacs_mule.full_support = 1; |
5907 } | |
5908 coding->spec.emacs_mule.cmp_status.state = COMPOSING_NO; | |
5909 coding->spec.emacs_mule.cmp_status.method = COMPOSITION_NO; | |
5554 } | 5910 } |
5555 else if (EQ (coding_type, Qshift_jis)) | 5911 else if (EQ (coding_type, Qshift_jis)) |
5556 { | 5912 { |
5557 coding->detector = detect_coding_sjis; | 5913 coding->detector = detect_coding_sjis; |
5558 coding->decoder = decode_coding_sjis; | 5914 coding->decoder = decode_coding_sjis; |
6336 } \ | 6692 } \ |
6337 } \ | 6693 } \ |
6338 } while (0) | 6694 } while (0) |
6339 | 6695 |
6340 | 6696 |
6697 /* Return a translation of character(s) at BUF according to TRANS. | |
6698 TRANS is TO-CHAR or ((FROM . TO) ...) where | |
6699 FROM = [FROM-CHAR ...], TO is TO-CHAR or [TO-CHAR ...]. | |
6700 The return value is TO-CHAR or ([FROM-CHAR ...] . TO) if a | |
6701 translation is found, and Qnil if not found.. | |
6702 If BUF is too short to lookup characters in FROM, return Qt. */ | |
6703 | |
6341 static Lisp_Object | 6704 static Lisp_Object |
6342 get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars) | 6705 get_translation (trans, buf, buf_end) |
6343 Lisp_Object val; | 6706 Lisp_Object trans; |
6344 int *buf, *buf_end; | 6707 int *buf, *buf_end; |
6345 int last_block; | |
6346 int *from_nchars, *to_nchars; | |
6347 { | 6708 { |
6348 /* VAL is TO or (([FROM-CHAR ...] . TO) ...) where TO is TO-CHAR or | 6709 |
6349 [TO-CHAR ...]. */ | 6710 if (INTEGERP (trans)) |
6350 if (CONSP (val)) | 6711 return trans; |
6351 { | 6712 for (; CONSP (trans); trans = XCDR (trans)) |
6352 Lisp_Object from, tail; | 6713 { |
6353 int i, len; | 6714 Lisp_Object val = XCAR (trans); |
6354 | 6715 Lisp_Object from = XCAR (val); |
6355 for (tail = val; CONSP (tail); tail = XCDR (tail)) | 6716 int len = ASIZE (from); |
6356 { | 6717 int i; |
6357 val = XCAR (tail); | 6718 |
6358 from = XCAR (val); | 6719 for (i = 0; i < len; i++) |
6359 len = ASIZE (from); | 6720 { |
6360 for (i = 0; i < len; i++) | 6721 if (buf + i == buf_end) |
6361 { | 6722 return Qt; |
6362 if (buf + i == buf_end) | 6723 if (XINT (AREF (from, i)) != buf[i]) |
6363 { | 6724 break; |
6364 if (! last_block) | 6725 } |
6365 return Qt; | 6726 if (i == len) |
6366 break; | 6727 return val; |
6367 } | 6728 } |
6368 if (XINT (AREF (from, i)) != buf[i]) | 6729 return Qnil; |
6369 break; | |
6370 } | |
6371 if (i == len) | |
6372 { | |
6373 val = XCDR (val); | |
6374 *from_nchars = len; | |
6375 break; | |
6376 } | |
6377 } | |
6378 if (! CONSP (tail)) | |
6379 return Qnil; | |
6380 } | |
6381 if (VECTORP (val)) | |
6382 *buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val); | |
6383 else | |
6384 *buf = XINT (val); | |
6385 return val; | |
6386 } | 6730 } |
6387 | 6731 |
6388 | 6732 |
6389 static int | 6733 static int |
6390 produce_chars (coding, translation_table, last_block) | 6734 produce_chars (coding, translation_table, last_block) |
6420 Lisp_Object trans = Qnil; | 6764 Lisp_Object trans = Qnil; |
6421 | 6765 |
6422 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans); | 6766 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans); |
6423 if (! NILP (trans)) | 6767 if (! NILP (trans)) |
6424 { | 6768 { |
6425 trans = get_translation (trans, buf, buf_end, last_block, | 6769 trans = get_translation (trans, buf, buf_end); |
6426 &from_nchars, &to_nchars); | 6770 if (INTEGERP (trans)) |
6427 if (EQ (trans, Qt)) | 6771 c = XINT (trans); |
6772 else if (CONSP (trans)) | |
6773 { | |
6774 from_nchars = ASIZE (XCAR (trans)); | |
6775 trans = XCDR (trans); | |
6776 if (INTEGERP (trans)) | |
6777 c = XINT (trans); | |
6778 else | |
6779 { | |
6780 to_nchars = ASIZE (trans); | |
6781 c = XINT (AREF (trans, 0)); | |
6782 } | |
6783 } | |
6784 else if (EQ (trans, Qt) && ! last_block) | |
6428 break; | 6785 break; |
6429 c = *buf; | |
6430 } | 6786 } |
6431 | 6787 |
6432 if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end) | 6788 if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end) |
6433 { | 6789 { |
6434 dst = alloc_destination (coding, | 6790 dst = alloc_destination (coding, |
6436 + MAX_MULTIBYTE_LENGTH * to_nchars, | 6792 + MAX_MULTIBYTE_LENGTH * to_nchars, |
6437 dst); | 6793 dst); |
6438 if (EQ (coding->src_object, coding->dst_object)) | 6794 if (EQ (coding->src_object, coding->dst_object)) |
6439 { | 6795 { |
6440 coding_set_source (coding); | 6796 coding_set_source (coding); |
6441 dst_end = ((unsigned char *) coding->source) + coding->consumed; | 6797 dst_end = (((unsigned char *) coding->source) |
6798 + coding->consumed); | |
6442 } | 6799 } |
6443 else | 6800 else |
6444 dst_end = coding->destination + coding->dst_bytes; | 6801 dst_end = coding->destination + coding->dst_bytes; |
6445 } | 6802 } |
6446 | 6803 |
6453 CHAR_STRING_ADVANCE_NO_UNIFY (c, dst); | 6810 CHAR_STRING_ADVANCE_NO_UNIFY (c, dst); |
6454 else | 6811 else |
6455 *dst++ = CHAR_TO_BYTE8 (c); | 6812 *dst++ = CHAR_TO_BYTE8 (c); |
6456 } | 6813 } |
6457 produced_chars += to_nchars; | 6814 produced_chars += to_nchars; |
6458 *buf++ = to_nchars; | 6815 buf += from_nchars; |
6459 while (--from_nchars > 0) | |
6460 *buf++ = 0; | |
6461 } | 6816 } |
6462 else | 6817 else |
6463 /* This is an annotation datum. (-C) is the length. */ | 6818 /* This is an annotation datum. (-C) is the length. */ |
6464 buf += -c; | 6819 buf += -c; |
6465 } | 6820 } |
6571 return carryover; | 6926 return carryover; |
6572 } | 6927 } |
6573 | 6928 |
6574 /* Compose text in CODING->object according to the annotation data at | 6929 /* Compose text in CODING->object according to the annotation data at |
6575 CHARBUF. CHARBUF is an array: | 6930 CHARBUF. CHARBUF is an array: |
6576 [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ] | 6931 [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ] |
6577 */ | 6932 */ |
6578 | 6933 |
6579 static INLINE void | 6934 static INLINE void |
6580 produce_composition (coding, charbuf, pos) | 6935 produce_composition (coding, charbuf, pos) |
6581 struct coding_system *coding; | 6936 struct coding_system *coding; |
6585 int len; | 6940 int len; |
6586 EMACS_INT to; | 6941 EMACS_INT to; |
6587 enum composition_method method; | 6942 enum composition_method method; |
6588 Lisp_Object components; | 6943 Lisp_Object components; |
6589 | 6944 |
6590 len = -charbuf[0]; | 6945 len = -charbuf[0] - MAX_ANNOTATION_LENGTH; |
6591 to = pos + charbuf[2]; | 6946 to = pos + charbuf[2]; |
6592 if (to <= pos) | 6947 method = (enum composition_method) (charbuf[4]); |
6593 return; | |
6594 method = (enum composition_method) (charbuf[3]); | |
6595 | 6948 |
6596 if (method == COMPOSITION_RELATIVE) | 6949 if (method == COMPOSITION_RELATIVE) |
6597 components = Qnil; | 6950 components = Qnil; |
6598 else if (method >= COMPOSITION_WITH_RULE | 6951 else |
6599 && method <= COMPOSITION_WITH_RULE_ALTCHARS) | |
6600 { | 6952 { |
6601 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1]; | 6953 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1]; |
6602 int i; | 6954 int i, j; |
6603 | 6955 |
6604 len -= 4; | 6956 if (method == COMPOSITION_WITH_RULE) |
6605 charbuf += 4; | 6957 len = charbuf[2] * 3 - 2; |
6606 for (i = 0; i < len; i++) | 6958 charbuf += MAX_ANNOTATION_LENGTH; |
6607 { | 6959 /* charbuf = [ CHRA ... CHAR] or [ CHAR -2 RULE ... CHAR ] */ |
6608 args[i] = make_number (charbuf[i]); | 6960 for (i = j = 0; i < len && charbuf[i] != -1; i++, j++) |
6609 if (charbuf[i] < 0) | 6961 { |
6610 return; | 6962 if (charbuf[i] >= 0) |
6611 } | 6963 args[j] = make_number (charbuf[i]); |
6612 components = (method == COMPOSITION_WITH_ALTCHARS | 6964 else |
6613 ? Fstring (len, args) : Fvector (len, args)); | 6965 { |
6614 } | 6966 i++; |
6615 else | 6967 args[j] = make_number (charbuf[i] % 0x100); |
6616 return; | 6968 } |
6969 } | |
6970 components = (i == j ? Fstring (j, args) : Fvector (j, args)); | |
6971 } | |
6617 compose_text (pos, to, components, Qnil, coding->dst_object); | 6972 compose_text (pos, to, components, Qnil, coding->dst_object); |
6618 } | 6973 } |
6619 | 6974 |
6620 | 6975 |
6621 /* Put `charset' property on text in CODING->object according to | 6976 /* Put `charset' property on text in CODING->object according to |
6673 return; | 7028 return; |
6674 | 7029 |
6675 while (charbuf < charbuf_end) | 7030 while (charbuf < charbuf_end) |
6676 { | 7031 { |
6677 if (*charbuf >= 0) | 7032 if (*charbuf >= 0) |
6678 pos += *charbuf++; | 7033 pos++, charbuf++; |
6679 else | 7034 else |
6680 { | 7035 { |
6681 int len = -*charbuf; | 7036 int len = -*charbuf; |
6682 switch (charbuf[1]) | 7037 |
6683 { | 7038 if (len > 2) |
6684 case CODING_ANNOTATE_COMPOSITION_MASK: | 7039 switch (charbuf[1]) |
6685 produce_composition (coding, charbuf, pos); | 7040 { |
6686 break; | 7041 case CODING_ANNOTATE_COMPOSITION_MASK: |
6687 case CODING_ANNOTATE_CHARSET_MASK: | 7042 produce_composition (coding, charbuf, pos); |
6688 produce_charset (coding, charbuf, pos); | 7043 break; |
6689 break; | 7044 case CODING_ANNOTATE_CHARSET_MASK: |
6690 default: | 7045 produce_charset (coding, charbuf, pos); |
6691 abort (); | 7046 break; |
6692 } | 7047 } |
6693 charbuf += len; | 7048 charbuf += len; |
6694 } | 7049 } |
6695 } | 7050 } |
6696 } | 7051 } |
6697 | 7052 |
6873 annotation data in BUF. */ | 7228 annotation data in BUF. */ |
6874 int *head = buf; | 7229 int *head = buf; |
6875 enum composition_method method = COMPOSITION_METHOD (prop); | 7230 enum composition_method method = COMPOSITION_METHOD (prop); |
6876 int nchars = COMPOSITION_LENGTH (prop); | 7231 int nchars = COMPOSITION_LENGTH (prop); |
6877 | 7232 |
6878 ADD_COMPOSITION_DATA (buf, nchars, method); | 7233 ADD_COMPOSITION_DATA (buf, nchars, 0, method); |
6879 if (method != COMPOSITION_RELATIVE) | 7234 if (method != COMPOSITION_RELATIVE) |
6880 { | 7235 { |
6881 Lisp_Object components; | 7236 Lisp_Object components; |
6882 int len, i, i_byte; | 7237 int len, i, i_byte; |
6883 | 7238 |
7060 | 7415 |
7061 lookup_buf[0] = c; | 7416 lookup_buf[0] = c; |
7062 for (i = 1; i < max_lookup && p < src_end; i++) | 7417 for (i = 1; i < max_lookup && p < src_end; i++) |
7063 lookup_buf[i] = STRING_CHAR_ADVANCE (p); | 7418 lookup_buf[i] = STRING_CHAR_ADVANCE (p); |
7064 lookup_buf_end = lookup_buf + i; | 7419 lookup_buf_end = lookup_buf + i; |
7065 trans = get_translation (trans, lookup_buf, lookup_buf_end, 1, | 7420 trans = get_translation (trans, lookup_buf, lookup_buf_end); |
7066 &from_nchars, &to_nchars); | 7421 if (INTEGERP (trans)) |
7067 if (EQ (trans, Qt) | 7422 c = XINT (trans); |
7068 || buf + to_nchars > buf_end) | 7423 else if (CONSP (trans)) |
7424 { | |
7425 from_nchars = ASIZE (XCAR (trans)); | |
7426 trans = XCDR (trans); | |
7427 if (INTEGERP (trans)) | |
7428 c = XINT (trans); | |
7429 else | |
7430 { | |
7431 to_nchars = ASIZE (trans); | |
7432 if (buf + to_nchars > buf_end) | |
7433 break; | |
7434 c = XINT (AREF (trans, 0)); | |
7435 } | |
7436 } | |
7437 else | |
7069 break; | 7438 break; |
7070 *buf++ = *lookup_buf; | 7439 *buf++ = c; |
7071 for (i = 1; i < to_nchars; i++) | 7440 for (i = 1; i < to_nchars; i++) |
7072 *buf++ = XINT (AREF (trans, i)); | 7441 *buf++ = XINT (AREF (trans, i)); |
7073 for (i = 1; i < from_nchars; i++, pos++) | 7442 for (i = 1; i < from_nchars; i++, pos++) |
7074 src += MULTIBYTE_LENGTH_NO_CHECK (src); | 7443 src += MULTIBYTE_LENGTH_NO_CHECK (src); |
7075 } | 7444 } |