comparison src/coding.c @ 102422:82f82b92314e

(CODING_ISO_CMP_STATUS): New macro. (CODING_ISO_EXTSEGMENT_LEN, CODING_ISO_EMBEDDED_UTF_8): New macros. (MAX_ANNOTATION_LENGTH): Defined to 5. (ADD_COMPOSITION_DATA): New arg nbytes. (emacs_mule_char): New arg cmp_status. (DECODE_EMACS_MULE_COMPOSITION_CHAR): Delete it. (DECODE_EMACS_MULE_COMPOSITION_RULE_20): New arg c. (DECODE_EMACS_MULE_COMPOSITION_RULE_21): New arg c. (DECODE_EMACS_MULE_21_COMPOSITION): Delete the arg c. (DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION): Likewise. (DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION): Likewise. (DECODE_EMACS_MULE_COMPOSITION_START): New macro. (EMACS_MULE_COMPOSITION_END): New macro. (emacs_mule_finish_composition): New function. (EMACS_MULE_MAYBE_FINISH_COMPOSITION): New macro. (decode_coding_emacs_mule): Avoid long looking ahead while handling composition. (DECODE_COMPOSITION_RULE): Argument changed to rule and nbytes. (ENCODE_COMPOSITION_RULE): New macro. (finish_composition): New function. (MAYBE_FINISH_COMPOSITION): Call finish_composition. (DECODE_COMPOSITION_START): New implementation. (DECODE_COMPOSITION_END): Likewise. (STORE_COMPOSITION_RULE): New macro. (decode_coding_iso_2022): Avoid long looking ahead while handling composition, CTEXT extended segment, and embedded UTF-8. (setup_coding_system): For a coding of type iso-2022, reset CODING_ISO_EXTSEGMENT_LEN (coding) and CODING_ISO_EMBEDDED_UTF_8 (coding). (get_translation): Delete arguments last_block, from_nchars, to_nchars. Callers changed. (produce_chars): Don't modify charbuf. Adjusted for the change of get_translation. (produce_composition): Adjusted for the new annotation sequence. (handle_composition_annotation): Likewise. (consume_chars): Adjusted for the change of get_translation.
author Kenichi Handa <handa@m17n.org>
date Fri, 06 Mar 2009 07:51:52 +0000
parents 7baaea85626e
children f556415c326b
comparison
equal deleted inserted replaced
102421:62a954f574fa 102422:82f82b92314e
450 ((coding)->spec.iso_2022.single_shifting) 450 ((coding)->spec.iso_2022.single_shifting)
451 #define CODING_ISO_BOL(coding) \ 451 #define CODING_ISO_BOL(coding) \
452 ((coding)->spec.iso_2022.bol) 452 ((coding)->spec.iso_2022.bol)
453 #define CODING_ISO_INVOKED_CHARSET(coding, plane) \ 453 #define CODING_ISO_INVOKED_CHARSET(coding, plane) \
454 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane))) 454 CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane)))
455 #define CODING_ISO_CMP_STATUS(coding) \
456 (&(coding)->spec.iso_2022.cmp_status)
457 #define CODING_ISO_EXTSEGMENT_LEN(coding) \
458 ((coding)->spec.iso_2022.ctext_extended_segment_len)
459 #define CODING_ISO_EMBEDDED_UTF_8(coding) \
460 ((coding)->spec.iso_2022.embedded_utf_8)
455 461
456 /* Control characters of ISO2022. */ 462 /* Control characters of ISO2022. */
457 /* code */ /* function */ 463 /* code */ /* function */
458 #define ISO_CODE_LF 0x0A /* line-feed */ 464 #define ISO_CODE_LF 0x0A /* line-feed */
459 #define ISO_CODE_CR 0x0D /* carriage-return */ 465 #define ISO_CODE_CR 0x0D /* carriage-return */
943 static int detect_eol P_ ((const unsigned char *, 949 static int detect_eol P_ ((const unsigned char *,
944 EMACS_INT, enum coding_category)); 950 EMACS_INT, enum coding_category));
945 static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int)); 951 static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int));
946 static void decode_eol P_ ((struct coding_system *)); 952 static void decode_eol P_ ((struct coding_system *));
947 static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *)); 953 static Lisp_Object get_translation_table P_ ((Lisp_Object, int, int *));
948 static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *, 954 static Lisp_Object get_translation P_ ((Lisp_Object, int *, int *));
949 int, int *, int *));
950 static int produce_chars P_ ((struct coding_system *, Lisp_Object, int)); 955 static int produce_chars P_ ((struct coding_system *, Lisp_Object, int));
951 static INLINE void produce_composition P_ ((struct coding_system *, int *,
952 EMACS_INT));
953 static INLINE void produce_charset P_ ((struct coding_system *, int *, 956 static INLINE void produce_charset P_ ((struct coding_system *, int *,
954 EMACS_INT)); 957 EMACS_INT));
955 static void produce_annotation P_ ((struct coding_system *, EMACS_INT)); 958 static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
956 static int decode_coding P_ ((struct coding_system *)); 959 static int decode_coding P_ ((struct coding_system *));
957 static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT, 960 static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
1206 return dst; 1209 return dst;
1207 } 1210 }
1208 1211
1209 /** Macros for annotations. */ 1212 /** Macros for annotations. */
1210 1213
1211 /* Maximum length of annotation data (sum of annotations for
1212 composition and charset). */
1213 #define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
1214
1215 /* An annotation data is stored in the array coding->charbuf in this 1214 /* An annotation data is stored in the array coding->charbuf in this
1216 format: 1215 format:
1217 [ -LENGTH ANNOTATION_MASK NCHARS ... ] 1216 [ -LENGTH ANNOTATION_MASK NCHARS ... ]
1218 LENGTH is the number of elements in the annotation. 1217 LENGTH is the number of elements in the annotation.
1219 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK. 1218 ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
1221 1220
1222 The format of the following elements depend on ANNOTATION_MASK. 1221 The format of the following elements depend on ANNOTATION_MASK.
1223 1222
1224 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements 1223 In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements
1225 follows: 1224 follows:
1226 ... METHOD [ COMPOSITION-COMPONENTS ... ] 1225 ... NBYTES METHOD [ COMPOSITION-COMPONENTS ... ]
1226
1227 NBYTES is the number of bytes specified in the header part of
1228 old-style emacs-mule encoding, or 0 for the other kind of
1229 composition.
1230
1227 METHOD is one of enum composition_method. 1231 METHOD is one of enum composition_method.
1232
1228 Optionnal COMPOSITION-COMPONENTS are characters and composition 1233 Optionnal COMPOSITION-COMPONENTS are characters and composition
1229 rules. 1234 rules.
1230 1235
1231 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID 1236 In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
1232 follows. */ 1237 follows.
1238
1239 If ANNOTATION_MASK is 0, this annotation is just a space holder to
1240 recover from an invalid annotation, and should be skipped by
1241 produce_annotation. */
1242
1243 /* Maximum length of the header of annotation data. */
1244 #define MAX_ANNOTATION_LENGTH 5
1233 1245
1234 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars) \ 1246 #define ADD_ANNOTATION_DATA(buf, len, mask, nchars) \
1235 do { \ 1247 do { \
1236 *(buf)++ = -(len); \ 1248 *(buf)++ = -(len); \
1237 *(buf)++ = (mask); \ 1249 *(buf)++ = (mask); \
1238 *(buf)++ = (nchars); \ 1250 *(buf)++ = (nchars); \
1239 coding->annotated = 1; \ 1251 coding->annotated = 1; \
1240 } while (0); 1252 } while (0);
1241 1253
1242 #define ADD_COMPOSITION_DATA(buf, nchars, method) \ 1254 #define ADD_COMPOSITION_DATA(buf, nchars, nbytes, method) \
1243 do { \ 1255 do { \
1244 ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \ 1256 ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
1257 *buf++ = nbytes; \
1245 *buf++ = method; \ 1258 *buf++ = method; \
1246 } while (0) 1259 } while (0)
1247 1260
1248 1261
1249 #define ADD_CHARSET_DATA(buf, nchars, id) \ 1262 #define ADD_CHARSET_DATA(buf, nchars, id) \
1918 one-byte sequences which are their 8-bit code. 1931 one-byte sequences which are their 8-bit code.
1919 1932
1920 Next, character composition data are represented by the byte 1933 Next, character composition data are represented by the byte
1921 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ..., 1934 sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ...,
1922 where, 1935 where,
1923 METHOD is 0xF0 plus one of composition method (enum 1936 METHOD is 0xF2 plus one of composition method (enum
1924 composition_method), 1937 composition_method),
1925 1938
1926 BYTES is 0xA0 plus a byte length of this composition data, 1939 BYTES is 0xA0 plus a byte length of this composition data,
1927 1940
1928 CHARS is 0x20 plus a number of characters composed by this 1941 CHARS is 0xA0 plus a number of characters composed by this
1929 data, 1942 data,
1930 1943
1931 COMPONENTs are characters of multibye form or composition 1944 COMPONENTs are characters of multibye form or composition
1932 rules encoded by two-byte of ASCII codes. 1945 rules encoded by two-byte of ASCII codes.
1933 1946
1945 represents a composition rule. 1958 represents a composition rule.
1946 */ 1959 */
1947 1960
1948 char emacs_mule_bytes[256]; 1961 char emacs_mule_bytes[256];
1949 1962
1963
1964 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
1965 Check if a text is encoded in `emacs-mule'. If it is, return 1,
1966 else return 0. */
1967
1968 static int
1969 detect_coding_emacs_mule (coding, detect_info)
1970 struct coding_system *coding;
1971 struct coding_detection_info *detect_info;
1972 {
1973 const unsigned char *src = coding->source, *src_base;
1974 const unsigned char *src_end = coding->source + coding->src_bytes;
1975 int multibytep = coding->src_multibyte;
1976 int consumed_chars = 0;
1977 int c;
1978 int found = 0;
1979
1980 detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
1981 /* A coding system of this category is always ASCII compatible. */
1982 src += coding->head_ascii;
1983
1984 while (1)
1985 {
1986 src_base = src;
1987 ONE_MORE_BYTE (c);
1988 if (c < 0)
1989 continue;
1990 if (c == 0x80)
1991 {
1992 /* Perhaps the start of composite character. We simply skip
1993 it because analyzing it is too heavy for detecting. But,
1994 at least, we check that the composite character
1995 constitutes of more than 4 bytes. */
1996 const unsigned char *src_base;
1997
1998 repeat:
1999 src_base = src;
2000 do
2001 {
2002 ONE_MORE_BYTE (c);
2003 }
2004 while (c >= 0xA0);
2005
2006 if (src - src_base <= 4)
2007 break;
2008 found = CATEGORY_MASK_EMACS_MULE;
2009 if (c == 0x80)
2010 goto repeat;
2011 }
2012
2013 if (c < 0x80)
2014 {
2015 if (c < 0x20
2016 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
2017 break;
2018 }
2019 else
2020 {
2021 int more_bytes = emacs_mule_bytes[*src_base] - 1;
2022
2023 while (more_bytes > 0)
2024 {
2025 ONE_MORE_BYTE (c);
2026 if (c < 0xA0)
2027 {
2028 src--; /* Unread the last byte. */
2029 break;
2030 }
2031 more_bytes--;
2032 }
2033 if (more_bytes != 0)
2034 break;
2035 found = CATEGORY_MASK_EMACS_MULE;
2036 }
2037 }
2038 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2039 return 0;
2040
2041 no_more_source:
2042 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
2043 {
2044 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2045 return 0;
2046 }
2047 detect_info->found |= found;
2048 return 1;
2049 }
2050
2051
2052 /* Parse emacs-mule multibyte sequence at SRC and return the decoded
2053 character. If CMP_STATUS indicates that we must expect MSEQ or
2054 RULE described above, decode it and return the negative value of
2055 the deocded character or rule. If an invalid byte is found, return
2056 -1. If SRC is too short, return -2. */
2057
1950 int 2058 int
1951 emacs_mule_char (coding, src, nbytes, nchars, id) 2059 emacs_mule_char (coding, src, nbytes, nchars, id, cmp_status)
1952 struct coding_system *coding; 2060 struct coding_system *coding;
1953 const unsigned char *src; 2061 const unsigned char *src;
1954 int *nbytes, *nchars, *id; 2062 int *nbytes, *nchars, *id;
2063 struct composition_status *cmp_status;
1955 { 2064 {
1956 const unsigned char *src_end = coding->source + coding->src_bytes; 2065 const unsigned char *src_end = coding->source + coding->src_bytes;
1957 const unsigned char *src_base = src; 2066 const unsigned char *src_base = src;
1958 int multibytep = coding->src_multibyte; 2067 int multibytep = coding->src_multibyte;
1959 struct charset *charset; 2068 struct charset *charset;
1960 unsigned code; 2069 unsigned code;
1961 int c; 2070 int c;
1962 int consumed_chars = 0; 2071 int consumed_chars = 0;
2072 int mseq_found = 0;
1963 2073
1964 ONE_MORE_BYTE (c); 2074 ONE_MORE_BYTE (c);
1965 if (c < 0) 2075 if (c < 0)
1966 { 2076 {
1967 c = -c; 2077 c = -c;
1969 } 2079 }
1970 else 2080 else
1971 { 2081 {
1972 if (c >= 0xA0) 2082 if (c >= 0xA0)
1973 { 2083 {
1974 /* Old style component character of a composition. */ 2084 if (cmp_status->state != COMPOSING_NO
1975 if (c == 0xA0) 2085 && cmp_status->old_form)
1976 { 2086 {
1977 ONE_MORE_BYTE (c); 2087 if (cmp_status->state == COMPOSING_CHAR)
1978 c -= 0x80; 2088 {
2089 if (c == 0xA0)
2090 {
2091 ONE_MORE_BYTE (c);
2092 c -= 0x80;
2093 if (c < 0)
2094 goto invalid_code;
2095 }
2096 else
2097 c -= 0x20;
2098 mseq_found = 1;
2099 }
2100 else
2101 {
2102 *nbytes = src - src_base;
2103 *nchars = consumed_chars;
2104 return -c;
2105 }
1979 } 2106 }
1980 else 2107 else
1981 c -= 0x20; 2108 goto invalid_code;
1982 } 2109 }
1983 2110
1984 switch (emacs_mule_bytes[c]) 2111 switch (emacs_mule_bytes[c])
1985 { 2112 {
1986 case 2: 2113 case 2:
2048 } 2175 }
2049 *nbytes = src - src_base; 2176 *nbytes = src - src_base;
2050 *nchars = consumed_chars; 2177 *nchars = consumed_chars;
2051 if (id) 2178 if (id)
2052 *id = charset->id; 2179 *id = charset->id;
2053 return c; 2180 return (mseq_found ? -c : c);
2054 2181
2055 no_more_source: 2182 no_more_source:
2056 return -2; 2183 return -2;
2057 2184
2058 invalid_code: 2185 invalid_code:
2059 return -1; 2186 return -1;
2060 } 2187 }
2061 2188
2062 2189
2063 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2064 Check if a text is encoded in `emacs-mule'. If it is, return 1,
2065 else return 0. */
2066
2067 static int
2068 detect_coding_emacs_mule (coding, detect_info)
2069 struct coding_system *coding;
2070 struct coding_detection_info *detect_info;
2071 {
2072 const unsigned char *src = coding->source, *src_base;
2073 const unsigned char *src_end = coding->source + coding->src_bytes;
2074 int multibytep = coding->src_multibyte;
2075 int consumed_chars = 0;
2076 int c;
2077 int found = 0;
2078
2079 detect_info->checked |= CATEGORY_MASK_EMACS_MULE;
2080 /* A coding system of this category is always ASCII compatible. */
2081 src += coding->head_ascii;
2082
2083 while (1)
2084 {
2085 src_base = src;
2086 ONE_MORE_BYTE (c);
2087 if (c < 0)
2088 continue;
2089 if (c == 0x80)
2090 {
2091 /* Perhaps the start of composite character. We simple skip
2092 it because analyzing it is too heavy for detecting. But,
2093 at least, we check that the composite character
2094 constitutes of more than 4 bytes. */
2095 const unsigned char *src_base;
2096
2097 repeat:
2098 src_base = src;
2099 do
2100 {
2101 ONE_MORE_BYTE (c);
2102 }
2103 while (c >= 0xA0);
2104
2105 if (src - src_base <= 4)
2106 break;
2107 found = CATEGORY_MASK_EMACS_MULE;
2108 if (c == 0x80)
2109 goto repeat;
2110 }
2111
2112 if (c < 0x80)
2113 {
2114 if (c < 0x20
2115 && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO))
2116 break;
2117 }
2118 else
2119 {
2120 int more_bytes = emacs_mule_bytes[*src_base] - 1;
2121
2122 while (more_bytes > 0)
2123 {
2124 ONE_MORE_BYTE (c);
2125 if (c < 0xA0)
2126 {
2127 src--; /* Unread the last byte. */
2128 break;
2129 }
2130 more_bytes--;
2131 }
2132 if (more_bytes != 0)
2133 break;
2134 found = CATEGORY_MASK_EMACS_MULE;
2135 }
2136 }
2137 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2138 return 0;
2139
2140 no_more_source:
2141 if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK)
2142 {
2143 detect_info->rejected |= CATEGORY_MASK_EMACS_MULE;
2144 return 0;
2145 }
2146 detect_info->found |= found;
2147 return 1;
2148 }
2149
2150
2151 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ 2190 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
2152 2191
2153 /* Decode a character represented as a component of composition 2192 /* Handle these composition sequence ('|': the end of header elements,
2154 sequence of Emacs 20/21 style at SRC. Set C to that character and 2193 BYTES and CHARS >= 0xA0):
2155 update SRC to the head of next character (or an encoded composition 2194
2156 rule). If SRC doesn't points a composition component, set C to -1. 2195 (1) relative composition: 0x80 0xF2 BYTES CHARS | CHAR ...
2157 If SRC points an invalid byte sequence, global exit by a return 2196 (2) altchar composition: 0x80 0xF4 BYTES CHARS | ALT ... ALT CHAR ...
2158 value 0. */ 2197 (3) alt&rule composition: 0x80 0xF5 BYTES CHARS | ALT RULE ... ALT CHAR ...
2159 2198
2160 #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \ 2199 and these old form:
2161 do \ 2200
2162 { \ 2201 (4) relative composition: 0x80 | MSEQ ... MSEQ
2163 int c; \ 2202 (5) rulebase composition: 0x80 0xFF | MSEQ MRULE ... MSEQ
2164 int nbytes, nchars; \ 2203
2165 \ 2204 When the starter 0x80 and the following header elements are found,
2166 if (src == src_end) \ 2205 this annotation header is produced.
2167 break; \ 2206
2168 c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\ 2207 [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS NBYTES METHOD ]
2169 if (c < 0) \ 2208
2170 { \ 2209 NCHARS is CHARS - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2171 if (c == -2) \ 2210 NBYTES is BYTES - 0xA0 for (1), (2), (3), and 0 for (4), (5).
2172 break; \ 2211
2173 goto invalid_code; \ 2212 Then, upon reading the following elements, these codes are produced
2174 } \ 2213 until the composition end is found:
2175 *buf++ = c; \ 2214
2176 src += nbytes; \ 2215 (1) CHAR ... CHAR
2177 consumed_chars += nchars; \ 2216 (2) ALT ... ALT CHAR ... CHAR
2178 } \ 2217 (3) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT CHAR ... CHAR
2179 while (0) 2218 (4) CHAR ... CHAR
2180 2219 (5) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
2181 2220
2182 /* Decode a composition rule represented as a component of composition 2221 When the composition end is found, LENGTH and NCHARS in the
2183 sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF, 2222 annotation header is updated as below:
2184 and increment BUF. If SRC points an invalid byte sequence, set C 2223
2185 to -1. */ 2224 (1) LENGTH: unchanged, NCHARS: unchanged
2186 2225 (2) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2187 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \ 2226 (3) LENGTH: length of the whole sequence minus NCHARS, NCHARS: unchanged
2227 (4) LENGTH: unchanged, NCHARS: number of CHARs
2228 (5) LENGTH: unchanged, NCHARS: number of CHARs
2229
2230 If an error is found while composing, the annotation header is
2231 changed to the original composition header (plus filler -1s) as
2232 below:
2233
2234 (1),(2),(3) [ 0x80 0xF2+METHOD BYTES CHARS -1 ]
2235 (5) [ 0x80 0xFF -1 -1- -1 ]
2236
2237 and the sequence [ -2 DECODED-RULE ] is changed to the original
2238 byte sequence as below:
2239 o the original byte sequence is B: [ B -1 ]
2240 o the original byte sequence is B1 B2: [ B1 B2 ]
2241
2242 Most of the routines are implemented by macros because many
2243 variables and labels in the caller decode_coding_emacs_mule must be
2244 accessible, and they are usually called just once (thus doesn't
2245 increase the size of compiled object). */
2246
2247 /* Decode a composition rule represented by C as a component of
2248 composition sequence of Emacs 20 style. Set RULE to the decoded
2249 rule. */
2250
2251 #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(c, rule) \
2188 do { \ 2252 do { \
2189 int c, gref, nref; \ 2253 int gref, nref; \
2190 \ 2254 \
2191 if (src >= src_end) \
2192 goto invalid_code; \
2193 ONE_MORE_BYTE_NO_CHECK (c); \
2194 c -= 0xA0; \ 2255 c -= 0xA0; \
2195 if (c < 0 || c >= 81) \ 2256 if (c < 0 || c >= 81) \
2196 goto invalid_code; \ 2257 goto invalid_code; \
2197 \
2198 gref = c / 9, nref = c % 9; \ 2258 gref = c / 9, nref = c % 9; \
2199 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \ 2259 if (gref == 4) gref = 10; \
2260 if (nref == 4) nref = 10; \
2261 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
2200 } while (0) 2262 } while (0)
2201 2263
2202 2264
2203 /* Decode a composition rule represented as a component of composition 2265 /* Decode a composition rule represented by C and the following byte
2204 sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF, 2266 at SRC as a component of composition sequence of Emacs 21 style.
2205 and increment BUF. If SRC points an invalid byte sequence, set C 2267 Set RULE to the decoded rule. */
2206 to -1. */ 2268
2207 2269 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(c, rule) \
2208 #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \
2209 do { \ 2270 do { \
2210 int gref, nref; \ 2271 int gref, nref; \
2211 \ 2272 \
2212 if (src + 1>= src_end) \ 2273 gref = c - 0x20; \
2274 if (gref < 0 || gref >= 81) \
2213 goto invalid_code; \ 2275 goto invalid_code; \
2214 ONE_MORE_BYTE_NO_CHECK (gref); \ 2276 ONE_MORE_BYTE (c); \
2215 gref -= 0x20; \ 2277 nref = c - 0x20; \
2216 ONE_MORE_BYTE_NO_CHECK (nref); \ 2278 if (nref < 0 || nref >= 81) \
2217 nref -= 0x20; \
2218 if (gref < 0 || gref >= 81 \
2219 || nref < 0 || nref >= 81) \
2220 goto invalid_code; \ 2279 goto invalid_code; \
2221 *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \ 2280 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
2222 } while (0) 2281 } while (0)
2223 2282
2224 2283
2225 #define DECODE_EMACS_MULE_21_COMPOSITION(c) \ 2284 /* Start of Emacs 21 style format. The first three bytes at SRC are
2285 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is the
2286 byte length of this composition information, CHARS is the number of
2287 characters composed by this composition. */
2288
2289 #define DECODE_EMACS_MULE_21_COMPOSITION() \
2226 do { \ 2290 do { \
2227 /* Emacs 21 style format. The first three bytes at SRC are \
2228 (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \
2229 the byte length of this composition information, CHARS is the \
2230 number of characters composed by this composition. */ \
2231 enum composition_method method = c - 0xF2; \ 2291 enum composition_method method = c - 0xF2; \
2232 int *charbuf_base = charbuf; \ 2292 int *charbuf_base = charbuf; \
2233 int consumed_chars_limit; \
2234 int nbytes, nchars; \ 2293 int nbytes, nchars; \
2235 \ 2294 \
2236 ONE_MORE_BYTE (c); \ 2295 ONE_MORE_BYTE (c); \
2237 if (c < 0) \ 2296 if (c < 0) \
2238 goto invalid_code; \ 2297 goto invalid_code; \
2239 nbytes = c - 0xA0; \ 2298 nbytes = c - 0xA0; \
2240 if (nbytes < 3) \ 2299 if (nbytes < 3 || (method == COMPOSITION_RELATIVE && nbytes != 4)) \
2241 goto invalid_code; \ 2300 goto invalid_code; \
2242 ONE_MORE_BYTE (c); \ 2301 ONE_MORE_BYTE (c); \
2243 if (c < 0) \ 2302 nchars = c - 0xA0; \
2303 if (nchars <= 0 || nchars >= MAX_COMPOSITION_COMPONENTS) \
2244 goto invalid_code; \ 2304 goto invalid_code; \
2245 nchars = c - 0xA0; \ 2305 cmp_status->old_form = 0; \
2246 ADD_COMPOSITION_DATA (charbuf, nchars, method); \ 2306 cmp_status->method = method; \
2247 consumed_chars_limit = consumed_chars_base + nbytes; \ 2307 if (method == COMPOSITION_RELATIVE) \
2248 if (method != COMPOSITION_RELATIVE) \ 2308 cmp_status->state = COMPOSING_CHAR; \
2249 { \ 2309 else \
2250 int i = 0; \ 2310 cmp_status->state = COMPOSING_COMPONENT_CHAR; \
2251 while (consumed_chars < consumed_chars_limit) \ 2311 cmp_status->length = MAX_ANNOTATION_LENGTH; \
2252 { \ 2312 cmp_status->nchars = nchars; \
2253 if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \ 2313 cmp_status->ncomps = nbytes - 4; \
2254 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \ 2314 ADD_COMPOSITION_DATA (charbuf, nchars, nbytes, method); \
2255 else \
2256 DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \
2257 i++; \
2258 } \
2259 if (consumed_chars < consumed_chars_limit) \
2260 goto invalid_code; \
2261 charbuf_base[0] -= i; \
2262 } \
2263 } while (0) 2315 } while (0)
2264 2316
2265 2317
2266 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \ 2318 /* Start of Emacs 20 style format for relative composition. */
2267 do { \ 2319
2268 /* Emacs 20 style format for relative composition. */ \ 2320 #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION() \
2269 /* Store multibyte form of characters to be composed. */ \ 2321 do { \
2270 enum composition_method method = COMPOSITION_RELATIVE; \ 2322 cmp_status->old_form = 1; \
2271 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \ 2323 cmp_status->method = COMPOSITION_RELATIVE; \
2272 int *buf = components; \ 2324 cmp_status->state = COMPOSING_CHAR; \
2273 int i, j; \ 2325 cmp_status->length = MAX_ANNOTATION_LENGTH; \
2274 \ 2326 cmp_status->nchars = cmp_status->ncomps = 0; \
2275 src = src_base; \ 2327 ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \
2276 ONE_MORE_BYTE (c); /* skip 0x80 */ \
2277 for (i = 0; *src >= 0xA0 && i < MAX_COMPOSITION_COMPONENTS; i++) \
2278 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \
2279 if (i < 2) \
2280 goto invalid_code; \
2281 ADD_COMPOSITION_DATA (charbuf, i, method); \
2282 for (j = 0; j < i; j++) \
2283 *charbuf++ = components[j]; \
2284 } while (0) 2328 } while (0)
2285 2329
2286 2330
2287 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \ 2331 /* Start of Emacs 20 style format for rule-base composition. */
2332
2333 #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION() \
2288 do { \ 2334 do { \
2289 /* Emacs 20 style format for rule-base composition. */ \ 2335 cmp_status->old_form = 1; \
2290 /* Store multibyte form of characters to be composed. */ \ 2336 cmp_status->method = COMPOSITION_WITH_RULE; \
2291 enum composition_method method = COMPOSITION_WITH_RULE; \ 2337 cmp_status->state = COMPOSING_CHAR; \
2292 int *charbuf_base = charbuf; \ 2338 cmp_status->length = MAX_ANNOTATION_LENGTH; \
2293 int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \ 2339 cmp_status->nchars = cmp_status->ncomps = 0; \
2294 int *buf = components; \ 2340 ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \
2295 int i, j; \ 2341 } while (0)
2342
2343
2344 #define DECODE_EMACS_MULE_COMPOSITION_START() \
2345 do { \
2346 const unsigned char *current_src = src; \
2347 \
2348 ONE_MORE_BYTE (c); \
2349 if (c < 0) \
2350 goto invalid_code; \
2351 if (c - 0xF2 >= COMPOSITION_RELATIVE \
2352 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS) \
2353 DECODE_EMACS_MULE_21_COMPOSITION (); \
2354 else if (c < 0xA0) \
2355 goto invalid_code; \
2356 else if (c < 0xC0) \
2357 { \
2358 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (); \
2359 /* Re-read C as a composition component. */ \
2360 src = current_src; \
2361 } \
2362 else if (c == 0xFF) \
2363 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (); \
2364 else \
2365 goto invalid_code; \
2366 } while (0)
2367
2368 #define EMACS_MULE_COMPOSITION_END() \
2369 do { \
2370 int idx = - cmp_status->length; \
2296 \ 2371 \
2297 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \ 2372 if (cmp_status->old_form) \
2298 for (i = 1; i < MAX_COMPOSITION_COMPONENTS; i++) \ 2373 charbuf[idx + 2] = cmp_status->nchars; \
2299 { \ 2374 else if (cmp_status->method > COMPOSITION_RELATIVE) \
2300 if (*src < 0xA0) \ 2375 charbuf[idx] = charbuf[idx + 2] - cmp_status->length; \
2301 break; \ 2376 cmp_status->state = COMPOSING_NO; \
2302 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \ 2377 } while (0)
2303 DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \ 2378
2304 } \ 2379
2305 if (i <= 1 || (buf - components) % 2 == 0) \ 2380 static int
2306 goto invalid_code; \ 2381 emacs_mule_finish_composition (charbuf, cmp_status)
2307 if (charbuf + i + (i / 2) + 1 >= charbuf_end) \ 2382 int *charbuf;
2308 goto no_more_source; \ 2383 struct composition_status *cmp_status;
2309 ADD_COMPOSITION_DATA (charbuf, i, method); \ 2384 {
2310 i = i * 2 - 1; \ 2385 int idx = - cmp_status->length;
2311 for (j = 0; j < i; j++) \ 2386 int new_chars;
2312 *charbuf++ = components[j]; \ 2387
2313 charbuf_base[0] -= i; \ 2388 if (cmp_status->old_form && cmp_status->nchars > 0)
2314 for (j = 0; j < i; j += 2) \ 2389 {
2315 *charbuf++ = components[j]; \ 2390 charbuf[idx + 2] = cmp_status->nchars;
2391 new_chars = 0;
2392 if (cmp_status->method == COMPOSITION_WITH_RULE
2393 && cmp_status->state == COMPOSING_CHAR)
2394 {
2395 /* The last rule was invalid. */
2396 int rule = charbuf[-1] + 0xA0;
2397
2398 charbuf[-2] = BYTE8_TO_CHAR (rule);
2399 charbuf[-1] = -1;
2400 new_chars = 1;
2401 }
2402 }
2403 else
2404 {
2405 charbuf[idx++] = BYTE8_TO_CHAR (0x80);
2406
2407 if (cmp_status->method == COMPOSITION_WITH_RULE)
2408 {
2409 charbuf[idx++] = BYTE8_TO_CHAR (0xFF);
2410 charbuf[idx++] = -3;
2411 charbuf[idx++] = 0;
2412 new_chars = 1;
2413 }
2414 else
2415 {
2416 int nchars = charbuf[idx + 1] + 0xA0;
2417 int nbytes = charbuf[idx + 2] + 0xA0;
2418
2419 charbuf[idx++] = BYTE8_TO_CHAR (0xF2 + cmp_status->method);
2420 charbuf[idx++] = BYTE8_TO_CHAR (nbytes);
2421 charbuf[idx++] = BYTE8_TO_CHAR (nchars);
2422 charbuf[idx++] = -1;
2423 new_chars = 4;
2424 }
2425 }
2426 cmp_status->state = COMPOSING_NO;
2427 return new_chars;
2428 }
2429
2430 #define EMACS_MULE_MAYBE_FINISH_COMPOSITION() \
2431 do { \
2432 if (cmp_status->state != COMPOSING_NO) \
2433 char_offset += emacs_mule_finish_composition (charbuf, cmp_status); \
2316 } while (0) 2434 } while (0)
2317 2435
2318 2436
2319 static void 2437 static void
2320 decode_coding_emacs_mule (coding) 2438 decode_coding_emacs_mule (coding)
2333 int last_offset = char_offset; 2451 int last_offset = char_offset;
2334 int last_id = charset_ascii; 2452 int last_id = charset_ascii;
2335 int eol_crlf = 2453 int eol_crlf =
2336 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); 2454 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
2337 int byte_after_cr = -1; 2455 int byte_after_cr = -1;
2456 struct composition_status *cmp_status = &coding->spec.emacs_mule.cmp_status;
2338 2457
2339 CODING_GET_INFO (coding, attrs, charset_list); 2458 CODING_GET_INFO (coding, attrs, charset_list);
2340 2459
2460 if (cmp_status->state != COMPOSING_NO)
2461 {
2462 int i;
2463
2464 for (i = 0; i < cmp_status->length; i++)
2465 *charbuf++ = cmp_status->carryover[i];
2466 coding->annotated = 1;
2467 }
2468
2341 while (1) 2469 while (1)
2342 { 2470 {
2343 int c; 2471 int c, id;
2344 2472
2345 src_base = src; 2473 src_base = src;
2346 consumed_chars_base = consumed_chars; 2474 consumed_chars_base = consumed_chars;
2347 2475
2348 if (charbuf >= charbuf_end) 2476 if (charbuf >= charbuf_end)
2354 2482
2355 if (byte_after_cr >= 0) 2483 if (byte_after_cr >= 0)
2356 c = byte_after_cr, byte_after_cr = -1; 2484 c = byte_after_cr, byte_after_cr = -1;
2357 else 2485 else
2358 ONE_MORE_BYTE (c); 2486 ONE_MORE_BYTE (c);
2359 if (c < 0) 2487
2360 { 2488 if (c < 0 || c == 0x80)
2361 *charbuf++ = -c; 2489 {
2362 char_offset++; 2490 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2363 } 2491 if (c < 0)
2364 else if (c < 0x80) 2492 {
2493 *charbuf++ = -c;
2494 char_offset++;
2495 }
2496 else
2497 DECODE_EMACS_MULE_COMPOSITION_START ();
2498 continue;
2499 }
2500
2501 if (c < 0x80)
2365 { 2502 {
2366 if (eol_crlf && c == '\r') 2503 if (eol_crlf && c == '\r')
2367 ONE_MORE_BYTE (byte_after_cr); 2504 ONE_MORE_BYTE (byte_after_cr);
2368 *charbuf++ = c; 2505 id = charset_ascii;
2369 char_offset++; 2506 if (cmp_status->state != COMPOSING_NO)
2370 } 2507 {
2371 else if (c == 0x80) 2508 if (cmp_status->old_form)
2372 { 2509 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2373 ONE_MORE_BYTE (c); 2510 else if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2374 if (c < 0) 2511 cmp_status->ncomps--;
2375 goto invalid_code; 2512 }
2376 if (c - 0xF2 >= COMPOSITION_RELATIVE 2513 }
2377 && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS) 2514 else
2378 DECODE_EMACS_MULE_21_COMPOSITION (c); 2515 {
2379 else if (c < 0xC0) 2516 int nchars, nbytes;
2380 DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c); 2517
2381 else if (c == 0xFF) 2518 c = emacs_mule_char (coding, src_base, &nbytes, &nchars, &id,
2382 DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c); 2519 cmp_status);
2383 else
2384 goto invalid_code;
2385 }
2386 else if (c < 0xA0 && emacs_mule_bytes[c] > 1)
2387 {
2388 int nbytes, nchars;
2389 int id;
2390
2391 src = src_base;
2392 consumed_chars = consumed_chars_base;
2393 c = emacs_mule_char (coding, src, &nbytes, &nchars, &id);
2394 if (c < 0) 2520 if (c < 0)
2395 { 2521 {
2522 if (c == -1)
2523 goto invalid_code;
2396 if (c == -2) 2524 if (c == -2)
2397 break; 2525 break;
2398 goto invalid_code;
2399 } 2526 }
2527 src = src_base + nbytes;
2528 consumed_chars = consumed_chars_base + nchars;
2529 if (cmp_status->state >= COMPOSING_COMPONENT_CHAR)
2530 cmp_status->ncomps -= nchars;
2531 }
2532
2533 /* Now if C >= 0, we found a normally encoded characer, if C <
2534 0, we found an old-style composition component character or
2535 rule. */
2536
2537 if (cmp_status->state == COMPOSING_NO)
2538 {
2400 if (last_id != id) 2539 if (last_id != id)
2401 { 2540 {
2402 if (last_id != charset_ascii) 2541 if (last_id != charset_ascii)
2403 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id); 2542 ADD_CHARSET_DATA (charbuf, char_offset - last_offset,
2543 last_id);
2404 last_id = id; 2544 last_id = id;
2405 last_offset = char_offset; 2545 last_offset = char_offset;
2406 } 2546 }
2407 *charbuf++ = c; 2547 *charbuf++ = c;
2408 src += nbytes;
2409 consumed_chars += nchars;
2410 char_offset++; 2548 char_offset++;
2411 } 2549 }
2412 else 2550 else if (cmp_status->state == COMPOSING_CHAR)
2413 goto invalid_code; 2551 {
2552 if (cmp_status->old_form)
2553 {
2554 if (c >= 0)
2555 {
2556 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2557 *charbuf++ = c;
2558 char_offset++;
2559 }
2560 else
2561 {
2562 *charbuf++ = -c;
2563 cmp_status->nchars++;
2564 cmp_status->length++;
2565 if (cmp_status->nchars == MAX_COMPOSITION_COMPONENTS)
2566 EMACS_MULE_COMPOSITION_END ();
2567 else if (cmp_status->method == COMPOSITION_WITH_RULE)
2568 cmp_status->state = COMPOSING_RULE;
2569 }
2570 }
2571 else
2572 {
2573 *charbuf++ = c;
2574 cmp_status->length++;
2575 cmp_status->nchars--;
2576 if (cmp_status->nchars == 0)
2577 EMACS_MULE_COMPOSITION_END ();
2578 }
2579 }
2580 else if (cmp_status->state == COMPOSING_RULE)
2581 {
2582 int rule;
2583
2584 if (c >= 0)
2585 {
2586 EMACS_MULE_COMPOSITION_END ();
2587 *charbuf++ = c;
2588 char_offset++;
2589 }
2590 else
2591 {
2592 c = -c;
2593 DECODE_EMACS_MULE_COMPOSITION_RULE_20 (c, rule);
2594 if (rule < 0)
2595 goto invalid_code;
2596 *charbuf++ = -2;
2597 *charbuf++ = rule;
2598 cmp_status->length += 2;
2599 cmp_status->state = COMPOSING_CHAR;
2600 }
2601 }
2602 else if (cmp_status->state == COMPOSING_COMPONENT_CHAR)
2603 {
2604 *charbuf++ = c;
2605 cmp_status->length++;
2606 if (cmp_status->ncomps == 0)
2607 cmp_status->state = COMPOSING_CHAR;
2608 else if (cmp_status->ncomps > 0)
2609 {
2610 if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS)
2611 cmp_status->state = COMPOSING_COMPONENT_RULE;
2612 }
2613 else
2614 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2615 }
2616 else /* COMPOSING_COMPONENT_RULE */
2617 {
2618 int rule;
2619
2620 DECODE_EMACS_MULE_COMPOSITION_RULE_21 (c, rule);
2621 if (rule < 0)
2622 goto invalid_code;
2623 *charbuf++ = -2;
2624 *charbuf++ = rule;
2625 cmp_status->length += 2;
2626 cmp_status->ncomps--;
2627 if (cmp_status->ncomps > 0)
2628 cmp_status->state = COMPOSING_COMPONENT_CHAR;
2629 else
2630 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2631 }
2414 continue; 2632 continue;
2415 2633
2634 retry:
2635 src = src_base;
2636 consumed_chars = consumed_chars_base;
2637 continue;
2638
2416 invalid_code: 2639 invalid_code:
2640 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2417 src = src_base; 2641 src = src_base;
2418 consumed_chars = consumed_chars_base; 2642 consumed_chars = consumed_chars_base;
2419 ONE_MORE_BYTE (c); 2643 ONE_MORE_BYTE (c);
2420 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); 2644 *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
2421 char_offset++; 2645 char_offset++;
2422 coding->errors++; 2646 coding->errors++;
2423 } 2647 }
2424 2648
2425 no_more_source: 2649 no_more_source:
2650 if (cmp_status->state != COMPOSING_NO)
2651 {
2652 if (coding->mode & CODING_MODE_LAST_BLOCK)
2653 EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
2654 else
2655 {
2656 int i;
2657
2658 charbuf -= cmp_status->length;
2659 for (i = 0; i < cmp_status->length; i++)
2660 cmp_status->carryover[i] = charbuf[i];
2661 }
2662 }
2426 if (last_id != charset_ascii) 2663 if (last_id != charset_ascii)
2427 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id); 2664 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
2428 coding->consumed_char += consumed_chars_base; 2665 coding->consumed_char += consumed_chars_base;
2429 coding->consumed = src_base - coding->source; 2666 coding->consumed = src_base - coding->source;
2430 coding->charbuf_used = charbuf - coding->charbuf; 2667 coding->charbuf_used = charbuf - coding->charbuf;
3075 if (prev == -2 && id == charset_ascii) \ 3312 if (prev == -2 && id == charset_ascii) \
3076 chars_96 = -1; \ 3313 chars_96 = -1; \
3077 } while (0) 3314 } while (0)
3078 3315
3079 3316
3317 /* Handle these composition sequence (ALT: alternate char):
3318
3319 (1) relative composition: ESC 0 CHAR ... ESC 1
3320 (2) rulebase composition: ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3321 (3) altchar composition: ESC 3 ALT ... ALT ESC 0 CHAR ... ESC 1
3322 (4) alt&rule composition: ESC 4 ALT RULE ... ALT ESC 0 CHAR ... ESC 1
3323
3324 When the start sequence (ESC 0/2/3/4) is found, this annotation
3325 header is produced.
3326
3327 [ -LENGTH(==-5) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) 0 METHOD ]
3328
3329 Then, upon reading CHAR or RULE (one or two bytes), these codes are
3330 produced until the end sequence (ESC 1) is found:
3331
3332 (1) CHAR ... CHAR
3333 (2) CHAR -2 DECODED-RULE CHAR -2 DECODED-RULE ... CHAR
3334 (3) ALT ... ALT -1 -1 CHAR ... CHAR
3335 (4) ALT -2 DECODED-RULE ALT -2 DECODED-RULE ... ALT -1 -1 CHAR ... CHAR
3336
3337 When the end sequence (ESC 1) is found, LENGTH and NCHARS in the
3338 annotation header is updated as below:
3339
3340 (1) LENGTH: unchanged, NCHARS: number of CHARs
3341 (2) LENGTH: unchanged, NCHARS: number of CHARs
3342 (3) LENGTH: += number of ALTs + 2, NCHARS: number of CHARs
3343 (4) LENGTH: += number of ALTs * 3, NCHARS: number of CHARs
3344
3345 If an error is found while composing, the annotation header is
3346 changed to:
3347
3348 [ ESC '0'/'2'/'3'/'4' -2 0 ]
3349
3350 and the sequence [ -2 DECODED-RULE ] is changed to the original
3351 byte sequence as below:
3352 o the original byte sequence is B: [ B -1 ]
3353 o the original byte sequence is B1 B2: [ B1 B2 ]
3354 and the sequence [ -1 -1 ] is changed to the original byte
3355 sequence:
3356 [ ESC '0' ]
3357 */
3358
3359 /* Decode a composition rule C1 and maybe one more byte from the
3360 source, and set RULE to the encoded composition rule, NBYTES to the
3361 length of the composition rule. If the rule is invalid, set RULE
3362 to some negative value. */
3363
3364 #define DECODE_COMPOSITION_RULE(rule, nbytes) \
3365 do { \
3366 rule = c1 - 32; \
3367 if (rule < 0) \
3368 break; \
3369 if (rule < 81) /* old format (before ver.21) */ \
3370 { \
3371 int gref = (rule) / 9; \
3372 int nref = (rule) % 9; \
3373 if (gref == 4) gref = 10; \
3374 if (nref == 4) nref = 10; \
3375 rule = COMPOSITION_ENCODE_RULE (gref, nref); \
3376 nbytes = 1; \
3377 } \
3378 else /* new format (after ver.21) */ \
3379 { \
3380 int c; \
3381 \
3382 ONE_MORE_BYTE (c); \
3383 rule = COMPOSITION_ENCODE_RULE (rule - 81, c - 32); \
3384 if (rule >= 0) \
3385 rule += 0x100; /* to destinguish it from the old format */ \
3386 nbytes = 2; \
3387 } \
3388 } while (0)
3389
3390 #define ENCODE_COMPOSITION_RULE(rule) \
3391 do { \
3392 int gref = (rule % 0x100) / 12, nref = (rule % 0x100) % 12; \
3393 \
3394 if (rule < 0x100) /* old format */ \
3395 { \
3396 if (gref == 10) gref = 4; \
3397 if (nref == 10) nref = 4; \
3398 charbuf[idx] = 32 + gref * 9 + nref; \
3399 charbuf[idx + 1] = -1; \
3400 new_chars++; \
3401 } \
3402 else /* new format */ \
3403 { \
3404 charbuf[idx] = 32 + 81 + gref; \
3405 charbuf[idx + 1] = 32 + nref; \
3406 new_chars += 2; \
3407 } \
3408 } while (0)
3409
3410 /* Finish the current composition as invalid. */
3411
3412 static int finish_composition P_ ((int *, struct composition_status *));
3413
3414 static int
3415 finish_composition (charbuf, cmp_status)
3416 int *charbuf;
3417 struct composition_status *cmp_status;
3418 {
3419 int idx = - cmp_status->length;
3420 int new_chars;
3421
3422 /* Recover the original ESC sequence */
3423 charbuf[idx++] = ISO_CODE_ESC;
3424 charbuf[idx++] = (cmp_status->method == COMPOSITION_RELATIVE ? '0'
3425 : cmp_status->method == COMPOSITION_WITH_RULE ? '2'
3426 : cmp_status->method == COMPOSITION_WITH_ALTCHARS ? '3'
3427 /* cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS */
3428 : '4');
3429 charbuf[idx++] = -2;
3430 charbuf[idx++] = 0;
3431 charbuf[idx++] = -1;
3432 new_chars = cmp_status->nchars;
3433 if (cmp_status->method >= COMPOSITION_WITH_RULE)
3434 for (; idx < 0; idx++)
3435 {
3436 int elt = charbuf[idx];
3437
3438 if (elt == -2)
3439 {
3440 ENCODE_COMPOSITION_RULE (charbuf[idx + 1]);
3441 idx++;
3442 }
3443 else if (elt == -1)
3444 {
3445 charbuf[idx++] = ISO_CODE_ESC;
3446 charbuf[idx] = '0';
3447 new_chars += 2;
3448 }
3449 }
3450 cmp_status->state = COMPOSING_NO;
3451 return new_chars;
3452 }
3453
3454 /* If characers are under composition, finish the composition. */
3080 #define MAYBE_FINISH_COMPOSITION() \ 3455 #define MAYBE_FINISH_COMPOSITION() \
3081 do { \ 3456 do { \
3082 int i; \ 3457 if (cmp_status->state != COMPOSING_NO) \
3083 if (composition_state == COMPOSING_NO) \ 3458 char_offset += finish_composition (charbuf, cmp_status); \
3084 break; \
3085 /* It is assured that we have enough room for producing \
3086 characters stored in the table `components'. */ \
3087 if (charbuf + component_idx > charbuf_end) \
3088 goto no_more_source; \
3089 composition_state = COMPOSING_NO; \
3090 if (method == COMPOSITION_RELATIVE \
3091 || method == COMPOSITION_WITH_ALTCHARS) \
3092 { \
3093 for (i = 0; i < component_idx; i++) \
3094 *charbuf++ = components[i]; \
3095 char_offset += component_idx; \
3096 } \
3097 else \
3098 { \
3099 for (i = 0; i < component_idx; i += 2) \
3100 *charbuf++ = components[i]; \
3101 char_offset += (component_idx / 2) + 1; \
3102 } \
3103 } while (0) 3459 } while (0)
3104 3460
3105
3106 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4. 3461 /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4.
3462
3107 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1 3463 ESC 0 : relative composition : ESC 0 CHAR ... ESC 1
3108 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1 3464 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1
3109 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1 3465 ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1
3110 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1 3466 ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1
3111 */ 3467
3112 3468 Produce this annotation sequence now:
3113 #define DECODE_COMPOSITION_START(c1) \ 3469
3114 do { \ 3470 [ -LENGTH(==-4) CODING_ANNOTATE_COMPOSITION_MASK NCHARS(==0) METHOD ]
3115 if (c1 == '0' \ 3471 */
3116 && composition_state == COMPOSING_COMPONENT_RULE) \ 3472
3117 { \ 3473 #define DECODE_COMPOSITION_START(c1) \
3118 component_len = component_idx; \ 3474 do { \
3119 composition_state = COMPOSING_CHAR; \ 3475 if (c1 == '0' \
3120 } \ 3476 && ((cmp_status->state == COMPOSING_COMPONENT_CHAR \
3121 else \ 3477 && cmp_status->method == COMPOSITION_WITH_ALTCHARS) \
3122 { \ 3478 || (cmp_status->state == COMPOSING_COMPONENT_RULE \
3123 const unsigned char *p; \ 3479 && cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS))) \
3124 \ 3480 { \
3125 MAYBE_FINISH_COMPOSITION (); \ 3481 *charbuf++ = -1; \
3126 if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \ 3482 *charbuf++= -1; \
3127 goto no_more_source; \ 3483 cmp_status->state = COMPOSING_CHAR; \
3128 for (p = src; p < src_end - 1; p++) \ 3484 cmp_status->length += 2; \
3129 if (*p == ISO_CODE_ESC && p[1] == '1') \ 3485 } \
3130 break; \ 3486 else \
3131 if (p == src_end - 1) \ 3487 { \
3132 { \ 3488 MAYBE_FINISH_COMPOSITION (); \
3133 if (coding->mode & CODING_MODE_LAST_BLOCK) \ 3489 cmp_status->method = (c1 == '0' ? COMPOSITION_RELATIVE \
3134 goto invalid_code; \ 3490 : c1 == '2' ? COMPOSITION_WITH_RULE \
3135 /* The current composition doesn't end in the current \ 3491 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
3136 source. */ \ 3492 : COMPOSITION_WITH_RULE_ALTCHARS); \
3137 record_conversion_result \ 3493 cmp_status->state \
3138 (coding, CODING_RESULT_INSUFFICIENT_SRC); \ 3494 = (c1 <= '2' ? COMPOSING_CHAR : COMPOSING_COMPONENT_CHAR); \
3139 goto no_more_source; \ 3495 ADD_COMPOSITION_DATA (charbuf, 0, 0, cmp_status->method); \
3140 } \ 3496 cmp_status->length = MAX_ANNOTATION_LENGTH; \
3141 \ 3497 cmp_status->nchars = cmp_status->ncomps = 0; \
3142 /* This is surely the start of a composition. */ \ 3498 coding->annotated = 1; \
3143 method = (c1 == '0' ? COMPOSITION_RELATIVE \ 3499 } \
3144 : c1 == '2' ? COMPOSITION_WITH_RULE \
3145 : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
3146 : COMPOSITION_WITH_RULE_ALTCHARS); \
3147 composition_state = (c1 <= '2' ? COMPOSING_CHAR \
3148 : COMPOSING_COMPONENT_CHAR); \
3149 component_idx = component_len = 0; \
3150 } \
3151 } while (0) 3500 } while (0)
3152 3501
3153 3502
3154 /* Handle compositoin end sequence ESC 1. */ 3503 /* Handle composition end sequence ESC 1. */
3155 3504
3156 #define DECODE_COMPOSITION_END() \ 3505 #define DECODE_COMPOSITION_END() \
3157 do { \ 3506 do { \
3158 int nchars = (component_len > 0 ? component_idx - component_len \ 3507 if (cmp_status->nchars == 0 \
3159 : method == COMPOSITION_RELATIVE ? component_idx \ 3508 || ((cmp_status->state == COMPOSING_CHAR) \
3160 : (component_idx + 1) / 2); \ 3509 == (cmp_status->method == COMPOSITION_WITH_RULE))) \
3161 int i; \
3162 int *saved_charbuf = charbuf; \
3163 \
3164 ADD_COMPOSITION_DATA (charbuf, nchars, method); \
3165 if (method != COMPOSITION_RELATIVE) \
3166 { \ 3510 { \
3167 if (component_len == 0) \ 3511 MAYBE_FINISH_COMPOSITION (); \
3168 for (i = 0; i < component_idx; i++) \ 3512 goto invalid_code; \
3169 *charbuf++ = components[i]; \
3170 else \
3171 for (i = 0; i < component_len; i++) \
3172 *charbuf++ = components[i]; \
3173 *saved_charbuf = saved_charbuf - charbuf; \
3174 } \ 3513 } \
3175 if (method == COMPOSITION_WITH_RULE) \ 3514 if (cmp_status->method == COMPOSITION_WITH_ALTCHARS) \
3176 for (i = 0; i < component_idx; i += 2, char_offset++) \ 3515 charbuf[- cmp_status->length] -= cmp_status->ncomps + 2; \
3177 *charbuf++ = components[i]; \ 3516 else if (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS) \
3517 charbuf[- cmp_status->length] -= cmp_status->ncomps * 3; \
3518 charbuf[- cmp_status->length + 2] = cmp_status->nchars; \
3519 char_offset += cmp_status->nchars; \
3520 cmp_status->state = COMPOSING_NO; \
3521 } while (0)
3522
3523 /* Store a composition rule RULE in charbuf, and update cmp_status. */
3524
3525 #define STORE_COMPOSITION_RULE(rule) \
3526 do { \
3527 *charbuf++ = -2; \
3528 *charbuf++ = rule; \
3529 cmp_status->length += 2; \
3530 cmp_status->state--; \
3531 } while (0)
3532
3533 /* Store a composed char or a component char C in charbuf, and update
3534 cmp_status. */
3535
3536 #define STORE_COMPOSITION_CHAR(c) \
3537 do { \
3538 *charbuf++ = (c); \
3539 cmp_status->length++; \
3540 if (cmp_status->state == COMPOSING_CHAR) \
3541 cmp_status->nchars++; \
3178 else \ 3542 else \
3179 for (i = component_len; i < component_idx; i++, char_offset++) \ 3543 cmp_status->ncomps++; \
3180 *charbuf++ = components[i]; \ 3544 if (cmp_status->method == COMPOSITION_WITH_RULE \
3181 coding->annotated = 1; \ 3545 || (cmp_status->method == COMPOSITION_WITH_RULE_ALTCHARS \
3182 composition_state = COMPOSING_NO; \ 3546 && cmp_status->state == COMPOSING_COMPONENT_CHAR)) \
3183 } while (0) 3547 cmp_status->state++; \
3184
3185
3186 /* Decode a composition rule from the byte C1 (and maybe one more byte
3187 from SRC) and store one encoded composition rule in
3188 coding->cmp_data. */
3189
3190 #define DECODE_COMPOSITION_RULE(c1) \
3191 do { \
3192 (c1) -= 32; \
3193 if (c1 < 81) /* old format (before ver.21) */ \
3194 { \
3195 int gref = (c1) / 9; \
3196 int nref = (c1) % 9; \
3197 if (gref == 4) gref = 10; \
3198 if (nref == 4) nref = 10; \
3199 c1 = COMPOSITION_ENCODE_RULE (gref, nref); \
3200 } \
3201 else if (c1 < 93) /* new format (after ver.21) */ \
3202 { \
3203 ONE_MORE_BYTE (c2); \
3204 c1 = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
3205 } \
3206 else \
3207 c1 = 0; \
3208 } while (0) 3548 } while (0)
3209 3549
3210 3550
3211 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ 3551 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
3212 3552
3217 const unsigned char *src = coding->source + coding->consumed; 3557 const unsigned char *src = coding->source + coding->consumed;
3218 const unsigned char *src_end = coding->source + coding->src_bytes; 3558 const unsigned char *src_end = coding->source + coding->src_bytes;
3219 const unsigned char *src_base; 3559 const unsigned char *src_base;
3220 int *charbuf = coding->charbuf + coding->charbuf_used; 3560 int *charbuf = coding->charbuf + coding->charbuf_used;
3221 int *charbuf_end 3561 int *charbuf_end
3222 = coding->charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH; 3562 = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
3223 int consumed_chars = 0, consumed_chars_base; 3563 int consumed_chars = 0, consumed_chars_base;
3224 int multibytep = coding->src_multibyte; 3564 int multibytep = coding->src_multibyte;
3225 /* Charsets invoked to graphic plane 0 and 1 respectively. */ 3565 /* Charsets invoked to graphic plane 0 and 1 respectively. */
3226 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0); 3566 int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0);
3227 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1); 3567 int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1);
3228 int charset_id_2, charset_id_3; 3568 int charset_id_2, charset_id_3;
3229 struct charset *charset; 3569 struct charset *charset;
3230 int c; 3570 int c;
3231 /* For handling composition sequence. */ 3571 struct composition_status *cmp_status = CODING_ISO_CMP_STATUS (coding);
3232 #define COMPOSING_NO 0
3233 #define COMPOSING_CHAR 1
3234 #define COMPOSING_RULE 2
3235 #define COMPOSING_COMPONENT_CHAR 3
3236 #define COMPOSING_COMPONENT_RULE 4
3237
3238 int composition_state = COMPOSING_NO;
3239 enum composition_method method;
3240 int components[MAX_COMPOSITION_COMPONENTS * 2 + 1];
3241 int component_idx;
3242 int component_len;
3243 Lisp_Object attrs, charset_list; 3572 Lisp_Object attrs, charset_list;
3244 int char_offset = coding->produced_char; 3573 int char_offset = coding->produced_char;
3245 int last_offset = char_offset; 3574 int last_offset = char_offset;
3246 int last_id = charset_ascii; 3575 int last_id = charset_ascii;
3247 int eol_crlf = 3576 int eol_crlf =
3248 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos); 3577 !inhibit_eol_conversion && EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
3249 int byte_after_cr = -1; 3578 int byte_after_cr = -1;
3579 int i;
3250 3580
3251 CODING_GET_INFO (coding, attrs, charset_list); 3581 CODING_GET_INFO (coding, attrs, charset_list);
3252 setup_iso_safe_charsets (attrs); 3582 setup_iso_safe_charsets (attrs);
3253 /* Charset list may have been changed. */ 3583 /* Charset list may have been changed. */
3254 charset_list = CODING_ATTR_CHARSET_LIST (attrs); 3584 charset_list = CODING_ATTR_CHARSET_LIST (attrs);
3255 coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs)); 3585 coding->safe_charsets = SDATA (CODING_ATTR_SAFE_CHARSETS (attrs));
3586
3587 if (cmp_status->state != COMPOSING_NO)
3588 {
3589 for (i = 0; i < cmp_status->length; i++)
3590 *charbuf++ = cmp_status->carryover[i];
3591 coding->annotated = 1;
3592 }
3256 3593
3257 while (1) 3594 while (1)
3258 { 3595 {
3259 int c1, c2; 3596 int c1, c2;
3260 3597
3273 else 3610 else
3274 ONE_MORE_BYTE (c1); 3611 ONE_MORE_BYTE (c1);
3275 if (c1 < 0) 3612 if (c1 < 0)
3276 goto invalid_code; 3613 goto invalid_code;
3277 3614
3615 if (CODING_ISO_EXTSEGMENT_LEN (coding) > 0)
3616 {
3617 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3618 char_offset++;
3619 CODING_ISO_EXTSEGMENT_LEN (coding)--;
3620 continue;
3621 }
3622
3623 if (CODING_ISO_EMBEDDED_UTF_8 (coding))
3624 {
3625 if (c1 == ISO_CODE_ESC)
3626 {
3627 if (src + 1 >= src_end)
3628 goto no_more_source;
3629 *charbuf++ = ISO_CODE_ESC;
3630 char_offset++;
3631 if (src[0] == '%' && src[1] == '@')
3632 {
3633 src += 2;
3634 consumed_chars += 2;
3635 char_offset += 2;
3636 /* We are sure charbuf can contain two more chars. */
3637 *charbuf++ = '%';
3638 *charbuf++ = '@';
3639 CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
3640 }
3641 }
3642 else
3643 {
3644 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3645 char_offset++;
3646 }
3647 continue;
3648 }
3649
3650 if ((cmp_status->state == COMPOSING_RULE
3651 || cmp_status->state == COMPOSING_COMPONENT_RULE)
3652 && c1 != ISO_CODE_ESC)
3653 {
3654 int rule, nbytes;
3655
3656 DECODE_COMPOSITION_RULE (rule, nbytes);
3657 if (rule < 0)
3658 goto invalid_code;
3659 STORE_COMPOSITION_RULE (rule);
3660 continue;
3661 }
3662
3278 /* We produce at most one character. */ 3663 /* We produce at most one character. */
3279 switch (iso_code_class [c1]) 3664 switch (iso_code_class [c1])
3280 { 3665 {
3281 case ISO_0x20_or_0x7F: 3666 case ISO_0x20_or_0x7F:
3282 if (composition_state != COMPOSING_NO)
3283 {
3284 if (composition_state == COMPOSING_RULE
3285 || composition_state == COMPOSING_COMPONENT_RULE)
3286 {
3287 if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1)
3288 {
3289 DECODE_COMPOSITION_RULE (c1);
3290 components[component_idx++] = c1;
3291 composition_state--;
3292 continue;
3293 }
3294 /* Too long composition. */
3295 MAYBE_FINISH_COMPOSITION ();
3296 }
3297 }
3298 if (charset_id_0 < 0 3667 if (charset_id_0 < 0
3299 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0))) 3668 || ! CHARSET_ISO_CHARS_96 (CHARSET_FROM_ID (charset_id_0)))
3300 /* This is SPACE or DEL. */ 3669 /* This is SPACE or DEL. */
3301 charset = CHARSET_FROM_ID (charset_ascii); 3670 charset = CHARSET_FROM_ID (charset_ascii);
3302 else 3671 else
3303 charset = CHARSET_FROM_ID (charset_id_0); 3672 charset = CHARSET_FROM_ID (charset_id_0);
3304 break; 3673 break;
3305 3674
3306 case ISO_graphic_plane_0: 3675 case ISO_graphic_plane_0:
3307 if (composition_state != COMPOSING_NO)
3308 {
3309 if (composition_state == COMPOSING_RULE
3310 || composition_state == COMPOSING_COMPONENT_RULE)
3311 {
3312 if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1)
3313 {
3314 DECODE_COMPOSITION_RULE (c1);
3315 components[component_idx++] = c1;
3316 composition_state--;
3317 continue;
3318 }
3319 MAYBE_FINISH_COMPOSITION ();
3320 }
3321 }
3322 if (charset_id_0 < 0) 3676 if (charset_id_0 < 0)
3323 charset = CHARSET_FROM_ID (charset_ascii); 3677 charset = CHARSET_FROM_ID (charset_ascii);
3324 else 3678 else
3325 charset = CHARSET_FROM_ID (charset_id_0); 3679 charset = CHARSET_FROM_ID (charset_id_0);
3326 break; 3680 break;
3344 MAYBE_FINISH_COMPOSITION (); 3698 MAYBE_FINISH_COMPOSITION ();
3345 charset = CHARSET_FROM_ID (charset_ascii); 3699 charset = CHARSET_FROM_ID (charset_ascii);
3346 break; 3700 break;
3347 3701
3348 case ISO_control_1: 3702 case ISO_control_1:
3349 MAYBE_FINISH_COMPOSITION ();
3350 goto invalid_code; 3703 goto invalid_code;
3351 3704
3352 case ISO_shift_out: 3705 case ISO_shift_out:
3353 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT) 3706 if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT)
3354 || CODING_ISO_DESIGNATION (coding, 1) < 0) 3707 || CODING_ISO_DESIGNATION (coding, 1) < 0)
3482 break; 3835 break;
3483 3836
3484 case '0': case '2': case '3': case '4': /* start composition */ 3837 case '0': case '2': case '3': case '4': /* start composition */
3485 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK)) 3838 if (! (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK))
3486 goto invalid_code; 3839 goto invalid_code;
3840 if (last_id != charset_ascii)
3841 {
3842 ADD_CHARSET_DATA (charbuf, char_offset- last_offset, last_id);
3843 last_id = charset_ascii;
3844 last_offset = char_offset;
3845 }
3487 DECODE_COMPOSITION_START (c1); 3846 DECODE_COMPOSITION_START (c1);
3488 continue; 3847 continue;
3489 3848
3490 case '1': /* end composition */ 3849 case '1': /* end composition */
3491 if (composition_state == COMPOSING_NO) 3850 if (cmp_status->state == COMPOSING_NO)
3492 goto invalid_code; 3851 goto invalid_code;
3493 DECODE_COMPOSITION_END (); 3852 DECODE_COMPOSITION_END ();
3494 continue; 3853 continue;
3495 3854
3496 case '[': /* specification of direction */ 3855 case '[': /* specification of direction */
3537 They may be decoded by post-read-conversion. */ 3896 They may be decoded by post-read-conversion. */
3538 int dim, M, L; 3897 int dim, M, L;
3539 int size; 3898 int size;
3540 3899
3541 ONE_MORE_BYTE (dim); 3900 ONE_MORE_BYTE (dim);
3901 if (dim < 0 || dim > 4)
3902 goto invalid_code;
3542 ONE_MORE_BYTE (M); 3903 ONE_MORE_BYTE (M);
3904 if (M < 128)
3905 goto invalid_code;
3543 ONE_MORE_BYTE (L); 3906 ONE_MORE_BYTE (L);
3907 if (L < 128)
3908 goto invalid_code;
3544 size = ((M - 128) * 128) + (L - 128); 3909 size = ((M - 128) * 128) + (L - 128);
3545 if (charbuf + 8 + size > charbuf_end) 3910 if (charbuf + 6 > charbuf_end)
3546 goto break_loop; 3911 goto break_loop;
3547 *charbuf++ = ISO_CODE_ESC; 3912 *charbuf++ = ISO_CODE_ESC;
3548 *charbuf++ = '%'; 3913 *charbuf++ = '%';
3549 *charbuf++ = '/'; 3914 *charbuf++ = '/';
3550 *charbuf++ = dim; 3915 *charbuf++ = dim;
3551 *charbuf++ = BYTE8_TO_CHAR (M); 3916 *charbuf++ = BYTE8_TO_CHAR (M);
3552 *charbuf++ = BYTE8_TO_CHAR (L); 3917 *charbuf++ = BYTE8_TO_CHAR (L);
3553 while (size-- > 0) 3918 CODING_ISO_EXTSEGMENT_LEN (coding) = size;
3554 {
3555 ONE_MORE_BYTE (c1);
3556 *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3557 }
3558 } 3919 }
3559 else if (c1 == 'G') 3920 else if (c1 == 'G')
3560 { 3921 {
3561 /* XFree86 extension for embedding UTF-8 in CTEXT: 3922 /* XFree86 extension for embedding UTF-8 in CTEXT:
3562 ESC % G --UTF-8-BYTES-- ESC % @ 3923 ESC % G --UTF-8-BYTES-- ESC % @
3563 We keep these bytes as is for the moment. 3924 We keep these bytes as is for the moment.
3564 They may be decoded by post-read-conversion. */ 3925 They may be decoded by post-read-conversion. */
3565 int *p = charbuf; 3926 if (charbuf + 3 > charbuf_end)
3566
3567 if (p + 6 > charbuf_end)
3568 goto break_loop; 3927 goto break_loop;
3569 *p++ = ISO_CODE_ESC; 3928 *charbuf++ = ISO_CODE_ESC;
3570 *p++ = '%'; 3929 *charbuf++ = '%';
3571 *p++ = 'G'; 3930 *charbuf++ = 'G';
3572 while (p < charbuf_end) 3931 CODING_ISO_EMBEDDED_UTF_8 (coding) = 1;
3573 {
3574 ONE_MORE_BYTE (c1);
3575 if (c1 == ISO_CODE_ESC
3576 && src + 1 < src_end
3577 && src[0] == '%'
3578 && src[1] == '@')
3579 {
3580 src += 2;
3581 break;
3582 }
3583 *p++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
3584 }
3585 if (p + 3 > charbuf_end)
3586 goto break_loop;
3587 *p++ = ISO_CODE_ESC;
3588 *p++ = '%';
3589 *p++ = '@';
3590 charbuf = p;
3591 } 3932 }
3592 else 3933 else
3593 goto invalid_code; 3934 goto invalid_code;
3594 continue; 3935 continue;
3595 break; 3936 break;
3623 } 3964 }
3624 continue; 3965 continue;
3625 } 3966 }
3626 } 3967 }
3627 3968
3628 if (charset->id != charset_ascii 3969 if (cmp_status->state == COMPOSING_NO
3970 && charset->id != charset_ascii
3629 && last_id != charset->id) 3971 && last_id != charset->id)
3630 { 3972 {
3631 if (last_id != charset_ascii) 3973 if (last_id != charset_ascii)
3632 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id); 3974 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3633 last_id = charset->id; 3975 last_id = charset->id;
3665 *charbuf++ = *src_base; 4007 *charbuf++ = *src_base;
3666 else 4008 else
3667 *charbuf++ = BYTE8_TO_CHAR (*src_base); 4009 *charbuf++ = BYTE8_TO_CHAR (*src_base);
3668 } 4010 }
3669 } 4011 }
3670 else if (composition_state == COMPOSING_NO) 4012 else if (cmp_status->state == COMPOSING_NO)
3671 { 4013 {
3672 *charbuf++ = c; 4014 *charbuf++ = c;
3673 char_offset++; 4015 char_offset++;
3674 } 4016 }
4017 else if ((cmp_status->state == COMPOSING_CHAR
4018 ? cmp_status->nchars
4019 : cmp_status->ncomps)
4020 >= MAX_COMPOSITION_COMPONENTS)
4021 {
4022 /* Too long composition. */
4023 MAYBE_FINISH_COMPOSITION ();
4024 *charbuf++ = c;
4025 char_offset++;
4026 }
3675 else 4027 else
3676 { 4028 STORE_COMPOSITION_CHAR (c);
3677 if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1)
3678 {
3679 components[component_idx++] = c;
3680 if (method == COMPOSITION_WITH_RULE
3681 || (method == COMPOSITION_WITH_RULE_ALTCHARS
3682 && composition_state == COMPOSING_COMPONENT_CHAR))
3683 composition_state++;
3684 }
3685 else
3686 {
3687 MAYBE_FINISH_COMPOSITION ();
3688 *charbuf++ = c;
3689 char_offset++;
3690 }
3691 }
3692 continue; 4029 continue;
3693 4030
3694 invalid_code: 4031 invalid_code:
3695 MAYBE_FINISH_COMPOSITION (); 4032 MAYBE_FINISH_COMPOSITION ();
3696 src = src_base; 4033 src = src_base;
3704 break_loop: 4041 break_loop:
3705 break; 4042 break;
3706 } 4043 }
3707 4044
3708 no_more_source: 4045 no_more_source:
3709 if (last_id != charset_ascii) 4046 if (cmp_status->state != COMPOSING_NO)
4047 {
4048 if (coding->mode & CODING_MODE_LAST_BLOCK)
4049 MAYBE_FINISH_COMPOSITION ();
4050 else
4051 {
4052 charbuf -= cmp_status->length;
4053 for (i = 0; i < cmp_status->length; i++)
4054 cmp_status->carryover[i] = charbuf[i];
4055 }
4056 }
4057 else if (last_id != charset_ascii)
3710 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id); 4058 ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
3711 coding->consumed_char += consumed_chars_base; 4059 coding->consumed_char += consumed_chars_base;
3712 coding->consumed = src_base - coding->source; 4060 coding->consumed = src_base - coding->source;
3713 coding->charbuf_used = charbuf - coding->charbuf; 4061 coding->charbuf_used = charbuf - coding->charbuf;
3714 } 4062 }
5474 val = CODING_ATTR_SAFE_CHARSETS (attrs); 5822 val = CODING_ATTR_SAFE_CHARSETS (attrs);
5475 coding->max_charset_id = SCHARS (val) - 1; 5823 coding->max_charset_id = SCHARS (val) - 1;
5476 coding->safe_charsets = SDATA (val); 5824 coding->safe_charsets = SDATA (val);
5477 } 5825 }
5478 CODING_ISO_FLAGS (coding) = flags; 5826 CODING_ISO_FLAGS (coding) = flags;
5827 CODING_ISO_CMP_STATUS (coding)->state = COMPOSING_NO;
5828 CODING_ISO_CMP_STATUS (coding)->method = COMPOSITION_NO;
5829 CODING_ISO_EXTSEGMENT_LEN (coding) = 0;
5830 CODING_ISO_EMBEDDED_UTF_8 (coding) = 0;
5479 } 5831 }
5480 else if (EQ (coding_type, Qcharset)) 5832 else if (EQ (coding_type, Qcharset))
5481 { 5833 {
5482 coding->detector = detect_coding_charset; 5834 coding->detector = detect_coding_charset;
5483 coding->decoder = decode_coding_charset; 5835 coding->decoder = decode_coding_charset;
5531 coding->detector = detect_coding_emacs_mule; 5883 coding->detector = detect_coding_emacs_mule;
5532 coding->decoder = decode_coding_emacs_mule; 5884 coding->decoder = decode_coding_emacs_mule;
5533 coding->encoder = encode_coding_emacs_mule; 5885 coding->encoder = encode_coding_emacs_mule;
5534 coding->common_flags 5886 coding->common_flags
5535 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK); 5887 |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
5888 coding->spec.emacs_mule.full_support = 1;
5536 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full)) 5889 if (! NILP (AREF (attrs, coding_attr_emacs_mule_full))
5537 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list)) 5890 && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list))
5538 { 5891 {
5539 Lisp_Object tail, safe_charsets; 5892 Lisp_Object tail, safe_charsets;
5540 int max_charset_id = 0; 5893 int max_charset_id = 0;
5548 for (tail = Vemacs_mule_charset_list; CONSP (tail); 5901 for (tail = Vemacs_mule_charset_list; CONSP (tail);
5549 tail = XCDR (tail)) 5902 tail = XCDR (tail))
5550 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0); 5903 SSET (safe_charsets, XFASTINT (XCAR (tail)), 0);
5551 coding->max_charset_id = max_charset_id; 5904 coding->max_charset_id = max_charset_id;
5552 coding->safe_charsets = SDATA (safe_charsets); 5905 coding->safe_charsets = SDATA (safe_charsets);
5553 } 5906 coding->spec.emacs_mule.full_support = 1;
5907 }
5908 coding->spec.emacs_mule.cmp_status.state = COMPOSING_NO;
5909 coding->spec.emacs_mule.cmp_status.method = COMPOSITION_NO;
5554 } 5910 }
5555 else if (EQ (coding_type, Qshift_jis)) 5911 else if (EQ (coding_type, Qshift_jis))
5556 { 5912 {
5557 coding->detector = detect_coding_sjis; 5913 coding->detector = detect_coding_sjis;
5558 coding->decoder = decode_coding_sjis; 5914 coding->decoder = decode_coding_sjis;
6336 } \ 6692 } \
6337 } \ 6693 } \
6338 } while (0) 6694 } while (0)
6339 6695
6340 6696
6697 /* Return a translation of character(s) at BUF according to TRANS.
6698 TRANS is TO-CHAR or ((FROM . TO) ...) where
6699 FROM = [FROM-CHAR ...], TO is TO-CHAR or [TO-CHAR ...].
6700 The return value is TO-CHAR or ([FROM-CHAR ...] . TO) if a
6701 translation is found, and Qnil if not found..
6702 If BUF is too short to lookup characters in FROM, return Qt. */
6703
6341 static Lisp_Object 6704 static Lisp_Object
6342 get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars) 6705 get_translation (trans, buf, buf_end)
6343 Lisp_Object val; 6706 Lisp_Object trans;
6344 int *buf, *buf_end; 6707 int *buf, *buf_end;
6345 int last_block;
6346 int *from_nchars, *to_nchars;
6347 { 6708 {
6348 /* VAL is TO or (([FROM-CHAR ...] . TO) ...) where TO is TO-CHAR or 6709
6349 [TO-CHAR ...]. */ 6710 if (INTEGERP (trans))
6350 if (CONSP (val)) 6711 return trans;
6351 { 6712 for (; CONSP (trans); trans = XCDR (trans))
6352 Lisp_Object from, tail; 6713 {
6353 int i, len; 6714 Lisp_Object val = XCAR (trans);
6354 6715 Lisp_Object from = XCAR (val);
6355 for (tail = val; CONSP (tail); tail = XCDR (tail)) 6716 int len = ASIZE (from);
6356 { 6717 int i;
6357 val = XCAR (tail); 6718
6358 from = XCAR (val); 6719 for (i = 0; i < len; i++)
6359 len = ASIZE (from); 6720 {
6360 for (i = 0; i < len; i++) 6721 if (buf + i == buf_end)
6361 { 6722 return Qt;
6362 if (buf + i == buf_end) 6723 if (XINT (AREF (from, i)) != buf[i])
6363 { 6724 break;
6364 if (! last_block) 6725 }
6365 return Qt; 6726 if (i == len)
6366 break; 6727 return val;
6367 } 6728 }
6368 if (XINT (AREF (from, i)) != buf[i]) 6729 return Qnil;
6369 break;
6370 }
6371 if (i == len)
6372 {
6373 val = XCDR (val);
6374 *from_nchars = len;
6375 break;
6376 }
6377 }
6378 if (! CONSP (tail))
6379 return Qnil;
6380 }
6381 if (VECTORP (val))
6382 *buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val);
6383 else
6384 *buf = XINT (val);
6385 return val;
6386 } 6730 }
6387 6731
6388 6732
6389 static int 6733 static int
6390 produce_chars (coding, translation_table, last_block) 6734 produce_chars (coding, translation_table, last_block)
6420 Lisp_Object trans = Qnil; 6764 Lisp_Object trans = Qnil;
6421 6765
6422 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans); 6766 LOOKUP_TRANSLATION_TABLE (translation_table, c, trans);
6423 if (! NILP (trans)) 6767 if (! NILP (trans))
6424 { 6768 {
6425 trans = get_translation (trans, buf, buf_end, last_block, 6769 trans = get_translation (trans, buf, buf_end);
6426 &from_nchars, &to_nchars); 6770 if (INTEGERP (trans))
6427 if (EQ (trans, Qt)) 6771 c = XINT (trans);
6772 else if (CONSP (trans))
6773 {
6774 from_nchars = ASIZE (XCAR (trans));
6775 trans = XCDR (trans);
6776 if (INTEGERP (trans))
6777 c = XINT (trans);
6778 else
6779 {
6780 to_nchars = ASIZE (trans);
6781 c = XINT (AREF (trans, 0));
6782 }
6783 }
6784 else if (EQ (trans, Qt) && ! last_block)
6428 break; 6785 break;
6429 c = *buf;
6430 } 6786 }
6431 6787
6432 if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end) 6788 if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
6433 { 6789 {
6434 dst = alloc_destination (coding, 6790 dst = alloc_destination (coding,
6436 + MAX_MULTIBYTE_LENGTH * to_nchars, 6792 + MAX_MULTIBYTE_LENGTH * to_nchars,
6437 dst); 6793 dst);
6438 if (EQ (coding->src_object, coding->dst_object)) 6794 if (EQ (coding->src_object, coding->dst_object))
6439 { 6795 {
6440 coding_set_source (coding); 6796 coding_set_source (coding);
6441 dst_end = ((unsigned char *) coding->source) + coding->consumed; 6797 dst_end = (((unsigned char *) coding->source)
6798 + coding->consumed);
6442 } 6799 }
6443 else 6800 else
6444 dst_end = coding->destination + coding->dst_bytes; 6801 dst_end = coding->destination + coding->dst_bytes;
6445 } 6802 }
6446 6803
6453 CHAR_STRING_ADVANCE_NO_UNIFY (c, dst); 6810 CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
6454 else 6811 else
6455 *dst++ = CHAR_TO_BYTE8 (c); 6812 *dst++ = CHAR_TO_BYTE8 (c);
6456 } 6813 }
6457 produced_chars += to_nchars; 6814 produced_chars += to_nchars;
6458 *buf++ = to_nchars; 6815 buf += from_nchars;
6459 while (--from_nchars > 0)
6460 *buf++ = 0;
6461 } 6816 }
6462 else 6817 else
6463 /* This is an annotation datum. (-C) is the length. */ 6818 /* This is an annotation datum. (-C) is the length. */
6464 buf += -c; 6819 buf += -c;
6465 } 6820 }
6571 return carryover; 6926 return carryover;
6572 } 6927 }
6573 6928
6574 /* Compose text in CODING->object according to the annotation data at 6929 /* Compose text in CODING->object according to the annotation data at
6575 CHARBUF. CHARBUF is an array: 6930 CHARBUF. CHARBUF is an array:
6576 [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ] 6931 [ -LENGTH ANNOTATION_MASK NCHARS NBYTES METHOD [ COMPONENTS... ] ]
6577 */ 6932 */
6578 6933
6579 static INLINE void 6934 static INLINE void
6580 produce_composition (coding, charbuf, pos) 6935 produce_composition (coding, charbuf, pos)
6581 struct coding_system *coding; 6936 struct coding_system *coding;
6585 int len; 6940 int len;
6586 EMACS_INT to; 6941 EMACS_INT to;
6587 enum composition_method method; 6942 enum composition_method method;
6588 Lisp_Object components; 6943 Lisp_Object components;
6589 6944
6590 len = -charbuf[0]; 6945 len = -charbuf[0] - MAX_ANNOTATION_LENGTH;
6591 to = pos + charbuf[2]; 6946 to = pos + charbuf[2];
6592 if (to <= pos) 6947 method = (enum composition_method) (charbuf[4]);
6593 return;
6594 method = (enum composition_method) (charbuf[3]);
6595 6948
6596 if (method == COMPOSITION_RELATIVE) 6949 if (method == COMPOSITION_RELATIVE)
6597 components = Qnil; 6950 components = Qnil;
6598 else if (method >= COMPOSITION_WITH_RULE 6951 else
6599 && method <= COMPOSITION_WITH_RULE_ALTCHARS)
6600 { 6952 {
6601 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1]; 6953 Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
6602 int i; 6954 int i, j;
6603 6955
6604 len -= 4; 6956 if (method == COMPOSITION_WITH_RULE)
6605 charbuf += 4; 6957 len = charbuf[2] * 3 - 2;
6606 for (i = 0; i < len; i++) 6958 charbuf += MAX_ANNOTATION_LENGTH;
6607 { 6959 /* charbuf = [ CHRA ... CHAR] or [ CHAR -2 RULE ... CHAR ] */
6608 args[i] = make_number (charbuf[i]); 6960 for (i = j = 0; i < len && charbuf[i] != -1; i++, j++)
6609 if (charbuf[i] < 0) 6961 {
6610 return; 6962 if (charbuf[i] >= 0)
6611 } 6963 args[j] = make_number (charbuf[i]);
6612 components = (method == COMPOSITION_WITH_ALTCHARS 6964 else
6613 ? Fstring (len, args) : Fvector (len, args)); 6965 {
6614 } 6966 i++;
6615 else 6967 args[j] = make_number (charbuf[i] % 0x100);
6616 return; 6968 }
6969 }
6970 components = (i == j ? Fstring (j, args) : Fvector (j, args));
6971 }
6617 compose_text (pos, to, components, Qnil, coding->dst_object); 6972 compose_text (pos, to, components, Qnil, coding->dst_object);
6618 } 6973 }
6619 6974
6620 6975
6621 /* Put `charset' property on text in CODING->object according to 6976 /* Put `charset' property on text in CODING->object according to
6673 return; 7028 return;
6674 7029
6675 while (charbuf < charbuf_end) 7030 while (charbuf < charbuf_end)
6676 { 7031 {
6677 if (*charbuf >= 0) 7032 if (*charbuf >= 0)
6678 pos += *charbuf++; 7033 pos++, charbuf++;
6679 else 7034 else
6680 { 7035 {
6681 int len = -*charbuf; 7036 int len = -*charbuf;
6682 switch (charbuf[1]) 7037
6683 { 7038 if (len > 2)
6684 case CODING_ANNOTATE_COMPOSITION_MASK: 7039 switch (charbuf[1])
6685 produce_composition (coding, charbuf, pos); 7040 {
6686 break; 7041 case CODING_ANNOTATE_COMPOSITION_MASK:
6687 case CODING_ANNOTATE_CHARSET_MASK: 7042 produce_composition (coding, charbuf, pos);
6688 produce_charset (coding, charbuf, pos); 7043 break;
6689 break; 7044 case CODING_ANNOTATE_CHARSET_MASK:
6690 default: 7045 produce_charset (coding, charbuf, pos);
6691 abort (); 7046 break;
6692 } 7047 }
6693 charbuf += len; 7048 charbuf += len;
6694 } 7049 }
6695 } 7050 }
6696 } 7051 }
6697 7052
6873 annotation data in BUF. */ 7228 annotation data in BUF. */
6874 int *head = buf; 7229 int *head = buf;
6875 enum composition_method method = COMPOSITION_METHOD (prop); 7230 enum composition_method method = COMPOSITION_METHOD (prop);
6876 int nchars = COMPOSITION_LENGTH (prop); 7231 int nchars = COMPOSITION_LENGTH (prop);
6877 7232
6878 ADD_COMPOSITION_DATA (buf, nchars, method); 7233 ADD_COMPOSITION_DATA (buf, nchars, 0, method);
6879 if (method != COMPOSITION_RELATIVE) 7234 if (method != COMPOSITION_RELATIVE)
6880 { 7235 {
6881 Lisp_Object components; 7236 Lisp_Object components;
6882 int len, i, i_byte; 7237 int len, i, i_byte;
6883 7238
7060 7415
7061 lookup_buf[0] = c; 7416 lookup_buf[0] = c;
7062 for (i = 1; i < max_lookup && p < src_end; i++) 7417 for (i = 1; i < max_lookup && p < src_end; i++)
7063 lookup_buf[i] = STRING_CHAR_ADVANCE (p); 7418 lookup_buf[i] = STRING_CHAR_ADVANCE (p);
7064 lookup_buf_end = lookup_buf + i; 7419 lookup_buf_end = lookup_buf + i;
7065 trans = get_translation (trans, lookup_buf, lookup_buf_end, 1, 7420 trans = get_translation (trans, lookup_buf, lookup_buf_end);
7066 &from_nchars, &to_nchars); 7421 if (INTEGERP (trans))
7067 if (EQ (trans, Qt) 7422 c = XINT (trans);
7068 || buf + to_nchars > buf_end) 7423 else if (CONSP (trans))
7424 {
7425 from_nchars = ASIZE (XCAR (trans));
7426 trans = XCDR (trans);
7427 if (INTEGERP (trans))
7428 c = XINT (trans);
7429 else
7430 {
7431 to_nchars = ASIZE (trans);
7432 if (buf + to_nchars > buf_end)
7433 break;
7434 c = XINT (AREF (trans, 0));
7435 }
7436 }
7437 else
7069 break; 7438 break;
7070 *buf++ = *lookup_buf; 7439 *buf++ = c;
7071 for (i = 1; i < to_nchars; i++) 7440 for (i = 1; i < to_nchars; i++)
7072 *buf++ = XINT (AREF (trans, i)); 7441 *buf++ = XINT (AREF (trans, i));
7073 for (i = 1; i < from_nchars; i++, pos++) 7442 for (i = 1; i < from_nchars; i++, pos++)
7074 src += MULTIBYTE_LENGTH_NO_CHECK (src); 7443 src += MULTIBYTE_LENGTH_NO_CHECK (src);
7075 } 7444 }