17052
|
1 /* Header for coding system handler.
|
75227
|
2 Copyright (C) 2001, 2002, 2003, 2004, 2005,
|
100951
|
3 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
|
74605
|
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
|
100951
|
5 2005, 2006, 2007, 2008, 2009
|
67658
|
6 National Institute of Advanced Industrial Science and Technology (AIST)
|
|
7 Registration Number H14PRO021
|
89483
|
8 Copyright (C) 2003
|
88365
|
9 National Institute of Advanced Industrial Science and Technology (AIST)
|
|
10 Registration Number H13PRO009
|
17052
|
11
|
17071
|
12 This file is part of GNU Emacs.
|
|
13
|
94994
|
14 GNU Emacs is free software: you can redistribute it and/or modify
|
17071
|
15 it under the terms of the GNU General Public License as published by
|
94994
|
16 the Free Software Foundation, either version 3 of the License, or
|
|
17 (at your option) any later version.
|
17052
|
18
|
17071
|
19 GNU Emacs is distributed in the hope that it will be useful,
|
|
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
22 GNU General Public License for more details.
|
17052
|
23
|
17071
|
24 You should have received a copy of the GNU General Public License
|
94994
|
25 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
|
17052
|
26
|
29571
|
27 #ifndef EMACS_CODING_H
|
|
28 #define EMACS_CODING_H
|
17052
|
29
|
88365
|
30 /* Index to arguments of Fdefine_coding_system_internal. */
|
17052
|
31
|
88365
|
32 enum define_coding_system_arg_index
|
17052
|
33 {
|
88365
|
34 coding_arg_name,
|
|
35 coding_arg_mnemonic,
|
|
36 coding_arg_coding_type,
|
|
37 coding_arg_charset_list,
|
|
38 coding_arg_ascii_compatible_p,
|
|
39 coding_arg_decode_translation_table,
|
|
40 coding_arg_encode_translation_table,
|
|
41 coding_arg_post_read_conversion,
|
|
42 coding_arg_pre_write_conversion,
|
|
43 coding_arg_default_char,
|
89483
|
44 coding_arg_for_unibyte,
|
88365
|
45 coding_arg_plist,
|
|
46 coding_arg_eol_type,
|
|
47 coding_arg_max
|
|
48 };
|
|
49
|
|
50 enum define_coding_iso2022_arg_index
|
|
51 {
|
|
52 coding_arg_iso2022_initial = coding_arg_max,
|
|
53 coding_arg_iso2022_reg_usage,
|
|
54 coding_arg_iso2022_request,
|
|
55 coding_arg_iso2022_flags,
|
|
56 coding_arg_iso2022_max
|
|
57 };
|
17052
|
58
|
95396
|
59 enum define_coding_utf8_arg_index
|
|
60 {
|
|
61 coding_arg_utf8_bom = coding_arg_max,
|
|
62 coding_arg_utf8_max
|
|
63 };
|
|
64
|
88365
|
65 enum define_coding_utf16_arg_index
|
|
66 {
|
|
67 coding_arg_utf16_bom = coding_arg_max,
|
|
68 coding_arg_utf16_endian,
|
|
69 coding_arg_utf16_max
|
17052
|
70 };
|
|
71
|
88365
|
72 enum define_coding_ccl_arg_index
|
|
73 {
|
89372
|
74 coding_arg_ccl_decoder = coding_arg_max,
|
88365
|
75 coding_arg_ccl_encoder,
|
|
76 coding_arg_ccl_valids,
|
|
77 coding_arg_ccl_max
|
|
78 };
|
17052
|
79
|
89886
|
80 /* Hash table for all coding systems. Keys are coding system symbols
|
|
81 and values are spec vectors of the corresponding coding system. A
|
|
82 spec vector has the form [ ATTRS ALIASES EOL-TYPE ]. ATTRS is a
|
|
83 vector of attribute of the coding system. ALIASES is a list of
|
|
84 aliases (symbols) of the coding system. EOL-TYPE is `unix', `dos',
|
|
85 `mac' or a vector of coding systems (symbols). */
|
|
86
|
88365
|
87 extern Lisp_Object Vcoding_system_hash_table;
|
|
88
|
89886
|
89
|
88365
|
90 /* Enumeration of coding system type. */
|
17052
|
91
|
88365
|
92 enum coding_system_type
|
17052
|
93 {
|
88365
|
94 coding_type_charset,
|
|
95 coding_type_utf_8,
|
|
96 coding_type_utf_16,
|
|
97 coding_type_iso_2022,
|
|
98 coding_type_emacs_mule,
|
|
99 coding_type_sjis,
|
|
100 coding_type_ccl,
|
|
101 coding_type_raw_text,
|
|
102 coding_type_undecided,
|
|
103 coding_type_max
|
|
104 };
|
|
105
|
|
106
|
|
107 /* Enumeration of end-of-line format type. */
|
|
108
|
|
109 enum end_of_line_type
|
|
110 {
|
|
111 eol_lf, /* Line-feed only, same as Emacs' internal
|
|
112 format. */
|
|
113 eol_crlf, /* Sequence of carriage-return and
|
|
114 line-feed. */
|
|
115 eol_cr, /* Carriage-return only. */
|
|
116 eol_any, /* Accept any of above. Produce line-feed
|
|
117 only. */
|
|
118 eol_undecided, /* This value is used to denote that the
|
|
119 eol-type is not yet undecided. */
|
|
120 eol_type_max
|
17052
|
121 };
|
|
122
|
88365
|
123 /* Enumeration of index to an attribute vector of a coding system. */
|
17052
|
124
|
88365
|
125 enum coding_attr_index
|
|
126 {
|
|
127 coding_attr_base_name,
|
|
128 coding_attr_docstring,
|
|
129 coding_attr_mnemonic,
|
|
130 coding_attr_type,
|
|
131 coding_attr_charset_list,
|
|
132 coding_attr_ascii_compat,
|
|
133 coding_attr_decode_tbl,
|
|
134 coding_attr_encode_tbl,
|
89733
|
135 coding_attr_trans_tbl,
|
88365
|
136 coding_attr_post_read,
|
|
137 coding_attr_pre_write,
|
|
138 coding_attr_default_char,
|
89483
|
139 coding_attr_for_unibyte,
|
88365
|
140 coding_attr_plist,
|
17052
|
141
|
88365
|
142 coding_attr_category,
|
|
143 coding_attr_safe_charsets,
|
17052
|
144
|
88365
|
145 /* The followings are extra attributes for each type. */
|
|
146 coding_attr_charset_valids,
|
17052
|
147
|
88365
|
148 coding_attr_ccl_decoder,
|
|
149 coding_attr_ccl_encoder,
|
|
150 coding_attr_ccl_valids,
|
19279
|
151
|
88365
|
152 coding_attr_iso_initial,
|
|
153 coding_attr_iso_usage,
|
|
154 coding_attr_iso_request,
|
|
155 coding_attr_iso_flags,
|
20717
|
156
|
95396
|
157 coding_attr_utf_bom,
|
88365
|
158 coding_attr_utf_16_endian,
|
19279
|
159
|
88365
|
160 coding_attr_emacs_mule_full,
|
|
161
|
|
162 coding_attr_last_index
|
17052
|
163 };
|
|
164
|
|
165
|
89886
|
166 /* Macros to access an element of an attribute vector. */
|
17052
|
167
|
91825
|
168 #define CODING_ATTR_BASE_NAME(attrs) AREF (attrs, coding_attr_base_name)
|
|
169 #define CODING_ATTR_TYPE(attrs) AREF (attrs, coding_attr_type)
|
|
170 #define CODING_ATTR_CHARSET_LIST(attrs) AREF (attrs, coding_attr_charset_list)
|
|
171 #define CODING_ATTR_MNEMONIC(attrs) AREF (attrs, coding_attr_mnemonic)
|
|
172 #define CODING_ATTR_DOCSTRING(attrs) AREF (attrs, coding_attr_docstring)
|
|
173 #define CODING_ATTR_ASCII_COMPAT(attrs) AREF (attrs, coding_attr_ascii_compat)
|
|
174 #define CODING_ATTR_DECODE_TBL(attrs) AREF (attrs, coding_attr_decode_tbl)
|
|
175 #define CODING_ATTR_ENCODE_TBL(attrs) AREF (attrs, coding_attr_encode_tbl)
|
|
176 #define CODING_ATTR_TRANS_TBL(attrs) AREF (attrs, coding_attr_trans_tbl)
|
|
177 #define CODING_ATTR_POST_READ(attrs) AREF (attrs, coding_attr_post_read)
|
|
178 #define CODING_ATTR_PRE_WRITE(attrs) AREF (attrs, coding_attr_pre_write)
|
|
179 #define CODING_ATTR_DEFAULT_CHAR(attrs) AREF (attrs, coding_attr_default_char)
|
|
180 #define CODING_ATTR_FOR_UNIBYTE(attrs) AREF (attrs, coding_attr_for_unibyte)
|
|
181 #define CODING_ATTR_FLUSHING(attrs) AREF (attrs, coding_attr_flushing)
|
|
182 #define CODING_ATTR_PLIST(attrs) AREF (attrs, coding_attr_plist)
|
|
183 #define CODING_ATTR_CATEGORY(attrs) AREF (attrs, coding_attr_category)
|
|
184 #define CODING_ATTR_SAFE_CHARSETS(attrs)AREF (attrs, coding_attr_safe_charsets)
|
17052
|
185
|
|
186
|
89886
|
187 /* Return the name of a coding system specified by ID. */
|
|
188 #define CODING_ID_NAME(id) \
|
|
189 (HASH_KEY (XHASH_TABLE (Vcoding_system_hash_table), id))
|
|
190
|
|
191 /* Return the attribute vector of a coding system specified by ID. */
|
17052
|
192
|
88365
|
193 #define CODING_ID_ATTRS(id) \
|
|
194 (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 0))
|
19284
|
195
|
89886
|
196 /* Return the list of aliases of a coding system specified by ID. */
|
|
197
|
88365
|
198 #define CODING_ID_ALIASES(id) \
|
|
199 (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 1))
|
17118
|
200
|
89886
|
201 /* Return the eol-type of a coding system specified by ID. */
|
|
202
|
88365
|
203 #define CODING_ID_EOL_TYPE(id) \
|
|
204 (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 2))
|
|
205
|
89886
|
206
|
|
207 /* Return the spec vector of CODING_SYSTEM_SYMBOL. */
|
17052
|
208
|
88365
|
209 #define CODING_SYSTEM_SPEC(coding_system_symbol) \
|
|
210 (Fgethash (coding_system_symbol, Vcoding_system_hash_table, Qnil))
|
|
211
|
89886
|
212
|
|
213 /* Return the ID of CODING_SYSTEM_SYMBOL. */
|
17052
|
214
|
88365
|
215 #define CODING_SYSTEM_ID(coding_system_symbol) \
|
|
216 hash_lookup (XHASH_TABLE (Vcoding_system_hash_table), \
|
|
217 coding_system_symbol, NULL)
|
|
218
|
91005
|
219 /* Return 1 if CODING_SYSTEM_SYMBOL is a coding system. */
|
17052
|
220
|
90270
|
221 #define CODING_SYSTEM_P(coding_system_symbol) \
|
|
222 (CODING_SYSTEM_ID (coding_system_symbol) >= 0 \
|
|
223 || (! NILP (coding_system_symbol) \
|
|
224 && ! NILP (Fcoding_system_p (coding_system_symbol))))
|
88365
|
225
|
89886
|
226 /* Check if X is a coding system or not. */
|
|
227
|
89483
|
228 #define CHECK_CODING_SYSTEM(x) \
|
88365
|
229 do { \
|
90270
|
230 if (CODING_SYSTEM_ID (x) < 0 \
|
|
231 && NILP (Fcheck_coding_system (x))) \
|
89483
|
232 wrong_type_argument (Qcoding_system_p, (x)); \
|
88365
|
233 } while (0)
|
|
234
|
17052
|
235
|
89886
|
236 /* Check if X is a coding system or not. If it is, set SEPC to the
|
|
237 spec vector of the coding system. */
|
|
238
|
88365
|
239 #define CHECK_CODING_SYSTEM_GET_SPEC(x, spec) \
|
|
240 do { \
|
|
241 spec = CODING_SYSTEM_SPEC (x); \
|
|
242 if (NILP (spec)) \
|
90270
|
243 { \
|
|
244 Fcheck_coding_system (x); \
|
|
245 spec = CODING_SYSTEM_SPEC (x); \
|
|
246 } \
|
|
247 if (NILP (spec)) \
|
91813
|
248 wrong_type_argument (Qcoding_system_p, (x)); \
|
88365
|
249 } while (0)
|
|
250
|
18001
|
251
|
89886
|
252 /* Check if X is a coding system or not. If it is, set ID to the
|
|
253 ID of the coding system. */
|
|
254
|
88365
|
255 #define CHECK_CODING_SYSTEM_GET_ID(x, id) \
|
|
256 do \
|
|
257 { \
|
|
258 id = CODING_SYSTEM_ID (x); \
|
|
259 if (id < 0) \
|
90270
|
260 { \
|
|
261 Fcheck_coding_system (x); \
|
|
262 id = CODING_SYSTEM_ID (x); \
|
|
263 } \
|
|
264 if (id < 0) \
|
91813
|
265 wrong_type_argument (Qcoding_system_p, (x)); \
|
88365
|
266 } while (0)
|
17052
|
267
|
|
268
|
|
269 /*** GENERAL section ***/
|
|
270
|
88365
|
271 /* Enumeration of result code of code conversion. */
|
|
272 enum coding_result_code
|
17052
|
273 {
|
88365
|
274 CODING_RESULT_SUCCESS,
|
|
275 CODING_RESULT_INSUFFICIENT_SRC,
|
|
276 CODING_RESULT_INSUFFICIENT_DST,
|
|
277 CODING_RESULT_INCONSISTENT_EOL,
|
89685
|
278 CODING_RESULT_INVALID_SRC,
|
88365
|
279 CODING_RESULT_INTERRUPT,
|
|
280 CODING_RESULT_INSUFFICIENT_MEM
|
17052
|
281 };
|
|
282
|
20717
|
283
|
21031
|
284 /* Macros used for the member `mode' of the struct coding_system. */
|
17052
|
285
|
20717
|
286 /* If set, recover the original CR or LF of the already decoded text
|
|
287 when the decoding routine encounters an inconsistent eol format. */
|
|
288 #define CODING_MODE_INHIBIT_INCONSISTENT_EOL 0x01
|
17052
|
289
|
20717
|
290 /* If set, the decoding/encoding routines treat the current data as
|
89172
|
291 the last block of the whole text to be converted, and do the
|
|
292 appropriate finishing job. */
|
20717
|
293 #define CODING_MODE_LAST_BLOCK 0x02
|
17052
|
294
|
20717
|
295 /* If set, it means that the current source text is in a buffer which
|
|
296 enables selective display. */
|
|
297 #define CODING_MODE_SELECTIVE_DISPLAY 0x04
|
17052
|
298
|
20717
|
299 /* This flag is used by the decoding/encoding routines on the fly. If
|
|
300 set, it means that right-to-left text is being processed. */
|
|
301 #define CODING_MODE_DIRECTION 0x08
|
17052
|
302
|
88365
|
303 #define CODING_MODE_FIXED_DESTINATION 0x10
|
17118
|
304
|
89886
|
305 /* If set, it means that the encoding routines produces some safe
|
|
306 ASCII characters (usually '?') for unsupported characters. */
|
88365
|
307 #define CODING_MODE_SAFE_ENCODING 0x20
|
19364
|
308
|
102423
|
309 /* For handling composition sequence. */
|
|
310 #include "composite.h"
|
|
311
|
|
312 enum composition_state
|
|
313 {
|
|
314 COMPOSING_NO,
|
|
315 COMPOSING_CHAR,
|
|
316 COMPOSING_RULE,
|
|
317 COMPOSING_COMPONENT_CHAR,
|
|
318 COMPOSING_COMPONENT_RULE
|
|
319 };
|
|
320
|
|
321 /* Structure for the current composition status. */
|
|
322 struct composition_status
|
|
323 {
|
|
324 enum composition_state state;
|
|
325 enum composition_method method;
|
|
326 int old_form; /* 0:pre-21 form, 1:post-21 form */
|
|
327 int length; /* number of elements produced in charbuf */
|
|
328 int nchars; /* number of characters composed */
|
|
329 int ncomps; /* number of composition components */
|
|
330 /* Maximum carryover is for the case of COMPOSITION_WITH_RULE_ALTCHARS.
|
|
331 See the comment in coding.c. */
|
|
332 int carryover[4 /* annotation header */
|
|
333 + MAX_COMPOSITION_COMPONENTS * 3 - 2 /* ALTs and RULEs */
|
|
334 + 2 /* intermediate -1 -1 */
|
|
335 + MAX_COMPOSITION_COMPONENTS /* CHARs */
|
|
336 ];
|
|
337 };
|
|
338
|
|
339
|
88365
|
340 /* Structure of the field `spec.iso_2022' in the structure
|
|
341 `coding_system'. */
|
|
342 struct iso_2022_spec
|
|
343 {
|
88688
|
344 /* Bit-wise-or of CODING_ISO_FLAG_XXX. */
|
88365
|
345 unsigned flags;
|
20717
|
346
|
17052
|
347 /* The current graphic register invoked to each graphic plane. */
|
|
348 int current_invocation[2];
|
|
349
|
88365
|
350 /* The current charset designated to each graphic register. The
|
|
351 value -1 means that not charset is designated, -2 means that
|
|
352 there was an invalid designation previously. */
|
17052
|
353 int current_designation[4];
|
|
354
|
|
355 /* Set to 1 temporarily only when graphic register 2 or 3 is invoked
|
|
356 by single-shift while encoding. */
|
|
357 int single_shifting;
|
17118
|
358
|
|
359 /* Set to 1 temporarily only when processing at beginning of line. */
|
|
360 int bol;
|
102423
|
361
|
|
362 /* If positive, we are now scanning CTEXT extended segment. */
|
|
363 int ctext_extended_segment_len;
|
|
364
|
|
365 /* If nonzero, we are now scanning embedded UTF-8 sequence. */
|
|
366 int embedded_utf_8;
|
|
367
|
|
368 /* The current composition. */
|
|
369 struct composition_status cmp_status;
|
|
370 };
|
|
371
|
|
372 struct emacs_mule_spec
|
|
373 {
|
|
374 int full_support;
|
|
375 struct composition_status cmp_status;
|
17052
|
376 };
|
|
377
|
88365
|
378 struct ccl_spec;
|
17052
|
379
|
95396
|
380 enum utf_bom_type
|
17052
|
381 {
|
95396
|
382 utf_detect_bom,
|
|
383 utf_without_bom,
|
|
384 utf_with_bom
|
88365
|
385 };
|
|
386
|
|
387 enum utf_16_endian_type
|
|
388 {
|
|
389 utf_16_big_endian,
|
|
390 utf_16_little_endian
|
17052
|
391 };
|
|
392
|
88365
|
393 struct utf_16_spec
|
26846
|
394 {
|
95396
|
395 enum utf_bom_type bom;
|
88365
|
396 enum utf_16_endian_type endian;
|
|
397 int surrogate;
|
26846
|
398 };
|
17052
|
399
|
89330
|
400 struct coding_detection_info
|
|
401 {
|
|
402 /* Values of these members are bitwise-OR of CATEGORY_MASK_XXXs. */
|
|
403 /* Which categories are already checked. */
|
|
404 int checked;
|
|
405 /* Which categories are strongly found. */
|
|
406 int found;
|
|
407 /* Which categories are rejected. */
|
|
408 int rejected;
|
|
409 };
|
20717
|
410
|
|
411
|
17052
|
412 struct coding_system
|
|
413 {
|
88365
|
414 /* ID number of the coding system. This is an index to
|
|
415 Vcoding_system_hash_table. This value is set by
|
|
416 setup_coding_system. At the early stage of building time, this
|
|
417 value is -1 in the array coding_categories to indicate that no
|
|
418 coding-system of that category is yet defined. */
|
|
419 int id;
|
20717
|
420
|
20226
|
421 /* Flag bits of the coding system. The meaning of each bit is common
|
20717
|
422 to all types of coding systems. */
|
88365
|
423 int common_flags;
|
17052
|
424
|
20717
|
425 /* Mode bits of the coding system. See the comments of the macros
|
|
426 CODING_MODE_XXX. */
|
|
427 unsigned int mode;
|
17052
|
428
|
|
429 /* Detailed information specific to each type of coding system. */
|
88365
|
430 union
|
17052
|
431 {
|
88365
|
432 struct iso_2022_spec iso_2022;
|
|
433 struct ccl_spec *ccl; /* Defined in ccl.h. */
|
|
434 struct utf_16_spec utf_16;
|
95396
|
435 enum utf_bom_type utf_8_bom;
|
102423
|
436 struct emacs_mule_spec emacs_mule;
|
17052
|
437 } spec;
|
|
438
|
88365
|
439 int max_charset_id;
|
102186
a12d39ca6870
* coding.h (struct coding_system): Make safe_charsets a pointer to
Andreas Schwab <schwab@suse.de>
diff
changeset
|
440 unsigned char *safe_charsets;
|
20717
|
441
|
88365
|
442 /* The following two members specify how binary 8-bit code 128..255
|
|
443 are represented in source and destination text respectively. 1
|
|
444 means they are represented by 2-byte sequence, 0 means they are
|
|
445 represented by 1-byte as is (see the comment in character.h). */
|
29006
|
446 unsigned src_multibyte : 1;
|
|
447 unsigned dst_multibyte : 1;
|
|
448
|
21320
|
449 /* How may heading bytes we can skip for decoding. This is set to
|
|
450 -1 in setup_coding_system, and updated by detect_coding. So,
|
|
451 when this is equal to the byte length of the text being
|
|
452 converted, we can skip the actual conversion process. */
|
88365
|
453 int head_ascii;
|
20717
|
454
|
|
455 /* The following members are set by encoding/decoding routine. */
|
88365
|
456 EMACS_INT produced, produced_char, consumed, consumed_char;
|
20717
|
457
|
29006
|
458 /* Number of error source data found in a decoding routine. */
|
|
459 int errors;
|
|
460
|
88365
|
461 /* Store the positions of error source data. */
|
|
462 EMACS_INT *error_positions;
|
20930
|
463
|
88365
|
464 /* Finish status of code conversion. */
|
|
465 enum coding_result_code result;
|
35530
|
466
|
88365
|
467 EMACS_INT src_pos, src_pos_byte, src_chars, src_bytes;
|
|
468 Lisp_Object src_object;
|
89483
|
469 const unsigned char *source;
|
20930
|
470
|
88365
|
471 EMACS_INT dst_pos, dst_pos_byte, dst_bytes;
|
|
472 Lisp_Object dst_object;
|
|
473 unsigned char *destination;
|
35530
|
474
|
91005
|
475 /* Set to 1 if the source of conversion is not in the member
|
89886
|
476 `charbuf', but at `src_object'. */
|
88365
|
477 int chars_at_source;
|
|
478
|
|
479 /* If an element is non-negative, it is a character code.
|
|
480
|
|
481 If it is in the range -128..-1, it is a 8-bit character code
|
|
482 minus 256.
|
20717
|
483
|
88365
|
484 If it is less than -128, it specifies the start of an annotation
|
|
485 chunk. The length of the chunk is -128 minus the value of the
|
|
486 element. The following elements are OFFSET, ANNOTATION-TYPE, and
|
|
487 a sequence of actual data for the annotation. OFFSET is a
|
|
488 character position offset from dst_pos or src_pos,
|
|
489 ANNOTATION-TYPE specfies the meaning of the annotation and how to
|
|
490 handle the following data.. */
|
|
491 int *charbuf;
|
|
492 int charbuf_size, charbuf_used;
|
17052
|
493
|
88365
|
494 /* Set to 1 if charbuf contains an annotation. */
|
|
495 int annotated;
|
|
496
|
|
497 unsigned char carryover[64];
|
|
498 int carryover_bytes;
|
17052
|
499
|
88365
|
500 int default_char;
|
|
501
|
89330
|
502 int (*detector) P_ ((struct coding_system *,
|
|
503 struct coding_detection_info *));
|
88365
|
504 void (*decoder) P_ ((struct coding_system *));
|
|
505 int (*encoder) P_ ((struct coding_system *));
|
17052
|
506 };
|
|
507
|
88365
|
508 /* Meanings of bits in the member `common_flags' of the structure
|
|
509 coding_system. The lowest 8 bits are reserved for various kind of
|
|
510 annotations (currently two of them are used). */
|
|
511 #define CODING_ANNOTATION_MASK 0x00FF
|
|
512 #define CODING_ANNOTATE_COMPOSITION_MASK 0x0001
|
|
513 #define CODING_ANNOTATE_DIRECTION_MASK 0x0002
|
89330
|
514 #define CODING_ANNOTATE_CHARSET_MASK 0x0003
|
88365
|
515 #define CODING_FOR_UNIBYTE_MASK 0x0100
|
|
516 #define CODING_REQUIRE_FLUSHING_MASK 0x0200
|
|
517 #define CODING_REQUIRE_DECODING_MASK 0x0400
|
|
518 #define CODING_REQUIRE_ENCODING_MASK 0x0800
|
|
519 #define CODING_REQUIRE_DETECTION_MASK 0x1000
|
|
520 #define CODING_RESET_AT_BOL_MASK 0x2000
|
20226
|
521
|
88365
|
522 /* Return 1 if the coding context CODING requires annotaion
|
|
523 handling. */
|
|
524 #define CODING_REQUIRE_ANNOTATION(coding) \
|
|
525 ((coding)->common_flags & CODING_ANNOTATION_MASK)
|
|
526
|
|
527 /* Return 1 if the coding context CODING prefers decoding into unibyte. */
|
|
528 #define CODING_FOR_UNIBYTE(coding) \
|
|
529 ((coding)->common_flags & CODING_FOR_UNIBYTE_MASK)
|
|
530
|
|
531 /* Return 1 if the coding context CODING requires specific code to be
|
20226
|
532 attached at the tail of converted text. */
|
|
533 #define CODING_REQUIRE_FLUSHING(coding) \
|
|
534 ((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK)
|
17052
|
535
|
88365
|
536 /* Return 1 if the coding context CODING requires code conversion on
|
20226
|
537 decoding. */
|
|
538 #define CODING_REQUIRE_DECODING(coding) \
|
29006
|
539 ((coding)->dst_multibyte \
|
|
540 || (coding)->common_flags & CODING_REQUIRE_DECODING_MASK)
|
20226
|
541
|
88365
|
542
|
|
543 /* Return 1 if the coding context CODING requires code conversion on
|
70783
|
544 encoding.
|
|
545 The non-multibyte part of the condition is to support encoding of
|
|
546 unibyte strings/buffers generated by string-as-unibyte or
|
|
547 (set-buffer-multibyte nil) from multibyte strings/buffers. */
|
88365
|
548 #define CODING_REQUIRE_ENCODING(coding) \
|
|
549 ((coding)->src_multibyte \
|
|
550 || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK \
|
|
551 || (coding)->mode & CODING_MODE_SELECTIVE_DISPLAY)
|
20226
|
552
|
88365
|
553
|
|
554 /* Return 1 if the coding context CODING requires some kind of code
|
20226
|
555 detection. */
|
|
556 #define CODING_REQUIRE_DETECTION(coding) \
|
|
557 ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK)
|
|
558
|
88365
|
559 /* Return 1 if the coding context CODING requires code conversion on
|
29006
|
560 decoding or some kind of code detection. */
|
20717
|
561 #define CODING_MAY_REQUIRE_DECODING(coding) \
|
29006
|
562 (CODING_REQUIRE_DECODING (coding) \
|
|
563 || CODING_REQUIRE_DETECTION (coding))
|
17052
|
564
|
|
565 /* Macros to decode or encode a character of JISX0208 in SJIS. S1 and
|
|
566 S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding
|
|
567 system. C1 and C2 are the 1st and 2nd position codes of Emacs'
|
|
568 internal format. */
|
|
569
|
88365
|
570 #define SJIS_TO_JIS(code) \
|
|
571 do { \
|
|
572 int s1, s2, j1, j2; \
|
|
573 \
|
|
574 s1 = (code) >> 8, s2 = (code) & 0xFF; \
|
|
575 \
|
|
576 if (s2 >= 0x9F) \
|
|
577 (j1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0), \
|
|
578 j2 = s2 - 0x7E); \
|
|
579 else \
|
|
580 (j1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1), \
|
|
581 j2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F)); \
|
|
582 (code) = (j1 << 8) | j2; \
|
17052
|
583 } while (0)
|
|
584
|
89765
|
585 #define SJIS_TO_JIS2(code) \
|
|
586 do { \
|
|
587 int s1, s2, j1, j2; \
|
|
588 \
|
|
589 s1 = (code) >> 8, s2 = (code) & 0xFF; \
|
|
590 \
|
|
591 if (s2 >= 0x9F) \
|
|
592 { \
|
|
593 j1 = (s1 == 0xF0 ? 0x28 \
|
|
594 : s1 == 0xF1 ? 0x24 \
|
|
595 : s1 == 0xF2 ? 0x2C \
|
|
596 : s1 == 0xF3 ? 0x2E \
|
|
597 : 0x6E + (s1 - 0xF4) * 2); \
|
|
598 j2 = s2 - 0x7E; \
|
|
599 } \
|
|
600 else \
|
|
601 { \
|
|
602 j1 = (s1 <= 0xF2 ? 0x21 + (s1 - 0xF0) * 2 \
|
|
603 : s1 <= 0xF4 ? 0x2D + (s1 - 0xF3) * 2 \
|
|
604 : 0x6F + (s1 - 0xF5) * 2); \
|
|
605 j2 = s2 - ((s2 >= 0x7F ? 0x20 : 0x1F)); \
|
|
606 } \
|
|
607 (code) = (j1 << 8) | j2; \
|
|
608 } while (0)
|
|
609
|
88365
|
610
|
|
611 #define JIS_TO_SJIS(code) \
|
17052
|
612 do { \
|
88365
|
613 int s1, s2, j1, j2; \
|
|
614 \
|
|
615 j1 = (code) >> 8, j2 = (code) & 0xFF; \
|
|
616 if (j1 & 1) \
|
|
617 (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x71 : 0xB1), \
|
|
618 s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F)); \
|
17052
|
619 else \
|
88365
|
620 (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x70 : 0xB0), \
|
|
621 s2 = j2 + 0x7E); \
|
88498
|
622 (code) = (s1 << 8) | s2; \
|
17052
|
623 } while (0)
|
|
624
|
89765
|
625 #define JIS_TO_SJIS2(code) \
|
|
626 do { \
|
|
627 int s1, s2, j1, j2; \
|
|
628 \
|
|
629 j1 = (code) >> 8, j2 = (code) & 0xFF; \
|
|
630 if (j1 & 1) \
|
|
631 { \
|
|
632 s1 = (j1 <= 0x25 ? 0xF0 + (j1 - 0x21) / 2 \
|
103763
|
633 : j1 <= 0x2F ? 0xF3 + (j1 - 0x2D) / 2 \
|
89765
|
634 : 0xF5 + (j1 - 0x6F) / 2); \
|
|
635 s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F); \
|
|
636 } \
|
|
637 else \
|
|
638 { \
|
|
639 s1 = (j1 == 0x28 ? 0xF0 \
|
|
640 : j1 == 0x24 ? 0xF1 \
|
|
641 : j1 == 0x2C ? 0xF2 \
|
|
642 : j1 == 0x2E ? 0xF3 \
|
|
643 : 0xF4 + (j1 - 0x6E) / 2); \
|
|
644 s2 = j2 + 0x7E; \
|
|
645 } \
|
|
646 (code) = (s1 << 8) | s2; \
|
17052
|
647 } while (0)
|
|
648
|
21051
|
649 /* Encode the file name NAME using the specified coding system
|
|
650 for file names, if any. */
|
|
651 #define ENCODE_FILE(name) \
|
|
652 (! NILP (Vfile_name_coding_system) \
|
58453
04580c50dd1c
(ENCODE_FILE, DECODE_FILE, ENCODE_SYSTEM, DECODE_SYSTEM):
Stefan Monnier <monnier@iro.umontreal.ca>
diff
changeset
|
653 && !EQ (Vfile_name_coding_system, make_number (0)) \
|
22342
|
654 ? code_convert_string_norecord (name, Vfile_name_coding_system, 1) \
|
21051
|
655 : (! NILP (Vdefault_file_name_coding_system) \
|
58453
04580c50dd1c
(ENCODE_FILE, DECODE_FILE, ENCODE_SYSTEM, DECODE_SYSTEM):
Stefan Monnier <monnier@iro.umontreal.ca>
diff
changeset
|
656 && !EQ (Vdefault_file_name_coding_system, make_number (0)) \
|
22342
|
657 ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \
|
21051
|
658 : name))
|
|
659
|
88365
|
660
|
21051
|
661 /* Decode the file name NAME using the specified coding system
|
|
662 for file names, if any. */
|
|
663 #define DECODE_FILE(name) \
|
|
664 (! NILP (Vfile_name_coding_system) \
|
58453
04580c50dd1c
(ENCODE_FILE, DECODE_FILE, ENCODE_SYSTEM, DECODE_SYSTEM):
Stefan Monnier <monnier@iro.umontreal.ca>
diff
changeset
|
665 && !EQ (Vfile_name_coding_system, make_number (0)) \
|
22342
|
666 ? code_convert_string_norecord (name, Vfile_name_coding_system, 0) \
|
21051
|
667 : (! NILP (Vdefault_file_name_coding_system) \
|
58453
04580c50dd1c
(ENCODE_FILE, DECODE_FILE, ENCODE_SYSTEM, DECODE_SYSTEM):
Stefan Monnier <monnier@iro.umontreal.ca>
diff
changeset
|
668 && !EQ (Vdefault_file_name_coding_system, make_number (0)) \
|
22342
|
669 ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 0) \
|
21051
|
670 : name))
|
|
671
|
88365
|
672
|
29310
|
673 /* Encode the string STR using the specified coding system
|
60657
|
674 for system functions, if any. */
|
29310
|
675 #define ENCODE_SYSTEM(str) \
|
41323
|
676 (! NILP (Vlocale_coding_system) \
|
58453
04580c50dd1c
(ENCODE_FILE, DECODE_FILE, ENCODE_SYSTEM, DECODE_SYSTEM):
Stefan Monnier <monnier@iro.umontreal.ca>
diff
changeset
|
677 && !EQ (Vlocale_coding_system, make_number (0)) \
|
41323
|
678 ? code_convert_string_norecord (str, Vlocale_coding_system, 1) \
|
29310
|
679 : str)
|
|
680
|
|
681 /* Decode the string STR using the specified coding system
|
60657
|
682 for system functions, if any. */
|
66463
|
683 #define DECODE_SYSTEM(str) \
|
41323
|
684 (! NILP (Vlocale_coding_system) \
|
58453
04580c50dd1c
(ENCODE_FILE, DECODE_FILE, ENCODE_SYSTEM, DECODE_SYSTEM):
Stefan Monnier <monnier@iro.umontreal.ca>
diff
changeset
|
685 && !EQ (Vlocale_coding_system, make_number (0)) \
|
41323
|
686 ? code_convert_string_norecord (str, Vlocale_coding_system, 0) \
|
29310
|
687 : str)
|
39574
|
688
|
89506
|
689 /* Used by the gtk menu code. Note that this encodes utf-8, not
|
|
690 utf-8-emacs, so it's not a no-op. */
|
51407
|
691 #define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, 1)
|
|
692
|
17052
|
693 /* Extern declarations. */
|
89666
|
694 extern Lisp_Object code_conversion_save P_ ((int, int));
|
20308
|
695 extern int decoding_buffer_size P_ ((struct coding_system *, int));
|
|
696 extern int encoding_buffer_size P_ ((struct coding_system *, int));
|
88365
|
697 extern void setup_coding_system P_ ((Lisp_Object, struct coding_system *));
|
90060
|
698 extern Lisp_Object coding_charset_list P_ ((struct coding_system *));
|
101777
|
699 extern Lisp_Object coding_system_charset_list P_ ((Lisp_Object));
|
88365
|
700 extern void detect_coding P_ ((struct coding_system *));
|
88848
|
701 extern Lisp_Object code_convert_region P_ ((Lisp_Object, Lisp_Object,
|
88365
|
702 Lisp_Object, Lisp_Object,
|
|
703 int, int));
|
|
704 extern Lisp_Object code_convert_string P_ ((Lisp_Object, Lisp_Object,
|
|
705 Lisp_Object, int, int, int));
|
29438
|
706 extern Lisp_Object code_convert_string_norecord P_ ((Lisp_Object, Lisp_Object,
|
|
707 int));
|
88365
|
708 extern Lisp_Object raw_text_coding_system P_ ((Lisp_Object));
|
|
709 extern Lisp_Object coding_inherit_eol_type P_ ((Lisp_Object, Lisp_Object));
|
|
710
|
|
711 extern int decode_coding_gap P_ ((struct coding_system *,
|
|
712 EMACS_INT, EMACS_INT));
|
|
713 extern int encode_coding_gap P_ ((struct coding_system *,
|
|
714 EMACS_INT, EMACS_INT));
|
|
715 extern void decode_coding_object P_ ((struct coding_system *,
|
|
716 Lisp_Object, EMACS_INT, EMACS_INT,
|
|
717 EMACS_INT, EMACS_INT, Lisp_Object));
|
|
718 extern void encode_coding_object P_ ((struct coding_system *,
|
|
719 Lisp_Object, EMACS_INT, EMACS_INT,
|
|
720 EMACS_INT, EMACS_INT, Lisp_Object));
|
|
721
|
89886
|
722 /* Macros for backward compatibility. */
|
|
723
|
88365
|
724 #define decode_coding_region(coding, from, to) \
|
|
725 decode_coding_object (coding, Fcurrent_buffer (), \
|
|
726 from, CHAR_TO_BYTE (from), \
|
|
727 to, CHAR_TO_BYTE (to), Fcurrent_buffer ())
|
|
728
|
|
729
|
|
730 #define encode_coding_region(coding, from, to) \
|
|
731 encode_coding_object (coding, Fcurrent_buffer (), \
|
|
732 from, CHAR_TO_BYTE (from), \
|
|
733 to, CHAR_TO_BYTE (to), Fcurrent_buffer ())
|
|
734
|
|
735
|
|
736 #define decode_coding_string(coding, string, nocopy) \
|
|
737 decode_coding_object (coding, string, 0, 0, XSTRING (string)->size, \
|
|
738 STRING_BYTES (XSTRING (string)), Qt)
|
|
739
|
|
740 #define encode_coding_string(coding, string, nocopy) \
|
|
741 (encode_coding_object (coding, string, 0, 0, XSTRING (string)->size, \
|
|
742 STRING_BYTES (XSTRING (string)), Qt), \
|
|
743 (coding)->dst_object)
|
|
744
|
|
745
|
|
746 #define decode_coding_c_string(coding, src, bytes, dst_object) \
|
|
747 do { \
|
|
748 (coding)->source = (src); \
|
|
749 (coding)->src_chars = (coding)->src_bytes = (bytes); \
|
|
750 decode_coding_object ((coding), Qnil, 0, 0, (bytes), (bytes), \
|
|
751 (dst_object)); \
|
|
752 } while (0)
|
|
753
|
|
754
|
|
755 extern Lisp_Object preferred_coding_system P_ (());
|
|
756
|
|
757
|
89483
|
758 extern Lisp_Object Qutf_8, Qutf_8_emacs;
|
|
759
|
17052
|
760 extern Lisp_Object Qcoding_system, Qeol_type, Qcoding_category_index;
|
88365
|
761 extern Lisp_Object Qcoding_system_p;
|
|
762 extern Lisp_Object Qraw_text, Qemacs_mule, Qno_conversion, Qundecided;
|
|
763 extern Lisp_Object Qiso_2022;
|
17052
|
764 extern Lisp_Object Qbuffer_file_coding_system;
|
88365
|
765
|
|
766 extern Lisp_Object Qunix, Qdos, Qmac;
|
17052
|
767
|
22186
|
768 extern Lisp_Object Qtranslation_table;
|
|
769 extern Lisp_Object Qtranslation_table_id;
|
22118
|
770
|
24201
|
771 /* Mnemonic strings to indicate each type of end-of-line. */
|
|
772 extern Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
|
|
773 /* Mnemonic string to indicate type of end-of-line is not yet decided. */
|
|
774 extern Lisp_Object eol_mnemonic_undecided;
|
17052
|
775
|
|
776 #ifdef emacs
|
|
777 extern Lisp_Object Qfile_coding_system;
|
64251
|
778 extern Lisp_Object Qcall_process, Qcall_process_region;
|
17052
|
779 extern Lisp_Object Qstart_process, Qopen_network_stream;
|
34107
|
780 extern Lisp_Object Qwrite_region;
|
17052
|
781
|
26088
b7aa6ac26872
Add support for large files, 64-bit Solaris, system locale codings.
Paul Eggert <eggert@twinsun.com>
diff
changeset
|
782 extern char *emacs_strerror P_ ((int));
|
b7aa6ac26872
Add support for large files, 64-bit Solaris, system locale codings.
Paul Eggert <eggert@twinsun.com>
diff
changeset
|
783
|
17052
|
784 /* Coding-system for reading files and receiving data from process. */
|
|
785 extern Lisp_Object Vcoding_system_for_read;
|
|
786 /* Coding-system for writing files and sending data to process. */
|
|
787 extern Lisp_Object Vcoding_system_for_write;
|
|
788 /* Coding-system actually used in the latest I/O. */
|
|
789 extern Lisp_Object Vlast_coding_system_used;
|
26088
b7aa6ac26872
Add support for large files, 64-bit Solaris, system locale codings.
Paul Eggert <eggert@twinsun.com>
diff
changeset
|
790 /* Coding-system to use with system messages (e.g. strerror). */
|
b7aa6ac26872
Add support for large files, 64-bit Solaris, system locale codings.
Paul Eggert <eggert@twinsun.com>
diff
changeset
|
791 extern Lisp_Object Vlocale_coding_system;
|
17052
|
792
|
21573
|
793 /* If non-zero, process buffer inherits the coding system used to decode
|
|
794 the subprocess output. */
|
|
795 extern int inherit_process_coding_system;
|
|
796
|
19279
|
797 /* Coding system to be used to encode text for terminal display when
|
|
798 terminal coding system is nil. */
|
|
799 extern struct coding_system safe_terminal_coding;
|
|
800
|
18181
|
801 /* Default coding systems used for process I/O. */
|
|
802 extern Lisp_Object Vdefault_process_coding_system;
|
17052
|
803
|
88365
|
804 /* Function to call to force a user to force select a propert coding
|
20717
|
805 system. */
|
|
806 extern Lisp_Object Vselect_safe_coding_system_function;
|
|
807
|
48875
|
808 /* If nonzero, on writing a file, Vselect_safe_coding_system_function
|
|
809 is called even if Vcoding_system_for_write is non-nil. */
|
|
810 extern int coding_system_require_warning;
|
|
811
|
21901
|
812 /* Coding system for file names, or nil if none. */
|
|
813 extern Lisp_Object Vfile_name_coding_system;
|
|
814
|
|
815 /* Coding system for file names used only when
|
|
816 Vfile_name_coding_system is nil. */
|
|
817 extern Lisp_Object Vdefault_file_name_coding_system;
|
29310
|
818
|
17052
|
819 #endif
|
|
820
|
34107
|
821 /* Error signaled when there's a problem with detecting coding system */
|
|
822 extern Lisp_Object Qcoding_system_error;
|
|
823
|
88365
|
824 extern char emacs_mule_bytes[256];
|
|
825 extern int emacs_mule_string_char P_ ((unsigned char *));
|
|
826
|
29571
|
827 #endif /* EMACS_CODING_H */
|
52401
|
828
|
|
829 /* arch-tag: 2bc3b4fa-6870-4f64-8135-b962b2d290e4
|
|
830 (do not change this comment) */
|