17052
|
1 /* Header for coding system handler.
|
75227
|
2 Copyright (C) 2001, 2002, 2003, 2004, 2005,
|
|
3 2006, 2007 Free Software Foundation, Inc.
|
74605
|
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
|
75364
|
5 2005, 2006, 2007
|
67658
|
6 National Institute of Advanced Industrial Science and Technology (AIST)
|
|
7 Registration Number H14PRO021
|
17052
|
8
|
17071
|
9 This file is part of GNU Emacs.
|
|
10
|
|
11 GNU Emacs is free software; you can redistribute it and/or modify
|
|
12 it under the terms of the GNU General Public License as published by
|
78313
|
13 the Free Software Foundation; either version 3, or (at your option)
|
17071
|
14 any later version.
|
17052
|
15
|
17071
|
16 GNU Emacs is distributed in the hope that it will be useful,
|
|
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
19 GNU General Public License for more details.
|
17052
|
20
|
17071
|
21 You should have received a copy of the GNU General Public License
|
|
22 along with GNU Emacs; see the file COPYING. If not, write to
|
64084
|
23 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
|
24 Boston, MA 02110-1301, USA. */
|
17052
|
25
|
29571
|
26 #ifndef EMACS_CODING_H
|
|
27 #define EMACS_CODING_H
|
17052
|
28
|
25378
|
29 #include "ccl.h"
|
17052
|
30
|
17835
|
31 /*** EMACS' INTERNAL FORMAT (emacs-mule) section ***/
|
17052
|
32
|
|
33 /* All code (1-byte) of Emacs' internal format is classified into one
|
|
34 of the followings. See also `charset.h'. */
|
|
35 enum emacs_code_class_type
|
|
36 {
|
|
37 EMACS_control_code, /* Control codes in the range
|
|
38 0x00..0x1F and 0x7F except for the
|
|
39 following two codes. */
|
|
40 EMACS_linefeed_code, /* 0x0A (linefeed) to denote
|
|
41 end-of-line. */
|
|
42 EMACS_carriage_return_code, /* 0x0D (carriage-return) to be used
|
|
43 in selective display mode. */
|
|
44 EMACS_ascii_code, /* ASCII characters. */
|
|
45 EMACS_leading_code_2, /* Base leading code of official
|
|
46 TYPE9N character. */
|
|
47 EMACS_leading_code_3, /* Base leading code of private TYPE9N
|
|
48 or official TYPE9Nx9N character. */
|
|
49 EMACS_leading_code_4, /* Base leading code of private
|
|
50 TYPE9Nx9N character. */
|
|
51 EMACS_invalid_code /* Invalid code, i.e. a base leading
|
|
52 code not yet assigned to any
|
|
53 charset, or a code of the range
|
|
54 0xA0..0xFF. */
|
|
55 };
|
|
56
|
|
57 extern enum emacs_code_class_type emacs_code_class[256];
|
|
58
|
|
59 /*** ISO2022 section ***/
|
|
60
|
|
61 /* Macros to define code of control characters for ISO2022's functions. */
|
|
62 /* code */ /* function */
|
|
63 #define ISO_CODE_LF 0x0A /* line-feed */
|
|
64 #define ISO_CODE_CR 0x0D /* carriage-return */
|
|
65 #define ISO_CODE_SO 0x0E /* shift-out */
|
|
66 #define ISO_CODE_SI 0x0F /* shift-in */
|
|
67 #define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
|
|
68 #define ISO_CODE_ESC 0x1B /* escape */
|
|
69 #define ISO_CODE_SS2 0x8E /* single-shift-2 */
|
|
70 #define ISO_CODE_SS3 0x8F /* single-shift-3 */
|
|
71 #define ISO_CODE_CSI 0x9B /* control-sequence-introduce */
|
|
72
|
|
73 /* All code (1-byte) of ISO2022 is classified into one of the
|
|
74 followings. */
|
|
75 enum iso_code_class_type
|
|
76 {
|
29006
|
77 ISO_control_0, /* Control codes in the range
|
|
78 0x00..0x1F and 0x7F, except for the
|
|
79 following 5 codes. */
|
17052
|
80 ISO_carriage_return, /* ISO_CODE_CR (0x0D) */
|
|
81 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
|
|
82 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
|
|
83 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
|
|
84 ISO_escape, /* ISO_CODE_SO (0x1B) */
|
29006
|
85 ISO_control_1, /* Control codes in the range
|
|
86 0x80..0x9F, except for the
|
|
87 following 3 codes. */
|
17052
|
88 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
|
|
89 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
|
|
90 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
|
|
91 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
|
|
92 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
|
|
93 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
|
|
94 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
|
|
95 };
|
|
96
|
|
97 /** The macros CODING_FLAG_ISO_XXX defines a flag bit of the `flags'
|
|
98 element in the structure `coding_system'. This information is used
|
|
99 while encoding a text to ISO2022. **/
|
|
100
|
|
101 /* If set, produce short-form designation sequence (e.g. ESC $ A)
|
|
102 instead of long-form sequence (e.g. ESC $ ( A). */
|
|
103 #define CODING_FLAG_ISO_SHORT_FORM 0x0001
|
|
104
|
|
105 /* If set, reset graphic planes and registers at end-of-line to the
|
|
106 initial state. */
|
|
107 #define CODING_FLAG_ISO_RESET_AT_EOL 0x0002
|
|
108
|
|
109 /* If set, reset graphic planes and registers before any control
|
|
110 characters to the initial state. */
|
|
111 #define CODING_FLAG_ISO_RESET_AT_CNTL 0x0004
|
|
112
|
|
113 /* If set, encode by 7-bit environment. */
|
|
114 #define CODING_FLAG_ISO_SEVEN_BITS 0x0008
|
|
115
|
|
116 /* If set, use locking-shift function. */
|
|
117 #define CODING_FLAG_ISO_LOCKING_SHIFT 0x0010
|
|
118
|
|
119 /* If set, use single-shift function. Overwrite
|
|
120 CODING_FLAG_ISO_LOCKING_SHIFT. */
|
|
121 #define CODING_FLAG_ISO_SINGLE_SHIFT 0x0020
|
|
122
|
|
123 /* If set, designate JISX0201-Roman instead of ASCII. */
|
|
124 #define CODING_FLAG_ISO_USE_ROMAN 0x0040
|
|
125
|
|
126 /* If set, designate JISX0208-1978 instead of JISX0208-1983. */
|
|
127 #define CODING_FLAG_ISO_USE_OLDJIS 0x0080
|
|
128
|
|
129 /* If set, do not produce ISO6429's direction specifying sequence. */
|
|
130 #define CODING_FLAG_ISO_NO_DIRECTION 0x0100
|
|
131
|
17118
|
132 /* If set, assume designation states are reset at beginning of line on
|
|
133 output. */
|
|
134 #define CODING_FLAG_ISO_INIT_AT_BOL 0x0200
|
|
135
|
|
136 /* If set, designation sequence should be placed at beginning of line
|
|
137 on output. */
|
|
138 #define CODING_FLAG_ISO_DESIGNATE_AT_BOL 0x0400
|
|
139
|
36088
|
140 /* If set, do not encode unsafe characters on output. */
|
19279
|
141 #define CODING_FLAG_ISO_SAFE 0x0800
|
|
142
|
19364
|
143 /* If set, extra latin codes (128..159) are accepted as a valid code
|
|
144 on input. */
|
|
145 #define CODING_FLAG_ISO_LATIN_EXTRA 0x1000
|
|
146
|
20717
|
147 /* If set, use designation escape sequence. */
|
|
148 #define CODING_FLAG_ISO_DESIGNATION 0x10000
|
|
149
|
19279
|
150 /* A character to be produced on output if encoding of the original
|
51139
|
151 character is inhibitted by CODING_MODE_INHIBIT_UNENCODABLE_CHAR.
|
|
152 It must be an ASCII character. */
|
|
153 #define CODING_REPLACEMENT_CHARACTER '?'
|
19279
|
154
|
17052
|
155 /* Structure of the field `spec.iso2022' in the structure `coding_system'. */
|
|
156 struct iso2022_spec
|
|
157 {
|
|
158 /* The current graphic register invoked to each graphic plane. */
|
|
159 int current_invocation[2];
|
|
160
|
|
161 /* The current charset designated to each graphic register. */
|
|
162 int current_designation[4];
|
|
163
|
|
164 /* A charset initially designated to each graphic register. */
|
|
165 int initial_designation[4];
|
|
166
|
20717
|
167 /* If not -1, it is a graphic register specified in an invalid
|
|
168 designation sequence. */
|
|
169 int last_invalid_designation_register;
|
|
170
|
17052
|
171 /* A graphic register to which each charset should be designated. */
|
18001
|
172 unsigned char requested_designation[MAX_CHARSET + 1];
|
17052
|
173
|
20149
|
174 /* A revision number to be specified for each charset on encoding.
|
|
175 The value 255 means no revision number for the corresponding
|
|
176 charset. */
|
|
177 unsigned char charset_revision_number[MAX_CHARSET + 1];
|
19284
|
178
|
17052
|
179 /* Set to 1 temporarily only when graphic register 2 or 3 is invoked
|
|
180 by single-shift while encoding. */
|
|
181 int single_shifting;
|
17118
|
182
|
|
183 /* Set to 1 temporarily only when processing at beginning of line. */
|
|
184 int bol;
|
17052
|
185 };
|
|
186
|
|
187 /* Macros to access each field in the structure `spec.iso2022'. */
|
|
188 #define CODING_SPEC_ISO_INVOCATION(coding, plane) \
|
20717
|
189 (coding)->spec.iso2022.current_invocation[plane]
|
17052
|
190 #define CODING_SPEC_ISO_DESIGNATION(coding, reg) \
|
20717
|
191 (coding)->spec.iso2022.current_designation[reg]
|
17052
|
192 #define CODING_SPEC_ISO_INITIAL_DESIGNATION(coding, reg) \
|
20717
|
193 (coding)->spec.iso2022.initial_designation[reg]
|
17052
|
194 #define CODING_SPEC_ISO_REQUESTED_DESIGNATION(coding, charset) \
|
20717
|
195 (coding)->spec.iso2022.requested_designation[charset]
|
20149
|
196 #define CODING_SPEC_ISO_REVISION_NUMBER(coding, charset) \
|
20717
|
197 (coding)->spec.iso2022.charset_revision_number[charset]
|
17052
|
198 #define CODING_SPEC_ISO_SINGLE_SHIFTING(coding) \
|
20717
|
199 (coding)->spec.iso2022.single_shifting
|
17118
|
200 #define CODING_SPEC_ISO_BOL(coding) \
|
20717
|
201 (coding)->spec.iso2022.bol
|
17052
|
202
|
18001
|
203 /* A value which may appear in
|
|
204 coding->spec.iso2022.requested_designation indicating that the
|
|
205 corresponding charset does not request any graphic register to be
|
|
206 designated. */
|
|
207 #define CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION 4
|
|
208
|
17052
|
209 /* Return a charset which is currently designated to the graphic plane
|
|
210 PLANE in the coding-system CODING. */
|
17724
|
211 #define CODING_SPEC_ISO_PLANE_CHARSET(coding, plane) \
|
|
212 ((CODING_SPEC_ISO_INVOCATION (coding, plane) < 0) \
|
|
213 ? -1 \
|
|
214 : CODING_SPEC_ISO_DESIGNATION (coding, \
|
|
215 CODING_SPEC_ISO_INVOCATION (coding, plane)))
|
17052
|
216
|
|
217 /*** BIG5 section ***/
|
|
218
|
|
219 /* Macros to denote each type of BIG5 coding system. */
|
|
220 #define CODING_FLAG_BIG5_HKU 0x00 /* BIG5-HKU is one of variants of
|
|
221 BIG5 developed by Hong Kong
|
|
222 University. */
|
|
223 #define CODING_FLAG_BIG5_ETEN 0x01 /* BIG5_ETen is one of variants
|
|
224 of BIG5 developed by the
|
|
225 company ETen in Taiwan. */
|
|
226
|
|
227 /*** GENERAL section ***/
|
|
228
|
|
229 /* Types of coding system. */
|
|
230 enum coding_type
|
|
231 {
|
|
232 coding_type_no_conversion, /* A coding system which requires no
|
|
233 conversion for reading and writing
|
|
234 including end-of-line format. */
|
17835
|
235 coding_type_emacs_mule, /* A coding system used in Emacs'
|
17052
|
236 buffer and string. Requires no
|
|
237 conversion for reading and writing
|
|
238 except for end-of-line format. */
|
17835
|
239 coding_type_undecided, /* A coding system which requires
|
17052
|
240 automatic detection of a real
|
|
241 coding system. */
|
|
242 coding_type_sjis, /* SJIS coding system for Japanese. */
|
|
243 coding_type_iso2022, /* Any coding system of ISO2022
|
|
244 variants. */
|
|
245 coding_type_big5, /* BIG5 coding system for Chinese. */
|
19611
|
246 coding_type_ccl, /* The coding system of which decoder
|
17052
|
247 and encoder are written in CCL. */
|
19611
|
248 coding_type_raw_text /* A coding system for a text
|
36088
|
249 containing random 8-bit code which
|
19611
|
250 does not require code conversion
|
|
251 except for end-of-line format. */
|
17052
|
252 };
|
|
253
|
|
254 /* Formats of end-of-line. */
|
|
255 #define CODING_EOL_LF 0 /* Line-feed only, same as Emacs'
|
|
256 internal format. */
|
|
257 #define CODING_EOL_CRLF 1 /* Sequence of carriage-return and
|
|
258 line-feed. */
|
|
259 #define CODING_EOL_CR 2 /* Carriage-return only. */
|
17835
|
260 #define CODING_EOL_UNDECIDED 3 /* This value is used to denote the
|
17052
|
261 eol-type is not yet decided. */
|
19611
|
262 #define CODING_EOL_INCONSISTENT 4 /* This value is used to denote the
|
|
263 eol-type is not consistent
|
|
264 through the file. */
|
17052
|
265
|
78501
|
266 /* 1 if composing. */
|
26846
|
267 #define COMPOSING_P(coding) ((int) coding->composing > (int) COMPOSITION_NO)
|
|
268
|
|
269 #define COMPOSITION_DATA_SIZE 4080
|
|
270 #define COMPOSITION_DATA_MAX_BUNCH_LENGTH (4 + MAX_COMPOSITION_COMPONENTS*2)
|
|
271
|
|
272 /* Data structure to hold information about compositions of text that
|
|
273 is being decoded or encode. ISO 2022 base code conversion routines
|
|
274 handle special ESC sequences for composition specification. But,
|
|
275 they can't get/put such information directly from/to a buffer in
|
|
276 the deepest place. So, they store or retrieve the information
|
|
277 through this structure.
|
|
278
|
|
279 The encoder stores the information in this structure when it meets
|
|
280 ESC sequences for composition while encoding codes, then, after all
|
|
281 text codes are encoded, puts `composition' properties on the text
|
36088
|
282 by referring to the structure.
|
26846
|
283
|
|
284 The decoder at first stores the information of a text to be
|
|
285 decoded, then, while decoding codes, generates ESC sequences for
|
36088
|
286 composition at proper places by referring to the structure. */
|
17052
|
287
|
26846
|
288 struct composition_data
|
|
289 {
|
|
290 /* The character position of the first character to be encoded or
|
|
291 decoded. START and END (see below) are relative to this
|
|
292 position. */
|
|
293 int char_offset;
|
|
294
|
|
295 /* The composition data. These elements are repeated for each
|
|
296 composition:
|
|
297 LENGTH START END METHOD [ COMPONENT ... ]
|
|
298 where,
|
|
299 LENGTH is the number of elements for this composition.
|
|
300
|
|
301 START and END are starting and ending character positions of
|
|
302 the composition relative to `char_offset'.
|
|
303
|
36088
|
304 METHOD is one of `enum composing_status' specifying the way of
|
26846
|
305 composition.
|
|
306
|
|
307 COMPONENT is a character or an encoded composition rule. */
|
|
308 int data[COMPOSITION_DATA_SIZE];
|
|
309
|
|
310 /* The number of elements in `data' currently used. */
|
|
311 int used;
|
|
312
|
|
313 /* Pointers to the previous and next structures. When `data' is
|
|
314 filled up, another structure is allocated and linked in `next'.
|
36088
|
315 The new structure has backward link to this structure in `prev'.
|
|
316 The number of chained structures depends on how many compositions
|
26846
|
317 the text being encoded or decoded contains. */
|
|
318 struct composition_data *prev, *next;
|
|
319 };
|
17052
|
320
|
30832
|
321 /* Macros used for the member `result' of the struct
|
20717
|
322 coding_system. */
|
|
323 #define CODING_FINISH_NORMAL 0
|
|
324 #define CODING_FINISH_INSUFFICIENT_SRC 1
|
|
325 #define CODING_FINISH_INSUFFICIENT_DST 2
|
|
326 #define CODING_FINISH_INCONSISTENT_EOL 3
|
26846
|
327 #define CODING_FINISH_INSUFFICIENT_CMP 4
|
|
328 #define CODING_FINISH_INTERRUPT 5
|
20717
|
329
|
21031
|
330 /* Macros used for the member `mode' of the struct coding_system. */
|
20717
|
331
|
|
332 /* If set, recover the original CR or LF of the already decoded text
|
|
333 when the decoding routine encounters an inconsistent eol format. */
|
|
334 #define CODING_MODE_INHIBIT_INCONSISTENT_EOL 0x01
|
|
335
|
|
336 /* If set, the decoding/encoding routines treat the current data as
|
|
337 the last block of the whole text to be converted, and do
|
36088
|
338 appropriate finishing job. */
|
20717
|
339 #define CODING_MODE_LAST_BLOCK 0x02
|
|
340
|
|
341 /* If set, it means that the current source text is in a buffer which
|
|
342 enables selective display. */
|
|
343 #define CODING_MODE_SELECTIVE_DISPLAY 0x04
|
|
344
|
51139
|
345 /* If set, replace unencodabae characters by `?' on encoding. */
|
|
346 #define CODING_MODE_INHIBIT_UNENCODABLE_CHAR 0x08
|
|
347
|
20717
|
348 /* This flag is used by the decoding/encoding routines on the fly. If
|
|
349 set, it means that right-to-left text is being processed. */
|
51139
|
350 #define CODING_MODE_DIRECTION 0x10
|
20717
|
351
|
17052
|
352 struct coding_system
|
|
353 {
|
|
354 /* Type of the coding system. */
|
|
355 enum coding_type type;
|
|
356
|
20717
|
357 /* Type of end-of-line format (LF, CRLF, or CR) of the coding system. */
|
|
358 int eol_type;
|
|
359
|
20226
|
360 /* Flag bits of the coding system. The meaning of each bit is common
|
20717
|
361 to all types of coding systems. */
|
20226
|
362 unsigned int common_flags;
|
17052
|
363
|
|
364 /* Flag bits of the coding system. The meaning of each bit depends
|
|
365 on the type of the coding system. */
|
|
366 unsigned int flags;
|
|
367
|
20717
|
368 /* Mode bits of the coding system. See the comments of the macros
|
|
369 CODING_MODE_XXX. */
|
|
370 unsigned int mode;
|
17052
|
371
|
26846
|
372 /* The current status of composition handling. */
|
17052
|
373 int composing;
|
|
374
|
78501
|
375 /* 1 if the next character is a composition rule. */
|
26846
|
376 int composition_rule_follows;
|
|
377
|
|
378 /* Information of compositions are stored here on decoding and set
|
|
379 in advance on encoding. */
|
|
380 struct composition_data *cmp_data;
|
|
381
|
|
382 /* Index to cmp_data->data for the first element for the current
|
|
383 composition. */
|
|
384 int cmp_data_start;
|
|
385
|
|
386 /* Index to cmp_data->data for the current element for the current
|
|
387 composition. */
|
|
388 int cmp_data_index;
|
23324
|
389
|
17052
|
390 /* Detailed information specific to each type of coding system. */
|
|
391 union spec
|
|
392 {
|
|
393 struct iso2022_spec iso2022;
|
|
394 struct ccl_spec ccl; /* Defined in ccl.h. */
|
|
395 } spec;
|
|
396
|
20717
|
397 /* Index number of coding category of the coding system. */
|
|
398 int category_idx;
|
|
399
|
29106
|
400 /* The following two members specify how characters 128..159 are
|
|
401 represented in source and destination text respectively. 1 means
|
|
402 they are represented by 2-byte sequence, 0 means they are
|
|
403 represented by 1-byte as is (see the comment in charset.h). */
|
29006
|
404 unsigned src_multibyte : 1;
|
|
405 unsigned dst_multibyte : 1;
|
|
406
|
21320
|
407 /* How may heading bytes we can skip for decoding. This is set to
|
|
408 -1 in setup_coding_system, and updated by detect_coding. So,
|
|
409 when this is equal to the byte length of the text being
|
|
410 converted, we can skip the actual conversion process. */
|
20717
|
411 int heading_ascii;
|
|
412
|
|
413 /* The following members are set by encoding/decoding routine. */
|
|
414 int produced, produced_char, consumed, consumed_char;
|
|
415
|
29006
|
416 /* Number of error source data found in a decoding routine. */
|
|
417 int errors;
|
|
418
|
|
419 /* Finish status of code conversion. It should be one of macros
|
|
420 CODING_FINISH_XXXX. */
|
|
421 int result;
|
20930
|
422
|
35530
|
423 /* If nonzero, suppress error notification. */
|
|
424 int suppress_error;
|
|
425
|
20717
|
426 /* The following members are all Lisp symbols. We don't have to
|
|
427 protect them from GC because the current garbage collection
|
|
428 doesn't relocate Lisp symbols. But, when it is changed, we must
|
|
429 find a way to protect them. */
|
|
430
|
17052
|
431 /* Backward pointer to the Lisp symbol of the coding system. */
|
|
432 Lisp_Object symbol;
|
|
433
|
|
434 /* Lisp function (symbol) to be called after decoding to do
|
20717
|
435 additional conversion, or nil. */
|
17052
|
436 Lisp_Object post_read_conversion;
|
|
437
|
|
438 /* Lisp function (symbol) to be called before encoding to do
|
20717
|
439 additional conversion, or nil. */
|
17052
|
440 Lisp_Object pre_write_conversion;
|
|
441
|
22118
|
442 /* Character translation tables to look up, or nil. */
|
22186
|
443 Lisp_Object translation_table_for_decode;
|
|
444 Lisp_Object translation_table_for_encode;
|
17052
|
445 };
|
|
446
|
70703
|
447 /* Mask bits for (struct coding_system *)->common_flags. */
|
|
448 #define CODING_REQUIRE_FLUSHING_MASK 0x01
|
|
449 #define CODING_REQUIRE_DECODING_MASK 0x02
|
|
450 #define CODING_REQUIRE_ENCODING_MASK 0x04
|
|
451 #define CODING_REQUIRE_DETECTION_MASK 0x08
|
|
452 #define CODING_ASCII_INCOMPATIBLE_MASK 0x10
|
20226
|
453
|
|
454 /* Return 1 if the coding system CODING requires specific code to be
|
|
455 attached at the tail of converted text. */
|
|
456 #define CODING_REQUIRE_FLUSHING(coding) \
|
|
457 ((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK)
|
17052
|
458
|
20226
|
459 /* Return 1 if the coding system CODING requires code conversion on
|
|
460 decoding. */
|
|
461 #define CODING_REQUIRE_DECODING(coding) \
|
29006
|
462 ((coding)->dst_multibyte \
|
|
463 || (coding)->common_flags & CODING_REQUIRE_DECODING_MASK)
|
20226
|
464
|
|
465 /* Return 1 if the coding system CODING requires code conversion on
|
70783
|
466 encoding.
|
|
467 The non-multibyte part of the condition is to support encoding of
|
|
468 unibyte strings/buffers generated by string-as-unibyte or
|
|
469 (set-buffer-multibyte nil) from multibyte strings/buffers. */
|
20226
|
470 #define CODING_REQUIRE_ENCODING(coding) \
|
29006
|
471 ((coding)->src_multibyte \
|
|
472 || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK)
|
20226
|
473
|
|
474 /* Return 1 if the coding system CODING requires some kind of code
|
|
475 detection. */
|
|
476 #define CODING_REQUIRE_DETECTION(coding) \
|
|
477 ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK)
|
|
478
|
29006
|
479 /* Return 1 if the coding system CODING requires code conversion on
|
|
480 decoding or some kind of code detection. */
|
20717
|
481 #define CODING_MAY_REQUIRE_DECODING(coding) \
|
29006
|
482 (CODING_REQUIRE_DECODING (coding) \
|
|
483 || CODING_REQUIRE_DETECTION (coding))
|
17052
|
484
|
|
485 /* Index for each coding category in `coding_category_table' */
|
17835
|
486 #define CODING_CATEGORY_IDX_EMACS_MULE 0
|
17052
|
487 #define CODING_CATEGORY_IDX_SJIS 1
|
|
488 #define CODING_CATEGORY_IDX_ISO_7 2
|
20717
|
489 #define CODING_CATEGORY_IDX_ISO_7_TIGHT 3
|
|
490 #define CODING_CATEGORY_IDX_ISO_8_1 4
|
|
491 #define CODING_CATEGORY_IDX_ISO_8_2 5
|
|
492 #define CODING_CATEGORY_IDX_ISO_7_ELSE 6
|
|
493 #define CODING_CATEGORY_IDX_ISO_8_ELSE 7
|
22873
|
494 #define CODING_CATEGORY_IDX_CCL 8
|
|
495 #define CODING_CATEGORY_IDX_BIG5 9
|
28021
|
496 #define CODING_CATEGORY_IDX_UTF_8 10
|
|
497 #define CODING_CATEGORY_IDX_UTF_16_BE 11
|
|
498 #define CODING_CATEGORY_IDX_UTF_16_LE 12
|
|
499 #define CODING_CATEGORY_IDX_RAW_TEXT 13
|
|
500 #define CODING_CATEGORY_IDX_BINARY 14
|
|
501 #define CODING_CATEGORY_IDX_MAX 15
|
17052
|
502
|
|
503 /* Definitions of flag bits returned by the function
|
|
504 detect_coding_mask (). */
|
17835
|
505 #define CODING_CATEGORY_MASK_EMACS_MULE (1 << CODING_CATEGORY_IDX_EMACS_MULE)
|
17052
|
506 #define CODING_CATEGORY_MASK_SJIS (1 << CODING_CATEGORY_IDX_SJIS)
|
|
507 #define CODING_CATEGORY_MASK_ISO_7 (1 << CODING_CATEGORY_IDX_ISO_7)
|
20717
|
508 #define CODING_CATEGORY_MASK_ISO_7_TIGHT (1 << CODING_CATEGORY_IDX_ISO_7_TIGHT)
|
17052
|
509 #define CODING_CATEGORY_MASK_ISO_8_1 (1 << CODING_CATEGORY_IDX_ISO_8_1)
|
|
510 #define CODING_CATEGORY_MASK_ISO_8_2 (1 << CODING_CATEGORY_IDX_ISO_8_2)
|
18786
|
511 #define CODING_CATEGORY_MASK_ISO_7_ELSE (1 << CODING_CATEGORY_IDX_ISO_7_ELSE)
|
|
512 #define CODING_CATEGORY_MASK_ISO_8_ELSE (1 << CODING_CATEGORY_IDX_ISO_8_ELSE)
|
22873
|
513 #define CODING_CATEGORY_MASK_CCL (1 << CODING_CATEGORY_IDX_CCL)
|
19689
|
514 #define CODING_CATEGORY_MASK_BIG5 (1 << CODING_CATEGORY_IDX_BIG5)
|
28021
|
515 #define CODING_CATEGORY_MASK_UTF_8 (1 << CODING_CATEGORY_IDX_UTF_8)
|
|
516 #define CODING_CATEGORY_MASK_UTF_16_BE (1 << CODING_CATEGORY_IDX_UTF_16_BE)
|
|
517 #define CODING_CATEGORY_MASK_UTF_16_LE (1 << CODING_CATEGORY_IDX_UTF_16_LE)
|
19611
|
518 #define CODING_CATEGORY_MASK_RAW_TEXT (1 << CODING_CATEGORY_IDX_RAW_TEXT)
|
19096
|
519 #define CODING_CATEGORY_MASK_BINARY (1 << CODING_CATEGORY_IDX_BINARY)
|
17052
|
520
|
|
521 /* This value is returned if detect_coding_mask () find nothing other
|
|
522 than ASCII characters. */
|
|
523 #define CODING_CATEGORY_MASK_ANY \
|
17835
|
524 ( CODING_CATEGORY_MASK_EMACS_MULE \
|
17052
|
525 | CODING_CATEGORY_MASK_SJIS \
|
|
526 | CODING_CATEGORY_MASK_ISO_7 \
|
20717
|
527 | CODING_CATEGORY_MASK_ISO_7_TIGHT \
|
17052
|
528 | CODING_CATEGORY_MASK_ISO_8_1 \
|
|
529 | CODING_CATEGORY_MASK_ISO_8_2 \
|
18786
|
530 | CODING_CATEGORY_MASK_ISO_7_ELSE \
|
|
531 | CODING_CATEGORY_MASK_ISO_8_ELSE \
|
22873
|
532 | CODING_CATEGORY_MASK_CCL \
|
28021
|
533 | CODING_CATEGORY_MASK_BIG5 \
|
|
534 | CODING_CATEGORY_MASK_UTF_8 \
|
|
535 | CODING_CATEGORY_MASK_UTF_16_BE \
|
|
536 | CODING_CATEGORY_MASK_UTF_16_LE)
|
17052
|
537
|
20717
|
538 #define CODING_CATEGORY_MASK_ISO_7BIT \
|
|
539 (CODING_CATEGORY_MASK_ISO_7 | CODING_CATEGORY_MASK_ISO_7_TIGHT)
|
|
540
|
|
541 #define CODING_CATEGORY_MASK_ISO_8BIT \
|
|
542 (CODING_CATEGORY_MASK_ISO_8_1 | CODING_CATEGORY_MASK_ISO_8_2)
|
|
543
|
|
544 #define CODING_CATEGORY_MASK_ISO_SHIFT \
|
|
545 (CODING_CATEGORY_MASK_ISO_7_ELSE | CODING_CATEGORY_MASK_ISO_8_ELSE)
|
|
546
|
|
547 #define CODING_CATEGORY_MASK_ISO \
|
|
548 ( CODING_CATEGORY_MASK_ISO_7BIT \
|
|
549 | CODING_CATEGORY_MASK_ISO_SHIFT \
|
|
550 | CODING_CATEGORY_MASK_ISO_8BIT)
|
|
551
|
28021
|
552 #define CODING_CATEGORY_MASK_UTF_16_BE_LE \
|
|
553 (CODING_CATEGORY_MASK_UTF_16_BE | CODING_CATEGORY_MASK_UTF_16_LE)
|
|
554
|
17052
|
555 /* Macros to decode or encode a character of JISX0208 in SJIS. S1 and
|
|
556 S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding
|
|
557 system. C1 and C2 are the 1st and 2nd position codes of Emacs'
|
|
558 internal format. */
|
|
559
|
|
560 #define DECODE_SJIS(s1, s2, c1, c2) \
|
|
561 do { \
|
|
562 if (s2 >= 0x9F) \
|
|
563 c1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0), \
|
|
564 c2 = s2 - 0x7E; \
|
|
565 else \
|
|
566 c1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1), \
|
|
567 c2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F); \
|
|
568 } while (0)
|
|
569
|
|
570 #define ENCODE_SJIS(c1, c2, s1, s2) \
|
|
571 do { \
|
|
572 if (c1 & 1) \
|
|
573 s1 = c1 / 2 + ((c1 < 0x5F) ? 0x71 : 0xB1), \
|
|
574 s2 = c2 + ((c2 >= 0x60) ? 0x20 : 0x1F); \
|
|
575 else \
|
|
576 s1 = c1 / 2 + ((c1 < 0x5F) ? 0x70 : 0xB0), \
|
|
577 s2 = c2 + 0x7E; \
|
|
578 } while (0)
|
|
579
|
21051
|
580 /* Encode the file name NAME using the specified coding system
|
|
581 for file names, if any. */
|
|
582 #define ENCODE_FILE(name) \
|
|
583 (! NILP (Vfile_name_coding_system) \
|
58453
04580c50dd1c
(ENCODE_FILE, DECODE_FILE, ENCODE_SYSTEM, DECODE_SYSTEM):
Stefan Monnier <monnier@iro.umontreal.ca>
diff
changeset
|
584 && !EQ (Vfile_name_coding_system, make_number (0)) \
|
22342
|
585 ? code_convert_string_norecord (name, Vfile_name_coding_system, 1) \
|
21051
|
586 : (! NILP (Vdefault_file_name_coding_system) \
|
58453
04580c50dd1c
(ENCODE_FILE, DECODE_FILE, ENCODE_SYSTEM, DECODE_SYSTEM):
Stefan Monnier <monnier@iro.umontreal.ca>
diff
changeset
|
587 && !EQ (Vdefault_file_name_coding_system, make_number (0)) \
|
22342
|
588 ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \
|
21051
|
589 : name))
|
|
590
|
|
591 /* Decode the file name NAME using the specified coding system
|
|
592 for file names, if any. */
|
|
593 #define DECODE_FILE(name) \
|
|
594 (! NILP (Vfile_name_coding_system) \
|
58453
04580c50dd1c
(ENCODE_FILE, DECODE_FILE, ENCODE_SYSTEM, DECODE_SYSTEM):
Stefan Monnier <monnier@iro.umontreal.ca>
diff
changeset
|
595 && !EQ (Vfile_name_coding_system, make_number (0)) \
|
22342
|
596 ? code_convert_string_norecord (name, Vfile_name_coding_system, 0) \
|
21051
|
597 : (! NILP (Vdefault_file_name_coding_system) \
|
58453
04580c50dd1c
(ENCODE_FILE, DECODE_FILE, ENCODE_SYSTEM, DECODE_SYSTEM):
Stefan Monnier <monnier@iro.umontreal.ca>
diff
changeset
|
598 && !EQ (Vdefault_file_name_coding_system, make_number (0)) \
|
22342
|
599 ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 0) \
|
21051
|
600 : name))
|
|
601
|
29310
|
602 /* Encode the string STR using the specified coding system
|
60657
|
603 for system functions, if any. */
|
29310
|
604 #define ENCODE_SYSTEM(str) \
|
41323
|
605 (! NILP (Vlocale_coding_system) \
|
58453
04580c50dd1c
(ENCODE_FILE, DECODE_FILE, ENCODE_SYSTEM, DECODE_SYSTEM):
Stefan Monnier <monnier@iro.umontreal.ca>
diff
changeset
|
606 && !EQ (Vlocale_coding_system, make_number (0)) \
|
41323
|
607 ? code_convert_string_norecord (str, Vlocale_coding_system, 1) \
|
29310
|
608 : str)
|
|
609
|
|
610 /* Decode the string STR using the specified coding system
|
60657
|
611 for system functions, if any. */
|
66463
|
612 #define DECODE_SYSTEM(str) \
|
41323
|
613 (! NILP (Vlocale_coding_system) \
|
58453
04580c50dd1c
(ENCODE_FILE, DECODE_FILE, ENCODE_SYSTEM, DECODE_SYSTEM):
Stefan Monnier <monnier@iro.umontreal.ca>
diff
changeset
|
614 && !EQ (Vlocale_coding_system, make_number (0)) \
|
41323
|
615 ? code_convert_string_norecord (str, Vlocale_coding_system, 0) \
|
29310
|
616 : str)
|
39574
|
617
|
51407
|
618 #define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, 1)
|
|
619
|
17052
|
620 /* Extern declarations. */
|
46549
|
621 extern int decode_coding P_ ((struct coding_system *, const unsigned char *,
|
20717
|
622 unsigned char *, int, int));
|
46549
|
623 extern int encode_coding P_ ((struct coding_system *, const unsigned char *,
|
20717
|
624 unsigned char *, int, int));
|
26846
|
625 extern void coding_save_composition P_ ((struct coding_system *, int, int,
|
|
626 Lisp_Object));
|
|
627 extern void coding_free_composition_data P_ ((struct coding_system *));
|
|
628 extern void coding_adjust_composition_offset P_ ((struct coding_system *,
|
|
629 int));
|
29274
|
630 extern void coding_allocate_composition_data P_ ((struct coding_system *,
|
|
631 int));
|
|
632 extern void coding_restore_composition P_ ((struct coding_system *,
|
|
633 Lisp_Object));
|
21515
|
634 extern int code_convert_region P_ ((int, int, int, int, struct coding_system *,
|
|
635 int, int));
|
43486
|
636 extern Lisp_Object run_pre_post_conversion_on_str P_ ((Lisp_Object,
|
|
637 struct coding_system *,
|
|
638 int));
|
64251
|
639 extern void run_pre_write_conversin_on_c_str P_ ((unsigned char **, int *,
|
58636
|
640 int, int,
|
|
641 struct coding_system *));
|
|
642
|
20308
|
643 extern int decoding_buffer_size P_ ((struct coding_system *, int));
|
|
644 extern int encoding_buffer_size P_ ((struct coding_system *, int));
|
46549
|
645 extern void detect_coding P_ ((struct coding_system *, const unsigned char *,
|
|
646 int));
|
|
647 extern void detect_eol P_ ((struct coding_system *, const unsigned char *,
|
|
648 int));
|
20308
|
649 extern int setup_coding_system P_ ((Lisp_Object, struct coding_system *));
|
28512
|
650 extern Lisp_Object code_convert_string P_ ((Lisp_Object,
|
|
651 struct coding_system *, int, int));
|
34152
|
652 extern Lisp_Object code_convert_string1 P_ ((Lisp_Object, Lisp_Object,
|
|
653 Lisp_Object, int));
|
29438
|
654 extern Lisp_Object code_convert_string_norecord P_ ((Lisp_Object, Lisp_Object,
|
|
655 int));
|
22615
|
656 extern void setup_raw_text_coding_system P_ ((struct coding_system *));
|
29717
|
657 extern Lisp_Object encode_coding_string P_ ((Lisp_Object,
|
|
658 struct coding_system *, int));
|
30680
|
659 extern Lisp_Object decode_coding_string P_ ((Lisp_Object,
|
|
660 struct coding_system *, int));
|
17052
|
661 extern Lisp_Object Qcoding_system, Qeol_type, Qcoding_category_index;
|
22620
|
662 extern Lisp_Object Qraw_text, Qemacs_mule;
|
17052
|
663 extern Lisp_Object Qbuffer_file_coding_system;
|
|
664 extern Lisp_Object Vcoding_category_list;
|
51407
|
665 extern Lisp_Object Qutf_8;
|
17052
|
666
|
22186
|
667 extern Lisp_Object Qtranslation_table;
|
|
668 extern Lisp_Object Qtranslation_table_id;
|
22118
|
669
|
24201
|
670 /* Mnemonic strings to indicate each type of end-of-line. */
|
|
671 extern Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
|
|
672 /* Mnemonic string to indicate type of end-of-line is not yet decided. */
|
|
673 extern Lisp_Object eol_mnemonic_undecided;
|
17052
|
674
|
70519
|
675 /* Format of end-of-line decided by system. */
|
|
676 extern int system_eol_type;
|
|
677
|
17052
|
678 #ifdef emacs
|
|
679 extern Lisp_Object Qfile_coding_system;
|
64251
|
680 extern Lisp_Object Qcall_process, Qcall_process_region;
|
17052
|
681 extern Lisp_Object Qstart_process, Qopen_network_stream;
|
34107
|
682 extern Lisp_Object Qwrite_region;
|
17052
|
683
|
26088
b7aa6ac26872
Add support for large files, 64-bit Solaris, system locale codings.
Paul Eggert <eggert@twinsun.com>
diff
changeset
|
684 extern char *emacs_strerror P_ ((int));
|
b7aa6ac26872
Add support for large files, 64-bit Solaris, system locale codings.
Paul Eggert <eggert@twinsun.com>
diff
changeset
|
685
|
17052
|
686 /* Coding-system for reading files and receiving data from process. */
|
|
687 extern Lisp_Object Vcoding_system_for_read;
|
|
688 /* Coding-system for writing files and sending data to process. */
|
|
689 extern Lisp_Object Vcoding_system_for_write;
|
|
690 /* Coding-system actually used in the latest I/O. */
|
|
691 extern Lisp_Object Vlast_coding_system_used;
|
26088
b7aa6ac26872
Add support for large files, 64-bit Solaris, system locale codings.
Paul Eggert <eggert@twinsun.com>
diff
changeset
|
692 /* Coding-system to use with system messages (e.g. strerror). */
|
b7aa6ac26872
Add support for large files, 64-bit Solaris, system locale codings.
Paul Eggert <eggert@twinsun.com>
diff
changeset
|
693 extern Lisp_Object Vlocale_coding_system;
|
17052
|
694
|
21573
|
695 /* If non-zero, process buffer inherits the coding system used to decode
|
|
696 the subprocess output. */
|
|
697 extern int inherit_process_coding_system;
|
|
698
|
19279
|
699 /* Coding system to be used to encode text for terminal display when
|
|
700 terminal coding system is nil. */
|
|
701 extern struct coding_system safe_terminal_coding;
|
|
702
|
22978
|
703 /* Default coding system to be used to write a file. */
|
|
704 extern struct coding_system default_buffer_file_coding;
|
|
705
|
18181
|
706 /* Default coding systems used for process I/O. */
|
|
707 extern Lisp_Object Vdefault_process_coding_system;
|
17052
|
708
|
36088
|
709 /* Function to call to force a user to force select a proper coding
|
20717
|
710 system. */
|
|
711 extern Lisp_Object Vselect_safe_coding_system_function;
|
|
712
|
48875
|
713 /* If nonzero, on writing a file, Vselect_safe_coding_system_function
|
|
714 is called even if Vcoding_system_for_write is non-nil. */
|
|
715 extern int coding_system_require_warning;
|
|
716
|
21901
|
717 /* Coding system for file names, or nil if none. */
|
|
718 extern Lisp_Object Vfile_name_coding_system;
|
|
719
|
|
720 /* Coding system for file names used only when
|
|
721 Vfile_name_coding_system is nil. */
|
|
722 extern Lisp_Object Vdefault_file_name_coding_system;
|
29310
|
723
|
17052
|
724 #endif
|
|
725
|
34107
|
726 /* Error signaled when there's a problem with detecting coding system */
|
|
727 extern Lisp_Object Qcoding_system_error;
|
|
728
|
29571
|
729 #endif /* EMACS_CODING_H */
|
52401
|
730
|
|
731 /* arch-tag: 2bc3b4fa-6870-4f64-8135-b962b2d290e4
|
|
732 (do not change this comment) */
|