17052
|
1 /* Header for coding system handler.
|
68651
|
2 Copyright (C) 2002, 2003, 2004, 2005,
|
|
3 2006 Free Software Foundation, Inc.
|
67658
|
4 Copyright (C) 1995, 1997, 1998, 2000
|
|
5 National Institute of Advanced Industrial Science and Technology (AIST)
|
|
6 Registration Number H14PRO021
|
17052
|
7
|
17071
|
8 This file is part of GNU Emacs.
|
|
9
|
|
10 GNU Emacs is free software; you can redistribute it and/or modify
|
|
11 it under the terms of the GNU General Public License as published by
|
|
12 the Free Software Foundation; either version 2, or (at your option)
|
|
13 any later version.
|
17052
|
14
|
17071
|
15 GNU Emacs is distributed in the hope that it will be useful,
|
|
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
18 GNU General Public License for more details.
|
17052
|
19
|
17071
|
20 You should have received a copy of the GNU General Public License
|
|
21 along with GNU Emacs; see the file COPYING. If not, write to
|
64084
|
22 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
|
23 Boston, MA 02110-1301, USA. */
|
17052
|
24
|
29571
|
25 #ifndef EMACS_CODING_H
|
|
26 #define EMACS_CODING_H
|
17052
|
27
|
25378
|
28 #include "ccl.h"
|
17052
|
29
|
17835
|
30 /*** EMACS' INTERNAL FORMAT (emacs-mule) section ***/
|
17052
|
31
|
|
32 /* All code (1-byte) of Emacs' internal format is classified into one
|
|
33 of the followings. See also `charset.h'. */
|
|
34 enum emacs_code_class_type
|
|
35 {
|
|
36 EMACS_control_code, /* Control codes in the range
|
|
37 0x00..0x1F and 0x7F except for the
|
|
38 following two codes. */
|
|
39 EMACS_linefeed_code, /* 0x0A (linefeed) to denote
|
|
40 end-of-line. */
|
|
41 EMACS_carriage_return_code, /* 0x0D (carriage-return) to be used
|
|
42 in selective display mode. */
|
|
43 EMACS_ascii_code, /* ASCII characters. */
|
|
44 EMACS_leading_code_2, /* Base leading code of official
|
|
45 TYPE9N character. */
|
|
46 EMACS_leading_code_3, /* Base leading code of private TYPE9N
|
|
47 or official TYPE9Nx9N character. */
|
|
48 EMACS_leading_code_4, /* Base leading code of private
|
|
49 TYPE9Nx9N character. */
|
|
50 EMACS_invalid_code /* Invalid code, i.e. a base leading
|
|
51 code not yet assigned to any
|
|
52 charset, or a code of the range
|
|
53 0xA0..0xFF. */
|
|
54 };
|
|
55
|
|
56 extern enum emacs_code_class_type emacs_code_class[256];
|
|
57
|
|
58 /*** ISO2022 section ***/
|
|
59
|
|
60 /* Macros to define code of control characters for ISO2022's functions. */
|
|
61 /* code */ /* function */
|
|
62 #define ISO_CODE_LF 0x0A /* line-feed */
|
|
63 #define ISO_CODE_CR 0x0D /* carriage-return */
|
|
64 #define ISO_CODE_SO 0x0E /* shift-out */
|
|
65 #define ISO_CODE_SI 0x0F /* shift-in */
|
|
66 #define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */
|
|
67 #define ISO_CODE_ESC 0x1B /* escape */
|
|
68 #define ISO_CODE_SS2 0x8E /* single-shift-2 */
|
|
69 #define ISO_CODE_SS3 0x8F /* single-shift-3 */
|
|
70 #define ISO_CODE_CSI 0x9B /* control-sequence-introduce */
|
|
71
|
|
72 /* All code (1-byte) of ISO2022 is classified into one of the
|
|
73 followings. */
|
|
74 enum iso_code_class_type
|
|
75 {
|
29006
|
76 ISO_control_0, /* Control codes in the range
|
|
77 0x00..0x1F and 0x7F, except for the
|
|
78 following 5 codes. */
|
17052
|
79 ISO_carriage_return, /* ISO_CODE_CR (0x0D) */
|
|
80 ISO_shift_out, /* ISO_CODE_SO (0x0E) */
|
|
81 ISO_shift_in, /* ISO_CODE_SI (0x0F) */
|
|
82 ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */
|
|
83 ISO_escape, /* ISO_CODE_SO (0x1B) */
|
29006
|
84 ISO_control_1, /* Control codes in the range
|
|
85 0x80..0x9F, except for the
|
|
86 following 3 codes. */
|
17052
|
87 ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */
|
|
88 ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */
|
|
89 ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */
|
|
90 ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */
|
|
91 ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */
|
|
92 ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */
|
|
93 ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */
|
|
94 };
|
|
95
|
|
96 /** The macros CODING_FLAG_ISO_XXX defines a flag bit of the `flags'
|
|
97 element in the structure `coding_system'. This information is used
|
|
98 while encoding a text to ISO2022. **/
|
|
99
|
|
100 /* If set, produce short-form designation sequence (e.g. ESC $ A)
|
|
101 instead of long-form sequence (e.g. ESC $ ( A). */
|
|
102 #define CODING_FLAG_ISO_SHORT_FORM 0x0001
|
|
103
|
|
104 /* If set, reset graphic planes and registers at end-of-line to the
|
|
105 initial state. */
|
|
106 #define CODING_FLAG_ISO_RESET_AT_EOL 0x0002
|
|
107
|
|
108 /* If set, reset graphic planes and registers before any control
|
|
109 characters to the initial state. */
|
|
110 #define CODING_FLAG_ISO_RESET_AT_CNTL 0x0004
|
|
111
|
|
112 /* If set, encode by 7-bit environment. */
|
|
113 #define CODING_FLAG_ISO_SEVEN_BITS 0x0008
|
|
114
|
|
115 /* If set, use locking-shift function. */
|
|
116 #define CODING_FLAG_ISO_LOCKING_SHIFT 0x0010
|
|
117
|
|
118 /* If set, use single-shift function. Overwrite
|
|
119 CODING_FLAG_ISO_LOCKING_SHIFT. */
|
|
120 #define CODING_FLAG_ISO_SINGLE_SHIFT 0x0020
|
|
121
|
|
122 /* If set, designate JISX0201-Roman instead of ASCII. */
|
|
123 #define CODING_FLAG_ISO_USE_ROMAN 0x0040
|
|
124
|
|
125 /* If set, designate JISX0208-1978 instead of JISX0208-1983. */
|
|
126 #define CODING_FLAG_ISO_USE_OLDJIS 0x0080
|
|
127
|
|
128 /* If set, do not produce ISO6429's direction specifying sequence. */
|
|
129 #define CODING_FLAG_ISO_NO_DIRECTION 0x0100
|
|
130
|
17118
|
131 /* If set, assume designation states are reset at beginning of line on
|
|
132 output. */
|
|
133 #define CODING_FLAG_ISO_INIT_AT_BOL 0x0200
|
|
134
|
|
135 /* If set, designation sequence should be placed at beginning of line
|
|
136 on output. */
|
|
137 #define CODING_FLAG_ISO_DESIGNATE_AT_BOL 0x0400
|
|
138
|
36088
|
139 /* If set, do not encode unsafe characters on output. */
|
19279
|
140 #define CODING_FLAG_ISO_SAFE 0x0800
|
|
141
|
19364
|
142 /* If set, extra latin codes (128..159) are accepted as a valid code
|
|
143 on input. */
|
|
144 #define CODING_FLAG_ISO_LATIN_EXTRA 0x1000
|
|
145
|
20717
|
146 /* If set, use designation escape sequence. */
|
|
147 #define CODING_FLAG_ISO_DESIGNATION 0x10000
|
|
148
|
19279
|
149 /* A character to be produced on output if encoding of the original
|
51139
|
150 character is inhibitted by CODING_MODE_INHIBIT_UNENCODABLE_CHAR.
|
|
151 It must be an ASCII character. */
|
|
152 #define CODING_REPLACEMENT_CHARACTER '?'
|
19279
|
153
|
17052
|
154 /* Structure of the field `spec.iso2022' in the structure `coding_system'. */
|
|
155 struct iso2022_spec
|
|
156 {
|
|
157 /* The current graphic register invoked to each graphic plane. */
|
|
158 int current_invocation[2];
|
|
159
|
|
160 /* The current charset designated to each graphic register. */
|
|
161 int current_designation[4];
|
|
162
|
|
163 /* A charset initially designated to each graphic register. */
|
|
164 int initial_designation[4];
|
|
165
|
20717
|
166 /* If not -1, it is a graphic register specified in an invalid
|
|
167 designation sequence. */
|
|
168 int last_invalid_designation_register;
|
|
169
|
17052
|
170 /* A graphic register to which each charset should be designated. */
|
18001
|
171 unsigned char requested_designation[MAX_CHARSET + 1];
|
17052
|
172
|
20149
|
173 /* A revision number to be specified for each charset on encoding.
|
|
174 The value 255 means no revision number for the corresponding
|
|
175 charset. */
|
|
176 unsigned char charset_revision_number[MAX_CHARSET + 1];
|
19284
|
177
|
17052
|
178 /* Set to 1 temporarily only when graphic register 2 or 3 is invoked
|
|
179 by single-shift while encoding. */
|
|
180 int single_shifting;
|
17118
|
181
|
|
182 /* Set to 1 temporarily only when processing at beginning of line. */
|
|
183 int bol;
|
17052
|
184 };
|
|
185
|
|
186 /* Macros to access each field in the structure `spec.iso2022'. */
|
|
187 #define CODING_SPEC_ISO_INVOCATION(coding, plane) \
|
20717
|
188 (coding)->spec.iso2022.current_invocation[plane]
|
17052
|
189 #define CODING_SPEC_ISO_DESIGNATION(coding, reg) \
|
20717
|
190 (coding)->spec.iso2022.current_designation[reg]
|
17052
|
191 #define CODING_SPEC_ISO_INITIAL_DESIGNATION(coding, reg) \
|
20717
|
192 (coding)->spec.iso2022.initial_designation[reg]
|
17052
|
193 #define CODING_SPEC_ISO_REQUESTED_DESIGNATION(coding, charset) \
|
20717
|
194 (coding)->spec.iso2022.requested_designation[charset]
|
20149
|
195 #define CODING_SPEC_ISO_REVISION_NUMBER(coding, charset) \
|
20717
|
196 (coding)->spec.iso2022.charset_revision_number[charset]
|
17052
|
197 #define CODING_SPEC_ISO_SINGLE_SHIFTING(coding) \
|
20717
|
198 (coding)->spec.iso2022.single_shifting
|
17118
|
199 #define CODING_SPEC_ISO_BOL(coding) \
|
20717
|
200 (coding)->spec.iso2022.bol
|
17052
|
201
|
18001
|
202 /* A value which may appear in
|
|
203 coding->spec.iso2022.requested_designation indicating that the
|
|
204 corresponding charset does not request any graphic register to be
|
|
205 designated. */
|
|
206 #define CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION 4
|
|
207
|
17052
|
208 /* Return a charset which is currently designated to the graphic plane
|
|
209 PLANE in the coding-system CODING. */
|
17724
|
210 #define CODING_SPEC_ISO_PLANE_CHARSET(coding, plane) \
|
|
211 ((CODING_SPEC_ISO_INVOCATION (coding, plane) < 0) \
|
|
212 ? -1 \
|
|
213 : CODING_SPEC_ISO_DESIGNATION (coding, \
|
|
214 CODING_SPEC_ISO_INVOCATION (coding, plane)))
|
17052
|
215
|
|
216 /*** BIG5 section ***/
|
|
217
|
|
218 /* Macros to denote each type of BIG5 coding system. */
|
|
219 #define CODING_FLAG_BIG5_HKU 0x00 /* BIG5-HKU is one of variants of
|
|
220 BIG5 developed by Hong Kong
|
|
221 University. */
|
|
222 #define CODING_FLAG_BIG5_ETEN 0x01 /* BIG5_ETen is one of variants
|
|
223 of BIG5 developed by the
|
|
224 company ETen in Taiwan. */
|
|
225
|
|
226 /*** GENERAL section ***/
|
|
227
|
|
228 /* Types of coding system. */
|
|
229 enum coding_type
|
|
230 {
|
|
231 coding_type_no_conversion, /* A coding system which requires no
|
|
232 conversion for reading and writing
|
|
233 including end-of-line format. */
|
17835
|
234 coding_type_emacs_mule, /* A coding system used in Emacs'
|
17052
|
235 buffer and string. Requires no
|
|
236 conversion for reading and writing
|
|
237 except for end-of-line format. */
|
17835
|
238 coding_type_undecided, /* A coding system which requires
|
17052
|
239 automatic detection of a real
|
|
240 coding system. */
|
|
241 coding_type_sjis, /* SJIS coding system for Japanese. */
|
|
242 coding_type_iso2022, /* Any coding system of ISO2022
|
|
243 variants. */
|
|
244 coding_type_big5, /* BIG5 coding system for Chinese. */
|
19611
|
245 coding_type_ccl, /* The coding system of which decoder
|
17052
|
246 and encoder are written in CCL. */
|
19611
|
247 coding_type_raw_text /* A coding system for a text
|
36088
|
248 containing random 8-bit code which
|
19611
|
249 does not require code conversion
|
|
250 except for end-of-line format. */
|
17052
|
251 };
|
|
252
|
|
253 /* Formats of end-of-line. */
|
|
254 #define CODING_EOL_LF 0 /* Line-feed only, same as Emacs'
|
|
255 internal format. */
|
|
256 #define CODING_EOL_CRLF 1 /* Sequence of carriage-return and
|
|
257 line-feed. */
|
|
258 #define CODING_EOL_CR 2 /* Carriage-return only. */
|
17835
|
259 #define CODING_EOL_UNDECIDED 3 /* This value is used to denote the
|
17052
|
260 eol-type is not yet decided. */
|
19611
|
261 #define CODING_EOL_INCONSISTENT 4 /* This value is used to denote the
|
|
262 eol-type is not consistent
|
|
263 through the file. */
|
17052
|
264
|
26846
|
265 /* 1 iff composing. */
|
|
266 #define COMPOSING_P(coding) ((int) coding->composing > (int) COMPOSITION_NO)
|
|
267
|
|
268 #define COMPOSITION_DATA_SIZE 4080
|
|
269 #define COMPOSITION_DATA_MAX_BUNCH_LENGTH (4 + MAX_COMPOSITION_COMPONENTS*2)
|
|
270
|
|
271 /* Data structure to hold information about compositions of text that
|
|
272 is being decoded or encode. ISO 2022 base code conversion routines
|
|
273 handle special ESC sequences for composition specification. But,
|
|
274 they can't get/put such information directly from/to a buffer in
|
|
275 the deepest place. So, they store or retrieve the information
|
|
276 through this structure.
|
|
277
|
|
278 The encoder stores the information in this structure when it meets
|
|
279 ESC sequences for composition while encoding codes, then, after all
|
|
280 text codes are encoded, puts `composition' properties on the text
|
36088
|
281 by referring to the structure.
|
26846
|
282
|
|
283 The decoder at first stores the information of a text to be
|
|
284 decoded, then, while decoding codes, generates ESC sequences for
|
36088
|
285 composition at proper places by referring to the structure. */
|
17052
|
286
|
26846
|
287 struct composition_data
|
|
288 {
|
|
289 /* The character position of the first character to be encoded or
|
|
290 decoded. START and END (see below) are relative to this
|
|
291 position. */
|
|
292 int char_offset;
|
|
293
|
|
294 /* The composition data. These elements are repeated for each
|
|
295 composition:
|
|
296 LENGTH START END METHOD [ COMPONENT ... ]
|
|
297 where,
|
|
298 LENGTH is the number of elements for this composition.
|
|
299
|
|
300 START and END are starting and ending character positions of
|
|
301 the composition relative to `char_offset'.
|
|
302
|
36088
|
303 METHOD is one of `enum composing_status' specifying the way of
|
26846
|
304 composition.
|
|
305
|
|
306 COMPONENT is a character or an encoded composition rule. */
|
|
307 int data[COMPOSITION_DATA_SIZE];
|
|
308
|
|
309 /* The number of elements in `data' currently used. */
|
|
310 int used;
|
|
311
|
|
312 /* Pointers to the previous and next structures. When `data' is
|
|
313 filled up, another structure is allocated and linked in `next'.
|
36088
|
314 The new structure has backward link to this structure in `prev'.
|
|
315 The number of chained structures depends on how many compositions
|
26846
|
316 the text being encoded or decoded contains. */
|
|
317 struct composition_data *prev, *next;
|
|
318 };
|
17052
|
319
|
30832
|
320 /* Macros used for the member `result' of the struct
|
20717
|
321 coding_system. */
|
|
322 #define CODING_FINISH_NORMAL 0
|
|
323 #define CODING_FINISH_INSUFFICIENT_SRC 1
|
|
324 #define CODING_FINISH_INSUFFICIENT_DST 2
|
|
325 #define CODING_FINISH_INCONSISTENT_EOL 3
|
26846
|
326 #define CODING_FINISH_INSUFFICIENT_CMP 4
|
|
327 #define CODING_FINISH_INTERRUPT 5
|
20717
|
328
|
21031
|
329 /* Macros used for the member `mode' of the struct coding_system. */
|
20717
|
330
|
|
331 /* If set, recover the original CR or LF of the already decoded text
|
|
332 when the decoding routine encounters an inconsistent eol format. */
|
|
333 #define CODING_MODE_INHIBIT_INCONSISTENT_EOL 0x01
|
|
334
|
|
335 /* If set, the decoding/encoding routines treat the current data as
|
|
336 the last block of the whole text to be converted, and do
|
36088
|
337 appropriate finishing job. */
|
20717
|
338 #define CODING_MODE_LAST_BLOCK 0x02
|
|
339
|
|
340 /* If set, it means that the current source text is in a buffer which
|
|
341 enables selective display. */
|
|
342 #define CODING_MODE_SELECTIVE_DISPLAY 0x04
|
|
343
|
51139
|
344 /* If set, replace unencodabae characters by `?' on encoding. */
|
|
345 #define CODING_MODE_INHIBIT_UNENCODABLE_CHAR 0x08
|
|
346
|
20717
|
347 /* This flag is used by the decoding/encoding routines on the fly. If
|
|
348 set, it means that right-to-left text is being processed. */
|
51139
|
349 #define CODING_MODE_DIRECTION 0x10
|
20717
|
350
|
17052
|
351 struct coding_system
|
|
352 {
|
|
353 /* Type of the coding system. */
|
|
354 enum coding_type type;
|
|
355
|
20717
|
356 /* Type of end-of-line format (LF, CRLF, or CR) of the coding system. */
|
|
357 int eol_type;
|
|
358
|
20226
|
359 /* Flag bits of the coding system. The meaning of each bit is common
|
20717
|
360 to all types of coding systems. */
|
20226
|
361 unsigned int common_flags;
|
17052
|
362
|
|
363 /* Flag bits of the coding system. The meaning of each bit depends
|
|
364 on the type of the coding system. */
|
|
365 unsigned int flags;
|
|
366
|
20717
|
367 /* Mode bits of the coding system. See the comments of the macros
|
|
368 CODING_MODE_XXX. */
|
|
369 unsigned int mode;
|
17052
|
370
|
26846
|
371 /* The current status of composition handling. */
|
17052
|
372 int composing;
|
|
373
|
26846
|
374 /* 1 iff the next character is a composition rule. */
|
|
375 int composition_rule_follows;
|
|
376
|
|
377 /* Information of compositions are stored here on decoding and set
|
|
378 in advance on encoding. */
|
|
379 struct composition_data *cmp_data;
|
|
380
|
|
381 /* Index to cmp_data->data for the first element for the current
|
|
382 composition. */
|
|
383 int cmp_data_start;
|
|
384
|
|
385 /* Index to cmp_data->data for the current element for the current
|
|
386 composition. */
|
|
387 int cmp_data_index;
|
23324
|
388
|
17052
|
389 /* Detailed information specific to each type of coding system. */
|
|
390 union spec
|
|
391 {
|
|
392 struct iso2022_spec iso2022;
|
|
393 struct ccl_spec ccl; /* Defined in ccl.h. */
|
|
394 } spec;
|
|
395
|
20717
|
396 /* Index number of coding category of the coding system. */
|
|
397 int category_idx;
|
|
398
|
29106
|
399 /* The following two members specify how characters 128..159 are
|
|
400 represented in source and destination text respectively. 1 means
|
|
401 they are represented by 2-byte sequence, 0 means they are
|
|
402 represented by 1-byte as is (see the comment in charset.h). */
|
29006
|
403 unsigned src_multibyte : 1;
|
|
404 unsigned dst_multibyte : 1;
|
|
405
|
21320
|
406 /* How may heading bytes we can skip for decoding. This is set to
|
|
407 -1 in setup_coding_system, and updated by detect_coding. So,
|
|
408 when this is equal to the byte length of the text being
|
|
409 converted, we can skip the actual conversion process. */
|
20717
|
410 int heading_ascii;
|
|
411
|
|
412 /* The following members are set by encoding/decoding routine. */
|
|
413 int produced, produced_char, consumed, consumed_char;
|
|
414
|
29006
|
415 /* Number of error source data found in a decoding routine. */
|
|
416 int errors;
|
|
417
|
|
418 /* Finish status of code conversion. It should be one of macros
|
|
419 CODING_FINISH_XXXX. */
|
|
420 int result;
|
20930
|
421
|
35530
|
422 /* If nonzero, suppress error notification. */
|
|
423 int suppress_error;
|
|
424
|
20717
|
425 /* The following members are all Lisp symbols. We don't have to
|
|
426 protect them from GC because the current garbage collection
|
|
427 doesn't relocate Lisp symbols. But, when it is changed, we must
|
|
428 find a way to protect them. */
|
|
429
|
17052
|
430 /* Backward pointer to the Lisp symbol of the coding system. */
|
|
431 Lisp_Object symbol;
|
|
432
|
|
433 /* Lisp function (symbol) to be called after decoding to do
|
20717
|
434 additional conversion, or nil. */
|
17052
|
435 Lisp_Object post_read_conversion;
|
|
436
|
|
437 /* Lisp function (symbol) to be called before encoding to do
|
20717
|
438 additional conversion, or nil. */
|
17052
|
439 Lisp_Object pre_write_conversion;
|
|
440
|
22118
|
441 /* Character translation tables to look up, or nil. */
|
22186
|
442 Lisp_Object translation_table_for_decode;
|
|
443 Lisp_Object translation_table_for_encode;
|
17052
|
444 };
|
|
445
|
70703
|
446 /* Mask bits for (struct coding_system *)->common_flags. */
|
|
447 #define CODING_REQUIRE_FLUSHING_MASK 0x01
|
|
448 #define CODING_REQUIRE_DECODING_MASK 0x02
|
|
449 #define CODING_REQUIRE_ENCODING_MASK 0x04
|
|
450 #define CODING_REQUIRE_DETECTION_MASK 0x08
|
|
451 #define CODING_ASCII_INCOMPATIBLE_MASK 0x10
|
20226
|
452
|
|
453 /* Return 1 if the coding system CODING requires specific code to be
|
|
454 attached at the tail of converted text. */
|
|
455 #define CODING_REQUIRE_FLUSHING(coding) \
|
|
456 ((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK)
|
17052
|
457
|
20226
|
458 /* Return 1 if the coding system CODING requires code conversion on
|
|
459 decoding. */
|
|
460 #define CODING_REQUIRE_DECODING(coding) \
|
29006
|
461 ((coding)->dst_multibyte \
|
|
462 || (coding)->common_flags & CODING_REQUIRE_DECODING_MASK)
|
20226
|
463
|
|
464 /* Return 1 if the coding system CODING requires code conversion on
|
70783
|
465 encoding.
|
|
466 The non-multibyte part of the condition is to support encoding of
|
|
467 unibyte strings/buffers generated by string-as-unibyte or
|
|
468 (set-buffer-multibyte nil) from multibyte strings/buffers. */
|
20226
|
469 #define CODING_REQUIRE_ENCODING(coding) \
|
29006
|
470 ((coding)->src_multibyte \
|
|
471 || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK)
|
20226
|
472
|
|
473 /* Return 1 if the coding system CODING requires some kind of code
|
|
474 detection. */
|
|
475 #define CODING_REQUIRE_DETECTION(coding) \
|
|
476 ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK)
|
|
477
|
29006
|
478 /* Return 1 if the coding system CODING requires code conversion on
|
|
479 decoding or some kind of code detection. */
|
20717
|
480 #define CODING_MAY_REQUIRE_DECODING(coding) \
|
29006
|
481 (CODING_REQUIRE_DECODING (coding) \
|
|
482 || CODING_REQUIRE_DETECTION (coding))
|
17052
|
483
|
|
484 /* Index for each coding category in `coding_category_table' */
|
17835
|
485 #define CODING_CATEGORY_IDX_EMACS_MULE 0
|
17052
|
486 #define CODING_CATEGORY_IDX_SJIS 1
|
|
487 #define CODING_CATEGORY_IDX_ISO_7 2
|
20717
|
488 #define CODING_CATEGORY_IDX_ISO_7_TIGHT 3
|
|
489 #define CODING_CATEGORY_IDX_ISO_8_1 4
|
|
490 #define CODING_CATEGORY_IDX_ISO_8_2 5
|
|
491 #define CODING_CATEGORY_IDX_ISO_7_ELSE 6
|
|
492 #define CODING_CATEGORY_IDX_ISO_8_ELSE 7
|
22873
|
493 #define CODING_CATEGORY_IDX_CCL 8
|
|
494 #define CODING_CATEGORY_IDX_BIG5 9
|
28021
|
495 #define CODING_CATEGORY_IDX_UTF_8 10
|
|
496 #define CODING_CATEGORY_IDX_UTF_16_BE 11
|
|
497 #define CODING_CATEGORY_IDX_UTF_16_LE 12
|
|
498 #define CODING_CATEGORY_IDX_RAW_TEXT 13
|
|
499 #define CODING_CATEGORY_IDX_BINARY 14
|
|
500 #define CODING_CATEGORY_IDX_MAX 15
|
17052
|
501
|
|
502 /* Definitions of flag bits returned by the function
|
|
503 detect_coding_mask (). */
|
17835
|
504 #define CODING_CATEGORY_MASK_EMACS_MULE (1 << CODING_CATEGORY_IDX_EMACS_MULE)
|
17052
|
505 #define CODING_CATEGORY_MASK_SJIS (1 << CODING_CATEGORY_IDX_SJIS)
|
|
506 #define CODING_CATEGORY_MASK_ISO_7 (1 << CODING_CATEGORY_IDX_ISO_7)
|
20717
|
507 #define CODING_CATEGORY_MASK_ISO_7_TIGHT (1 << CODING_CATEGORY_IDX_ISO_7_TIGHT)
|
17052
|
508 #define CODING_CATEGORY_MASK_ISO_8_1 (1 << CODING_CATEGORY_IDX_ISO_8_1)
|
|
509 #define CODING_CATEGORY_MASK_ISO_8_2 (1 << CODING_CATEGORY_IDX_ISO_8_2)
|
18786
|
510 #define CODING_CATEGORY_MASK_ISO_7_ELSE (1 << CODING_CATEGORY_IDX_ISO_7_ELSE)
|
|
511 #define CODING_CATEGORY_MASK_ISO_8_ELSE (1 << CODING_CATEGORY_IDX_ISO_8_ELSE)
|
22873
|
512 #define CODING_CATEGORY_MASK_CCL (1 << CODING_CATEGORY_IDX_CCL)
|
19689
|
513 #define CODING_CATEGORY_MASK_BIG5 (1 << CODING_CATEGORY_IDX_BIG5)
|
28021
|
514 #define CODING_CATEGORY_MASK_UTF_8 (1 << CODING_CATEGORY_IDX_UTF_8)
|
|
515 #define CODING_CATEGORY_MASK_UTF_16_BE (1 << CODING_CATEGORY_IDX_UTF_16_BE)
|
|
516 #define CODING_CATEGORY_MASK_UTF_16_LE (1 << CODING_CATEGORY_IDX_UTF_16_LE)
|
19611
|
517 #define CODING_CATEGORY_MASK_RAW_TEXT (1 << CODING_CATEGORY_IDX_RAW_TEXT)
|
19096
|
518 #define CODING_CATEGORY_MASK_BINARY (1 << CODING_CATEGORY_IDX_BINARY)
|
17052
|
519
|
|
520 /* This value is returned if detect_coding_mask () find nothing other
|
|
521 than ASCII characters. */
|
|
522 #define CODING_CATEGORY_MASK_ANY \
|
17835
|
523 ( CODING_CATEGORY_MASK_EMACS_MULE \
|
17052
|
524 | CODING_CATEGORY_MASK_SJIS \
|
|
525 | CODING_CATEGORY_MASK_ISO_7 \
|
20717
|
526 | CODING_CATEGORY_MASK_ISO_7_TIGHT \
|
17052
|
527 | CODING_CATEGORY_MASK_ISO_8_1 \
|
|
528 | CODING_CATEGORY_MASK_ISO_8_2 \
|
18786
|
529 | CODING_CATEGORY_MASK_ISO_7_ELSE \
|
|
530 | CODING_CATEGORY_MASK_ISO_8_ELSE \
|
22873
|
531 | CODING_CATEGORY_MASK_CCL \
|
28021
|
532 | CODING_CATEGORY_MASK_BIG5 \
|
|
533 | CODING_CATEGORY_MASK_UTF_8 \
|
|
534 | CODING_CATEGORY_MASK_UTF_16_BE \
|
|
535 | CODING_CATEGORY_MASK_UTF_16_LE)
|
17052
|
536
|
20717
|
537 #define CODING_CATEGORY_MASK_ISO_7BIT \
|
|
538 (CODING_CATEGORY_MASK_ISO_7 | CODING_CATEGORY_MASK_ISO_7_TIGHT)
|
|
539
|
|
540 #define CODING_CATEGORY_MASK_ISO_8BIT \
|
|
541 (CODING_CATEGORY_MASK_ISO_8_1 | CODING_CATEGORY_MASK_ISO_8_2)
|
|
542
|
|
543 #define CODING_CATEGORY_MASK_ISO_SHIFT \
|
|
544 (CODING_CATEGORY_MASK_ISO_7_ELSE | CODING_CATEGORY_MASK_ISO_8_ELSE)
|
|
545
|
|
546 #define CODING_CATEGORY_MASK_ISO \
|
|
547 ( CODING_CATEGORY_MASK_ISO_7BIT \
|
|
548 | CODING_CATEGORY_MASK_ISO_SHIFT \
|
|
549 | CODING_CATEGORY_MASK_ISO_8BIT)
|
|
550
|
28021
|
551 #define CODING_CATEGORY_MASK_UTF_16_BE_LE \
|
|
552 (CODING_CATEGORY_MASK_UTF_16_BE | CODING_CATEGORY_MASK_UTF_16_LE)
|
|
553
|
17052
|
554 /* Macros to decode or encode a character of JISX0208 in SJIS. S1 and
|
|
555 S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding
|
|
556 system. C1 and C2 are the 1st and 2nd position codes of Emacs'
|
|
557 internal format. */
|
|
558
|
|
559 #define DECODE_SJIS(s1, s2, c1, c2) \
|
|
560 do { \
|
|
561 if (s2 >= 0x9F) \
|
|
562 c1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0), \
|
|
563 c2 = s2 - 0x7E; \
|
|
564 else \
|
|
565 c1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1), \
|
|
566 c2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F); \
|
|
567 } while (0)
|
|
568
|
|
569 #define ENCODE_SJIS(c1, c2, s1, s2) \
|
|
570 do { \
|
|
571 if (c1 & 1) \
|
|
572 s1 = c1 / 2 + ((c1 < 0x5F) ? 0x71 : 0xB1), \
|
|
573 s2 = c2 + ((c2 >= 0x60) ? 0x20 : 0x1F); \
|
|
574 else \
|
|
575 s1 = c1 / 2 + ((c1 < 0x5F) ? 0x70 : 0xB0), \
|
|
576 s2 = c2 + 0x7E; \
|
|
577 } while (0)
|
|
578
|
21051
|
579 /* Encode the file name NAME using the specified coding system
|
|
580 for file names, if any. */
|
|
581 #define ENCODE_FILE(name) \
|
|
582 (! NILP (Vfile_name_coding_system) \
|
58453
04580c50dd1c
(ENCODE_FILE, DECODE_FILE, ENCODE_SYSTEM, DECODE_SYSTEM):
Stefan Monnier <monnier@iro.umontreal.ca>
diff
changeset
|
583 && !EQ (Vfile_name_coding_system, make_number (0)) \
|
22342
|
584 ? code_convert_string_norecord (name, Vfile_name_coding_system, 1) \
|
21051
|
585 : (! NILP (Vdefault_file_name_coding_system) \
|
58453
04580c50dd1c
(ENCODE_FILE, DECODE_FILE, ENCODE_SYSTEM, DECODE_SYSTEM):
Stefan Monnier <monnier@iro.umontreal.ca>
diff
changeset
|
586 && !EQ (Vdefault_file_name_coding_system, make_number (0)) \
|
22342
|
587 ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \
|
21051
|
588 : name))
|
|
589
|
|
590 /* Decode the file name NAME using the specified coding system
|
|
591 for file names, if any. */
|
|
592 #define DECODE_FILE(name) \
|
|
593 (! NILP (Vfile_name_coding_system) \
|
58453
04580c50dd1c
(ENCODE_FILE, DECODE_FILE, ENCODE_SYSTEM, DECODE_SYSTEM):
Stefan Monnier <monnier@iro.umontreal.ca>
diff
changeset
|
594 && !EQ (Vfile_name_coding_system, make_number (0)) \
|
22342
|
595 ? code_convert_string_norecord (name, Vfile_name_coding_system, 0) \
|
21051
|
596 : (! NILP (Vdefault_file_name_coding_system) \
|
58453
04580c50dd1c
(ENCODE_FILE, DECODE_FILE, ENCODE_SYSTEM, DECODE_SYSTEM):
Stefan Monnier <monnier@iro.umontreal.ca>
diff
changeset
|
597 && !EQ (Vdefault_file_name_coding_system, make_number (0)) \
|
22342
|
598 ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 0) \
|
21051
|
599 : name))
|
|
600
|
29310
|
601 /* Encode the string STR using the specified coding system
|
60657
|
602 for system functions, if any. */
|
29310
|
603 #define ENCODE_SYSTEM(str) \
|
41323
|
604 (! NILP (Vlocale_coding_system) \
|
58453
04580c50dd1c
(ENCODE_FILE, DECODE_FILE, ENCODE_SYSTEM, DECODE_SYSTEM):
Stefan Monnier <monnier@iro.umontreal.ca>
diff
changeset
|
605 && !EQ (Vlocale_coding_system, make_number (0)) \
|
41323
|
606 ? code_convert_string_norecord (str, Vlocale_coding_system, 1) \
|
29310
|
607 : str)
|
|
608
|
|
609 /* Decode the string STR using the specified coding system
|
60657
|
610 for system functions, if any. */
|
66463
|
611 #define DECODE_SYSTEM(str) \
|
41323
|
612 (! NILP (Vlocale_coding_system) \
|
58453
04580c50dd1c
(ENCODE_FILE, DECODE_FILE, ENCODE_SYSTEM, DECODE_SYSTEM):
Stefan Monnier <monnier@iro.umontreal.ca>
diff
changeset
|
613 && !EQ (Vlocale_coding_system, make_number (0)) \
|
41323
|
614 ? code_convert_string_norecord (str, Vlocale_coding_system, 0) \
|
29310
|
615 : str)
|
39574
|
616
|
51407
|
617 #define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, 1)
|
|
618
|
17052
|
619 /* Extern declarations. */
|
46549
|
620 extern int decode_coding P_ ((struct coding_system *, const unsigned char *,
|
20717
|
621 unsigned char *, int, int));
|
46549
|
622 extern int encode_coding P_ ((struct coding_system *, const unsigned char *,
|
20717
|
623 unsigned char *, int, int));
|
26846
|
624 extern void coding_save_composition P_ ((struct coding_system *, int, int,
|
|
625 Lisp_Object));
|
|
626 extern void coding_free_composition_data P_ ((struct coding_system *));
|
|
627 extern void coding_adjust_composition_offset P_ ((struct coding_system *,
|
|
628 int));
|
29274
|
629 extern void coding_allocate_composition_data P_ ((struct coding_system *,
|
|
630 int));
|
|
631 extern void coding_restore_composition P_ ((struct coding_system *,
|
|
632 Lisp_Object));
|
21515
|
633 extern int code_convert_region P_ ((int, int, int, int, struct coding_system *,
|
|
634 int, int));
|
43486
|
635 extern Lisp_Object run_pre_post_conversion_on_str P_ ((Lisp_Object,
|
|
636 struct coding_system *,
|
|
637 int));
|
64251
|
638 extern void run_pre_write_conversin_on_c_str P_ ((unsigned char **, int *,
|
58636
|
639 int, int,
|
|
640 struct coding_system *));
|
|
641
|
20308
|
642 extern int decoding_buffer_size P_ ((struct coding_system *, int));
|
|
643 extern int encoding_buffer_size P_ ((struct coding_system *, int));
|
46549
|
644 extern void detect_coding P_ ((struct coding_system *, const unsigned char *,
|
|
645 int));
|
|
646 extern void detect_eol P_ ((struct coding_system *, const unsigned char *,
|
|
647 int));
|
20308
|
648 extern int setup_coding_system P_ ((Lisp_Object, struct coding_system *));
|
28512
|
649 extern Lisp_Object code_convert_string P_ ((Lisp_Object,
|
|
650 struct coding_system *, int, int));
|
34152
|
651 extern Lisp_Object code_convert_string1 P_ ((Lisp_Object, Lisp_Object,
|
|
652 Lisp_Object, int));
|
29438
|
653 extern Lisp_Object code_convert_string_norecord P_ ((Lisp_Object, Lisp_Object,
|
|
654 int));
|
22615
|
655 extern void setup_raw_text_coding_system P_ ((struct coding_system *));
|
29717
|
656 extern Lisp_Object encode_coding_string P_ ((Lisp_Object,
|
|
657 struct coding_system *, int));
|
30680
|
658 extern Lisp_Object decode_coding_string P_ ((Lisp_Object,
|
|
659 struct coding_system *, int));
|
17052
|
660 extern Lisp_Object Qcoding_system, Qeol_type, Qcoding_category_index;
|
22620
|
661 extern Lisp_Object Qraw_text, Qemacs_mule;
|
17052
|
662 extern Lisp_Object Qbuffer_file_coding_system;
|
|
663 extern Lisp_Object Vcoding_category_list;
|
51407
|
664 extern Lisp_Object Qutf_8;
|
17052
|
665
|
22186
|
666 extern Lisp_Object Qtranslation_table;
|
|
667 extern Lisp_Object Qtranslation_table_id;
|
22118
|
668
|
24201
|
669 /* Mnemonic strings to indicate each type of end-of-line. */
|
|
670 extern Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
|
|
671 /* Mnemonic string to indicate type of end-of-line is not yet decided. */
|
|
672 extern Lisp_Object eol_mnemonic_undecided;
|
17052
|
673
|
70519
|
674 /* Format of end-of-line decided by system. */
|
|
675 extern int system_eol_type;
|
|
676
|
17052
|
677 #ifdef emacs
|
|
678 extern Lisp_Object Qfile_coding_system;
|
64251
|
679 extern Lisp_Object Qcall_process, Qcall_process_region;
|
17052
|
680 extern Lisp_Object Qstart_process, Qopen_network_stream;
|
34107
|
681 extern Lisp_Object Qwrite_region;
|
17052
|
682
|
26088
b7aa6ac26872
Add support for large files, 64-bit Solaris, system locale codings.
Paul Eggert <eggert@twinsun.com>
diff
changeset
|
683 extern char *emacs_strerror P_ ((int));
|
b7aa6ac26872
Add support for large files, 64-bit Solaris, system locale codings.
Paul Eggert <eggert@twinsun.com>
diff
changeset
|
684
|
17052
|
685 /* Coding-system for reading files and receiving data from process. */
|
|
686 extern Lisp_Object Vcoding_system_for_read;
|
|
687 /* Coding-system for writing files and sending data to process. */
|
|
688 extern Lisp_Object Vcoding_system_for_write;
|
|
689 /* Coding-system actually used in the latest I/O. */
|
|
690 extern Lisp_Object Vlast_coding_system_used;
|
26088
b7aa6ac26872
Add support for large files, 64-bit Solaris, system locale codings.
Paul Eggert <eggert@twinsun.com>
diff
changeset
|
691 /* Coding-system to use with system messages (e.g. strerror). */
|
b7aa6ac26872
Add support for large files, 64-bit Solaris, system locale codings.
Paul Eggert <eggert@twinsun.com>
diff
changeset
|
692 extern Lisp_Object Vlocale_coding_system;
|
17052
|
693
|
21573
|
694 /* If non-zero, process buffer inherits the coding system used to decode
|
|
695 the subprocess output. */
|
|
696 extern int inherit_process_coding_system;
|
|
697
|
17052
|
698 /* Coding-system to be used for encoding terminal output. This
|
|
699 structure contains information of a coding-system specified by the
|
|
700 function `set-terminal-coding-system'. */
|
|
701 extern struct coding_system terminal_coding;
|
|
702
|
19279
|
703 /* Coding system to be used to encode text for terminal display when
|
|
704 terminal coding system is nil. */
|
|
705 extern struct coding_system safe_terminal_coding;
|
|
706
|
17052
|
707 /* Coding-system of what is sent from terminal keyboard. This
|
|
708 structure contains information of a coding-system specified by the
|
|
709 function `set-keyboard-coding-system'. */
|
|
710 extern struct coding_system keyboard_coding;
|
|
711
|
22978
|
712 /* Default coding system to be used to write a file. */
|
|
713 extern struct coding_system default_buffer_file_coding;
|
|
714
|
18181
|
715 /* Default coding systems used for process I/O. */
|
|
716 extern Lisp_Object Vdefault_process_coding_system;
|
17052
|
717
|
36088
|
718 /* Function to call to force a user to force select a proper coding
|
20717
|
719 system. */
|
|
720 extern Lisp_Object Vselect_safe_coding_system_function;
|
|
721
|
48875
|
722 /* If nonzero, on writing a file, Vselect_safe_coding_system_function
|
|
723 is called even if Vcoding_system_for_write is non-nil. */
|
|
724 extern int coding_system_require_warning;
|
|
725
|
21901
|
726 /* Coding system for file names, or nil if none. */
|
|
727 extern Lisp_Object Vfile_name_coding_system;
|
|
728
|
|
729 /* Coding system for file names used only when
|
|
730 Vfile_name_coding_system is nil. */
|
|
731 extern Lisp_Object Vdefault_file_name_coding_system;
|
29310
|
732
|
17052
|
733 #endif
|
|
734
|
34107
|
735 /* Error signaled when there's a problem with detecting coding system */
|
|
736 extern Lisp_Object Qcoding_system_error;
|
|
737
|
29571
|
738 #endif /* EMACS_CODING_H */
|
52401
|
739
|
|
740 /* arch-tag: 2bc3b4fa-6870-4f64-8135-b962b2d290e4
|
|
741 (do not change this comment) */
|