comparison src/coding.h @ 20717:19463997fbc6

(CODING_FLAG_ISO_DESIGNATION): New macro. (struct iso2022_spec): New member. last_invalid_designation_register. (struct coding_system): New member inhibit_inconsistent_eol_type. (CODING_CATEGORY_IDX_ISO_7_TIGHT, CODING_CATEGORY_MASK_ISO_7_TIGHT): New macros. (CODING_CATEGORY_MASK_ANY): Include CODING_CATEGORY_MASK_ISO_7_TIGHT. (coding_category_table, coding_category_name): Extern deleted. (Vselect_safe_coding_system_function): Extern it. (CODING_FINISH_XXX): New macros. (CODING_MODE_XXX): New macros. (struct coding_system): New member mode, category_idx, heading_ascii. Deleted members last_block, direction, selective, carryover, carryover_size. (CODING_MAY_REQUIRE_DECODING): New macro. (CODING_CATEGORY_MASK_ISO_7BIT, CODING_CATEGORY_MASK_ISO_8BIT, CODING_CATEGORY_MASK_ISO_SHIFT, CODING_CATEGORY_MASK_ISO): New macros. (Qraw_text): Extern it.
author Kenichi Handa <handa@m17n.org>
date Thu, 22 Jan 1998 01:26:45 +0000
parents 8d520e3dcb86
children 1331679fe704
comparison
equal deleted inserted replaced
20716:e915d0141ec7 20717:19463997fbc6
140 140
141 /* If set, extra latin codes (128..159) are accepted as a valid code 141 /* If set, extra latin codes (128..159) are accepted as a valid code
142 on input. */ 142 on input. */
143 #define CODING_FLAG_ISO_LATIN_EXTRA 0x1000 143 #define CODING_FLAG_ISO_LATIN_EXTRA 0x1000
144 144
145 /* If set, use designation escape sequence. */
146 #define CODING_FLAG_ISO_DESIGNATION 0x10000
147
145 /* A character to be produced on output if encoding of the original 148 /* A character to be produced on output if encoding of the original
146 character is prohibited by CODING_FLAG_ISO_SAFE. */ 149 character is prohibited by CODING_FLAG_ISO_SAFE. */
147 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION 077 /* 077 == `?' */ 150 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION 077 /* 077 == `?' */
148 151
149 /* Structure of the field `spec.iso2022' in the structure `coding_system'. */ 152 /* Structure of the field `spec.iso2022' in the structure `coding_system'. */
156 int current_designation[4]; 159 int current_designation[4];
157 160
158 /* A charset initially designated to each graphic register. */ 161 /* A charset initially designated to each graphic register. */
159 int initial_designation[4]; 162 int initial_designation[4];
160 163
164 /* If not -1, it is a graphic register specified in an invalid
165 designation sequence. */
166 int last_invalid_designation_register;
167
161 /* A graphic register to which each charset should be designated. */ 168 /* A graphic register to which each charset should be designated. */
162 unsigned char requested_designation[MAX_CHARSET + 1]; 169 unsigned char requested_designation[MAX_CHARSET + 1];
163 170
164 /* A revision number to be specified for each charset on encoding. 171 /* A revision number to be specified for each charset on encoding.
165 The value 255 means no revision number for the corresponding 172 The value 255 means no revision number for the corresponding
174 int bol; 181 int bol;
175 }; 182 };
176 183
177 /* Macros to access each field in the structure `spec.iso2022'. */ 184 /* Macros to access each field in the structure `spec.iso2022'. */
178 #define CODING_SPEC_ISO_INVOCATION(coding, plane) \ 185 #define CODING_SPEC_ISO_INVOCATION(coding, plane) \
179 coding->spec.iso2022.current_invocation[plane] 186 (coding)->spec.iso2022.current_invocation[plane]
180 #define CODING_SPEC_ISO_DESIGNATION(coding, reg) \ 187 #define CODING_SPEC_ISO_DESIGNATION(coding, reg) \
181 coding->spec.iso2022.current_designation[reg] 188 (coding)->spec.iso2022.current_designation[reg]
182 #define CODING_SPEC_ISO_INITIAL_DESIGNATION(coding, reg) \ 189 #define CODING_SPEC_ISO_INITIAL_DESIGNATION(coding, reg) \
183 coding->spec.iso2022.initial_designation[reg] 190 (coding)->spec.iso2022.initial_designation[reg]
184 #define CODING_SPEC_ISO_REQUESTED_DESIGNATION(coding, charset) \ 191 #define CODING_SPEC_ISO_REQUESTED_DESIGNATION(coding, charset) \
185 coding->spec.iso2022.requested_designation[charset] 192 (coding)->spec.iso2022.requested_designation[charset]
186 #define CODING_SPEC_ISO_REVISION_NUMBER(coding, charset) \ 193 #define CODING_SPEC_ISO_REVISION_NUMBER(coding, charset) \
187 coding->spec.iso2022.charset_revision_number[charset] 194 (coding)->spec.iso2022.charset_revision_number[charset]
188 #define CODING_SPEC_ISO_SINGLE_SHIFTING(coding) \ 195 #define CODING_SPEC_ISO_SINGLE_SHIFTING(coding) \
189 coding->spec.iso2022.single_shifting 196 (coding)->spec.iso2022.single_shifting
190 #define CODING_SPEC_ISO_BOL(coding) \ 197 #define CODING_SPEC_ISO_BOL(coding) \
191 coding->spec.iso2022.bol 198 (coding)->spec.iso2022.bol
192 199
193 /* A value which may appear in 200 /* A value which may appear in
194 coding->spec.iso2022.requested_designation indicating that the 201 coding->spec.iso2022.requested_designation indicating that the
195 corresponding charset does not request any graphic register to be 202 corresponding charset does not request any graphic register to be
196 designated. */ 203 designated. */
267 #define COMPOSING_HEAD_P(composing) \ 274 #define COMPOSING_HEAD_P(composing) \
268 ((composing) && (composing) <= COMPOSING_NO_RULE_HEAD) 275 ((composing) && (composing) <= COMPOSING_NO_RULE_HEAD)
269 /* 1 iff composing with embeded composition rule. */ 276 /* 1 iff composing with embeded composition rule. */
270 #define COMPOSING_WITH_RULE_P(composing) ((composing) & 1) 277 #define COMPOSING_WITH_RULE_P(composing) ((composing) & 1)
271 278
279 /* Macros used for the member finish_status of the struct
280 coding_system. */
281 #define CODING_FINISH_NORMAL 0
282 #define CODING_FINISH_INSUFFICIENT_SRC 1
283 #define CODING_FINISH_INSUFFICIENT_DST 2
284 #define CODING_FINISH_INCONSISTENT_EOL 3
285
286 /* Macros used for the member mode of the struct coding_systme. */
287
288 /* If set, recover the original CR or LF of the already decoded text
289 when the decoding routine encounters an inconsistent eol format. */
290 #define CODING_MODE_INHIBIT_INCONSISTENT_EOL 0x01
291
292 /* If set, the decoding/encoding routines treat the current data as
293 the last block of the whole text to be converted, and do
294 appropriate fisishing job. */
295 #define CODING_MODE_LAST_BLOCK 0x02
296
297 /* If set, it means that the current source text is in a buffer which
298 enables selective display. */
299 #define CODING_MODE_SELECTIVE_DISPLAY 0x04
300
301 /* This flag is used by the decoding/encoding routines on the fly. If
302 set, it means that right-to-left text is being processed. */
303 #define CODING_MODE_DIRECTION 0x08
304
272 struct coding_system 305 struct coding_system
273 { 306 {
274 /* Type of the coding system. */ 307 /* Type of the coding system. */
275 enum coding_type type; 308 enum coding_type type;
276 309
310 /* Type of end-of-line format (LF, CRLF, or CR) of the coding system. */
311 int eol_type;
312
277 /* Flag bits of the coding system. The meaning of each bit is common 313 /* Flag bits of the coding system. The meaning of each bit is common
278 to any type of coding systems. */ 314 to all types of coding systems. */
279 unsigned int common_flags; 315 unsigned int common_flags;
280 316
281 /* Flag bits of the coding system. The meaning of each bit depends 317 /* Flag bits of the coding system. The meaning of each bit depends
282 on the type of the coding system. */ 318 on the type of the coding system. */
283 unsigned int flags; 319 unsigned int flags;
284 320
285 /* Type of end-of-line format (LF, CRLF, or CR) of the coding system. */ 321 /* Mode bits of the coding system. See the comments of the macros
286 int eol_type; 322 CODING_MODE_XXX. */
323 unsigned int mode;
287 324
288 /* Table of safe character sets for this coding system. If the Nth 325 /* Table of safe character sets for this coding system. If the Nth
289 element is 0, the charset of ID N is not an safe character set. 326 element is 0, the charset of ID N is not a safe character set.
290 Such a character set is not encoded when CODING_ISO_FLAG_SAFE is 327 Such a character set is not encoded when CODING_ISO_FLAG_SAFE is
291 set. */ 328 set. */
292 unsigned char safe_charsets[MAX_CHARSET + 1]; 329 unsigned char safe_charsets[MAX_CHARSET + 1];
293
294 /* Non-zero means that the current source text is the last block of the
295 whole text to be converted. */
296 int last_block;
297 330
298 /* Non-zero means that characters are being composed currently while 331 /* Non-zero means that characters are being composed currently while
299 decoding or encoding. See macros COMPOSING_XXXX above for the 332 decoding or encoding. See macros COMPOSING_XXXX above for the
300 meaing of each non-zero value. */ 333 meaing of each non-zero value. */
301 int composing; 334 int composing;
302
303 /* 0 (left-to-right) or 1 (right-to-left): the direction of the text
304 being processed currently. */
305 int direction;
306
307 /* Non-zero means that the current source text is in a buffer which
308 enables selective display. */
309 int selective;
310 335
311 /* Detailed information specific to each type of coding system. */ 336 /* Detailed information specific to each type of coding system. */
312 union spec 337 union spec
313 { 338 {
314 struct iso2022_spec iso2022; 339 struct iso2022_spec iso2022;
315 struct ccl_spec ccl; /* Defined in ccl.h. */ 340 struct ccl_spec ccl; /* Defined in ccl.h. */
316 } spec; 341 } spec;
317 342
343 /* Index number of coding category of the coding system. */
344 int category_idx;
345
346 /* How may heading bytes we can skip for decoding. This member is
347 set by the function detect_coding. The initial value is -1 which
348 means detect_coding has not yet been called. */
349 int heading_ascii;
350
351 /* The following members are set by encoding/decoding routine. */
352 int produced, produced_char, consumed, consumed_char;
353
354 /* The following members are all Lisp symbols. We don't have to
355 protect them from GC because the current garbage collection
356 doesn't relocate Lisp symbols. But, when it is changed, we must
357 find a way to protect them. */
358
318 /* Backward pointer to the Lisp symbol of the coding system. */ 359 /* Backward pointer to the Lisp symbol of the coding system. */
319 Lisp_Object symbol; 360 Lisp_Object symbol;
320 361
321 /* Lisp function (symbol) to be called after decoding to do 362 /* Lisp function (symbol) to be called after decoding to do
322 additional conversion. */ 363 additional conversion, or nil. */
323 Lisp_Object post_read_conversion; 364 Lisp_Object post_read_conversion;
324 365
325 /* Lisp function (symbol) to be called before encoding to do 366 /* Lisp function (symbol) to be called before encoding to do
326 additional conversion. */ 367 additional conversion, or nil. */
327 Lisp_Object pre_write_conversion; 368 Lisp_Object pre_write_conversion;
328 369
329 /* Character unification tables to look up, or nil. */ 370 /* Character unification tables to look up, or nil. */
330 Lisp_Object character_unification_table_for_decode; 371 Lisp_Object character_unification_table_for_decode;
331 Lisp_Object character_unification_table_for_encode; 372 Lisp_Object character_unification_table_for_encode;
332
333 /* Carryover yielded by decoding/encoding incomplete source. No
334 coding-system yields more than 7-byte of carryover. This does
335 not include a text which is not processed because of short of
336 output buffer. */
337 char carryover[8];
338
339 /* Actual data length in the above array. */
340 int carryover_size;
341 }; 373 };
342 374
343 #define CODING_REQUIRE_FLUSHING_MASK 1 375 #define CODING_REQUIRE_FLUSHING_MASK 1
344 #define CODING_REQUIRE_DECODING_MASK 2 376 #define CODING_REQUIRE_DECODING_MASK 2
345 #define CODING_REQUIRE_ENCODING_MASK 4 377 #define CODING_REQUIRE_ENCODING_MASK 4
363 /* Return 1 if the coding system CODING requires some kind of code 395 /* Return 1 if the coding system CODING requires some kind of code
364 detection. */ 396 detection. */
365 #define CODING_REQUIRE_DETECTION(coding) \ 397 #define CODING_REQUIRE_DETECTION(coding) \
366 ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK) 398 ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK)
367 399
400 #define CODING_MAY_REQUIRE_DECODING(coding) \
401 ((coding)->common_flags \
402 & (CODING_REQUIRE_DETECTION_MASK | CODING_REQUIRE_DECODING_MASK))
368 403
369 /* Index for each coding category in `coding_category_table' */ 404 /* Index for each coding category in `coding_category_table' */
370 #define CODING_CATEGORY_IDX_EMACS_MULE 0 405 #define CODING_CATEGORY_IDX_EMACS_MULE 0
371 #define CODING_CATEGORY_IDX_SJIS 1 406 #define CODING_CATEGORY_IDX_SJIS 1
372 #define CODING_CATEGORY_IDX_ISO_7 2 407 #define CODING_CATEGORY_IDX_ISO_7 2
373 #define CODING_CATEGORY_IDX_ISO_8_1 3 408 #define CODING_CATEGORY_IDX_ISO_7_TIGHT 3
374 #define CODING_CATEGORY_IDX_ISO_8_2 4 409 #define CODING_CATEGORY_IDX_ISO_8_1 4
375 #define CODING_CATEGORY_IDX_ISO_7_ELSE 5 410 #define CODING_CATEGORY_IDX_ISO_8_2 5
376 #define CODING_CATEGORY_IDX_ISO_8_ELSE 6 411 #define CODING_CATEGORY_IDX_ISO_7_ELSE 6
377 #define CODING_CATEGORY_IDX_BIG5 7 412 #define CODING_CATEGORY_IDX_ISO_8_ELSE 7
378 #define CODING_CATEGORY_IDX_RAW_TEXT 8 413 #define CODING_CATEGORY_IDX_BIG5 8
379 #define CODING_CATEGORY_IDX_BINARY 9 414 #define CODING_CATEGORY_IDX_RAW_TEXT 9
380 #define CODING_CATEGORY_IDX_MAX 10 415 #define CODING_CATEGORY_IDX_BINARY 10
416 #define CODING_CATEGORY_IDX_MAX 11
381 417
382 /* Definitions of flag bits returned by the function 418 /* Definitions of flag bits returned by the function
383 detect_coding_mask (). */ 419 detect_coding_mask (). */
384 #define CODING_CATEGORY_MASK_EMACS_MULE (1 << CODING_CATEGORY_IDX_EMACS_MULE) 420 #define CODING_CATEGORY_MASK_EMACS_MULE (1 << CODING_CATEGORY_IDX_EMACS_MULE)
385 #define CODING_CATEGORY_MASK_SJIS (1 << CODING_CATEGORY_IDX_SJIS) 421 #define CODING_CATEGORY_MASK_SJIS (1 << CODING_CATEGORY_IDX_SJIS)
386 #define CODING_CATEGORY_MASK_ISO_7 (1 << CODING_CATEGORY_IDX_ISO_7) 422 #define CODING_CATEGORY_MASK_ISO_7 (1 << CODING_CATEGORY_IDX_ISO_7)
423 #define CODING_CATEGORY_MASK_ISO_7_TIGHT (1 << CODING_CATEGORY_IDX_ISO_7_TIGHT)
387 #define CODING_CATEGORY_MASK_ISO_8_1 (1 << CODING_CATEGORY_IDX_ISO_8_1) 424 #define CODING_CATEGORY_MASK_ISO_8_1 (1 << CODING_CATEGORY_IDX_ISO_8_1)
388 #define CODING_CATEGORY_MASK_ISO_8_2 (1 << CODING_CATEGORY_IDX_ISO_8_2) 425 #define CODING_CATEGORY_MASK_ISO_8_2 (1 << CODING_CATEGORY_IDX_ISO_8_2)
389 #define CODING_CATEGORY_MASK_ISO_7_ELSE (1 << CODING_CATEGORY_IDX_ISO_7_ELSE) 426 #define CODING_CATEGORY_MASK_ISO_7_ELSE (1 << CODING_CATEGORY_IDX_ISO_7_ELSE)
390 #define CODING_CATEGORY_MASK_ISO_8_ELSE (1 << CODING_CATEGORY_IDX_ISO_8_ELSE) 427 #define CODING_CATEGORY_MASK_ISO_8_ELSE (1 << CODING_CATEGORY_IDX_ISO_8_ELSE)
391 #define CODING_CATEGORY_MASK_BIG5 (1 << CODING_CATEGORY_IDX_BIG5) 428 #define CODING_CATEGORY_MASK_BIG5 (1 << CODING_CATEGORY_IDX_BIG5)
396 than ASCII characters. */ 433 than ASCII characters. */
397 #define CODING_CATEGORY_MASK_ANY \ 434 #define CODING_CATEGORY_MASK_ANY \
398 ( CODING_CATEGORY_MASK_EMACS_MULE \ 435 ( CODING_CATEGORY_MASK_EMACS_MULE \
399 | CODING_CATEGORY_MASK_SJIS \ 436 | CODING_CATEGORY_MASK_SJIS \
400 | CODING_CATEGORY_MASK_ISO_7 \ 437 | CODING_CATEGORY_MASK_ISO_7 \
438 | CODING_CATEGORY_MASK_ISO_7_TIGHT \
401 | CODING_CATEGORY_MASK_ISO_8_1 \ 439 | CODING_CATEGORY_MASK_ISO_8_1 \
402 | CODING_CATEGORY_MASK_ISO_8_2 \ 440 | CODING_CATEGORY_MASK_ISO_8_2 \
403 | CODING_CATEGORY_MASK_ISO_7_ELSE \ 441 | CODING_CATEGORY_MASK_ISO_7_ELSE \
404 | CODING_CATEGORY_MASK_ISO_8_ELSE \ 442 | CODING_CATEGORY_MASK_ISO_8_ELSE \
405 | CODING_CATEGORY_MASK_BIG5) 443 | CODING_CATEGORY_MASK_BIG5)
444
445 #define CODING_CATEGORY_MASK_ISO_7BIT \
446 (CODING_CATEGORY_MASK_ISO_7 | CODING_CATEGORY_MASK_ISO_7_TIGHT)
447
448 #define CODING_CATEGORY_MASK_ISO_8BIT \
449 (CODING_CATEGORY_MASK_ISO_8_1 | CODING_CATEGORY_MASK_ISO_8_2)
450
451 #define CODING_CATEGORY_MASK_ISO_SHIFT \
452 (CODING_CATEGORY_MASK_ISO_7_ELSE | CODING_CATEGORY_MASK_ISO_8_ELSE)
453
454 #define CODING_CATEGORY_MASK_ISO \
455 ( CODING_CATEGORY_MASK_ISO_7BIT \
456 | CODING_CATEGORY_MASK_ISO_SHIFT \
457 | CODING_CATEGORY_MASK_ISO_8BIT)
406 458
407 /* Macros to decode or encode a character of JISX0208 in SJIS. S1 and 459 /* Macros to decode or encode a character of JISX0208 in SJIS. S1 and
408 S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding 460 S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding
409 system. C1 and C2 are the 1st and 2nd position codes of Emacs' 461 system. C1 and C2 are the 1st and 2nd position codes of Emacs'
410 internal format. */ 462 internal format. */
429 s2 = c2 + 0x7E; \ 481 s2 = c2 + 0x7E; \
430 } while (0) 482 } while (0)
431 483
432 /* Extern declarations. */ 484 /* Extern declarations. */
433 extern int decode_coding P_ ((struct coding_system *, unsigned char *, 485 extern int decode_coding P_ ((struct coding_system *, unsigned char *,
434 unsigned char *, int, int, int *)); 486 unsigned char *, int, int));
435 extern int encode_coding P_ ((struct coding_system *, unsigned char *, 487 extern int encode_coding P_ ((struct coding_system *, unsigned char *,
436 unsigned char *, int, int, int *)); 488 unsigned char *, int, int));
437 extern int decoding_buffer_size P_ ((struct coding_system *, int)); 489 extern int decoding_buffer_size P_ ((struct coding_system *, int));
438 extern int encoding_buffer_size P_ ((struct coding_system *, int)); 490 extern int encoding_buffer_size P_ ((struct coding_system *, int));
439 extern void detect_coding P_ ((struct coding_system *, unsigned char *, int)); 491 extern void detect_coding P_ ((struct coding_system *, unsigned char *, int));
440 extern void detect_eol P_ ((struct coding_system *, unsigned char *, int)); 492 extern void detect_eol P_ ((struct coding_system *, unsigned char *, int));
441 extern int conversion_buffer_size; 493 extern int conversion_buffer_size;
442 extern char *conversion_buffer; 494 extern char *conversion_buffer;
443 extern char *get_conversion_buffer P_ ((int)); 495 extern char *get_conversion_buffer P_ ((int));
444 extern int setup_coding_system P_ ((Lisp_Object, struct coding_system *)); 496 extern int setup_coding_system P_ ((Lisp_Object, struct coding_system *));
445 extern Lisp_Object Qcoding_system, Qeol_type, Qcoding_category_index; 497 extern Lisp_Object Qcoding_system, Qeol_type, Qcoding_category_index;
446 extern Lisp_Object Qemacs_mule; 498 extern Lisp_Object Qraw_text;
447 extern Lisp_Object Qbuffer_file_coding_system; 499 extern Lisp_Object Qbuffer_file_coding_system;
448 extern Lisp_Object Vcoding_category_list; 500 extern Lisp_Object Vcoding_category_list;
449 501
450 /* Mnemonic character to indicate each type of end-of-line. */ 502 /* Mnemonic character to indicate each type of end-of-line. */
451 extern int eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac; 503 extern int eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
452 /* Mnemonic character to indicate type of end-of-line is not yet decided. */ 504 /* Mnemonic character to indicate type of end-of-line is not yet decided. */
453 extern int eol_mnemonic_undecided; 505 extern int eol_mnemonic_undecided;
454
455 /* Table of coding-systems currently assigned to each coding-category. */
456 extern Lisp_Object coding_category_table[CODING_CATEGORY_IDX_MAX];
457 /* Table of names of symbol for each coding-category. */
458 extern char *coding_category_name[CODING_CATEGORY_IDX_MAX];
459 506
460 #ifdef emacs 507 #ifdef emacs
461 extern Lisp_Object Qfile_coding_system; 508 extern Lisp_Object Qfile_coding_system;
462 extern Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument; 509 extern Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
463 extern Lisp_Object Qstart_process, Qopen_network_stream; 510 extern Lisp_Object Qstart_process, Qopen_network_stream;
484 extern struct coding_system keyboard_coding; 531 extern struct coding_system keyboard_coding;
485 532
486 /* Default coding systems used for process I/O. */ 533 /* Default coding systems used for process I/O. */
487 extern Lisp_Object Vdefault_process_coding_system; 534 extern Lisp_Object Vdefault_process_coding_system;
488 535
536 /* Function to call to force a user to force select a propert coding
537 system. */
538 extern Lisp_Object Vselect_safe_coding_system_function;
539
489 #endif 540 #endif
490 541
491 #endif /* _CODING_H */ 542 #endif /* _CODING_H */