Mercurial > emacs
view src/coding.h @ 88499:75801a503812
*** empty log message ***
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Tue, 14 May 2002 08:01:50 +0000 |
parents | 1dd66ce3fc9c |
children | d429f57602b9 |
line wrap: on
line source
/* Header for coding system handler. Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN. Licensed to the Free Software Foundation. Copyright (C) 2001, 2002 National Institute of Advanced Industrial Science and Technology (AIST) Registration Number H13PRO009 This file is part of GNU Emacs. GNU Emacs is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. GNU Emacs is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GNU Emacs; see the file COPYING. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #ifndef EMACS_CODING_H #define EMACS_CODING_H /* Index to arguments of Fdefine_coding_system_internal. */ enum define_coding_system_arg_index { coding_arg_name, coding_arg_mnemonic, coding_arg_coding_type, coding_arg_charset_list, coding_arg_ascii_compatible_p, coding_arg_decode_translation_table, coding_arg_encode_translation_table, coding_arg_post_read_conversion, coding_arg_pre_write_conversion, coding_arg_default_char, coding_arg_plist, coding_arg_eol_type, coding_arg_max }; enum define_coding_iso2022_arg_index { coding_arg_iso2022_initial = coding_arg_max, coding_arg_iso2022_reg_usage, coding_arg_iso2022_request, coding_arg_iso2022_flags, coding_arg_iso2022_max }; enum define_coding_utf16_arg_index { coding_arg_utf16_bom = coding_arg_max, coding_arg_utf16_endian, coding_arg_utf16_max }; enum define_coding_ccl_arg_index { coding_arg_ccl_decoder, coding_arg_ccl_encoder, coding_arg_ccl_valids, coding_arg_ccl_max }; extern Lisp_Object Vcoding_system_hash_table; /* Enumeration of coding system type. */ enum coding_system_type { coding_type_charset, coding_type_utf_8, coding_type_utf_16, coding_type_iso_2022, coding_type_emacs_mule, coding_type_sjis, coding_type_ccl, coding_type_raw_text, coding_type_undecided, coding_type_max }; /* Enumeration of end-of-line format type. */ enum end_of_line_type { eol_lf, /* Line-feed only, same as Emacs' internal format. */ eol_crlf, /* Sequence of carriage-return and line-feed. */ eol_cr, /* Carriage-return only. */ eol_any, /* Accept any of above. Produce line-feed only. */ eol_undecided, /* This value is used to denote that the eol-type is not yet undecided. */ eol_type_max }; /* Enumeration of index to an attribute vector of a coding system. */ enum coding_attr_index { coding_attr_base_name, coding_attr_docstring, coding_attr_mnemonic, coding_attr_type, coding_attr_charset_list, coding_attr_ascii_compat, coding_attr_decode_tbl, coding_attr_encode_tbl, coding_attr_post_read, coding_attr_pre_write, coding_attr_default_char, coding_attr_plist, coding_attr_category, coding_attr_safe_charsets, /* The followings are extra attributes for each type. */ coding_attr_charset_valids, coding_attr_ccl_decoder, coding_attr_ccl_encoder, coding_attr_ccl_valids, coding_attr_iso_initial, coding_attr_iso_usage, coding_attr_iso_request, coding_attr_iso_flags, coding_attr_utf_16_bom, coding_attr_utf_16_endian, coding_attr_emacs_mule_full, coding_attr_last_index }; #define CODING_ATTR_BASE_NAME(attrs) AREF (attrs, coding_attr_base_name) #define CODING_ATTR_TYPE(attrs) AREF (attrs, coding_attr_type) #define CODING_ATTR_CHARSET_LIST(attrs) AREF (attrs, coding_attr_charset_list) #define CODING_ATTR_MNEMONIC(attrs) AREF (attrs, coding_attr_mnemonic) #define CODING_ATTR_DOCSTRING(attrs) AREF (attrs, coding_attr_docstring) #define CODING_ATTR_ASCII_COMPAT(attrs) AREF (attrs, coding_attr_ascii_compat) #define CODING_ATTR_DECODE_TBL(attrs) AREF (attrs, coding_attr_decode_tbl) #define CODING_ATTR_ENCODE_TBL(attrs) AREF (attrs, coding_attr_encode_tbl) #define CODING_ATTR_POST_READ(attrs) AREF (attrs, coding_attr_post_read) #define CODING_ATTR_PRE_WRITE(attrs) AREF (attrs, coding_attr_pre_write) #define CODING_ATTR_DEFAULT_CHAR(attrs) AREF (attrs, coding_attr_default_char) #define CODING_ATTR_DIRECTION(attrs) AREF (attrs, coding_attr_direction) #define CODING_ATTR_FLUSHING(attrs) AREF (attrs, coding_attr_flushing) #define CODING_ATTR_PLIST(attrs) AREF (attrs, coding_attr_plist) #define CODING_ATTR_CATEGORY(attrs) AREF (attrs, coding_attr_category) #define CODING_ATTR_SAFE_CHARSETS(attrs)AREF (attrs, coding_attr_safe_charsets) #define CODING_ID_ATTRS(id) \ (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 0)) #define CODING_ID_ALIASES(id) \ (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 1)) #define CODING_ID_EOL_TYPE(id) \ (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 2)) #define CODING_ID_NAME(id) \ (HASH_KEY (XHASH_TABLE (Vcoding_system_hash_table), id)) #define CODING_SYSTEM_SPEC(coding_system_symbol) \ (Fgethash (coding_system_symbol, Vcoding_system_hash_table, Qnil)) #define CODING_SYSTEM_ID(coding_system_symbol) \ hash_lookup (XHASH_TABLE (Vcoding_system_hash_table), \ coding_system_symbol, NULL) #define CODING_SYSTEM_P(coding_system_symbol) \ (! NILP (CODING_SYSTEM_SPEC (coding_system_symbol))) #define CHECK_CODING_SYSTEM(x) \ do { \ if (!CODING_SYSTEM_P (x)) \ x = wrong_type_argument (Qcoding_system_p, (x)); \ } while (0) #define CHECK_CODING_SYSTEM_GET_SPEC(x, spec) \ do { \ spec = CODING_SYSTEM_SPEC (x); \ if (NILP (spec)) \ x = wrong_type_argument (Qcoding_system_p, (x)); \ } while (0) #define CHECK_CODING_SYSTEM_GET_ID(x, id) \ do \ { \ id = CODING_SYSTEM_ID (x); \ if (id < 0) \ x = wrong_type_argument (Qcoding_system_p, (x)); \ } while (0) /*** GENERAL section ***/ /* Enumeration of result code of code conversion. */ enum coding_result_code { CODING_RESULT_SUCCESS, CODING_RESULT_INSUFFICIENT_SRC, CODING_RESULT_INSUFFICIENT_DST, CODING_RESULT_INCONSISTENT_EOL, CODING_RESULT_INSUFFICIENT_CMP, CODING_RESULT_INTERRUPT, CODING_RESULT_INSUFFICIENT_MEM }; /* Macros used for the member `mode' of the struct coding_system. */ /* If set, recover the original CR or LF of the already decoded text when the decoding routine encounters an inconsistent eol format. */ #define CODING_MODE_INHIBIT_INCONSISTENT_EOL 0x01 /* If set, the decoding/encoding routines treat the current data as the last block of the whole text to be converted, and do appropriate fisishing job. */ #define CODING_MODE_LAST_BLOCK 0x02 /* If set, it means that the current source text is in a buffer which enables selective display. */ #define CODING_MODE_SELECTIVE_DISPLAY 0x04 /* This flag is used by the decoding/encoding routines on the fly. If set, it means that right-to-left text is being processed. */ #define CODING_MODE_DIRECTION 0x08 #define CODING_MODE_FIXED_DESTINATION 0x10 #define CODING_MODE_SAFE_ENCODING 0x20 /* Structure of the field `spec.iso_2022' in the structure `coding_system'. */ struct iso_2022_spec { /* */ unsigned flags; /* The current graphic register invoked to each graphic plane. */ int current_invocation[2]; /* The current charset designated to each graphic register. The value -1 means that not charset is designated, -2 means that there was an invalid designation previously. */ int current_designation[4]; /* Set to 1 temporarily only when graphic register 2 or 3 is invoked by single-shift while encoding. */ int single_shifting; /* Set to 1 temporarily only when processing at beginning of line. */ int bol; }; struct ccl_spec; enum utf_16_bom_type { utf_16_detect_bom, utf_16_without_bom, utf_16_with_bom }; enum utf_16_endian_type { utf_16_big_endian, utf_16_little_endian }; struct utf_16_spec { enum utf_16_bom_type bom; enum utf_16_endian_type endian; int surrogate; }; struct coding_system { /* ID number of the coding system. This is an index to Vcoding_system_hash_table. This value is set by setup_coding_system. At the early stage of building time, this value is -1 in the array coding_categories to indicate that no coding-system of that category is yet defined. */ int id; /* Flag bits of the coding system. The meaning of each bit is common to all types of coding systems. */ int common_flags; /* Mode bits of the coding system. See the comments of the macros CODING_MODE_XXX. */ unsigned int mode; /* Detailed information specific to each type of coding system. */ union { struct iso_2022_spec iso_2022; struct ccl_spec *ccl; /* Defined in ccl.h. */ struct utf_16_spec utf_16; int emacs_mule_full_support; } spec; int max_charset_id; char *safe_charsets; /* The following two members specify how binary 8-bit code 128..255 are represented in source and destination text respectively. 1 means they are represented by 2-byte sequence, 0 means they are represented by 1-byte as is (see the comment in character.h). */ unsigned src_multibyte : 1; unsigned dst_multibyte : 1; /* How may heading bytes we can skip for decoding. This is set to -1 in setup_coding_system, and updated by detect_coding. So, when this is equal to the byte length of the text being converted, we can skip the actual conversion process. */ int head_ascii; /* The following members are set by encoding/decoding routine. */ EMACS_INT produced, produced_char, consumed, consumed_char; /* Number of error source data found in a decoding routine. */ int errors; /* Store the positions of error source data. */ EMACS_INT *error_positions; /* Finish status of code conversion. */ enum coding_result_code result; /* The following members are all Lisp symbols. We don't have to protect them from GC because the current garbage collection doesn't relocate Lisp symbols. But, when it is changed, we must find a way to protect them. */ EMACS_INT src_pos, src_pos_byte, src_chars, src_bytes; Lisp_Object src_object; unsigned char *source; EMACS_INT dst_pos, dst_pos_byte, dst_bytes; Lisp_Object dst_object; unsigned char *destination; int chars_at_source; /* If an element is non-negative, it is a character code. If it is in the range -128..-1, it is a 8-bit character code minus 256. If it is less than -128, it specifies the start of an annotation chunk. The length of the chunk is -128 minus the value of the element. The following elements are OFFSET, ANNOTATION-TYPE, and a sequence of actual data for the annotation. OFFSET is a character position offset from dst_pos or src_pos, ANNOTATION-TYPE specfies the meaning of the annotation and how to handle the following data.. */ int *charbuf; int charbuf_size, charbuf_used; /* Set to 1 if charbuf contains an annotation. */ int annotated; unsigned char carryover[64]; int carryover_bytes; int default_char; int (*detector) P_ ((struct coding_system *, int *)); void (*decoder) P_ ((struct coding_system *)); int (*encoder) P_ ((struct coding_system *)); }; /* Meanings of bits in the member `common_flags' of the structure coding_system. The lowest 8 bits are reserved for various kind of annotations (currently two of them are used). */ #define CODING_ANNOTATION_MASK 0x00FF #define CODING_ANNOTATE_COMPOSITION_MASK 0x0001 #define CODING_ANNOTATE_DIRECTION_MASK 0x0002 #define CODING_FOR_UNIBYTE_MASK 0x0100 #define CODING_REQUIRE_FLUSHING_MASK 0x0200 #define CODING_REQUIRE_DECODING_MASK 0x0400 #define CODING_REQUIRE_ENCODING_MASK 0x0800 #define CODING_REQUIRE_DETECTION_MASK 0x1000 #define CODING_RESET_AT_BOL_MASK 0x2000 /* Return 1 if the coding context CODING requires annotaion handling. */ #define CODING_REQUIRE_ANNOTATION(coding) \ ((coding)->common_flags & CODING_ANNOTATION_MASK) /* Return 1 if the coding context CODING prefers decoding into unibyte. */ #define CODING_FOR_UNIBYTE(coding) \ ((coding)->common_flags & CODING_FOR_UNIBYTE_MASK) /* Return 1 if the coding context CODING requires specific code to be attached at the tail of converted text. */ #define CODING_REQUIRE_FLUSHING(coding) \ ((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK) /* Return 1 if the coding context CODING requires code conversion on decoding. */ #define CODING_REQUIRE_DECODING(coding) \ ((coding)->dst_multibyte \ || (coding)->common_flags & CODING_REQUIRE_DECODING_MASK) /* Return 1 if the coding context CODING requires code conversion on encoding. */ #define CODING_REQUIRE_ENCODING(coding) \ ((coding)->src_multibyte \ || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK \ || (coding)->mode & CODING_MODE_SELECTIVE_DISPLAY) /* Return 1 if the coding context CODING requires some kind of code detection. */ #define CODING_REQUIRE_DETECTION(coding) \ ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK) /* Return 1 if the coding context CODING requires code conversion on decoding or some kind of code detection. */ #define CODING_MAY_REQUIRE_DECODING(coding) \ (CODING_REQUIRE_DECODING (coding) \ || CODING_REQUIRE_DETECTION (coding)) /* Macros to decode or encode a character of JISX0208 in SJIS. S1 and S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding system. C1 and C2 are the 1st and 2nd position codes of Emacs' internal format. */ #define SJIS_TO_JIS(code) \ do { \ int s1, s2, j1, j2; \ \ s1 = (code) >> 8, s2 = (code) & 0xFF; \ \ if (s2 >= 0x9F) \ (j1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0), \ j2 = s2 - 0x7E); \ else \ (j1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1), \ j2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F)); \ (code) = (j1 << 8) | j2; \ } while (0) #define JIS_TO_SJIS(code) \ do { \ int s1, s2, j1, j2; \ \ j1 = (code) >> 8, j2 = (code) & 0xFF; \ if (j1 & 1) \ (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x71 : 0xB1), \ s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F)); \ else \ (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x70 : 0xB0), \ s2 = j2 + 0x7E); \ (code) = (s1 << 8) | s2; \ } while (0) /* Encode the file name NAME using the specified coding system for file names, if any. */ #define ENCODE_FILE(name) \ (! NILP (Vfile_name_coding_system) \ && XFASTINT (Vfile_name_coding_system) != 0 \ ? code_convert_string_norecord (name, Vfile_name_coding_system, 1) \ : (! NILP (Vdefault_file_name_coding_system) \ && XFASTINT (Vdefault_file_name_coding_system) != 0 \ ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \ : name)) /* Decode the file name NAME using the specified coding system for file names, if any. */ #define DECODE_FILE(name) \ (! NILP (Vfile_name_coding_system) \ && XFASTINT (Vfile_name_coding_system) != 0 \ ? code_convert_string_norecord (name, Vfile_name_coding_system, 0) \ : (! NILP (Vdefault_file_name_coding_system) \ && XFASTINT (Vdefault_file_name_coding_system) != 0 \ ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 0) \ : name)) #ifdef WINDOWSNT /* Encode the string STR using the specified coding system for w32 system functions, if any. */ #define ENCODE_SYSTEM(str) \ (! NILP (Vlocale_coding_system) \ && XFASTINT (Vlocale_coding_system) != 0 \ ? code_convert_string_norecord (str, Vlocale_coding_system, 1) \ : str) /* Decode the string STR using the specified coding system for w32 system functions, if any. */ #define DECODE_SYSTEM(name) \ (! NILP (Vlocale_coding_system) \ && XFASTINT (Vlocale_coding_system) != 0 \ ? code_convert_string_norecord (str, Vlocale_coding_system, 0) \ : str) #else /* WINDOWSNT */ #define ENCODE_SYSTEM(str) string_make_unibyte(str) #define DECODE_SYSTEM(name) name #endif /* !WINDOWSNT */ /* Extern declarations. */ extern Lisp_Object make_conversion_work_buffer P_ ((int)); extern Lisp_Object code_conversion_restore P_ ((Lisp_Object)); extern int decoding_buffer_size P_ ((struct coding_system *, int)); extern int encoding_buffer_size P_ ((struct coding_system *, int)); extern void setup_coding_system P_ ((Lisp_Object, struct coding_system *)); extern void detect_coding P_ ((struct coding_system *)); extern Lisp_Object code_convert_region P_ ((EMACS_INT, EMACS_INT, Lisp_Object, Lisp_Object, int, int)); extern Lisp_Object code_convert_string P_ ((Lisp_Object, Lisp_Object, Lisp_Object, int, int, int)); extern Lisp_Object code_convert_string_norecord P_ ((Lisp_Object, Lisp_Object, int)); extern Lisp_Object raw_text_coding_system P_ ((Lisp_Object)); extern Lisp_Object coding_inherit_eol_type P_ ((Lisp_Object, Lisp_Object)); extern int decode_coding_gap P_ ((struct coding_system *, EMACS_INT, EMACS_INT)); extern int encode_coding_gap P_ ((struct coding_system *, EMACS_INT, EMACS_INT)); extern void decode_coding_object P_ ((struct coding_system *, Lisp_Object, EMACS_INT, EMACS_INT, EMACS_INT, EMACS_INT, Lisp_Object)); extern void encode_coding_object P_ ((struct coding_system *, Lisp_Object, EMACS_INT, EMACS_INT, EMACS_INT, EMACS_INT, Lisp_Object)); #define decode_coding_region(coding, from, to) \ decode_coding_object (coding, Fcurrent_buffer (), \ from, CHAR_TO_BYTE (from), \ to, CHAR_TO_BYTE (to), Fcurrent_buffer ()) #define encode_coding_region(coding, from, to) \ encode_coding_object (coding, Fcurrent_buffer (), \ from, CHAR_TO_BYTE (from), \ to, CHAR_TO_BYTE (to), Fcurrent_buffer ()) #define decode_coding_string(coding, string, nocopy) \ decode_coding_object (coding, string, 0, 0, XSTRING (string)->size, \ STRING_BYTES (XSTRING (string)), Qt) #define encode_coding_string(coding, string, nocopy) \ (encode_coding_object (coding, string, 0, 0, XSTRING (string)->size, \ STRING_BYTES (XSTRING (string)), Qt), \ (coding)->dst_object) #define decode_coding_c_string(coding, src, bytes, dst_object) \ do { \ (coding)->source = (src); \ (coding)->src_chars = (coding)->src_bytes = (bytes); \ decode_coding_object ((coding), Qnil, 0, 0, (bytes), (bytes), \ (dst_object)); \ } while (0) extern Lisp_Object preferred_coding_system P_ (()); extern Lisp_Object Qcoding_system, Qeol_type, Qcoding_category_index; extern Lisp_Object Qcoding_system_p; extern Lisp_Object Qraw_text, Qemacs_mule, Qno_conversion, Qundecided; extern Lisp_Object Qiso_2022; extern Lisp_Object Qbuffer_file_coding_system; extern Lisp_Object Qunix, Qdos, Qmac; extern Lisp_Object Qtranslation_table; extern Lisp_Object Qtranslation_table_id; /* Mnemonic strings to indicate each type of end-of-line. */ extern Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac; /* Mnemonic string to indicate type of end-of-line is not yet decided. */ extern Lisp_Object eol_mnemonic_undecided; #ifdef emacs extern Lisp_Object Qfile_coding_system; extern Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument; extern Lisp_Object Qstart_process, Qopen_network_stream; extern Lisp_Object Qwrite_region; extern char *emacs_strerror P_ ((int)); /* Coding-system for reading files and receiving data from process. */ extern Lisp_Object Vcoding_system_for_read; /* Coding-system for writing files and sending data to process. */ extern Lisp_Object Vcoding_system_for_write; /* Coding-system actually used in the latest I/O. */ extern Lisp_Object Vlast_coding_system_used; /* Coding-system to use with system messages (e.g. strerror). */ extern Lisp_Object Vlocale_coding_system; /* If non-zero, process buffer inherits the coding system used to decode the subprocess output. */ extern int inherit_process_coding_system; /* Coding-system to be used for encoding terminal output. This structure contains information of a coding-system specified by the function `set-terminal-coding-system'. */ extern struct coding_system terminal_coding; /* Coding system to be used to encode text for terminal display when terminal coding system is nil. */ extern struct coding_system safe_terminal_coding; /* Coding-system of what is sent from terminal keyboard. This structure contains information of a coding-system specified by the function `set-keyboard-coding-system'. */ extern struct coding_system keyboard_coding; /* Default coding systems used for process I/O. */ extern Lisp_Object Vdefault_process_coding_system; /* Function to call to force a user to force select a propert coding system. */ extern Lisp_Object Vselect_safe_coding_system_function; /* Coding system for file names, or nil if none. */ extern Lisp_Object Vfile_name_coding_system; /* Coding system for file names used only when Vfile_name_coding_system is nil. */ extern Lisp_Object Vdefault_file_name_coding_system; #endif /* Error signaled when there's a problem with detecting coding system */ extern Lisp_Object Qcoding_system_error; extern char emacs_mule_bytes[256]; extern int emacs_mule_string_char P_ ((unsigned char *)); #endif /* EMACS_CODING_H */