Mercurial > emacs
changeset 88413:418777d5ccd4
(char-valid-p): Make it an alias of characterp.
(define-charset): Fully re-designed.
(charset-quoted-standard-p): Deleted.
(charsetp): Moved to charset.c.
(charset-info, charset-id, charset-bytes, charset-width,
charset-directioin, charset-iso-graphic-plane,
charset-reverse-charset): Deleted.
(charset-dimension, charset-chars, charset-iso-final-char,
charset-description, charset-short-name, charset-long-name): Call
charset-plist instead of charset-info.
(charset-plist, set-charset-plist): Moved to charset.c.
(get-charset-property, put-charset-property): Moved from
mule-cmds.el. Call charset-plist and set-charset-plist.
(make-char): Deleted.
(generic-char-p): Make it always return nil.
(decode-char, encode-char): Moved to charset.c.
(coding-spec-XXX-idx): Variables deleted.
(coding-system-iso-2022-flags): New variable.
(define-coding-system): New function.
(transform-make-coding-system-args, make-coding-system): Deleted.
(set-coding-priority): Make it obsolete.
(after-insert-file-set-buffer-file-coding-system): Adjusted for
the new coding system structure.
(find-new-buffer-file-coding-system): Likewise.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Fri, 01 Mar 2002 02:22:38 +0000 (2002-03-01) |
parents | 524f9b5b2ac5 |
children | fad0f879877f |
files | lisp/international/mule.el |
diffstat | 1 files changed, 492 insertions(+), 903 deletions(-) [+] |
line wrap: on
line diff
--- a/lisp/international/mule.el Fri Mar 01 02:21:53 2002 +0000 +++ b/lisp/international/mule.el Fri Mar 01 02:22:38 2002 +0000 @@ -3,6 +3,9 @@ ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. ;; Licensed to the Free Software Foundation. ;; Copyright (C) 2001 Free Software Foundation, Inc. +;; Copyright (C) 2001, 2002 +;; National Institute of Advanced Industrial Science and Technology (AIST) +;; Registration Number H13PRO009 ;; Keywords: mule, multilingual, character set, coding system @@ -27,12 +30,165 @@ ;;; Code: -(defconst mule-version "5.0 (SAKAKI)" "\ +(defconst mule-version "7.0 (SAKAKI)" "\ Version number and name of this version of MULE (multilingual environment).") -(defconst mule-version-date "1999.12.7" "\ +(defconst mule-version-date "2002.2.28" "\ Distribution date of this version of MULE (multilingual environment).") + + +;;; CHARACTER +(defalias 'char-valid-p 'characterp) +(make-obsolete 'char-valid-p 'characterp "22.1") + + +;;; CHARSET + +(defun define-charset (name docstring &rest props) + "Define NAME (symbol) as a charset with DOCSTRING. +The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE +may be any symbol. The followings have special meanings, and one of +`:code-offset', `:map', `:parents' must be specified. + +`:short-name' + +VALUE must be a short string to identify the charset. If omitted, +NAME is used. + +`:long-name' + +VALUE must be a string longer than `:short-name' to identify the +charset. If omitted, the value of `:short-name' attribute is used. + +`:dimension' + +VALUE must be an integer 0, 1, 2, or 3, specifying the dimension of +code-points of the charsets. If omitted, it is calculated from a +value of `:code-space' attribute. + +`:code-space' + +VALUE must be a vector of length at most 8 specifying the byte code +range of each dimension in this format: + [ MIN-1 MAX-1 MIN-2 MAX-2 ... ] +where, MIN-N is the minimum byte value of Nth dimension of code-point, +MAX-N is the maximum byte value of that. + +`:iso-final-char' + +VALUE must be a character in the range 32 to 127 (inclusive) +specifying the final char of the charset for ISO-2022 encoding. If +omitted, the charset can't be encoded by ISO-2022 based +coding-systems. + +`:iso-revision-number' + +VALUE must be an integer in the range 0..63, specifying the revision +number of the charset for ISO-2022 encoding. + +`:emacs-mule-id' + +VALUE must be an integer of 0, 128..255. If omitted, the charset +can't be encoded by coding-systems of type `emacs-mule'. + +`:ascii-compatible-p' + +VALUE must be nil or t. If the VALUE is nil, the charset is a not +compatible with ASCII. The default value is nil. + +`:supplementary-p' + +VALUE must be nil or t. If the VALUE is t, the charset is +supplementary, which means the charset is used only as a parent of +some other charset. + +`:invalid-code' + +VALUE must be a nonnegative integer that can be used as an invalid +code point of the charset. If the minimum code is 0 and the maximum +code is greater than Emacs' maximum integer value, `:invalid-code' +should not be omitted. + +`:code-offset' + +VALUE must be an integer added to an index number of character to get +the corresponding character code. + +`:map' + +VALUE must be vector or string. + +If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ], +where CODE-n is a code-point of the charset, and CHAR-n is the +corresponding charcter code. + +If it is a string, it is a name of file that contains the above +information. + +`:parents' + +VALUE must be a list of parent charsets. The charset inherits +characters from them. Each element of the list may be a cons (PARENT +. OFFSET), where PARENT is a parent charset, and OFFSET is an offset +value to add to a code point of this charset to get the corresponding +code point of PARENT. + +`:unify-map' + +VALUE must be vector or string. + +If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ], +where CODE-n is a code-point of the charset, and CHAR-n is the +corresponding unified charcter code. + +If it is a string, it is a name of file that contains the above +information." + (let ((attrs (mapcar 'list '(:dimension + :code-space + :iso-final-char + :iso-revision-number + :emacs-mule-id + :ascii-compatible-p + :supplementary-p + :invalid-code + :code-offset + :map + :parents + :unify-map + :plist)))) + + ;; If :dimension is omitted, get the dimension from :code-space. + (let ((dimension (plist-get props :dimension))) + (or dimension + (progn + (setq dimension (/ (length (plist-get props :code-space)) 2)) + (setq props (plist-put props :dimension dimension))))) + + (dolist (slot attrs) + (setcdr slot (plist-get props (car slot)))) + + ;; Make sure that the value of :code-space is a vector of 8 + ;; elements. + (let* ((slot (assq :code-space attrs)) + (val (cdr slot)) + (len (length val))) + (if (< len 8) + (setcdr slot + (vconcat val (make-vector (- 8 len) 0))))) + + ;; Add :name and :docstring properties to PROPS. + (setq props + (cons :name (cons name (cons :docstring (cons docstring props))))) + (or (plist-get props :short-name) + (plist-put props :short-name (symbol-name name))) + (or (plist-get props :long-name) + (plist-put props :long-name (plist-get props :short-name))) + (setcdr (assq :plist attrs) props) + + (apply 'define-charset-internal name (mapcar 'cdr attrs)))) + + (defun load-with-code-conversion (fullname file &optional noerror nomessage) "Execute a file of Lisp code named FILE whose absolute name is FULLNAME. The file contents are decoded before evaluation if necessary. @@ -103,190 +259,46 @@ ;; API (Application Program Interface) for charsets. -(defsubst charset-quoted-standard-p (obj) - "Return t if OBJ is a quoted symbol, and is the name of a standard charset." - (and (listp obj) (eq (car obj) 'quote) - (symbolp (car-safe (cdr obj))) - (let ((vector (get (car-safe (cdr obj)) 'charset))) - (and (vectorp vector) - (< (aref vector 0) 160))))) - -(defsubst charsetp (object) - "T if OBJECT is a charset." - (and (symbolp object) (vectorp (get object 'charset)))) +;;; Charset property -(defsubst charset-info (charset) - "Return a vector of information of CHARSET. -The elements of the vector are: - CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION, - LEADING-CODE-BASE, LEADING-CODE-EXT, - ISO-FINAL-CHAR, ISO-GRAPHIC-PLANE, - REVERSE-CHARSET, SHORT-NAME, LONG-NAME, DESCRIPTION, - PLIST, -where -CHARSET-ID (integer) is the identification number of the charset. -BYTES (integer) is the length of multi-byte form of a character in - the charset: one of 1, 2, 3, and 4. -DIMENSION (integer) is the number of bytes to represent a character of -the charset: 1 or 2. -CHARS (integer) is the number of characters in a dimension: 94 or 96. -WIDTH (integer) is the number of columns a character in the charset - occupies on the screen: one of 0, 1, and 2. -DIRECTION (integer) is the rendering direction of characters in the - charset when rendering. If 0, render from left to right, else - render from right to left. -LEADING-CODE-BASE (integer) is the base leading-code for the - charset. -LEADING-CODE-EXT (integer) is the extended leading-code for the - charset. All charsets of less than 0xA0 has the value 0. -ISO-FINAL-CHAR (character) is the final character of the - corresponding ISO 2022 charset. If the charset is not assigned - any final character, the value is -1. -ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked - while encoding to variants of ISO 2022 coding system, one of the - following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR). - If the charset is not assigned any final character, the value is -1. -REVERSE-CHARSET (integer) is the charset which differs only in - LEFT-TO-RIGHT value from the charset. If there's no such a - charset, the value is -1. -SHORT-NAME (string) is the short name to refer to the charset. -LONG-NAME (string) is the long name to refer to the charset -DESCRIPTION (string) is the description string of the charset. -PLIST (property list) may contain any type of information a user - want to put and get by functions `put-charset-property' and - `get-charset-property' respectively." - (get charset 'charset)) +(defun get-charset-property (charset propname) + "Return the value of CHARSET's PROPNAME property. +This is the last value stored with + (put-charset-property CHARSET PROPNAME VALUE)." + (plist-get (charset-plist charset) propname)) -;; It is better not to use backquote in this file, -;; because that makes a bootstrapping problem -;; if you need to recompile all the Lisp files using interpreted code. +(defun put-charset-property (charset propname value) + "Store CHARSETS's PROPNAME property with value VALUE. +It can be retrieved with `(get-charset-property CHARSET PROPNAME)'." + (set-charset-plist charset + (plist-put (charset-plist charset) propname value))) + -(defmacro charset-id (charset) - "Return charset identification number of CHARSET." - (if (charset-quoted-standard-p charset) - (aref (charset-info (nth 1 charset)) 0) - (list 'aref (list 'charset-info charset) 0))) - -(defmacro charset-bytes (charset) - "Return bytes of CHARSET. -See the function `charset-info' for more detail." - (if (charset-quoted-standard-p charset) - (aref (charset-info (nth 1 charset)) 1) - (list 'aref (list 'charset-info charset) 1))) +(defun charset-description (charset) + "Return description string of CHARSET." + (plist-get (charset-plist charset) :docstring)) -(defmacro charset-dimension (charset) - "Return dimension of CHARSET. -See the function `charset-info' for more detail." - (if (charset-quoted-standard-p charset) - (aref (charset-info (nth 1 charset)) 2) - (list 'aref (list 'charset-info charset) 2))) - -(defmacro charset-chars (charset) - "Return character numbers contained in a dimension of CHARSET. -See the function `charset-info' for more detail." - (if (charset-quoted-standard-p charset) - (aref (charset-info (nth 1 charset)) 3) - (list 'aref (list 'charset-info charset) 3))) +(defun charset-dimension (charset) + "Return dimension string of CHARSET." + (plist-get (charset-plist charset) :dimension)) -(defmacro charset-width (charset) - "Return width (how many column occupied on a screen) of CHARSET. -See the function `charset-info' for more detail." - (if (charset-quoted-standard-p charset) - (aref (charset-info (nth 1 charset)) 4) - (list 'aref (list 'charset-info charset) 4))) - -(defmacro charset-direction (charset) - "Return direction of CHARSET. -See the function `charset-info' for more detail." - (if (charset-quoted-standard-p charset) - (aref (charset-info (nth 1 charset)) 5) - (list 'aref (list 'charset-info charset) 5))) +(defun charset-chars (charset) + "Return character numbers contained in a dimension of CHARSET." + (let ((code-space (plist-get (cahrset-plist charset) :code-space))) + (1+ (- (aref code-space 1) (aref code-space 0))))) -(defmacro charset-iso-final-char (charset) - "Return final char of CHARSET. -See the function `charset-info' for more detail." - (if (charset-quoted-standard-p charset) - (aref (charset-info (nth 1 charset)) 8) - (list 'aref (list 'charset-info charset) 8))) - -(defmacro charset-iso-graphic-plane (charset) - "Return graphic plane of CHARSET. -See the function `charset-info' for more detail." - (if (charset-quoted-standard-p charset) - (aref (charset-info (nth 1 charset)) 9) - (list 'aref (list 'charset-info charset) 9))) - -(defmacro charset-reverse-charset (charset) - "Return reverse charset of CHARSET. -See the function `charset-info' for more detail." - (if (charset-quoted-standard-p charset) - (aref (charset-info (nth 1 charset)) 10) - (list 'aref (list 'charset-info charset) 10))) +(defun charset-iso-final-char (charset) + "Return final char of CHARSET." + (or (plist-get (charset-plist charset) :iso-final-char) + -1)) (defmacro charset-short-name (charset) - "Return short name of CHARSET. -See the function `charset-info' for more detail." - (if (charset-quoted-standard-p charset) - (aref (charset-info (nth 1 charset)) 11) - (list 'aref (list 'charset-info charset) 11))) + "Return short name of CHARSET." + (plist-get (charset-plist charset) :short-name)) (defmacro charset-long-name (charset) - "Return long name of CHARSET. -See the function `charset-info' for more detail." - (if (charset-quoted-standard-p charset) - (aref (charset-info (nth 1 charset)) 12) - (list 'aref (list 'charset-info charset) 12))) - -(defmacro charset-description (charset) - "Return description of CHARSET. -See the function `charset-info' for more detail." - (if (charset-quoted-standard-p charset) - (aref (charset-info (nth 1 charset)) 13) - (list 'aref (list 'charset-info charset) 13))) - -(defmacro charset-plist (charset) - "Return list charset property of CHARSET. -See the function `charset-info' for more detail." - (list 'aref - (if (charset-quoted-standard-p charset) - (charset-info (nth 1 charset)) - (list 'charset-info charset)) - 14)) - -(defun set-charset-plist (charset plist) - "Set CHARSET's property list to PLIST, and return PLIST." - (aset (charset-info charset) 14 plist)) - -(defun make-char (charset &optional code1 code2) - "Return a character of CHARSET whose position codes are CODE1 and CODE2. -CODE1 and CODE2 are optional, but if you don't supply -sufficient position codes, return a generic character which stands for -all characters or group of characters in the character set. -A generic character can be used to index a char table (e.g. syntax-table). - -Such character sets as ascii, eight-bit-control, and eight-bit-graphic -don't have corresponding generic characters. If CHARSET is one of -them and you don't supply CODE1, return the character of the smallest -code in CHARSET. - -If CODE1 or CODE2 are invalid (out of range), this function signals an -error. However, the eighth bit of both CODE1 and CODE2 is zeroed -before they are used to index CHARSET. Thus you may use, say, the -actual ISO 8859 character code rather than subtracting 128, as you -would need to index the corresponding Emacs charset." - (make-char-internal (charset-id charset) code1 code2)) - -(put 'make-char 'byte-compile - (function - (lambda (form) - (let ((charset (nth 1 form))) - (if (charset-quoted-standard-p charset) - (byte-compile-normal-call - (cons 'make-char-internal - (cons (charset-id (nth 1 charset)) (nthcdr 2 form)))) - (byte-compile-normal-call - (cons 'make-char-internal - (cons (list 'charset-id charset) (nthcdr 2 form))))))))) + "Return long name of CHARSET." + (plist-get (charset-plist charset) :long-name)) (defun charset-list () "Return list of charsets ever defined. @@ -295,152 +307,314 @@ Now we have the variable `charset-list'." charset-list) -(defsubst generic-char-p (char) - "Return t if and only if CHAR is a generic character. -See also the documentation of `make-char'." - (and (>= char 0400) - (let ((l (split-char char))) - (and (or (= (nth 1 l) 0) (eq (nth 2 l) 0)) - (not (eq (car l) 'composition)))))) - -(defun decode-char (ccs code-point &optional restriction) - "Return character specified by coded character set CCS and CODE-POINT in it. -Return nil if such a character is not supported. -Currently the only supported coded character set is `ucs' (ISO/IEC -10646: Universal Multi-Octet Coded Character Set). - -Optional argument RESTRICTION specifies a way to map the pair of CCS -and CODE-POINT to a character. Currently not supported and just ignored." - (cond ((eq ccs 'ucs) - (cond ((< code-point 160) - code-point) - ((< code-point 256) - (make-char 'latin-iso8859-1 code-point)) - ((< code-point #x2500) - (setq code-point (- code-point #x0100)) - (make-char 'mule-unicode-0100-24ff - (+ (/ code-point 96) 32) (+ (% code-point 96) 32))) - ((< code-point #x3400) - (setq code-point (- code-point #x2500)) - (make-char 'mule-unicode-2500-33ff - (+ (/ code-point 96) 32) (+ (% code-point 96) 32))) - ((and (>= code-point #xe000) (< code-point #x10000)) - (setq code-point (- code-point #xe000)) - (make-char 'mule-unicode-e000-ffff - (+ (/ code-point 96) 32) (+ (% code-point 96) 32))) - )))) - -(defun encode-char (char ccs &optional restriction) - "Return code-point in coded character set CCS that corresponds to CHAR. -Return nil if CHAR is not included in CCS. -Currently the only supported coded character set is `ucs' (ISO/IEC -10646: Universal Multi-Octet Coded Character Set). - -CHAR should be in one of these charsets: - ascii, latin-iso8859-1, mule-unicode-0100-24ff, mule-unicode-2500-33ff, - mule-unicode-e000-ffff, eight-bit-control -Otherwise, return nil. - -Optional argument RESTRICTION specifies a way to map CHAR to a -code-point in CCS. Currently not supported and just ignored." - (let* ((split (split-char char)) - (charset (car split))) - (cond ((eq ccs 'ucs) - (cond ((eq charset 'ascii) - char) - ((eq charset 'latin-iso8859-1) - (+ (nth 1 split) 128)) - ((eq charset 'mule-unicode-0100-24ff) - (+ #x0100 (+ (* (- (nth 1 split) 32) 96) - (- (nth 2 split) 32)))) - ((eq charset 'mule-unicode-2500-33ff) - (+ #x2500 (+ (* (- (nth 1 split) 32) 96) - (- (nth 2 split) 32)))) - ((eq charset 'mule-unicode-e000-ffff) - (+ #xe000 (+ (* (- (nth 1 split) 32) 96) - (- (nth 2 split) 32)))) - ((eq charset 'eight-bit-control) - char)))))) - +(defun generic-char-p (char) + "Always return nil. This exists only for backward compatibility." + nil) ;; Coding system stuff -;; Coding system is a symbol that has the property `coding-system'. -;; -;; The value of the property `coding-system' is a vector of the -;; following format: -;; [TYPE MNEMONIC DOC-STRING PLIST FLAGS] -;; We call this vector as coding-spec. See comments in src/coding.c -;; for more detail. +;; Coding system is a symbol that has been defined by the function +;; `define-coding-system'. + +(defconst coding-system-iso-2022-flags + '(long-form + ascii-at-eol + ascii-at-cntl + 7-bit + locking-shift + single-shift + designation + revision + direction + init-at-bol + designate-at-bol + safe + latin-extra + composition + euc-tw-shift) + "List of symbols that control ISO-2022 encoder/decoder. + +The value of `:flags' attribute in the argument of the function +`define-coding-system' must be one of them. + +If `long-form' is specified, use a long designation sequence on +encoding for the charsets `japanese-jisx0208-1978', `chinese-gb2312', +and `japanese-jisx0208'. The long designation sequence doesn't +conform to ISO 2022, but used by such a coding system as +`compound-text'. + +If `ascii-at-eol' is specified, designate ASCII to g0 at end of line +on encoding. + +If `ascii-at-cntl' is specified, designate ASCII to g0 before control +codes and SPC on encoding. + +If `7-bit' is specified, use 7-bit code only on encoding. + +If `locking-shift' is specified, decode locking-shift code correctly +on decoding, and use locking-shift to invoke a graphic element on +encoding. + +If `single-shift' is specified, decode single-shift code correctly on +decoding, and use single-shift to invoke a graphic element on encoding. + +If `designation' is specified, decode designation code correctly on +decoding, and use designation to designate a charset to a graphic +element on encoding. + +If `revision' is specified, produce an escape sequence to specify +revision number of a charset on encoding. Such an escape sequence is +always correctly decoded on decoding. + +If `direction' is specified, decode ISO6429's code for specifying +direction correctly, and produced the code on encoding. + +If `init-at-bol' is specified, on encoding, it is assumed that +invocation and designation statuses are reset at each beginning of +line even if `ascii-at-eol' is not specified thus no code for +resetting them are produced. + +If `safe' is specified, on encoding, characters not supported by a +coding are replaced with `?'. + +If `latin-extra' is specified, code-detection routine assumes that a +code specified in `latin-extra-code-table' (which see) is valid. + +If `composition' is specified, an escape sequence to specify +composition sequence is correctly decode on decoding, and is produced +on encoding. -(defconst coding-spec-type-idx 0) -(defconst coding-spec-mnemonic-idx 1) -(defconst coding-spec-doc-string-idx 2) -(defconst coding-spec-plist-idx 3) -(defconst coding-spec-flags-idx 4) +If `euc-tw-shift' is specified, the EUC-TW specific shifting code is +correctly decoded on decoding, and is produced on encoding.") + +(defun define-coding-system (name docstring &rest props) + "Define NAME (symbol) as a coding system with DOCSTRING and attributes. +The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE +may be any symbol. + +The following attributes have special meanings. If labeled as +\"(required)\", it should not be omitted. + +`:mnemonic' (required) + +VALUE is a character to display on mode line for the coding system. + +`:coding-type' (required) + +VALUE must be one of `charset', `utf-8', `utf-16', `iso-2022', +`emacs-mule', `sjis', `big5', `ccl', `raw-text', `undecided'. + +`:eol-type' (optional) + +VALUE is an EOL (end-of-line) format of the coding system. It must be +one of `unix', `dos', `mac'. The symbol `unix' means Unix-like EOL +\(i.e. single LF), `dos' means DOS-like EOL \(i.e. sequence of CR LF), +and `mac' means MAC-like EOL \(i.e. single CR). If omitted, on +decoding by the coding system, Emacs automatically detects an EOL +format of the source text. + +`:charset-list' (required) + +VALUE must be a list of charsets supported by the coding system. On +encoding by the coding system, if a character belongs to multiple +charsets in the list, a charset that comes earlier in the list is +selected. + +`:ascii-compatible-p' (optional) + +If VALUE is non-nil, the coding system decodes all 7-bit bytes into +the correponding ASCII characters, and encodes all ASCII characters +back to the correponding 7-bit bytes. If omitted, the VALUE defaults +to nil. + +`:decode-translation-table' (optional) + +VALUE must be a translation table to use on decoding. + +`:encode-translation-table' (optional) + +VALUE must be a translation table to use on encoding. + +`:post-read-conversion' (optional) + +VALUE must be a function to call after some text is inserted and +decoded by the coding system itself and before any functions in +`after-insert-functions' are called. The arguments to this function +is the same as those of a function in `after-insert-functions', +i.e. LENGTH of a text while putting point at the head of the text to +be decoded + +`:pre-write-conversion' + +VALUE must be a function to call after all functions in +`write-region-annotate-functions' and `buffer-file-format' are called, +and before the text is encoded by the coding system itself. The +arguments to this function is the same as those of a function in +`write-region-annotate-functions', i.e. FROM and TO specifying region +of a text. + +`:default-char' -;; PLIST is a property list of a coding system. To share PLIST among -;; alias coding systems, a coding system has PLIST in coding-spec -;; instead of having it in normal property list of Lisp symbol. -;; Here's a list of coding system properties currently being used. -;; -;; o coding-category -;; -;; The value is a coding category the coding system belongs to. The -;; function `make-coding-system' sets this value automatically -;; unless its argument PROPERTIES specifies this property. -;; -;; o alias-coding-systems -;; -;; The value is a list of coding systems of the same alias group. The -;; first element is the coding system made at first, which we call as -;; `base coding system'. The function `make-coding-system' sets this -;; value automatically and `define-coding-system-alias' updates it. -;; -;; See the documentation of make-coding-system for the meanings of the -;; following properties. -;; -;; o post-read-conversion -;; o pre-write-conversion -;; o translation-table-for-decode -;; o translation-table-for-encode -;; o safe-chars -;; o safe-charsets -;; o mime-charset -;; o valid-codes (meaningful only for a coding system based on CCL) +VALUE must be a character. On encoding, a character not supported by +the coding system is replaced with VALUE. + +`:eol-type' + +VALUE must be `unix', `dos', `mac'. The symbol `unix' means Unix-like +EOL (LF), `dos' means DOS-like EOL (CRLF), and `mac' means MAC-like +EOL (CR). If omitted, on decoding, the coding system detect EOL +format automatically, and on encoding, used Unix-like EOL. + +`:mime-charset' + +VALUE must be a symbol who has MIME-charset name. + +`:flags' + +VALUE must be a list of symbols that control ISO-2022 converter. Each +symbol must be a member of the variable `coding-system-iso-2022-flags' +\(which see). This attribute has a meaning only when `:coding-type' +is `iso-2022'. + +`:designation' + +VALUE must be a vector [ G0-USAGE G1-USAGE G2-USAGE G3-USAGE]. +GN-USAGE specifies the usage of graphic register GN as follows. + +If it is nil, no charset can be designated to GN. + +If it is a charset, the charset is initially designaged to GN, and +never used by the other charsets. + +If it is a list, the elements must be charsets, nil, 94, or 96. GN +can be used by all listed charsets. If the list contains 94, any +charsets whose iso-chars is 94 can be designaged to GN. If the list +contains 96, any charsets whose iso-chars is 96 can be designaged to +GN. If the first element is a charset, the charset is initially +designaged to GN. + +This attribute has a meaning only when `:coding-type' is `iso-2022'. + +`:bom' + +VALUE must nil, t, or cons of coding systems whose `:coding-type' is +`utf-16'. + +This attribute has a meaning only when `:coding-type' is `utf-16'. + +`:endian' + +VALUE must be t or nil. See the above description for the detail. + +This attribute has a meaning only when `:coding-type' is `utf-16'. + +`:ccl-decoder' + +This attribute has a meaning only when `:coding-type' is `ccl'. + +`:ccl-encoder' +This attribute has a meaning only when `:coding-type' is `ccl'." + (let* ((common-attrs (mapcar 'list + '(:mnemonic + :coding-type + :charset-list + :ascii-compatible-p + :docode-translation-table + :encode-translation-table + :post-read-conversion + :pre-write-conversion + :default-char + :plist + :eol-type))) + (coding-type (plist-get props :coding-type)) + (spec-attrs (mapcar 'list + (cond ((eq coding-type 'iso-2022) + '(:initial + :reg-usage + :request + :flags)) + ((eq coding-type 'utf-16) + '(:bom + :endian)) + ((eq coding-type 'ccl) + '(:ccl-decoder + :ccl-encoder + :valids)))))) -(defsubst coding-system-spec (coding-system) - "Return coding-spec of CODING-SYSTEM." - (get (check-coding-system coding-system) 'coding-system)) + (dolist (slot common-attrs) + (setcdr slot (plist-get props (car slot)))) + + (dolist (slot spec-attrs) + (setcdr slot (plist-get props (car slot)))) -(defun coding-system-type (coding-system) - "Return the coding type of CODING-SYSTEM. -A coding type is an integer value indicating the encoding method -of CODING-SYSTEM. See the function `make-coding-system' for more detail." - (aref (coding-system-spec coding-system) coding-spec-type-idx)) + (if (eq coding-type 'iso-2022) + (let ((designation (plist-get props :designation)) + (flags (plist-get props :flags)) + (initial (make-vector 4 nil)) + (reg-usage (cons 4 4)) + request elt) + (dotimes (i 4) + (setq elt (aref designation i)) + (cond ((charsetp elt) + (aset initial i elt) + (setq request (cons (cons elt i) request))) + ((consp elt) + (aset initial i (car elt)) + (if (charsetp (car elt)) + (setq request (cons (cons (car elt) i) request))) + (dolist (e (cdr elt)) + (cond ((charsetp e) + (setq request (cons (cons e i) request))) + ((eq e 94) + (setcar reg-usage i)) + ((eq e 96) + (setcdr reg-usage i)) + ((eq e t) + (setcar reg-usage i) + (setcdr reg-usage i))))))) + (setcdr (assq :initial spec-attrs) initial) + (setcdr (assq :reg-usage spec-attrs) reg-usage) + (setcdr (assq :request spec-attrs) request) + + ;; Change :flags value from a list to a bit-mask. + (let ((bits 0) + (i 0)) + (dolist (elt coding-system-iso-2022-flags) + (if (memq elt flags) + (setq bits (logior bits (lsh 1 i)))) + (setq i (1+ i))) + (setcdr (assq :flags spec-attrs) bits)))) + + ;; Add :name and :docstring properties to PROPS. + (setq props + (cons :name (cons name (cons :docstring (cons docstring props))))) + (setcdr (assq :plist common-attrs) props) + + (apply 'define-coding-system-internal + name (mapcar 'cdr (append common-attrs spec-attrs))))) + +(defun coding-system-doc-string (coding-system) + "Return the documentation string for CODING-SYSTEM." + (plist-get (coding-system-plist coding-system) :docstring)) (defun coding-system-mnemonic (coding-system) "Return the mnemonic character of CODING-SYSTEM. The mnemonic character of a coding system is used in mode line to indicate the coding system. If the arg is nil, return ?-." - (let ((spec (coding-system-spec coding-system))) - (if spec (aref spec coding-spec-mnemonic-idx) ?-))) - -(defun coding-system-doc-string (coding-system) - "Return the documentation string for CODING-SYSTEM." - (aref (coding-system-spec coding-system) coding-spec-doc-string-idx)) + (plist-get (coding-system-plist coding-system) :mnemonic)) -(defun coding-system-plist (coding-system) - "Return the property list of CODING-SYSTEM." - (aref (coding-system-spec coding-system) coding-spec-plist-idx)) +(defun coding-system-type (coding-system) + "Return the coding type of CODING-SYSTEM. +A coding type is a symbol indicating the encoding method of CODING-SYSTEM. +See the function `define-coding-system' for more detail." + (plist-get (coding-system-plist coding-system) :coding-type)) -(defun coding-system-flags (coding-system) - "Return `flags' of CODING-SYSTEM. -A `flags' of a coding system is a vector of length 32 indicating detailed -information of a coding system. See the function `make-coding-system' -for more detail." - (aref (coding-system-spec coding-system) coding-spec-flags-idx)) +(defun coding-system-charset-list (coding-system) + "Return list of charsets supported by COIDNG-SYSTEM. +If CODING-SYSTEM supports all ISO-2022 charsets, return `iso-2022'. +If CODING-SYSTEM supports all emacs-mule charsets, return `emacs-mule'." + (plist-get (coding-system-plist coding-system) :charset-list)) (defun coding-system-get (coding-system prop) "Extract a value from CODING-SYSTEM's property list for property PROP." @@ -448,22 +622,7 @@ (defun coding-system-put (coding-system prop val) "Change value in CODING-SYSTEM's property list PROP to VAL." - (let ((plist (coding-system-plist coding-system))) - (if plist - (plist-put plist prop val) - (aset (coding-system-spec coding-system) coding-spec-plist-idx - (list prop val))))) - -(defun coding-system-category (coding-system) - "Return the coding category of CODING-SYSTEM. -See also `coding-category-list'." - (coding-system-get coding-system 'coding-category)) - -(defun coding-system-base (coding-system) - "Return the base coding system of CODING-SYSTEM. -A base coding system is what made by `make-coding-system'. -Any alias nor subsidiary coding systems are not base coding system." - (car (coding-system-get coding-system 'alias-coding-systems))) + (plist-put (coding-system-plist coding-system) prop val)) (defalias 'coding-system-parent 'coding-system-base) (make-obsolete 'coding-system-parent 'coding-system-base "20.3") @@ -478,18 +637,6 @@ ;; automatically. Nth element of the vector is the subsidiary coding ;; system whose `eol-type' property is N. -(defun coding-system-eol-type (coding-system) - "Return eol-type of CODING-SYSTEM. -An eol-type is integer 0, 1, 2, or a vector of coding systems. - -Integer values 0, 1, and 2 indicate a format of end-of-line; LF, -CRLF, and CR respectively. - -A vector value indicates that a format of end-of-line should be -detected automatically. Nth element of the vector is the subsidiary -coding system whose eol-type is N." - (get coding-system 'eol-type)) - (defun coding-system-lessp (x y) (cond ((eq x 'no-conversion) t) ((eq y 'no-conversion) nil) @@ -540,566 +687,6 @@ (setq tail (cdr tail))))) codings)) -(defun map-charset-chars (func charset) - "Use FUNC to map over all characters in CHARSET for side effects. -FUNC is a function of two args, the start and end (inclusive) of a -character code range. Thus FUNC should iterate over [START, END]." - (let* ((dim (charset-dimension charset)) - (chars (charset-chars charset)) - (start (if (= chars 94) - 33 - 32))) - (if (= dim 1) - (funcall func - (make-char charset start) - (make-char charset (+ start chars -1))) - (dotimes (i chars) - (funcall func - (make-char charset (+ i start) start) - (make-char charset (+ i start) (+ start chars -1))))))) - -(defun register-char-codings (coding-system safe-chars) - "Add entries for CODING-SYSTEM to `char-coding-system-table'. -If SAFE-CHARS is a char-table, its non-nil entries specify characters -which CODING-SYSTEM encodes safely. If SAFE-CHARS is t, register -CODING-SYSTEM as a general one which can encode all characters." - (let ((general (char-table-extra-slot char-coding-system-table 0)) - ;; Charsets which have some members in the table, but not all - ;; of them (i.e. not just a generic character): - (partials (char-table-extra-slot char-coding-system-table 1))) - (if (eq safe-chars t) - (or (memq coding-system general) - (set-char-table-extra-slot char-coding-system-table 0 - (cons coding-system general))) - (map-char-table - (lambda (key val) - (if (and (>= key 128) val) - (let ((codings (aref char-coding-system-table key)) - (charset (char-charset key))) - (unless (memq coding-system codings) - (if (and (generic-char-p key) - (memq charset partials)) - ;; The generic char would clobber individual - ;; entries already in the table. First save the - ;; separate existing entries for all chars of the - ;; charset (with the generic entry added, if - ;; necessary). - (let (entry existing) - (map-charset-chars - (lambda (start end) - (while (<= start end) - (setq entry (aref char-coding-system-table start)) - (when entry - (push (cons - start - (if (memq coding-system entry) - entry - (cons coding-system entry))) - existing)) - (setq start (1+ start)))) - charset) - ;; Update the generic entry. - (aset char-coding-system-table key - (cons coding-system codings)) - ;; Override with the saved entries. - (dolist (elt existing) - (aset char-coding-system-table (car elt) (cdr elt)))) - (aset char-coding-system-table key - (cons coding-system codings)) - (unless (or (memq charset partials) - (generic-char-p key)) - (push charset partials))))))) - safe-chars) - (set-char-table-extra-slot char-coding-system-table 1 partials)))) - - -(defun make-subsidiary-coding-system (coding-system) - "Make subsidiary coding systems (eol-type variants) of CODING-SYSTEM." - (let ((coding-spec (coding-system-spec coding-system)) - (subsidiaries (vector (intern (format "%s-unix" coding-system)) - (intern (format "%s-dos" coding-system)) - (intern (format "%s-mac" coding-system)))) - (i 0) - temp) - (while (< i 3) - (put (aref subsidiaries i) 'coding-system coding-spec) - (put (aref subsidiaries i) 'eol-type i) - (add-to-coding-system-list (aref subsidiaries i)) - (setq coding-system-alist - (cons (list (symbol-name (aref subsidiaries i))) - coding-system-alist)) - (setq i (1+ i))) - subsidiaries)) - -(defun transform-make-coding-system-args (name type &optional doc-string props) - "For internal use only. -Transform XEmacs style args for `make-coding-system' to Emacs style. -Value is a list of transformed arguments." - (let ((mnemonic (string-to-char (or (plist-get props 'mnemonic) "?"))) - (eol-type (plist-get props 'eol-type)) - properties tmp) - (cond - ((eq eol-type 'lf) (setq eol-type 'unix)) - ((eq eol-type 'crlf) (setq eol-type 'dos)) - ((eq eol-type 'cr) (setq eol-type 'mac))) - (if (setq tmp (plist-get props 'post-read-conversion)) - (setq properties (plist-put properties 'post-read-conversion tmp))) - (if (setq tmp (plist-get props 'pre-write-conversion)) - (setq properties (plist-put properties 'pre-write-conversion tmp))) - (cond - ((eq type 'shift-jis) - `(,name 1 ,mnemonic ,doc-string () ,properties ,eol-type)) - ((eq type 'iso2022) ; This is not perfect. - (if (plist-get props 'escape-quoted) - (error "escape-quoted is not supported: %S" - `(,name ,type ,doc-string ,props))) - (let ((g0 (plist-get props 'charset-g0)) - (g1 (plist-get props 'charset-g1)) - (g2 (plist-get props 'charset-g2)) - (g3 (plist-get props 'charset-g3)) - (use-roman - (and - (eq (cadr (assoc 'latin-jisx0201 - (plist-get props 'input-charset-conversion))) - 'ascii) - (eq (cadr (assoc 'ascii - (plist-get props 'output-charset-conversion))) - 'latin-jisx0201))) - (use-oldjis - (and - (eq (cadr (assoc 'japanese-jisx0208-1978 - (plist-get props 'input-charset-conversion))) - 'japanese-jisx0208) - (eq (cadr (assoc 'japanese-jisx0208 - (plist-get props 'output-charset-conversion))) - 'japanese-jisx0208-1978)))) - (if (charsetp g0) - (if (plist-get props 'force-g0-on-output) - (setq g0 `(nil ,g0)) - (setq g0 `(,g0 t)))) - (if (charsetp g1) - (if (plist-get props 'force-g1-on-output) - (setq g1 `(nil ,g1)) - (setq g1 `(,g1 t)))) - (if (charsetp g2) - (if (plist-get props 'force-g2-on-output) - (setq g2 `(nil ,g2)) - (setq g2 `(,g2 t)))) - (if (charsetp g3) - (if (plist-get props 'force-g3-on-output) - (setq g3 `(nil ,g3)) - (setq g3 `(,g3 t)))) - `(,name 2 ,mnemonic ,doc-string - (,g0 ,g1 ,g2 ,g3 - ,(plist-get props 'short) - ,(not (plist-get props 'no-ascii-eol)) - ,(not (plist-get props 'no-ascii-cntl)) - ,(plist-get props 'seven) - t - ,(not (plist-get props 'lock-shift)) - ,use-roman - ,use-oldjis - ,(plist-get props 'no-iso6429) - nil nil nil nil) - ,properties ,eol-type))) - ((eq type 'big5) - `(,name 3 ,mnemonic ,doc-string () ,properties ,eol-type)) - ((eq type 'ccl) - `(,name 4 ,mnemonic ,doc-string - (,(plist-get props 'decode) . ,(plist-get props 'encode)) - ,properties ,eol-type)) - (t - (error "unsupported XEmacs style make-coding-style arguments: %S" - `(,name ,type ,doc-string ,props)))))) - -(defun make-coding-system (coding-system type mnemonic doc-string - &optional - flags - properties - eol-type) - "Define a new coding system CODING-SYSTEM (symbol). -Remaining arguments are TYPE, MNEMONIC, DOC-STRING, FLAGS (optional), -and PROPERTIES (optional) which construct a coding-spec of CODING-SYSTEM -in the following format: - [TYPE MNEMONIC DOC-STRING PLIST FLAGS] - -TYPE is an integer value indicating the type of the coding system as follows: - 0: Emacs internal format, - 1: Shift-JIS (or MS-Kanji) used mainly on Japanese PC, - 2: ISO-2022 including many variants, - 3: Big5 used mainly on Chinese PC, - 4: private, CCL programs provide encoding/decoding algorithm, - 5: Raw-text, which means that text contains random 8-bit codes. - -MNEMONIC is a character to be displayed on mode line for the coding system. - -DOC-STRING is a documentation string for the coding system. - -FLAGS specifies more detailed information of the coding system as follows: - - If TYPE is 2 (ISO-2022), FLAGS is a list of these elements: - CHARSET0, CHARSET1, CHARSET2, CHARSET3, SHORT-FORM, - ASCII-EOL, ASCII-CNTL, SEVEN, LOCKING-SHIFT, SINGLE-SHIFT, - USE-ROMAN, USE-OLDJIS, NO-ISO6429, INIT-BOL, DESIGNATION-BOL, - SAFE, ACCEPT-LATIN-EXTRA-CODE. - CHARSETn are character sets initially designated to Gn graphic registers. - If CHARSETn is nil, Gn is never used. - If CHARSETn is t, Gn can be used but nothing designated initially. - If CHARSETn is a list of character sets, those character sets are - designated to Gn on output, but nothing designated to Gn initially. - But, character set `ascii' can be designated only to G0. - SHORT-FORM non-nil means use short designation sequence on output. - ASCII-EOL non-nil means designate ASCII to g0 at end of line on output. - ASCII-CNTL non-nil means designate ASCII to g0 before control codes and - SPACE on output. - SEVEN non-nil means use 7-bit code only on output. - LOCKING-SHIFT non-nil means use locking-shift. - SINGLE-SHIFT non-nil means use single-shift. - USE-ROMAN non-nil means designate JIS0201-1976-Roman instead of ASCII. - USE-OLDJIS non-nil means designate JIS0208-1976 instead of JIS0208-1983. - NO-ISO6429 non-nil means not use ISO6429's direction specification. - INIT-BOL non-nil means any designation state is assumed to be reset - to initial at each beginning of line on output. - DESIGNATION-BOL non-nil means designation sequences should be placed - at beginning of line on output. - SAFE non-nil means convert unsafe characters to `?' on output. - Characters not specified in the property `safe-charsets' nor - `safe-chars' are unsafe. - ACCEPT-LATIN-EXTRA-CODE non-nil means code-detection routine accepts - a code specified in `latin-extra-code-table' (which see) as a valid - code of the coding system. - - If TYPE is 4 (private), FLAGS should be a cons of CCL programs, for - decoding and encoding. CCL programs should be specified by their - symbols. - -PROPERTIES is an alist of properties vs the corresponding values. The -following properties are recognized: - - o post-read-conversion - - The value is a function to call after some text is inserted and - decoded by the coding system itself and before any functions in - `after-insert-functions' are called. The argument of this - function is the same as for a function in - `after-insert-file-functions', i.e. LENGTH of the text inserted, - with point at the head of the text to be decoded. - - o pre-write-conversion - - The value is a function to call after all functions in - `write-region-annotate-functions' and `buffer-file-format' are - called, and before the text is encoded by the coding system itself. - The arguments to this function are the same as those of a function - in `write-region-annotate-functions', i.e. FROM and TO, specifying - a region of text. - - o translation-table-for-decode - - The value is a translation table to be applied on decoding. See - the function `make-translation-table' for the format of translation - table. This is not applicable to type 4 (CCL-based) coding systems. - - o translation-table-for-encode - - The value is a translation table to be applied on encoding. This is - not applicable to type 4 (CCL-based) coding systems. - - o safe-chars - - The value is a char table. If a character has non-nil value in it, - the character is safely supported by the coding system. This - overrides the specification of safe-charsets. - - o safe-charsets - - The value is a list of charsets safely supported by the coding - system. The value t means that all charsets Emacs handles are - supported. Even if some charset is not in this list, it doesn't - mean that the charset can't be encoded in the coding system; - it just means that some other receiver of text encoded - in the coding system won't be able to handle that charset. - - o mime-charset - - The value is a symbol of which name is `MIME-charset' parameter of - the coding system. - - o valid-codes (meaningful only for a coding system based on CCL) - - The value is a list to indicate valid byte ranges of the encoded - file. Each element of the list is an integer or a cons of integer. - In the former case, the integer value is a valid byte code. In the - latter case, the integers specify the range of valid byte codes. - -These properties are set in PLIST, a property list. This function -also sets properties `coding-category' and `alias-coding-systems' -automatically. - -EOL-TYPE specifies the EOL type of the coding-system in one of the -following formats: - - o symbol (unix, dos, or mac) - - The symbol `unix' means Unix-like EOL (LF), `dos' means - DOS-like EOL (CRLF), and `mac' means MAC-like EOL (CR). - - o number (0, 1, or 2) - - The number 0, 1, and 2 mean UNIX, DOS, and MAC-like EOL - respectively. - - o vector of coding-systems of length 3 - - The EOL type is detected automatically for the coding system. - And, according to the detected EOL type, one of the coding - systems in the vector is selected. Elements of the vector - corresponds to Unix-like EOL, DOS-like EOL, and Mac-like EOL - in this order. - -Kludgy features for backward compatibility: - -1. If TYPE is 4 and car or cdr of FLAGS is a vector, the vector is -treated as a compiled CCL code. - -2. If PROPERTIES is just a list of character sets, the list is set as -a value of `safe-charsets' in PLIST." - - ;; For compatiblity with XEmacs, we check the type of TYPE. If it - ;; is a symbol, perhaps, this function is called with XEmacs-style - ;; arguments. Here, try to transform that kind of arguments to - ;; Emacs style. - (if (symbolp type) - (let ((args (transform-make-coding-system-args coding-system type - mnemonic doc-string))) - (setq coding-system (car args) - type (nth 1 args) - mnemonic (nth 2 args) - doc-string (nth 3 args) - flags (nth 4 args) - properties (nth 5 args) - eol-type (nth 6 args)))) - - ;; Set a value of `coding-system' property. - (let ((coding-spec (make-vector 5 nil)) - (no-initial-designation t) - (no-alternative-designation t) - (accept-latin-extra-code nil) - coding-category) - (if (or (not (integerp type)) (< type 0) (> type 5)) - (error "TYPE argument must be 0..5")) - (if (or (not (integerp mnemonic)) (<= mnemonic ? ) (> mnemonic 127)) - (error "MNEMONIC argument must be an ASCII printable character")) - (aset coding-spec coding-spec-type-idx type) - (aset coding-spec coding-spec-mnemonic-idx mnemonic) - (aset coding-spec coding-spec-doc-string-idx - (purecopy (if (stringp doc-string) doc-string ""))) - (cond ((= type 0) - (setq coding-category 'coding-category-emacs-mule)) - ((= type 1) - (setq coding-category 'coding-category-sjis)) - ((= type 2) ; ISO2022 - (let ((i 0) - (vec (make-vector 32 nil)) - (g1-designation nil) - (fl flags)) - (while (< i 4) - (let ((charset (car fl))) - (if (and no-initial-designation - (> i 0) - (or (charsetp charset) - (and (consp charset) - (charsetp (car charset))))) - (setq no-initial-designation nil)) - (if (charsetp charset) - (if (= i 1) (setq g1-designation charset)) - (if (consp charset) - (let ((tail charset) - elt) - (while tail - (setq elt (car tail)) - (if (eq elt t) - (setq no-alternative-designation nil) - (if (and elt (not (charsetp elt))) - (error "Invalid charset: %s" elt))) - (setq tail (cdr tail))) - (setq g1-designation (car charset))) - (if charset - (if (eq charset t) - (setq no-alternative-designation nil) - (error "Invalid charset: %s" charset))))) - (aset vec i charset)) - (setq fl (cdr fl) i (1+ i))) - (while (and (< i 32) fl) - (aset vec i (car fl)) - (if (and (= i 16) ; ACCEPT-LATIN-EXTRA-CODE - (car fl)) - (setq accept-latin-extra-code t)) - (setq fl (cdr fl) i (1+ i))) - (aset coding-spec 4 vec) - (setq coding-category - (if (aref vec 8) ; Use locking-shift. - (or (and (aref vec 7) 'coding-category-iso-7-else) - 'coding-category-iso-8-else) - (if (aref vec 7) ; 7-bit only. - (if (aref vec 9) ; Use single-shift. - 'coding-category-iso-7-else - (if no-alternative-designation - 'coding-category-iso-7-tight - 'coding-category-iso-7)) - (if (or no-initial-designation - (not no-alternative-designation)) - 'coding-category-iso-8-else - (if (and (charsetp g1-designation) - (= (charset-dimension g1-designation) 2)) - 'coding-category-iso-8-2 - 'coding-category-iso-8-1))))))) - ((= type 3) - (setq coding-category 'coding-category-big5)) - ((= type 4) ; private - (setq coding-category 'coding-category-ccl) - (if (not (consp flags)) - (error "Invalid FLAGS argument for TYPE 4 (CCL)") - (let ((decoder (check-ccl-program - (car flags) - (intern (format "%s-decoder" coding-system)))) - (encoder (check-ccl-program - (cdr flags) - (intern (format "%s-encoder" coding-system))))) - (if (and decoder encoder) - (aset coding-spec 4 (cons decoder encoder)) - (error "Invalid FLAGS argument for TYPE 4 (CCL)"))))) - (t ; i.e. (= type 5) - (setq coding-category 'coding-category-raw-text))) - - (let ((plist (list 'coding-category coding-category - 'alias-coding-systems (list coding-system)))) - (if no-initial-designation - (plist-put plist 'no-initial-designation t)) - (if (and properties - (or (eq properties t) - (not (consp (car properties))))) - ;; In the old version, the arg PROPERTIES is a list to be - ;; set in PLIST as a value of property `safe-charsets'. - (setq properties (list (cons 'safe-charsets properties)))) - ;; In the current version PROPERTIES is a property list. - ;; Reflect it into PLIST one by one while handling safe-chars - ;; specially. - (let ((safe-charsets (cdr (assq 'safe-charsets properties))) - (safe-chars (cdr (assq 'safe-chars properties))) - (l properties) - prop val) - ;; If only safe-charsets is specified, make a char-table from - ;; it, and store that char-table as the value of `safe-chars'. - (if (and (not safe-chars) safe-charsets) - (let (charset) - (if (eq safe-charsets t) - (setq safe-chars t) - (setq safe-chars (make-char-table 'safe-chars)) - (while safe-charsets - (setq charset (car safe-charsets) - safe-charsets (cdr safe-charsets)) - (cond ((eq charset 'ascii)) ; just ignore - ((eq charset 'eight-bit-control) - (let ((i 128)) - (while (< i 160) - (aset safe-chars i t) - (setq i (1+ i))))) - ((eq charset 'eight-bit-graphic) - (let ((i 160)) - (while (< i 256) - (aset safe-chars i t) - (setq i (1+ i))))) - (t - (aset safe-chars (make-char charset) t)))) - (if accept-latin-extra-code - (let ((i 128)) - (while (< i 160) - (if (aref latin-extra-code-table i) - (aset safe-chars i t)) - (setq i (1+ i)))))) - (setq l (cons (cons 'safe-chars safe-chars) l)))) - (while l - (setq prop (car (car l)) val (cdr (car l)) l (cdr l)) - (if (eq prop 'safe-chars) - (progn - (if (and (symbolp val) - (get val 'translation-table)) - (setq safe-chars (get val 'translation-table))) - (register-char-codings coding-system safe-chars) - (setq val safe-chars))) - (plist-put plist prop val))) - ;; The property `coding-category' may have been set differently - ;; through PROPERTIES. - (setq coding-category (plist-get plist 'coding-category)) - (aset coding-spec coding-spec-plist-idx plist)) - (put coding-system 'coding-system coding-spec) - (put coding-category 'coding-systems - (cons coding-system (get coding-category 'coding-systems)))) - - ;; Next, set a value of `eol-type' property. - (if (not eol-type) - ;; If EOL-TYPE is nil, set a vector of subsidiary coding - ;; systems, each corresponds to a coding system for the detected - ;; EOL format. - (setq eol-type (make-subsidiary-coding-system coding-system))) - (setq eol-type - (cond ((or (eq eol-type 'unix) (null eol-type)) - 0) - ((eq eol-type 'dos) - 1) - ((eq eol-type 'mac) - 2) - ((or (and (vectorp eol-type) - (= (length eol-type) 3)) - (and (numberp eol-type) - (and (>= eol-type 0) - (<= eol-type 2)))) - eol-type) - (t - (error "Invalid EOL-TYPE spec:%S" eol-type)))) - (put coding-system 'eol-type eol-type) - - ;; At last, register CODING-SYSTEM in `coding-system-list' and - ;; `coding-system-alist'. - (add-to-coding-system-list coding-system) - (setq coding-system-alist (cons (list (symbol-name coding-system)) - coding-system-alist)) - - ;; For a coding system of cateogory iso-8-1 and iso-8-2, create - ;; XXX-with-esc variants. - (let ((coding-category (coding-system-category coding-system))) - (if (or (eq coding-category 'coding-category-iso-8-1) - (eq coding-category 'coding-category-iso-8-2)) - (let ((esc (intern (concat (symbol-name coding-system) "-with-esc"))) - (doc (format "Same as %s but can handle any charsets by ISO's escape sequences." coding-system)) - (safe-charsets (assq 'safe-charsets properties)) - (mime-charset (assq 'mime-charset properties))) - (if safe-charsets - (setcdr safe-charsets t) - (setq properties (cons (cons 'safe-charsets t) properties))) - (if mime-charset - (setcdr mime-charset nil)) - (make-coding-system esc type mnemonic doc - (if (listp (car flags)) - (cons (append (car flags) '(t)) (cdr flags)) - (cons (list (car flags) t) (cdr flags))) - properties)))) - - coding-system) - -(defun define-coding-system-alias (alias coding-system) - "Define ALIAS as an alias for coding system CODING-SYSTEM." - (put alias 'coding-system (coding-system-spec coding-system)) - (nconc (coding-system-get alias 'alias-coding-systems) (list alias)) - (add-to-coding-system-list alias) - (setq coding-system-alist (cons (list (symbol-name alias)) - coding-system-alist)) - (let ((eol-type (coding-system-eol-type coding-system))) - (if (vectorp eol-type) - (put alias 'eol-type (make-subsidiary-coding-system alias)) - (put alias 'eol-type eol-type)))) - (defun set-buffer-file-coding-system (coding-system &optional force) "Set the file coding-system of the current buffer to CODING-SYSTEM. This means that when you save the buffer, it will be converted @@ -1268,7 +855,10 @@ (defun set-coding-priority (arg) "Set priority of coding categories according to ARG. -ARG is a list of coding categories ordered by priority." +ARG is a list of coding categories ordered by priority. + +This function is provided for backward compatibility. +Now we have more convenient function `set-coding-system-priority'." (let ((l arg) (current-list (copy-sequence coding-category-list))) ;; Check the validity of ARG while deleting coding categories in @@ -1457,6 +1047,8 @@ ;; Must return nil, as build_annotations_2 expects that. nil) +(make-obsolete 'set-coding-priority 'set-coding-system-priority "22.0") + ;;; FILE I/O (defcustom auto-coding-alist @@ -1626,8 +1218,7 @@ (when coding-system (set-buffer-file-coding-system coding-system t) (if (and enable-multibyte-characters - (or (eq coding-system 'no-conversion) - (eq (coding-system-type coding-system) 5)) + (or (eq (coding-system-type coding-system) 'raw-text)) ;; If buffer was unmodified and the size is the ;; same as INSERTED, we must be visiting it. (not modified-p) @@ -1667,8 +1258,8 @@ ;; But eol-type is not yet set. (setq local-eol nil)) (if (and buffer-file-coding-system - (not (eq (coding-system-type buffer-file-coding-system) t))) - ;; This is not `undecided'. + (not (eq (coding-system-type buffer-file-coding-system) + 'undecided))) (setq local-coding (coding-system-base buffer-file-coding-system))) (if (and (local-variable-p 'buffer-file-coding-system) @@ -1682,9 +1273,7 @@ ;; But eol-type is not found. ;; If EOL conversions are inhibited, force unix eol-type. (setq found-eol (if inhibit-eol-conversion 0))) - (if (eq (coding-system-type coding) t) - (setq found-coding 'undecided) - (setq found-coding (coding-system-base coding))) + (setq found-coding (coding-system-base coding)) (if (and (not found-eol) (eq found-coding 'undecided)) ;; No valid coding information found.