comparison lisp/international/mule.el @ 45450:3b83834d8f74

(make-coding-system): Doc fixes. (auto-coding-functions): New variable. (auto-coding-from-file-contents): Use it. (set-auto-coding): Update docs. (sgml-xml-auto-coding-function): New function.
author Colin Walters <walters@gnu.org>
date Tue, 21 May 2002 21:14:03 +0000
parents 8a2b953f3d6c
children 8da743b93fbf
comparison
equal deleted inserted replaced
45449:2e404ac3657f 45450:3b83834d8f74
723 in the following format: 723 in the following format:
724 [TYPE MNEMONIC DOC-STRING PLIST FLAGS] 724 [TYPE MNEMONIC DOC-STRING PLIST FLAGS]
725 725
726 TYPE is an integer value indicating the type of the coding system as follows: 726 TYPE is an integer value indicating the type of the coding system as follows:
727 0: Emacs internal format, 727 0: Emacs internal format,
728 1: Shift-JIS (or MS-Kanji) used mainly on Japanese PC, 728 1: Shift-JIS (or MS-Kanji) used mainly on Japanese PCs,
729 2: ISO-2022 including many variants, 729 2: ISO-2022 including many variants,
730 3: Big5 used mainly on Chinese PC, 730 3: Big5 used mainly on Chinese PCs,
731 4: private, CCL programs provide encoding/decoding algorithm, 731 4: private, CCL programs provide encoding/decoding algorithm,
732 5: Raw-text, which means that text contains random 8-bit codes. 732 5: Raw-text, which means that text contains random 8-bit codes.
733 733
734 MNEMONIC is a character to be displayed on mode line for the coding system. 734 MNEMONIC is a character to be displayed on mode line for the coding system.
735 735
820 it just means that some other receiver of text encoded 820 it just means that some other receiver of text encoded
821 in the coding system won't be able to handle that charset. 821 in the coding system won't be able to handle that charset.
822 822
823 o mime-charset 823 o mime-charset
824 824
825 The value is a symbol of which name is `MIME-charset' parameter of 825 The value is a symbol whose name is the `MIME-charset' parameter of
826 the coding system. 826 the coding system.
827 827
828 o valid-codes (meaningful only for a coding system based on CCL) 828 o valid-codes (meaningful only for a coding system based on CCL)
829 829
830 The value is a list to indicate valid byte ranges of the encoded 830 The value is a list to indicate valid byte ranges of the encoded
1487 :group 'files 1487 :group 'files
1488 :group 'mule 1488 :group 'mule
1489 :type '(repeat (cons (regexp :tag "Regexp") 1489 :type '(repeat (cons (regexp :tag "Regexp")
1490 (symbol :tag "Coding system")))) 1490 (symbol :tag "Coding system"))))
1491 1491
1492 ;; See the bottom of this file for built-in auto coding functions.
1493 (defcustom auto-coding-functions '(sgml-xml-auto-coding-function)
1494 "A list of functions which attempt to determine a coding system.
1495
1496 Each function in this list should be written to operate on the current
1497 buffer, but should not modify it in any way. It should take one
1498 argument SIZE, past which it should not search. If a function
1499 succeeds in determining a coding system, it should return that coding
1500 system. Otherwise, it should return nil.
1501
1502 The functions in this list take priority over `coding:' tags in the
1503 file, just as for `auto-coding-regexp-alist'."
1504 :group 'files
1505 :group 'mule
1506 :type '(repeat function))
1507
1492 (defvar set-auto-coding-for-load nil 1508 (defvar set-auto-coding-for-load nil
1493 "Non-nil means look for `load-coding' property instead of `coding'. 1509 "Non-nil means look for `load-coding' property instead of `coding'.
1494 This is used for loading and byte-compiling Emacs Lisp files.") 1510 This is used for loading and byte-compiling Emacs Lisp files.")
1495 1511
1496 (defun auto-coding-alist-lookup (filename) 1512 (defun auto-coding-alist-lookup (filename)
1502 (if (string-match (car (car alist)) filename) 1518 (if (string-match (car (car alist)) filename)
1503 (setq coding-system (cdr (car alist))) 1519 (setq coding-system (cdr (car alist)))
1504 (setq alist (cdr alist)))) 1520 (setq alist (cdr alist))))
1505 coding-system)) 1521 coding-system))
1506 1522
1507
1508 (defun auto-coding-from-file-contents (size) 1523 (defun auto-coding-from-file-contents (size)
1509 "Determine a coding system from the contents of the current buffer. 1524 "Determine a coding system from the contents of the current buffer.
1510 The current buffer contains SIZE bytes starting at point. 1525 The current buffer contains SIZE bytes starting at point.
1511 Value is either a coding system or nil." 1526 Value is either a coding system or nil."
1512 (save-excursion 1527 (save-excursion
1513 (let ((alist auto-coding-regexp-alist) 1528 (let ((alist auto-coding-regexp-alist)
1529 (funcs auto-coding-functions)
1514 coding-system) 1530 coding-system)
1515 (while (and alist (not coding-system)) 1531 (while (and alist (not coding-system))
1516 (let ((regexp (car (car alist)))) 1532 (let ((regexp (car (car alist))))
1517 (when (re-search-forward regexp (+ (point) size) t) 1533 (when (re-search-forward regexp (+ (point) size) t)
1518 (setq coding-system (cdr (car alist))))) 1534 (setq coding-system (cdr (car alist)))))
1519 (setq alist (cdr alist))) 1535 (setq alist (cdr alist)))
1536 (while (and funcs (not coding-system))
1537 (setq coding-system (condition-case e
1538 (save-excursion
1539 (funcall (pop funcs) size))
1540 (error nil))))
1520 coding-system))) 1541 coding-system)))
1521
1522 1542
1523 (defun set-auto-coding (filename size) 1543 (defun set-auto-coding (filename size)
1524 "Return coding system for a file FILENAME of which SIZE bytes follow point. 1544 "Return coding system for a file FILENAME of which SIZE bytes follow point.
1525 These bytes should include at least the first 1k of the file 1545 These bytes should include at least the first 1k of the file
1526 and the last 3k of the file, but the middle may be omitted. 1546 and the last 3k of the file, but the middle may be omitted.
1527 1547
1528 It checks FILENAME against the variable `auto-coding-alist'. If 1548 It checks FILENAME against the variable `auto-coding-alist'. If
1529 FILENAME doesn't match any entries in the variable, it checks the 1549 FILENAME doesn't match any entries in the variable, it checks the
1530 contents of the current buffer following point against 1550 contents of the current buffer following point against
1531 `auto-coding-regexp-alist'. If no match is found, it checks for a 1551 `auto-coding-regexp-alist', and tries calling each function in
1552 `auto-coding-functions'. If no match is found, it checks for a
1532 `coding:' tag in the first one or two lines following point. If no 1553 `coding:' tag in the first one or two lines following point. If no
1533 `coding:' tag is found, it checks for local variables list in the last 1554 `coding:' tag is found, it checks for local variables list in the last
1534 3K bytes out of the SIZE bytes. 1555 3K bytes out of the SIZE bytes.
1535 1556
1536 The return value is the specified coding system, 1557 The return value is the specified coding system,
1896 (setq use-default-ascent (make-char-table 'use-default-ascent)) 1917 (setq use-default-ascent (make-char-table 'use-default-ascent))
1897 (put 'ignore-relative-composition 'char-table-extra-slots 0) 1918 (put 'ignore-relative-composition 'char-table-extra-slots 0)
1898 (setq ignore-relative-composition 1919 (setq ignore-relative-composition
1899 (make-char-table 'ignore-relative-composition)) 1920 (make-char-table 'ignore-relative-composition))
1900 1921
1922
1923 ;;; Built-in auto-coding-functions:
1924
1925 (defun sgml-xml-auto-coding-function (size)
1926 "Determine whether the buffer is XML, and if so, its encoding.
1927 This function is intended to be added to `auto-coding-functions'."
1928 (when (re-search-forward "\\`[[:space:]\n]*<\\?xml")
1929 (let ((end (save-excursion
1930 ;; This is a hack.
1931 (search-forward "\"\\s-*?>" size t))))
1932 (when end
1933 (if (re-search-forward "encoding=\"\\(.+?\\)\"" end t)
1934 (let ((match (downcase (match-string 1))))
1935 ;; FIXME: what other encodings are valid, and how can we
1936 ;; translate them to the names of coding systems?
1937 (cond ((string= match "utf-8")
1938 'utf-8)
1939 ((string-match "iso-8859-[[:digit:]]+" match)
1940 (intern match))
1941 (t nil)))
1942 'utf-8)))))
1943
1901 ;;; 1944 ;;;
1902 (provide 'mule) 1945 (provide 'mule)
1903 1946
1904 ;;; mule.el ends here 1947 ;;; mule.el ends here