Mercurial > emacs
comparison lisp/international/mule.el @ 45450:3b83834d8f74
(make-coding-system): Doc fixes.
(auto-coding-functions): New variable.
(auto-coding-from-file-contents): Use it.
(set-auto-coding): Update docs.
(sgml-xml-auto-coding-function): New function.
author | Colin Walters <walters@gnu.org> |
---|---|
date | Tue, 21 May 2002 21:14:03 +0000 |
parents | 8a2b953f3d6c |
children | 8da743b93fbf |
comparison
equal
deleted
inserted
replaced
45449:2e404ac3657f | 45450:3b83834d8f74 |
---|---|
723 in the following format: | 723 in the following format: |
724 [TYPE MNEMONIC DOC-STRING PLIST FLAGS] | 724 [TYPE MNEMONIC DOC-STRING PLIST FLAGS] |
725 | 725 |
726 TYPE is an integer value indicating the type of the coding system as follows: | 726 TYPE is an integer value indicating the type of the coding system as follows: |
727 0: Emacs internal format, | 727 0: Emacs internal format, |
728 1: Shift-JIS (or MS-Kanji) used mainly on Japanese PC, | 728 1: Shift-JIS (or MS-Kanji) used mainly on Japanese PCs, |
729 2: ISO-2022 including many variants, | 729 2: ISO-2022 including many variants, |
730 3: Big5 used mainly on Chinese PC, | 730 3: Big5 used mainly on Chinese PCs, |
731 4: private, CCL programs provide encoding/decoding algorithm, | 731 4: private, CCL programs provide encoding/decoding algorithm, |
732 5: Raw-text, which means that text contains random 8-bit codes. | 732 5: Raw-text, which means that text contains random 8-bit codes. |
733 | 733 |
734 MNEMONIC is a character to be displayed on mode line for the coding system. | 734 MNEMONIC is a character to be displayed on mode line for the coding system. |
735 | 735 |
820 it just means that some other receiver of text encoded | 820 it just means that some other receiver of text encoded |
821 in the coding system won't be able to handle that charset. | 821 in the coding system won't be able to handle that charset. |
822 | 822 |
823 o mime-charset | 823 o mime-charset |
824 | 824 |
825 The value is a symbol of which name is `MIME-charset' parameter of | 825 The value is a symbol whose name is the `MIME-charset' parameter of |
826 the coding system. | 826 the coding system. |
827 | 827 |
828 o valid-codes (meaningful only for a coding system based on CCL) | 828 o valid-codes (meaningful only for a coding system based on CCL) |
829 | 829 |
830 The value is a list to indicate valid byte ranges of the encoded | 830 The value is a list to indicate valid byte ranges of the encoded |
1487 :group 'files | 1487 :group 'files |
1488 :group 'mule | 1488 :group 'mule |
1489 :type '(repeat (cons (regexp :tag "Regexp") | 1489 :type '(repeat (cons (regexp :tag "Regexp") |
1490 (symbol :tag "Coding system")))) | 1490 (symbol :tag "Coding system")))) |
1491 | 1491 |
1492 ;; See the bottom of this file for built-in auto coding functions. | |
1493 (defcustom auto-coding-functions '(sgml-xml-auto-coding-function) | |
1494 "A list of functions which attempt to determine a coding system. | |
1495 | |
1496 Each function in this list should be written to operate on the current | |
1497 buffer, but should not modify it in any way. It should take one | |
1498 argument SIZE, past which it should not search. If a function | |
1499 succeeds in determining a coding system, it should return that coding | |
1500 system. Otherwise, it should return nil. | |
1501 | |
1502 The functions in this list take priority over `coding:' tags in the | |
1503 file, just as for `auto-coding-regexp-alist'." | |
1504 :group 'files | |
1505 :group 'mule | |
1506 :type '(repeat function)) | |
1507 | |
1492 (defvar set-auto-coding-for-load nil | 1508 (defvar set-auto-coding-for-load nil |
1493 "Non-nil means look for `load-coding' property instead of `coding'. | 1509 "Non-nil means look for `load-coding' property instead of `coding'. |
1494 This is used for loading and byte-compiling Emacs Lisp files.") | 1510 This is used for loading and byte-compiling Emacs Lisp files.") |
1495 | 1511 |
1496 (defun auto-coding-alist-lookup (filename) | 1512 (defun auto-coding-alist-lookup (filename) |
1502 (if (string-match (car (car alist)) filename) | 1518 (if (string-match (car (car alist)) filename) |
1503 (setq coding-system (cdr (car alist))) | 1519 (setq coding-system (cdr (car alist))) |
1504 (setq alist (cdr alist)))) | 1520 (setq alist (cdr alist)))) |
1505 coding-system)) | 1521 coding-system)) |
1506 | 1522 |
1507 | |
1508 (defun auto-coding-from-file-contents (size) | 1523 (defun auto-coding-from-file-contents (size) |
1509 "Determine a coding system from the contents of the current buffer. | 1524 "Determine a coding system from the contents of the current buffer. |
1510 The current buffer contains SIZE bytes starting at point. | 1525 The current buffer contains SIZE bytes starting at point. |
1511 Value is either a coding system or nil." | 1526 Value is either a coding system or nil." |
1512 (save-excursion | 1527 (save-excursion |
1513 (let ((alist auto-coding-regexp-alist) | 1528 (let ((alist auto-coding-regexp-alist) |
1529 (funcs auto-coding-functions) | |
1514 coding-system) | 1530 coding-system) |
1515 (while (and alist (not coding-system)) | 1531 (while (and alist (not coding-system)) |
1516 (let ((regexp (car (car alist)))) | 1532 (let ((regexp (car (car alist)))) |
1517 (when (re-search-forward regexp (+ (point) size) t) | 1533 (when (re-search-forward regexp (+ (point) size) t) |
1518 (setq coding-system (cdr (car alist))))) | 1534 (setq coding-system (cdr (car alist))))) |
1519 (setq alist (cdr alist))) | 1535 (setq alist (cdr alist))) |
1536 (while (and funcs (not coding-system)) | |
1537 (setq coding-system (condition-case e | |
1538 (save-excursion | |
1539 (funcall (pop funcs) size)) | |
1540 (error nil)))) | |
1520 coding-system))) | 1541 coding-system))) |
1521 | |
1522 | 1542 |
1523 (defun set-auto-coding (filename size) | 1543 (defun set-auto-coding (filename size) |
1524 "Return coding system for a file FILENAME of which SIZE bytes follow point. | 1544 "Return coding system for a file FILENAME of which SIZE bytes follow point. |
1525 These bytes should include at least the first 1k of the file | 1545 These bytes should include at least the first 1k of the file |
1526 and the last 3k of the file, but the middle may be omitted. | 1546 and the last 3k of the file, but the middle may be omitted. |
1527 | 1547 |
1528 It checks FILENAME against the variable `auto-coding-alist'. If | 1548 It checks FILENAME against the variable `auto-coding-alist'. If |
1529 FILENAME doesn't match any entries in the variable, it checks the | 1549 FILENAME doesn't match any entries in the variable, it checks the |
1530 contents of the current buffer following point against | 1550 contents of the current buffer following point against |
1531 `auto-coding-regexp-alist'. If no match is found, it checks for a | 1551 `auto-coding-regexp-alist', and tries calling each function in |
1552 `auto-coding-functions'. If no match is found, it checks for a | |
1532 `coding:' tag in the first one or two lines following point. If no | 1553 `coding:' tag in the first one or two lines following point. If no |
1533 `coding:' tag is found, it checks for local variables list in the last | 1554 `coding:' tag is found, it checks for local variables list in the last |
1534 3K bytes out of the SIZE bytes. | 1555 3K bytes out of the SIZE bytes. |
1535 | 1556 |
1536 The return value is the specified coding system, | 1557 The return value is the specified coding system, |
1896 (setq use-default-ascent (make-char-table 'use-default-ascent)) | 1917 (setq use-default-ascent (make-char-table 'use-default-ascent)) |
1897 (put 'ignore-relative-composition 'char-table-extra-slots 0) | 1918 (put 'ignore-relative-composition 'char-table-extra-slots 0) |
1898 (setq ignore-relative-composition | 1919 (setq ignore-relative-composition |
1899 (make-char-table 'ignore-relative-composition)) | 1920 (make-char-table 'ignore-relative-composition)) |
1900 | 1921 |
1922 | |
1923 ;;; Built-in auto-coding-functions: | |
1924 | |
1925 (defun sgml-xml-auto-coding-function (size) | |
1926 "Determine whether the buffer is XML, and if so, its encoding. | |
1927 This function is intended to be added to `auto-coding-functions'." | |
1928 (when (re-search-forward "\\`[[:space:]\n]*<\\?xml") | |
1929 (let ((end (save-excursion | |
1930 ;; This is a hack. | |
1931 (search-forward "\"\\s-*?>" size t)))) | |
1932 (when end | |
1933 (if (re-search-forward "encoding=\"\\(.+?\\)\"" end t) | |
1934 (let ((match (downcase (match-string 1)))) | |
1935 ;; FIXME: what other encodings are valid, and how can we | |
1936 ;; translate them to the names of coding systems? | |
1937 (cond ((string= match "utf-8") | |
1938 'utf-8) | |
1939 ((string-match "iso-8859-[[:digit:]]+" match) | |
1940 (intern match)) | |
1941 (t nil))) | |
1942 'utf-8))))) | |
1943 | |
1901 ;;; | 1944 ;;; |
1902 (provide 'mule) | 1945 (provide 'mule) |
1903 | 1946 |
1904 ;;; mule.el ends here | 1947 ;;; mule.el ends here |