Mercurial > emacs
comparison lisp/international/mule.el @ 89483:2f877ed80fa6
*** empty log message ***
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Mon, 08 Sep 2003 12:53:41 +0000 |
parents | 375f2633d815 d30982a5faa9 |
children | 681ad4b7421f |
comparison
equal
deleted
inserted
replaced
88123:375f2633d815 | 89483:2f877ed80fa6 |
---|---|
1 ;;; mule.el --- basic commands for mulitilingual environment | 1 (binary file application/octet-stream, hash: 75b1023a3b55c8df240df5cf00723cc5fdf01602) |
2 | |
3 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. | |
4 ;; Licensed to the Free Software Foundation. | |
5 ;; Copyright (C) 2001, 2002 Free Software Foundation, Inc. | |
6 | |
7 ;; Keywords: mule, multilingual, character set, coding system | |
8 | |
9 ;; This file is part of GNU Emacs. | |
10 | |
11 ;; GNU Emacs is free software; you can redistribute it and/or modify | |
12 ;; it under the terms of the GNU General Public License as published by | |
13 ;; the Free Software Foundation; either version 2, or (at your option) | |
14 ;; any later version. | |
15 | |
16 ;; GNU Emacs is distributed in the hope that it will be useful, | |
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 ;; GNU General Public License for more details. | |
20 | |
21 ;; You should have received a copy of the GNU General Public License | |
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
24 ;; Boston, MA 02111-1307, USA. | |
25 | |
26 ;;; Commentary: | |
27 | |
28 ;;; Code: | |
29 | |
30 (defconst mule-version "5.0 (SAKAKI)" "\ | |
31 Version number and name of this version of MULE (multilingual environment).") | |
32 | |
33 (defconst mule-version-date "1999.12.7" "\ | |
34 Distribution date of this version of MULE (multilingual environment).") | |
35 | |
36 (defun load-with-code-conversion (fullname file &optional noerror nomessage) | |
37 "Execute a file of Lisp code named FILE whose absolute name is FULLNAME. | |
38 The file contents are decoded before evaluation if necessary. | |
39 If optional second arg NOERROR is non-nil, | |
40 report no error if FILE doesn't exist. | |
41 Print messages at start and end of loading unless | |
42 optional third arg NOMESSAGE is non-nil. | |
43 Return t if file exists." | |
44 (if (null (file-readable-p fullname)) | |
45 (and (null noerror) | |
46 (signal 'file-error (list "Cannot open load file" file))) | |
47 ;; Read file with code conversion, and then eval. | |
48 (let* ((buffer | |
49 ;; To avoid any autoloading, set default-major-mode to | |
50 ;; fundamental-mode. | |
51 ;; So that we don't get completely screwed if the | |
52 ;; file is encoded in some complicated character set, | |
53 ;; read it with real decoding, as a multibyte buffer, | |
54 ;; even if this is a --unibyte Emacs session. | |
55 (let ((default-major-mode 'fundamental-mode) | |
56 (default-enable-multibyte-characters t)) | |
57 ;; We can't use `generate-new-buffer' because files.el | |
58 ;; is not yet loaded. | |
59 (get-buffer-create (generate-new-buffer-name " *load*")))) | |
60 (load-in-progress t) | |
61 (source (save-match-data (string-match "\\.el\\'" fullname)))) | |
62 (unless nomessage | |
63 (if source | |
64 (message "Loading %s (source)..." file) | |
65 (message "Loading %s..." file))) | |
66 (when purify-flag | |
67 (push file preloaded-file-list)) | |
68 (unwind-protect | |
69 (let ((load-file-name fullname) | |
70 (set-auto-coding-for-load t) | |
71 (inhibit-file-name-operation nil)) | |
72 (save-excursion | |
73 (set-buffer buffer) | |
74 (insert-file-contents fullname) | |
75 ;; If the loaded file was inserted with no-conversion or | |
76 ;; raw-text coding system, make the buffer unibyte. | |
77 ;; Otherwise, eval-buffer might try to interpret random | |
78 ;; binary junk as multibyte characters. | |
79 (if (and enable-multibyte-characters | |
80 (or (eq (coding-system-type last-coding-system-used) 5) | |
81 (eq last-coding-system-used 'no-conversion))) | |
82 (set-buffer-multibyte nil)) | |
83 ;; Make `kill-buffer' quiet. | |
84 (set-buffer-modified-p nil)) | |
85 ;; Have the original buffer current while we eval. | |
86 (eval-buffer buffer nil file | |
87 ;; If this Emacs is running with --unibyte, | |
88 ;; convert multibyte strings to unibyte | |
89 ;; after reading them. | |
90 ;; (not default-enable-multibyte-characters) | |
91 nil t | |
92 )) | |
93 (let (kill-buffer-hook kill-buffer-query-functions) | |
94 (kill-buffer buffer))) | |
95 (let ((hook (assoc file after-load-alist))) | |
96 (when hook | |
97 (mapcar (function eval) (cdr hook)))) | |
98 (unless (or nomessage noninteractive) | |
99 (if source | |
100 (message "Loading %s (source)...done" file) | |
101 (message "Loading %s...done" file))) | |
102 t))) | |
103 | |
104 ;; API (Application Program Interface) for charsets. | |
105 | |
106 (defsubst charset-quoted-standard-p (obj) | |
107 "Return t if OBJ is a quoted symbol, and is the name of a standard charset." | |
108 (and (listp obj) (eq (car obj) 'quote) | |
109 (symbolp (car-safe (cdr obj))) | |
110 (let ((vector (get (car-safe (cdr obj)) 'charset))) | |
111 (and (vectorp vector) | |
112 (< (aref vector 0) 160))))) | |
113 | |
114 (defsubst charsetp (object) | |
115 "T if OBJECT is a charset." | |
116 (and (symbolp object) (vectorp (get object 'charset)))) | |
117 | |
118 (defsubst charset-info (charset) | |
119 "Return a vector of information of CHARSET. | |
120 The elements of the vector are: | |
121 CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION, | |
122 LEADING-CODE-BASE, LEADING-CODE-EXT, | |
123 ISO-FINAL-CHAR, ISO-GRAPHIC-PLANE, | |
124 REVERSE-CHARSET, SHORT-NAME, LONG-NAME, DESCRIPTION, | |
125 PLIST, | |
126 where | |
127 CHARSET-ID (integer) is the identification number of the charset. | |
128 BYTES (integer) is the length of multi-byte form of a character in | |
129 the charset: one of 1, 2, 3, and 4. | |
130 DIMENSION (integer) is the number of bytes to represent a character of | |
131 the charset: 1 or 2. | |
132 CHARS (integer) is the number of characters in a dimension: 94 or 96. | |
133 WIDTH (integer) is the number of columns a character in the charset | |
134 occupies on the screen: one of 0, 1, and 2. | |
135 DIRECTION (integer) is the rendering direction of characters in the | |
136 charset when rendering. If 0, render from left to right, else | |
137 render from right to left. | |
138 LEADING-CODE-BASE (integer) is the base leading-code for the | |
139 charset. | |
140 LEADING-CODE-EXT (integer) is the extended leading-code for the | |
141 charset. All charsets of less than 0xA0 has the value 0. | |
142 ISO-FINAL-CHAR (character) is the final character of the | |
143 corresponding ISO 2022 charset. If the charset is not assigned | |
144 any final character, the value is -1. | |
145 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked | |
146 while encoding to variants of ISO 2022 coding system, one of the | |
147 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR). | |
148 If the charset is not assigned any final character, the value is -1. | |
149 REVERSE-CHARSET (integer) is the charset which differs only in | |
150 LEFT-TO-RIGHT value from the charset. If there's no such a | |
151 charset, the value is -1. | |
152 SHORT-NAME (string) is the short name to refer to the charset. | |
153 LONG-NAME (string) is the long name to refer to the charset | |
154 DESCRIPTION (string) is the description string of the charset. | |
155 PLIST (property list) may contain any type of information a user | |
156 want to put and get by functions `put-charset-property' and | |
157 `get-charset-property' respectively." | |
158 (get charset 'charset)) | |
159 | |
160 ;; It is better not to use backquote in this file, | |
161 ;; because that makes a bootstrapping problem | |
162 ;; if you need to recompile all the Lisp files using interpreted code. | |
163 | |
164 (defmacro charset-id (charset) | |
165 "Return charset identification number of CHARSET." | |
166 (if (charset-quoted-standard-p charset) | |
167 (aref (charset-info (nth 1 charset)) 0) | |
168 (list 'aref (list 'charset-info charset) 0))) | |
169 | |
170 (defmacro charset-bytes (charset) | |
171 "Return bytes of CHARSET. | |
172 See the function `charset-info' for more detail." | |
173 (if (charset-quoted-standard-p charset) | |
174 (aref (charset-info (nth 1 charset)) 1) | |
175 (list 'aref (list 'charset-info charset) 1))) | |
176 | |
177 (defmacro charset-dimension (charset) | |
178 "Return dimension of CHARSET. | |
179 See the function `charset-info' for more detail." | |
180 (if (charset-quoted-standard-p charset) | |
181 (aref (charset-info (nth 1 charset)) 2) | |
182 (list 'aref (list 'charset-info charset) 2))) | |
183 | |
184 (defmacro charset-chars (charset) | |
185 "Return character numbers contained in a dimension of CHARSET. | |
186 See the function `charset-info' for more detail." | |
187 (if (charset-quoted-standard-p charset) | |
188 (aref (charset-info (nth 1 charset)) 3) | |
189 (list 'aref (list 'charset-info charset) 3))) | |
190 | |
191 (defmacro charset-width (charset) | |
192 "Return width (how many column occupied on a screen) of CHARSET. | |
193 See the function `charset-info' for more detail." | |
194 (if (charset-quoted-standard-p charset) | |
195 (aref (charset-info (nth 1 charset)) 4) | |
196 (list 'aref (list 'charset-info charset) 4))) | |
197 | |
198 (defmacro charset-direction (charset) | |
199 "Return direction of CHARSET. | |
200 See the function `charset-info' for more detail." | |
201 (if (charset-quoted-standard-p charset) | |
202 (aref (charset-info (nth 1 charset)) 5) | |
203 (list 'aref (list 'charset-info charset) 5))) | |
204 | |
205 (defmacro charset-iso-final-char (charset) | |
206 "Return final char of CHARSET. | |
207 See the function `charset-info' for more detail." | |
208 (if (charset-quoted-standard-p charset) | |
209 (aref (charset-info (nth 1 charset)) 8) | |
210 (list 'aref (list 'charset-info charset) 8))) | |
211 | |
212 (defmacro charset-iso-graphic-plane (charset) | |
213 "Return graphic plane of CHARSET. | |
214 See the function `charset-info' for more detail." | |
215 (if (charset-quoted-standard-p charset) | |
216 (aref (charset-info (nth 1 charset)) 9) | |
217 (list 'aref (list 'charset-info charset) 9))) | |
218 | |
219 (defmacro charset-reverse-charset (charset) | |
220 "Return reverse charset of CHARSET. | |
221 See the function `charset-info' for more detail." | |
222 (if (charset-quoted-standard-p charset) | |
223 (aref (charset-info (nth 1 charset)) 10) | |
224 (list 'aref (list 'charset-info charset) 10))) | |
225 | |
226 (defmacro charset-short-name (charset) | |
227 "Return short name of CHARSET. | |
228 See the function `charset-info' for more detail." | |
229 (if (charset-quoted-standard-p charset) | |
230 (aref (charset-info (nth 1 charset)) 11) | |
231 (list 'aref (list 'charset-info charset) 11))) | |
232 | |
233 (defmacro charset-long-name (charset) | |
234 "Return long name of CHARSET. | |
235 See the function `charset-info' for more detail." | |
236 (if (charset-quoted-standard-p charset) | |
237 (aref (charset-info (nth 1 charset)) 12) | |
238 (list 'aref (list 'charset-info charset) 12))) | |
239 | |
240 (defmacro charset-description (charset) | |
241 "Return description of CHARSET. | |
242 See the function `charset-info' for more detail." | |
243 (if (charset-quoted-standard-p charset) | |
244 (aref (charset-info (nth 1 charset)) 13) | |
245 (list 'aref (list 'charset-info charset) 13))) | |
246 | |
247 (defmacro charset-plist (charset) | |
248 "Return list charset property of CHARSET. | |
249 See the function `charset-info' for more detail." | |
250 (list 'aref | |
251 (if (charset-quoted-standard-p charset) | |
252 (charset-info (nth 1 charset)) | |
253 (list 'charset-info charset)) | |
254 14)) | |
255 | |
256 (defun set-charset-plist (charset plist) | |
257 "Set CHARSET's property list to PLIST, and return PLIST." | |
258 (aset (charset-info charset) 14 plist)) | |
259 | |
260 (defun make-char (charset &optional code1 code2) | |
261 "Return a character of CHARSET whose position codes are CODE1 and CODE2. | |
262 CODE1 and CODE2 are optional, but if you don't supply | |
263 sufficient position codes, return a generic character which stands for | |
264 all characters or group of characters in the character set. | |
265 A generic character can be used to index a char table (e.g. syntax-table). | |
266 | |
267 Such character sets as ascii, eight-bit-control, and eight-bit-graphic | |
268 don't have corresponding generic characters. If CHARSET is one of | |
269 them and you don't supply CODE1, return the character of the smallest | |
270 code in CHARSET. | |
271 | |
272 If CODE1 or CODE2 are invalid (out of range), this function signals an | |
273 error. However, the eighth bit of both CODE1 and CODE2 is zeroed | |
274 before they are used to index CHARSET. Thus you may use, say, the | |
275 actual ISO 8859 character code rather than subtracting 128, as you | |
276 would need to index the corresponding Emacs charset." | |
277 (make-char-internal (charset-id charset) code1 code2)) | |
278 | |
279 (put 'make-char 'byte-compile | |
280 (function | |
281 (lambda (form) | |
282 (let ((charset (nth 1 form))) | |
283 (if (charset-quoted-standard-p charset) | |
284 (byte-compile-normal-call | |
285 (cons 'make-char-internal | |
286 (cons (charset-id (nth 1 charset)) (nthcdr 2 form)))) | |
287 (byte-compile-normal-call | |
288 (cons 'make-char-internal | |
289 (cons (list 'charset-id charset) (nthcdr 2 form))))))))) | |
290 | |
291 (defun charset-list () | |
292 "Return list of charsets ever defined. | |
293 | |
294 This function is provided for backward compatibility. | |
295 Now we have the variable `charset-list'." | |
296 charset-list) | |
297 | |
298 (defsubst generic-char-p (char) | |
299 "Return t if and only if CHAR is a generic character. | |
300 See also the documentation of `make-char'." | |
301 (and (>= char 0400) | |
302 (let ((l (split-char char))) | |
303 (and (or (= (nth 1 l) 0) (eq (nth 2 l) 0)) | |
304 (not (eq (car l) 'composition)))))) | |
305 | |
306 (defun decode-char (ccs code-point &optional restriction) | |
307 "Return character specified by coded character set CCS and CODE-POINT in it. | |
308 Return nil if such a character is not supported. | |
309 Currently the only supported coded character set is `ucs' (ISO/IEC | |
310 10646: Universal Multi-Octet Coded Character Set), and the result is | |
311 translated through the translation-table named | |
312 `utf-translation-table-for-decode' or the translation-hash-table named | |
313 `utf-subst-table-for-decode'. | |
314 | |
315 Optional argument RESTRICTION specifies a way to map the pair of CCS | |
316 and CODE-POINT to a character. Currently not supported and just ignored." | |
317 (cond | |
318 ((eq ccs 'ucs) | |
319 (or (gethash code-point | |
320 (get 'utf-subst-table-for-decode 'translation-hash-table)) | |
321 (let ((c (cond | |
322 ((< code-point 160) | |
323 code-point) | |
324 ((< code-point 256) | |
325 (make-char 'latin-iso8859-1 code-point)) | |
326 ((< code-point #x2500) | |
327 (setq code-point (- code-point #x0100)) | |
328 (make-char 'mule-unicode-0100-24ff | |
329 (+ (/ code-point 96) 32) (+ (% code-point 96) 32))) | |
330 ((< code-point #x3400) | |
331 (setq code-point (- code-point #x2500)) | |
332 (make-char 'mule-unicode-2500-33ff | |
333 (+ (/ code-point 96) 32) (+ (% code-point 96) 32))) | |
334 ((and (>= code-point #xe000) (< code-point #x10000)) | |
335 (setq code-point (- code-point #xe000)) | |
336 (make-char 'mule-unicode-e000-ffff | |
337 (+ (/ code-point 96) 32) | |
338 (+ (% code-point 96) 32)))))) | |
339 (when c | |
340 (or (aref (get 'utf-translation-table-for-decode | |
341 'translation-table) c) | |
342 c))))))) | |
343 | |
344 (defun encode-char (char ccs &optional restriction) | |
345 "Return code-point in coded character set CCS that corresponds to CHAR. | |
346 Return nil if CHAR is not included in CCS. | |
347 Currently the only supported coded character set is `ucs' (ISO/IEC | |
348 10646: Universal Multi-Octet Coded Character Set), and CHAR is first | |
349 translated through the translation-table named | |
350 `utf-translation-table-for-encode' or the translation-hash-table named | |
351 `utf-subst-table-for-encode'. | |
352 | |
353 CHAR should be in one of these charsets: | |
354 ascii, latin-iso8859-1, mule-unicode-0100-24ff, mule-unicode-2500-33ff, | |
355 mule-unicode-e000-ffff, eight-bit-control | |
356 Otherwise, return nil. | |
357 | |
358 Optional argument RESTRICTION specifies a way to map CHAR to a | |
359 code-point in CCS. Currently not supported and just ignored." | |
360 (let* ((split (split-char char)) | |
361 (charset (car split)) | |
362 trans) | |
363 (cond ((eq ccs 'ucs) | |
364 (or (gethash char (get 'utf-subst-table-for-encode | |
365 'translation-hash-table)) | |
366 (let ((table (get 'utf-translation-table-for-encode | |
367 'translation-table))) | |
368 (setq trans (aref table char)) | |
369 (if trans | |
370 (setq split (split-char trans) | |
371 charset (car split))) | |
372 (cond ((eq charset 'ascii) | |
373 char) | |
374 ((eq charset 'latin-iso8859-1) | |
375 (+ (nth 1 split) 128)) | |
376 ((eq charset 'mule-unicode-0100-24ff) | |
377 (+ #x0100 (+ (* (- (nth 1 split) 32) 96) | |
378 (- (nth 2 split) 32)))) | |
379 ((eq charset 'mule-unicode-2500-33ff) | |
380 (+ #x2500 (+ (* (- (nth 1 split) 32) 96) | |
381 (- (nth 2 split) 32)))) | |
382 ((eq charset 'mule-unicode-e000-ffff) | |
383 (+ #xe000 (+ (* (- (nth 1 split) 32) 96) | |
384 (- (nth 2 split) 32)))) | |
385 ((eq charset 'eight-bit-control) | |
386 char)))))))) | |
387 | |
388 | |
389 ;; Coding system stuff | |
390 | |
391 ;; Coding system is a symbol that has the property `coding-system'. | |
392 ;; | |
393 ;; The value of the property `coding-system' is a vector of the | |
394 ;; following format: | |
395 ;; [TYPE MNEMONIC DOC-STRING PLIST FLAGS] | |
396 ;; We call this vector as coding-spec. See comments in src/coding.c | |
397 ;; for more detail. | |
398 | |
399 (defconst coding-spec-type-idx 0) | |
400 (defconst coding-spec-mnemonic-idx 1) | |
401 (defconst coding-spec-doc-string-idx 2) | |
402 (defconst coding-spec-plist-idx 3) | |
403 (defconst coding-spec-flags-idx 4) | |
404 | |
405 ;; PLIST is a property list of a coding system. To share PLIST among | |
406 ;; alias coding systems, a coding system has PLIST in coding-spec | |
407 ;; instead of having it in normal property list of Lisp symbol. | |
408 ;; Here's a list of coding system properties currently being used. | |
409 ;; | |
410 ;; o coding-category | |
411 ;; | |
412 ;; The value is a coding category the coding system belongs to. The | |
413 ;; function `make-coding-system' sets this value automatically | |
414 ;; unless its argument PROPERTIES specifies this property. | |
415 ;; | |
416 ;; o alias-coding-systems | |
417 ;; | |
418 ;; The value is a list of coding systems of the same alias group. The | |
419 ;; first element is the coding system made at first, which we call as | |
420 ;; `base coding system'. The function `make-coding-system' sets this | |
421 ;; value automatically and `define-coding-system-alias' updates it. | |
422 ;; | |
423 ;; See the documentation of make-coding-system for the meanings of the | |
424 ;; following properties. | |
425 ;; | |
426 ;; o post-read-conversion | |
427 ;; o pre-write-conversion | |
428 ;; o translation-table-for-decode | |
429 ;; o translation-table-for-encode | |
430 ;; o safe-chars | |
431 ;; o safe-charsets | |
432 ;; o mime-charset | |
433 ;; o valid-codes (meaningful only for a coding system based on CCL) | |
434 | |
435 | |
436 (defsubst coding-system-spec (coding-system) | |
437 "Return coding-spec of CODING-SYSTEM." | |
438 (get (check-coding-system coding-system) 'coding-system)) | |
439 | |
440 (defun coding-system-type (coding-system) | |
441 "Return the coding type of CODING-SYSTEM. | |
442 A coding type is an integer value indicating the encoding method | |
443 of CODING-SYSTEM. See the function `make-coding-system' for more detail." | |
444 (aref (coding-system-spec coding-system) coding-spec-type-idx)) | |
445 | |
446 (defun coding-system-mnemonic (coding-system) | |
447 "Return the mnemonic character of CODING-SYSTEM. | |
448 The mnemonic character of a coding system is used in mode line | |
449 to indicate the coding system. If the arg is nil, return ?-." | |
450 (let ((spec (coding-system-spec coding-system))) | |
451 (if spec (aref spec coding-spec-mnemonic-idx) ?-))) | |
452 | |
453 (defun coding-system-doc-string (coding-system) | |
454 "Return the documentation string for CODING-SYSTEM." | |
455 (aref (coding-system-spec coding-system) coding-spec-doc-string-idx)) | |
456 | |
457 (defun coding-system-plist (coding-system) | |
458 "Return the property list of CODING-SYSTEM." | |
459 (aref (coding-system-spec coding-system) coding-spec-plist-idx)) | |
460 | |
461 (defun coding-system-flags (coding-system) | |
462 "Return `flags' of CODING-SYSTEM. | |
463 A `flags' of a coding system is a vector of length 32 indicating detailed | |
464 information of a coding system. See the function `make-coding-system' | |
465 for more detail." | |
466 (aref (coding-system-spec coding-system) coding-spec-flags-idx)) | |
467 | |
468 (defun coding-system-get (coding-system prop) | |
469 "Extract a value from CODING-SYSTEM's property list for property PROP." | |
470 (plist-get (coding-system-plist coding-system) prop)) | |
471 | |
472 (defun coding-system-put (coding-system prop val) | |
473 "Change value in CODING-SYSTEM's property list PROP to VAL." | |
474 (let ((plist (coding-system-plist coding-system))) | |
475 (if plist | |
476 (plist-put plist prop val) | |
477 (aset (coding-system-spec coding-system) coding-spec-plist-idx | |
478 (list prop val))))) | |
479 | |
480 (defun coding-system-category (coding-system) | |
481 "Return the coding category of CODING-SYSTEM. | |
482 See also `coding-category-list'." | |
483 (coding-system-get coding-system 'coding-category)) | |
484 | |
485 (defun coding-system-base (coding-system) | |
486 "Return the base coding system of CODING-SYSTEM. | |
487 A base coding system is what made by `make-coding-system'. | |
488 Any alias nor subsidiary coding systems are not base coding system." | |
489 (car (coding-system-get coding-system 'alias-coding-systems))) | |
490 | |
491 (defalias 'coding-system-parent 'coding-system-base) | |
492 (make-obsolete 'coding-system-parent 'coding-system-base "20.3") | |
493 | |
494 ;; Coding system also has a property `eol-type'. | |
495 ;; | |
496 ;; This property indicates how the coding system handles end-of-line | |
497 ;; format. The value is integer 0, 1, 2, or a vector of three coding | |
498 ;; systems. Each integer value 0, 1, and 2 indicates the format of | |
499 ;; end-of-line LF, CRLF, and CR respectively. A vector value | |
500 ;; indicates that the format of end-of-line should be detected | |
501 ;; automatically. Nth element of the vector is the subsidiary coding | |
502 ;; system whose `eol-type' property is N. | |
503 | |
504 (defun coding-system-eol-type (coding-system) | |
505 "Return eol-type of CODING-SYSTEM. | |
506 An eol-type is integer 0, 1, 2, or a vector of coding systems. | |
507 | |
508 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, | |
509 CRLF, and CR respectively. | |
510 | |
511 A vector value indicates that a format of end-of-line should be | |
512 detected automatically. Nth element of the vector is the subsidiary | |
513 coding system whose eol-type is N." | |
514 (get coding-system 'eol-type)) | |
515 | |
516 (defun coding-system-eol-type-mnemonic (coding-system) | |
517 "Return the string indicating end-of-line format of CODING-SYSTEM." | |
518 (let* ((eol-type (coding-system-eol-type coding-system)) | |
519 (val (cond ((eq eol-type 0) eol-mnemonic-unix) | |
520 ((eq eol-type 1) eol-mnemonic-dos) | |
521 ((eq eol-type 2) eol-mnemonic-mac) | |
522 (t eol-mnemonic-undecided)))) | |
523 (if (stringp val) | |
524 val | |
525 (char-to-string val)))) | |
526 | |
527 (defun coding-system-lessp (x y) | |
528 (cond ((eq x 'no-conversion) t) | |
529 ((eq y 'no-conversion) nil) | |
530 ((eq x 'emacs-mule) t) | |
531 ((eq y 'emacs-mule) nil) | |
532 ((eq x 'undecided) t) | |
533 ((eq y 'undecided) nil) | |
534 (t (let ((c1 (coding-system-mnemonic x)) | |
535 (c2 (coding-system-mnemonic y))) | |
536 (or (< (downcase c1) (downcase c2)) | |
537 (and (not (> (downcase c1) (downcase c2))) | |
538 (< c1 c2))))))) | |
539 | |
540 (defun add-to-coding-system-list (coding-system) | |
541 "Add CODING-SYSTEM to `coding-system-list' while keeping it sorted." | |
542 (if (or (null coding-system-list) | |
543 (coding-system-lessp coding-system (car coding-system-list))) | |
544 (setq coding-system-list (cons coding-system coding-system-list)) | |
545 (let ((len (length coding-system-list)) | |
546 mid (tem coding-system-list)) | |
547 (while (> len 1) | |
548 (setq mid (nthcdr (/ len 2) tem)) | |
549 (if (coding-system-lessp (car mid) coding-system) | |
550 (setq tem mid | |
551 len (- len (/ len 2))) | |
552 (setq len (/ len 2)))) | |
553 (setcdr tem (cons coding-system (cdr tem)))))) | |
554 | |
555 (defun coding-system-list (&optional base-only) | |
556 "Return a list of all existing non-subsidiary coding systems. | |
557 If optional arg BASE-ONLY is non-nil, only base coding systems are listed. | |
558 The value doesn't include subsidiary coding systems which are what | |
559 made from bases and aliases automatically for various end-of-line | |
560 formats (e.g. iso-latin-1-unix, koi8-r-dos)." | |
561 (let* ((codings (copy-sequence coding-system-list)) | |
562 (tail (cons nil codings))) | |
563 ;; Remove subsidiary coding systems (eol variants) and alias | |
564 ;; coding systems (if necessary). | |
565 (while (cdr tail) | |
566 (let* ((coding (car (cdr tail))) | |
567 (aliases (coding-system-get coding 'alias-coding-systems))) | |
568 (if (or | |
569 ;; CODING is an eol variant if not in ALIASES. | |
570 (not (memq coding aliases)) | |
571 ;; CODING is an alias if it is not car of ALIASES. | |
572 (and base-only (not (eq coding (car aliases))))) | |
573 (setcdr tail (cdr (cdr tail))) | |
574 (setq tail (cdr tail))))) | |
575 codings)) | |
576 | |
577 (defun map-charset-chars (func charset) | |
578 "Use FUNC to map over all characters in CHARSET for side effects. | |
579 FUNC is a function of two args, the start and end (inclusive) of a | |
580 character code range. Thus FUNC should iterate over [START, END]." | |
581 (let* ((dim (charset-dimension charset)) | |
582 (chars (charset-chars charset)) | |
583 (start (if (= chars 94) | |
584 33 | |
585 32))) | |
586 (if (= dim 1) | |
587 (funcall func | |
588 (make-char charset start) | |
589 (make-char charset (+ start chars -1))) | |
590 (dotimes (i chars) | |
591 (funcall func | |
592 (make-char charset (+ i start) start) | |
593 (make-char charset (+ i start) (+ start chars -1))))))) | |
594 | |
595 (defun register-char-codings (coding-system safe-chars) | |
596 "This is an obsolete function. | |
597 It exists just for backward compatibility, and it does nothing.") | |
598 (make-obsolete 'register-char-codings | |
599 "Unnecessary function. Calling it has no effect." | |
600 "21.3") | |
601 | |
602 (defconst char-coding-system-table nil | |
603 "This is an obsolete variable. | |
604 It exists just for backward compatibility, and the value is always nil.") | |
605 | |
606 (defun make-subsidiary-coding-system (coding-system) | |
607 "Make subsidiary coding systems (eol-type variants) of CODING-SYSTEM." | |
608 (let ((coding-spec (coding-system-spec coding-system)) | |
609 (subsidiaries (vector (intern (format "%s-unix" coding-system)) | |
610 (intern (format "%s-dos" coding-system)) | |
611 (intern (format "%s-mac" coding-system)))) | |
612 (i 0) | |
613 temp) | |
614 (while (< i 3) | |
615 (put (aref subsidiaries i) 'coding-system coding-spec) | |
616 (put (aref subsidiaries i) 'eol-type i) | |
617 (add-to-coding-system-list (aref subsidiaries i)) | |
618 (setq coding-system-alist | |
619 (cons (list (symbol-name (aref subsidiaries i))) | |
620 coding-system-alist)) | |
621 (setq i (1+ i))) | |
622 subsidiaries)) | |
623 | |
624 (defun transform-make-coding-system-args (name type &optional doc-string props) | |
625 "For internal use only. | |
626 Transform XEmacs style args for `make-coding-system' to Emacs style. | |
627 Value is a list of transformed arguments." | |
628 (let ((mnemonic (string-to-char (or (plist-get props 'mnemonic) "?"))) | |
629 (eol-type (plist-get props 'eol-type)) | |
630 properties tmp) | |
631 (cond | |
632 ((eq eol-type 'lf) (setq eol-type 'unix)) | |
633 ((eq eol-type 'crlf) (setq eol-type 'dos)) | |
634 ((eq eol-type 'cr) (setq eol-type 'mac))) | |
635 (if (setq tmp (plist-get props 'post-read-conversion)) | |
636 (setq properties (plist-put properties 'post-read-conversion tmp))) | |
637 (if (setq tmp (plist-get props 'pre-write-conversion)) | |
638 (setq properties (plist-put properties 'pre-write-conversion tmp))) | |
639 (cond | |
640 ((eq type 'shift-jis) | |
641 `(,name 1 ,mnemonic ,doc-string () ,properties ,eol-type)) | |
642 ((eq type 'iso2022) ; This is not perfect. | |
643 (if (plist-get props 'escape-quoted) | |
644 (error "escape-quoted is not supported: %S" | |
645 `(,name ,type ,doc-string ,props))) | |
646 (let ((g0 (plist-get props 'charset-g0)) | |
647 (g1 (plist-get props 'charset-g1)) | |
648 (g2 (plist-get props 'charset-g2)) | |
649 (g3 (plist-get props 'charset-g3)) | |
650 (use-roman | |
651 (and | |
652 (eq (cadr (assoc 'latin-jisx0201 | |
653 (plist-get props 'input-charset-conversion))) | |
654 'ascii) | |
655 (eq (cadr (assoc 'ascii | |
656 (plist-get props 'output-charset-conversion))) | |
657 'latin-jisx0201))) | |
658 (use-oldjis | |
659 (and | |
660 (eq (cadr (assoc 'japanese-jisx0208-1978 | |
661 (plist-get props 'input-charset-conversion))) | |
662 'japanese-jisx0208) | |
663 (eq (cadr (assoc 'japanese-jisx0208 | |
664 (plist-get props 'output-charset-conversion))) | |
665 'japanese-jisx0208-1978)))) | |
666 (if (charsetp g0) | |
667 (if (plist-get props 'force-g0-on-output) | |
668 (setq g0 `(nil ,g0)) | |
669 (setq g0 `(,g0 t)))) | |
670 (if (charsetp g1) | |
671 (if (plist-get props 'force-g1-on-output) | |
672 (setq g1 `(nil ,g1)) | |
673 (setq g1 `(,g1 t)))) | |
674 (if (charsetp g2) | |
675 (if (plist-get props 'force-g2-on-output) | |
676 (setq g2 `(nil ,g2)) | |
677 (setq g2 `(,g2 t)))) | |
678 (if (charsetp g3) | |
679 (if (plist-get props 'force-g3-on-output) | |
680 (setq g3 `(nil ,g3)) | |
681 (setq g3 `(,g3 t)))) | |
682 `(,name 2 ,mnemonic ,doc-string | |
683 (,g0 ,g1 ,g2 ,g3 | |
684 ,(plist-get props 'short) | |
685 ,(not (plist-get props 'no-ascii-eol)) | |
686 ,(not (plist-get props 'no-ascii-cntl)) | |
687 ,(plist-get props 'seven) | |
688 t | |
689 ,(not (plist-get props 'lock-shift)) | |
690 ,use-roman | |
691 ,use-oldjis | |
692 ,(plist-get props 'no-iso6429) | |
693 nil nil nil nil) | |
694 ,properties ,eol-type))) | |
695 ((eq type 'big5) | |
696 `(,name 3 ,mnemonic ,doc-string () ,properties ,eol-type)) | |
697 ((eq type 'ccl) | |
698 `(,name 4 ,mnemonic ,doc-string | |
699 (,(plist-get props 'decode) . ,(plist-get props 'encode)) | |
700 ,properties ,eol-type)) | |
701 (t | |
702 (error "unsupported XEmacs style make-coding-style arguments: %S" | |
703 `(,name ,type ,doc-string ,props)))))) | |
704 | |
705 (defun make-coding-system (coding-system type mnemonic doc-string | |
706 &optional | |
707 flags | |
708 properties | |
709 eol-type) | |
710 "Define a new coding system CODING-SYSTEM (symbol). | |
711 Remaining arguments are TYPE, MNEMONIC, DOC-STRING, FLAGS (optional), | |
712 and PROPERTIES (optional) which construct a coding-spec of CODING-SYSTEM | |
713 in the following format: | |
714 [TYPE MNEMONIC DOC-STRING PLIST FLAGS] | |
715 | |
716 TYPE is an integer value indicating the type of the coding system as follows: | |
717 0: Emacs internal format, | |
718 1: Shift-JIS (or MS-Kanji) used mainly on Japanese PCs, | |
719 2: ISO-2022 including many variants, | |
720 3: Big5 used mainly on Chinese PCs, | |
721 4: private, CCL programs provide encoding/decoding algorithm, | |
722 5: Raw-text, which means that text contains random 8-bit codes. | |
723 | |
724 MNEMONIC is a character to be displayed on mode line for the coding system. | |
725 | |
726 DOC-STRING is a documentation string for the coding system. | |
727 | |
728 FLAGS specifies more detailed information of the coding system as follows: | |
729 | |
730 If TYPE is 2 (ISO-2022), FLAGS is a list of these elements: | |
731 CHARSET0, CHARSET1, CHARSET2, CHARSET3, SHORT-FORM, | |
732 ASCII-EOL, ASCII-CNTL, SEVEN, LOCKING-SHIFT, SINGLE-SHIFT, | |
733 USE-ROMAN, USE-OLDJIS, NO-ISO6429, INIT-BOL, DESIGNATION-BOL, | |
734 SAFE, ACCEPT-LATIN-EXTRA-CODE. | |
735 CHARSETn are character sets initially designated to Gn graphic registers. | |
736 If CHARSETn is nil, Gn is never used. | |
737 If CHARSETn is t, Gn can be used but nothing designated initially. | |
738 If CHARSETn is a list of character sets, those character sets are | |
739 designated to Gn on output, but nothing designated to Gn initially. | |
740 But, character set `ascii' can be designated only to G0. | |
741 SHORT-FORM non-nil means use short designation sequence on output. | |
742 ASCII-EOL non-nil means designate ASCII to g0 at end of line on output. | |
743 ASCII-CNTL non-nil means designate ASCII to g0 before control codes and | |
744 SPACE on output. | |
745 SEVEN non-nil means use 7-bit code only on output. | |
746 LOCKING-SHIFT non-nil means use locking-shift. | |
747 SINGLE-SHIFT non-nil means use single-shift. | |
748 USE-ROMAN non-nil means designate JIS0201-1976-Roman instead of ASCII. | |
749 USE-OLDJIS non-nil means designate JIS0208-1976 instead of JIS0208-1983. | |
750 NO-ISO6429 non-nil means not use ISO6429's direction specification. | |
751 INIT-BOL non-nil means any designation state is assumed to be reset | |
752 to initial at each beginning of line on output. | |
753 DESIGNATION-BOL non-nil means designation sequences should be placed | |
754 at beginning of line on output. | |
755 SAFE non-nil means convert unsafe characters to `?' on output. | |
756 Characters not specified in the property `safe-charsets' nor | |
757 `safe-chars' are unsafe. | |
758 ACCEPT-LATIN-EXTRA-CODE non-nil means code-detection routine accepts | |
759 a code specified in `latin-extra-code-table' (which see) as a valid | |
760 code of the coding system. | |
761 | |
762 If TYPE is 4 (private), FLAGS should be a cons of CCL programs, for | |
763 decoding and encoding. CCL programs should be specified by their | |
764 symbols. | |
765 | |
766 PROPERTIES is an alist of properties vs the corresponding values. The | |
767 following properties are recognized: | |
768 | |
769 o post-read-conversion | |
770 | |
771 The value is a function to call after some text is inserted and | |
772 decoded by the coding system itself and before any functions in | |
773 `after-insert-functions' are called. The argument of this | |
774 function is the same as for a function in | |
775 `after-insert-file-functions', i.e. LENGTH of the text inserted, | |
776 with point at the head of the text to be decoded. | |
777 | |
778 o pre-write-conversion | |
779 | |
780 The value is a function to call after all functions in | |
781 `write-region-annotate-functions' and `buffer-file-format' are | |
782 called, and before the text is encoded by the coding system itself. | |
783 The arguments to this function are the same as those of a function | |
784 in `write-region-annotate-functions', i.e. FROM and TO, specifying | |
785 a region of text. | |
786 | |
787 o translation-table-for-decode | |
788 | |
789 The value is a translation table to be applied on decoding. See | |
790 the function `make-translation-table' for the format of translation | |
791 table. This is not applicable to type 4 (CCL-based) coding systems. | |
792 | |
793 o translation-table-for-encode | |
794 | |
795 The value is a translation table to be applied on encoding. This is | |
796 not applicable to type 4 (CCL-based) coding systems. | |
797 | |
798 o safe-chars | |
799 | |
800 The value is a char table. If a character has non-nil value in it, | |
801 the character is safely supported by the coding system. This | |
802 overrides the specification of safe-charsets. | |
803 | |
804 o safe-charsets | |
805 | |
806 The value is a list of charsets safely supported by the coding | |
807 system. The value t means that all charsets Emacs handles are | |
808 supported. Even if some charset is not in this list, it doesn't | |
809 mean that the charset can't be encoded in the coding system; | |
810 it just means that some other receiver of text encoded | |
811 in the coding system won't be able to handle that charset. | |
812 | |
813 o mime-charset | |
814 | |
815 The value is a symbol whose name is the `MIME-charset' parameter of | |
816 the coding system. | |
817 | |
818 o valid-codes (meaningful only for a coding system based on CCL) | |
819 | |
820 The value is a list to indicate valid byte ranges of the encoded | |
821 file. Each element of the list is an integer or a cons of integer. | |
822 In the former case, the integer value is a valid byte code. In the | |
823 latter case, the integers specify the range of valid byte codes. | |
824 | |
825 o composition (meaningful only when TYPE is 0 or 2) | |
826 | |
827 If the value is non-nil, the coding system preserves composition | |
828 information. | |
829 | |
830 These properties are set in PLIST, a property list. This function | |
831 also sets properties `coding-category' and `alias-coding-systems' | |
832 automatically. | |
833 | |
834 EOL-TYPE specifies the EOL type of the coding-system in one of the | |
835 following formats: | |
836 | |
837 o symbol (unix, dos, or mac) | |
838 | |
839 The symbol `unix' means Unix-like EOL (LF), `dos' means | |
840 DOS-like EOL (CRLF), and `mac' means MAC-like EOL (CR). | |
841 | |
842 o number (0, 1, or 2) | |
843 | |
844 The number 0, 1, and 2 mean UNIX, DOS, and MAC-like EOL | |
845 respectively. | |
846 | |
847 o vector of coding-systems of length 3 | |
848 | |
849 The EOL type is detected automatically for the coding system. | |
850 And, according to the detected EOL type, one of the coding | |
851 systems in the vector is selected. Elements of the vector | |
852 corresponds to Unix-like EOL, DOS-like EOL, and Mac-like EOL | |
853 in this order. | |
854 | |
855 Kludgy features for backward compatibility: | |
856 | |
857 1. If TYPE is 4 and car or cdr of FLAGS is a vector, the vector is | |
858 treated as a compiled CCL code. | |
859 | |
860 2. If PROPERTIES is just a list of character sets, the list is set as | |
861 a value of `safe-charsets' in PLIST." | |
862 | |
863 ;; For compatiblity with XEmacs, we check the type of TYPE. If it | |
864 ;; is a symbol, perhaps, this function is called with XEmacs-style | |
865 ;; arguments. Here, try to transform that kind of arguments to | |
866 ;; Emacs style. | |
867 (if (symbolp type) | |
868 (let ((args (transform-make-coding-system-args coding-system type | |
869 mnemonic doc-string))) | |
870 (setq coding-system (car args) | |
871 type (nth 1 args) | |
872 mnemonic (nth 2 args) | |
873 doc-string (nth 3 args) | |
874 flags (nth 4 args) | |
875 properties (nth 5 args) | |
876 eol-type (nth 6 args)))) | |
877 | |
878 ;; Set a value of `coding-system' property. | |
879 (let ((coding-spec (make-vector 5 nil)) | |
880 (no-initial-designation t) | |
881 (no-alternative-designation t) | |
882 (accept-latin-extra-code nil) | |
883 coding-category) | |
884 (if (or (not (integerp type)) (< type 0) (> type 5)) | |
885 (error "TYPE argument must be 0..5")) | |
886 (if (or (not (integerp mnemonic)) (<= mnemonic ? ) (> mnemonic 127)) | |
887 (error "MNEMONIC argument must be an ASCII printable character")) | |
888 (aset coding-spec coding-spec-type-idx type) | |
889 (aset coding-spec coding-spec-mnemonic-idx mnemonic) | |
890 (aset coding-spec coding-spec-doc-string-idx | |
891 (purecopy (if (stringp doc-string) doc-string ""))) | |
892 (cond ((= type 0) | |
893 (setq coding-category 'coding-category-emacs-mule)) | |
894 ((= type 1) | |
895 (setq coding-category 'coding-category-sjis)) | |
896 ((= type 2) ; ISO2022 | |
897 (let ((i 0) | |
898 (vec (make-vector 32 nil)) | |
899 (g1-designation nil) | |
900 (fl flags)) | |
901 (while (< i 4) | |
902 (let ((charset (car fl))) | |
903 (if (and no-initial-designation | |
904 (> i 0) | |
905 (or (charsetp charset) | |
906 (and (consp charset) | |
907 (charsetp (car charset))))) | |
908 (setq no-initial-designation nil)) | |
909 (if (charsetp charset) | |
910 (if (= i 1) (setq g1-designation charset)) | |
911 (if (consp charset) | |
912 (let ((tail charset) | |
913 elt) | |
914 (while tail | |
915 (setq elt (car tail)) | |
916 (if (eq elt t) | |
917 (setq no-alternative-designation nil) | |
918 (if (and elt (not (charsetp elt))) | |
919 (error "Invalid charset: %s" elt))) | |
920 (setq tail (cdr tail))) | |
921 (setq g1-designation (car charset))) | |
922 (if charset | |
923 (if (eq charset t) | |
924 (setq no-alternative-designation nil) | |
925 (error "Invalid charset: %s" charset))))) | |
926 (aset vec i charset)) | |
927 (setq fl (cdr fl) i (1+ i))) | |
928 (while (and (< i 32) fl) | |
929 (aset vec i (car fl)) | |
930 (if (and (= i 16) ; ACCEPT-LATIN-EXTRA-CODE | |
931 (car fl)) | |
932 (setq accept-latin-extra-code t)) | |
933 (setq fl (cdr fl) i (1+ i))) | |
934 (aset coding-spec 4 vec) | |
935 (setq coding-category | |
936 (if (aref vec 8) ; Use locking-shift. | |
937 (or (and (aref vec 7) 'coding-category-iso-7-else) | |
938 'coding-category-iso-8-else) | |
939 (if (aref vec 7) ; 7-bit only. | |
940 (if (aref vec 9) ; Use single-shift. | |
941 'coding-category-iso-7-else | |
942 (if no-alternative-designation | |
943 'coding-category-iso-7-tight | |
944 'coding-category-iso-7)) | |
945 (if (or no-initial-designation | |
946 (not no-alternative-designation)) | |
947 'coding-category-iso-8-else | |
948 (if (and (charsetp g1-designation) | |
949 (= (charset-dimension g1-designation) 2)) | |
950 'coding-category-iso-8-2 | |
951 'coding-category-iso-8-1))))))) | |
952 ((= type 3) | |
953 (setq coding-category 'coding-category-big5)) | |
954 ((= type 4) ; private | |
955 (setq coding-category 'coding-category-ccl) | |
956 (if (not (consp flags)) | |
957 (error "Invalid FLAGS argument for TYPE 4 (CCL)") | |
958 (let ((decoder (check-ccl-program | |
959 (car flags) | |
960 (intern (format "%s-decoder" coding-system)))) | |
961 (encoder (check-ccl-program | |
962 (cdr flags) | |
963 (intern (format "%s-encoder" coding-system))))) | |
964 (if (and decoder encoder) | |
965 (aset coding-spec 4 (cons decoder encoder)) | |
966 (error "Invalid FLAGS argument for TYPE 4 (CCL)"))))) | |
967 (t ; i.e. (= type 5) | |
968 (setq coding-category 'coding-category-raw-text))) | |
969 | |
970 (let ((plist (list 'coding-category coding-category | |
971 'alias-coding-systems (list coding-system)))) | |
972 (if no-initial-designation | |
973 (plist-put plist 'no-initial-designation t)) | |
974 (if (and properties | |
975 (or (eq properties t) | |
976 (not (consp (car properties))))) | |
977 ;; In the old version, the arg PROPERTIES is a list to be | |
978 ;; set in PLIST as a value of property `safe-charsets'. | |
979 (setq properties (list (cons 'safe-charsets properties)))) | |
980 ;; In the current version PROPERTIES is a property list. | |
981 ;; Reflect it into PLIST one by one while handling safe-chars | |
982 ;; specially. | |
983 (let ((safe-charsets (cdr (assq 'safe-charsets properties))) | |
984 (safe-chars (cdr (assq 'safe-chars properties))) | |
985 (l properties) | |
986 prop val) | |
987 ;; If only safe-charsets is specified, make a char-table from | |
988 ;; it, and store that char-table as the value of `safe-chars'. | |
989 (if (and (not safe-chars) safe-charsets) | |
990 (let (charset) | |
991 (if (eq safe-charsets t) | |
992 (setq safe-chars t) | |
993 (setq safe-chars (make-char-table 'safe-chars)) | |
994 (while safe-charsets | |
995 (setq charset (car safe-charsets) | |
996 safe-charsets (cdr safe-charsets)) | |
997 (cond ((eq charset 'ascii)) ; just ignore | |
998 ((eq charset 'eight-bit-control) | |
999 (let ((i 128)) | |
1000 (while (< i 160) | |
1001 (aset safe-chars i t) | |
1002 (setq i (1+ i))))) | |
1003 ((eq charset 'eight-bit-graphic) | |
1004 (let ((i 160)) | |
1005 (while (< i 256) | |
1006 (aset safe-chars i t) | |
1007 (setq i (1+ i))))) | |
1008 (t | |
1009 (aset safe-chars (make-char charset) t)))) | |
1010 (if accept-latin-extra-code | |
1011 (let ((i 128)) | |
1012 (while (< i 160) | |
1013 (if (aref latin-extra-code-table i) | |
1014 (aset safe-chars i t)) | |
1015 (setq i (1+ i)))))) | |
1016 (setq l (cons (cons 'safe-chars safe-chars) l)))) | |
1017 (while l | |
1018 (setq prop (car (car l)) val (cdr (car l)) l (cdr l)) | |
1019 (if (eq prop 'safe-chars) | |
1020 (progn | |
1021 (if (and (symbolp val) | |
1022 (get val 'translation-table)) | |
1023 (setq safe-chars (get val 'translation-table))) | |
1024 (setq val safe-chars))) | |
1025 (plist-put plist prop val))) | |
1026 ;; The property `coding-category' may have been set differently | |
1027 ;; through PROPERTIES. | |
1028 (setq coding-category (plist-get plist 'coding-category)) | |
1029 (aset coding-spec coding-spec-plist-idx plist)) | |
1030 (put coding-system 'coding-system coding-spec) | |
1031 (put coding-category 'coding-systems | |
1032 (cons coding-system (get coding-category 'coding-systems)))) | |
1033 | |
1034 ;; Next, set a value of `eol-type' property. | |
1035 (if (not eol-type) | |
1036 ;; If EOL-TYPE is nil, set a vector of subsidiary coding | |
1037 ;; systems, each corresponds to a coding system for the detected | |
1038 ;; EOL format. | |
1039 (setq eol-type (make-subsidiary-coding-system coding-system))) | |
1040 (setq eol-type | |
1041 (cond ((or (eq eol-type 'unix) (null eol-type)) | |
1042 0) | |
1043 ((eq eol-type 'dos) | |
1044 1) | |
1045 ((eq eol-type 'mac) | |
1046 2) | |
1047 ((or (and (vectorp eol-type) | |
1048 (= (length eol-type) 3)) | |
1049 (and (numberp eol-type) | |
1050 (and (>= eol-type 0) | |
1051 (<= eol-type 2)))) | |
1052 eol-type) | |
1053 (t | |
1054 (error "Invalid EOL-TYPE spec:%S" eol-type)))) | |
1055 (put coding-system 'eol-type eol-type) | |
1056 | |
1057 (define-coding-system-internal coding-system) | |
1058 | |
1059 ;; At last, register CODING-SYSTEM in `coding-system-list' and | |
1060 ;; `coding-system-alist'. | |
1061 (add-to-coding-system-list coding-system) | |
1062 (setq coding-system-alist (cons (list (symbol-name coding-system)) | |
1063 coding-system-alist)) | |
1064 | |
1065 ;; For a coding system of cateogory iso-8-1 and iso-8-2, create | |
1066 ;; XXX-with-esc variants. | |
1067 (let ((coding-category (coding-system-category coding-system))) | |
1068 (if (or (eq coding-category 'coding-category-iso-8-1) | |
1069 (eq coding-category 'coding-category-iso-8-2)) | |
1070 (let ((esc (intern (concat (symbol-name coding-system) "-with-esc"))) | |
1071 (doc (format "Same as %s but can handle any charsets by ISO's escape sequences." coding-system)) | |
1072 (safe-charsets (assq 'safe-charsets properties)) | |
1073 (mime-charset (assq 'mime-charset properties))) | |
1074 (if safe-charsets | |
1075 (setcdr safe-charsets t) | |
1076 (setq properties (cons (cons 'safe-charsets t) properties))) | |
1077 (if mime-charset | |
1078 (setcdr mime-charset nil)) | |
1079 (make-coding-system esc type mnemonic doc | |
1080 (if (listp (car flags)) | |
1081 (cons (append (car flags) '(t)) (cdr flags)) | |
1082 (cons (list (car flags) t) (cdr flags))) | |
1083 properties)))) | |
1084 | |
1085 coding-system) | |
1086 | |
1087 (put 'safe-chars 'char-table-extra-slots 0) | |
1088 | |
1089 (defun define-coding-system-alias (alias coding-system) | |
1090 "Define ALIAS as an alias for coding system CODING-SYSTEM." | |
1091 (put alias 'coding-system (coding-system-spec coding-system)) | |
1092 (add-to-coding-system-list alias) | |
1093 (setq coding-system-alist (cons (list (symbol-name alias)) | |
1094 coding-system-alist)) | |
1095 (let ((eol-type (coding-system-eol-type coding-system))) | |
1096 (if (vectorp eol-type) | |
1097 (progn | |
1098 (nconc (coding-system-get alias 'alias-coding-systems) (list alias)) | |
1099 (put alias 'eol-type (make-subsidiary-coding-system alias))) | |
1100 (put alias 'eol-type eol-type)))) | |
1101 | |
1102 (defun merge-coding-systems (first second) | |
1103 "Fill in any unspecified aspects of coding system FIRST from SECOND. | |
1104 Return the resulting coding system." | |
1105 (let ((base (coding-system-base second)) | |
1106 (eol (coding-system-eol-type second))) | |
1107 ;; If FIRST doesn't specify text conversion, merge with that of SECOND. | |
1108 (if (eq (coding-system-base first) 'undecided) | |
1109 (setq first (coding-system-change-text-conversion first base))) | |
1110 ;; If FIRST doesn't specify eol conversion, merge with that of SECOND. | |
1111 (if (and (vectorp (coding-system-eol-type first)) | |
1112 (numberp eol) (>= eol 0) (<= eol 2)) | |
1113 (setq first (coding-system-change-eol-conversion | |
1114 first eol))) | |
1115 first)) | |
1116 | |
1117 (defun set-buffer-file-coding-system (coding-system &optional force) | |
1118 "Set the file coding-system of the current buffer to CODING-SYSTEM. | |
1119 This means that when you save the buffer, it will be converted | |
1120 according to CODING-SYSTEM. For a list of possible values of CODING-SYSTEM, | |
1121 use \\[list-coding-systems]. | |
1122 | |
1123 If CODING-SYSTEM leaves the text conversion unspecified, or if it | |
1124 leaves the end-of-line conversion unspecified, FORCE controls what to | |
1125 do. If FORCE is nil, get the unspecified aspect (or aspects) from the | |
1126 buffer's previous `buffer-file-coding-system' value (if it is | |
1127 specified there). Otherwise, levae it unspecified. | |
1128 | |
1129 This marks the buffer modified so that the succeeding \\[save-buffer] | |
1130 surely saves the buffer with CODING-SYSTEM. From a program, if you | |
1131 don't want to mark the buffer modified, just set the variable | |
1132 `buffer-file-coding-system' directly." | |
1133 (interactive "zCoding system for saving file (default, nil): \nP") | |
1134 (check-coding-system coding-system) | |
1135 (if (and coding-system buffer-file-coding-system (null force)) | |
1136 (setq coding-system | |
1137 (merge-coding-systems coding-system buffer-file-coding-system))) | |
1138 (setq buffer-file-coding-system coding-system) | |
1139 ;; This is in case of an explicit call. Normally, `normal-mode' and | |
1140 ;; `set-buffer-major-mode-hook' take care of setting the table. | |
1141 (if (fboundp 'ucs-set-table-for-input) ; don't lose when building | |
1142 (ucs-set-table-for-input)) | |
1143 (set-buffer-modified-p t) | |
1144 (force-mode-line-update)) | |
1145 | |
1146 (defun revert-buffer-with-coding-system (coding-system &optional force) | |
1147 "Visit the current buffer's file again using coding system CODING-SYSTEM. | |
1148 For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]. | |
1149 | |
1150 If CODING-SYSTEM leaves the text conversion unspecified, or if it | |
1151 leaves the end-of-line conversion unspecified, FORCE controls what to | |
1152 do. If FORCE is nil, get the unspecified aspect (or aspects) from the | |
1153 buffer's previous `buffer-file-coding-system' value (if it is | |
1154 specified there). Otherwise, determine it from the file contents as | |
1155 usual for visiting a file." | |
1156 (interactive "zCoding system for visited file (default, nil): \nP") | |
1157 (check-coding-system coding-system) | |
1158 (if (and coding-system buffer-file-coding-system (null force)) | |
1159 (setq coding-system | |
1160 (merge-coding-systems coding-system buffer-file-coding-system))) | |
1161 (let ((coding-system-for-read coding-system)) | |
1162 (revert-buffer))) | |
1163 | |
1164 (defun set-file-name-coding-system (coding-system) | |
1165 "Set coding system for decoding and encoding file names to CODING-SYSTEM. | |
1166 It actually just set the variable `file-name-coding-system' (which | |
1167 see) to CODING-SYSTEM." | |
1168 (interactive "zCoding system for file names (default, nil): ") | |
1169 (check-coding-system coding-system) | |
1170 (setq file-name-coding-system coding-system)) | |
1171 | |
1172 (defvar default-terminal-coding-system nil | |
1173 "Default value for the terminal coding system. | |
1174 This is normally set according to the selected language environment. | |
1175 See also the command `set-terminal-coding-system'.") | |
1176 | |
1177 (defun set-terminal-coding-system (coding-system) | |
1178 "Set coding system of your terminal to CODING-SYSTEM. | |
1179 All text output to the terminal will be encoded | |
1180 with the specified coding system. | |
1181 For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]. | |
1182 The default is determined by the selected language environment | |
1183 or by the previous use of this command." | |
1184 (interactive | |
1185 (list (let ((default (if (and (not (terminal-coding-system)) | |
1186 default-terminal-coding-system) | |
1187 default-terminal-coding-system))) | |
1188 (read-coding-system | |
1189 (format "Coding system for terminal display (default, %s): " | |
1190 default) | |
1191 default)))) | |
1192 (if (and (not coding-system) | |
1193 (not (terminal-coding-system))) | |
1194 (setq coding-system default-terminal-coding-system)) | |
1195 (if coding-system | |
1196 (setq default-terminal-coding-system coding-system)) | |
1197 (set-terminal-coding-system-internal coding-system) | |
1198 (redraw-frame (selected-frame))) | |
1199 | |
1200 (defvar default-keyboard-coding-system nil | |
1201 "Default value of the keyboard coding system. | |
1202 This is normally set according to the selected language environment. | |
1203 See also the command `set-keyboard-coding-system'.") | |
1204 | |
1205 (defun set-keyboard-coding-system (coding-system) | |
1206 "Set coding system for keyboard input to CODING-SYSTEM. | |
1207 In addition, this command enables Encoded-kbd minor mode. | |
1208 \(If CODING-SYSTEM is nil, Encoded-kbd mode is turned off -- see | |
1209 `encoded-kbd-mode'.) | |
1210 For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]. | |
1211 The default is determined by the selected language environment | |
1212 or by the previous use of this command." | |
1213 (interactive | |
1214 (list (let ((default (if (and (not (keyboard-coding-system)) | |
1215 default-keyboard-coding-system) | |
1216 default-keyboard-coding-system))) | |
1217 (read-coding-system | |
1218 (format "Coding system for keyboard input (default, %s): " | |
1219 default) | |
1220 default)))) | |
1221 (if (and (not coding-system) | |
1222 (not (keyboard-coding-system))) | |
1223 (setq coding-system default-keyboard-coding-system)) | |
1224 (if coding-system | |
1225 (setq default-keyboard-coding-system coding-system)) | |
1226 (set-keyboard-coding-system-internal coding-system) | |
1227 (setq keyboard-coding-system coding-system) | |
1228 (encoded-kbd-mode (if coding-system 1 0))) | |
1229 | |
1230 (defcustom keyboard-coding-system nil | |
1231 "Specify coding system for keyboard input. | |
1232 If you set this on a terminal which can't distinguish Meta keys from | |
1233 8-bit characters, you will have to use ESC to type Meta characters. | |
1234 See Info node `Specify Coding' and Info node `Single-Byte Character Support'. | |
1235 | |
1236 On non-windowing terminals, this is set from the locale by default. | |
1237 | |
1238 Setting this variable directly does not take effect; | |
1239 use either M-x customize or \\[set-keyboard-coding-system]." | |
1240 :type '(coding-system :tag "Coding system") | |
1241 :link '(info-link "(emacs)Specify Coding") | |
1242 :link '(info-link "(emacs)Single-Byte Character Support") | |
1243 :set (lambda (symbol value) | |
1244 ;; Don't load encoded-kbd-mode unnecessarily. | |
1245 (if (or value (boundp 'encoded-kbd-mode)) | |
1246 (set-keyboard-coding-system value) | |
1247 (set-default 'keyboard-coding-system nil))) ; must initialize | |
1248 :version "21.4" | |
1249 :group 'keyboard | |
1250 :group 'mule) | |
1251 | |
1252 (defun set-buffer-process-coding-system (decoding encoding) | |
1253 "Set coding systems for the process associated with the current buffer. | |
1254 DECODING is the coding system to be used to decode input from the process, | |
1255 ENCODING is the coding system to be used to encode output to the process. | |
1256 | |
1257 For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]." | |
1258 (interactive | |
1259 "zCoding-system for output from the process: \nzCoding-system for input to the process: ") | |
1260 (let ((proc (get-buffer-process (current-buffer)))) | |
1261 (if (null proc) | |
1262 (error "No process") | |
1263 (check-coding-system decoding) | |
1264 (check-coding-system encoding) | |
1265 (set-process-coding-system proc decoding encoding))) | |
1266 (force-mode-line-update)) | |
1267 | |
1268 (defalias 'set-clipboard-coding-system 'set-selection-coding-system) | |
1269 | |
1270 (defun set-selection-coding-system (coding-system) | |
1271 "Make CODING-SYSTEM used for communicating with other X clients. | |
1272 When sending or receiving text via cut_buffer, selection, and clipboard, | |
1273 the text is encoded or decoded by CODING-SYSTEM." | |
1274 (interactive "zCoding system for X selection: ") | |
1275 (check-coding-system coding-system) | |
1276 (setq selection-coding-system coding-system)) | |
1277 | |
1278 ;; Coding system lastly specified by the command | |
1279 ;; set-next-selection-coding-system. | |
1280 (defvar last-next-selection-coding-system nil) | |
1281 | |
1282 (defun set-next-selection-coding-system (coding-system) | |
1283 "Make CODING-SYSTEM used for the next communication with other X clients. | |
1284 This setting is effective for the next communication only." | |
1285 (interactive | |
1286 (list (read-coding-system | |
1287 (if last-next-selection-coding-system | |
1288 (format "Coding system for the next X selection (default, %S): " | |
1289 last-next-selection-coding-system) | |
1290 "Coding system for the next X selection: ") | |
1291 last-next-selection-coding-system))) | |
1292 (if coding-system | |
1293 (setq last-next-selection-coding-system coding-system) | |
1294 (setq coding-system last-next-selection-coding-system)) | |
1295 (check-coding-system coding-system) | |
1296 | |
1297 (setq next-selection-coding-system coding-system)) | |
1298 | |
1299 (defun set-coding-priority (arg) | |
1300 "Set priority of coding categories according to ARG. | |
1301 ARG is a list of coding categories ordered by priority." | |
1302 (let ((l arg) | |
1303 (current-list (copy-sequence coding-category-list))) | |
1304 ;; Check the validity of ARG while deleting coding categories in | |
1305 ;; ARG from CURRENT-LIST. We assume that CODING-CATEGORY-LIST | |
1306 ;; contains all coding categories. | |
1307 (while l | |
1308 (if (or (null (get (car l) 'coding-category-index)) | |
1309 (null (memq (car l) current-list))) | |
1310 (error "Invalid or duplicated element in argument: %s" arg)) | |
1311 (setq current-list (delq (car l) current-list)) | |
1312 (setq l (cdr l))) | |
1313 ;; Update `coding-category-list' and return it. | |
1314 (setq coding-category-list (append arg current-list)) | |
1315 (set-coding-priority-internal))) | |
1316 | |
1317 ;;; X selections | |
1318 | |
1319 (defvar ctext-non-standard-encodings-alist | |
1320 '(("ISO8859-15" . latin-iso8859-15) | |
1321 ("ISO8859-14" . latin-iso8859-14) | |
1322 ("KOI8-R" . koi8-r) | |
1323 ("BIG5-0" . big5)) | |
1324 "Alist of non-standard encoding names vs Emacs coding systems. | |
1325 This alist is used to decode an extened segment of a compound text.") | |
1326 | |
1327 (defvar ctext-non-standard-encodings-regexp | |
1328 (string-to-multibyte | |
1329 (concat | |
1330 ;; For non-standard encodings. | |
1331 "\\(\e%/[0-4][\200-\377][\200-\377]\\([^\002]+\\)\002\\)" | |
1332 "\\|" | |
1333 ;; For UTF-8 encoding. | |
1334 "\\(\e%G[^\e]*\e%@\\)"))) | |
1335 | |
1336 ;; Functions to support "Non-Standard Character Set Encodings" defined | |
1337 ;; by the COMPOUND-TEXT spec. | |
1338 ;; We support that by decoding the whole data by `ctext' which just | |
1339 ;; pertains byte sequences belonging to ``extended segment'', then | |
1340 ;; decoding those byte sequences one by one in Lisp. | |
1341 ;; This function also supports "The UTF-8 encoding" described in the | |
1342 ;; section 7 of the documentation fo COMPOUND-TEXT distributed with | |
1343 ;; XFree86. | |
1344 | |
1345 (defun ctext-post-read-conversion (len) | |
1346 "Decode LEN characters encoded as Compound Text with Extended Segments." | |
1347 (save-match-data | |
1348 (save-restriction | |
1349 (let ((case-fold-search nil) | |
1350 (in-workbuf (string= (buffer-name) " *code-converting-work*")) | |
1351 last-coding-system-used | |
1352 pos bytes) | |
1353 (or in-workbuf | |
1354 (narrow-to-region (point) (+ (point) len))) | |
1355 (decode-coding-region (point-min) (point-max) 'ctext) | |
1356 (if in-workbuf | |
1357 (set-buffer-multibyte t)) | |
1358 (while (re-search-forward ctext-non-standard-encodings-regexp | |
1359 nil 'move) | |
1360 (setq pos (match-beginning 0)) | |
1361 (if (match-beginning 1) | |
1362 ;; ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES-- | |
1363 (let* ((M (char-after (+ pos 4))) | |
1364 (L (char-after (+ pos 5))) | |
1365 (encoding (match-string 2)) | |
1366 (coding (or (cdr (assoc-ignore-case | |
1367 encoding | |
1368 ctext-non-standard-encodings-alist)) | |
1369 (coding-system-p | |
1370 (intern (downcase encoding)))))) | |
1371 (setq bytes (- (+ (* (- M 128) 128) (- L 128)) | |
1372 (- (point) (+ pos 6)))) | |
1373 (when coding | |
1374 (delete-region pos (point)) | |
1375 (forward-char bytes) | |
1376 (decode-coding-region (- (point) bytes) (point) coding))) | |
1377 ;; ESC % G --UTF-8-BYTES-- ESC % @ | |
1378 (setq bytes (- (point) pos)) | |
1379 (decode-coding-region (- (point) bytes) (point) 'utf-8)))) | |
1380 (goto-char (point-min)) | |
1381 (- (point-max) (point))))) | |
1382 | |
1383 ;; If you add charsets here, be sure to modify the regexp used by | |
1384 ;; ctext-pre-write-conversion to look up non-standard charsets. | |
1385 (defvar ctext-non-standard-designations-alist | |
1386 '(("$(0" . (big5 "big5-0" 2)) | |
1387 ("$(1" . (big5 "big5-0" 2)) | |
1388 ;; The following are actually standard; generating extended | |
1389 ;; segments for them is wrong and screws e.g. Latin-9 users. | |
1390 ;; 8859-{10,13,16} aren't Emacs charsets anyhow. -- fx | |
1391 ;; ("-V" . (t "iso8859-10" 1)) | |
1392 ;; ("-Y" . (t "iso8859-13" 1)) | |
1393 ;; ("-_" . (t "iso8859-14" 1)) | |
1394 ;; ("-b" . (t "iso8859-15" 1)) | |
1395 ;; ("-f" . (t "iso8859-16" 1)) | |
1396 ) | |
1397 "Alist of ctext control sequences that introduce character sets which | |
1398 are not in the list of approved encodings, and the corresponding | |
1399 coding system, identifier string, and number of octets per encoded | |
1400 character. | |
1401 | |
1402 Each element has the form (CTLSEQ . (ENCODING CHARSET NOCTETS)). CTLSEQ | |
1403 is the control sequence (sans the leading ESC) that introduces the character | |
1404 set in the text encoded by compound-text. ENCODING is a coding system | |
1405 symbol; if it is t, it means that the ctext coding system already encodes | |
1406 the text correctly, and only the leading control sequence needs to be altered. | |
1407 If ENCODING is a coding system, we need to re-encode the text with that | |
1408 coding system. CHARSET is the name of the charset we need to put into | |
1409 the leading control sequence. NOCTETS is the number of octets (bytes) that | |
1410 encode each character in this charset. NOCTETS can be 0 (meaning the number | |
1411 of octets per character is variable), 1, 2, 3, or 4.") | |
1412 | |
1413 (defun ctext-pre-write-conversion (from to) | |
1414 "Encode characters between FROM and TO as Compound Text w/Extended Segments. | |
1415 | |
1416 If FROM is a string, or if the current buffer is not the one set up for us | |
1417 by encode-coding-string, generate a new temp buffer, insert the | |
1418 text, and convert it in the temporary buffer. Otherwise, convert in-place." | |
1419 (save-match-data | |
1420 ;; Setup a working buffer if necessary. | |
1421 (cond ((stringp from) | |
1422 (let ((buf (current-buffer))) | |
1423 (set-buffer (generate-new-buffer " *temp")) | |
1424 (set-buffer-multibyte (multibyte-string-p from)) | |
1425 (insert from))) | |
1426 ((not (string= (buffer-name) " *code-converting-work*")) | |
1427 (let ((buf (current-buffer)) | |
1428 (multibyte enable-multibyte-characters)) | |
1429 (set-buffer (generate-new-buffer " *temp")) | |
1430 (set-buffer-multibyte multibyte) | |
1431 (insert-buffer-substring buf from to)))) | |
1432 | |
1433 ;; Now we can encode the whole buffer. | |
1434 (let ((case-fold-search nil) | |
1435 last-coding-system-used | |
1436 pos posend desig encode-info encoding chset noctets textlen) | |
1437 (goto-char (point-min)) | |
1438 ;; At first encode the whole buffer. | |
1439 (encode-coding-region (point-min) (point-max) 'ctext-no-compositions) | |
1440 ;; Then replace ISO-2022 charset designations with extended | |
1441 ;; segments, for those charsets that are not part of the | |
1442 ;; official X registry. The regexp below finds the leading | |
1443 ;; sequences for big5. | |
1444 (while (re-search-forward "\e\\(\$([01]\\)" nil 'move) | |
1445 (setq pos (match-beginning 0) | |
1446 posend (point) | |
1447 desig (match-string 1) | |
1448 encode-info (cdr (assoc desig | |
1449 ctext-non-standard-designations-alist)) | |
1450 encoding (car encode-info) | |
1451 chset (cadr encode-info) | |
1452 noctets (car (cddr encode-info))) | |
1453 (skip-chars-forward "^\e") | |
1454 (cond | |
1455 ((eq encoding t) ; only the leading sequence needs to be changed | |
1456 (setq textlen (+ (- (point) posend) (length chset) 1)) | |
1457 ;; Generate the control sequence for an extended segment. | |
1458 (replace-match (format "\e%%/%d%c%c%s" | |
1459 noctets | |
1460 (+ (/ textlen 128) 128) | |
1461 (+ (% textlen 128) 128) | |
1462 chset) | |
1463 t t)) | |
1464 ((coding-system-p encoding) ; need to recode the entire segment... | |
1465 (decode-coding-region pos (point) 'ctext-no-compositions) | |
1466 (encode-coding-region pos (point) encoding) | |
1467 (setq textlen (+ (- (point) pos) (length chset) 1)) | |
1468 (save-excursion | |
1469 (goto-char pos) | |
1470 (insert (format "\e%%/%d%c%c%s" | |
1471 noctets | |
1472 (+ (/ textlen 128) 128) | |
1473 (+ (% textlen 128) 128) | |
1474 chset)))))) | |
1475 (goto-char (point-min)))) | |
1476 ;; Must return nil, as build_annotations_2 expects that. | |
1477 nil) | |
1478 | |
1479 ;;; FILE I/O | |
1480 | |
1481 (defcustom auto-coding-alist | |
1482 '(("\\.\\(arc\\|zip\\|lzh\\|zoo\\|jar\\|sx[dmicw]\\|tar\\|tgz\\)\\'" . no-conversion) | |
1483 ("\\.\\(gz\\|Z\\|bz\\|bz2\\|gpg\\)\\'" . no-conversion) | |
1484 ("/#[^/]+#\\'" . emacs-mule)) | |
1485 "Alist of filename patterns vs corresponding coding systems. | |
1486 Each element looks like (REGEXP . CODING-SYSTEM). | |
1487 A file whose name matches REGEXP is decoded by CODING-SYSTEM on reading. | |
1488 | |
1489 The settings in this alist take priority over `coding:' tags | |
1490 in the file (see the function `set-auto-coding') | |
1491 and the contents of `file-coding-system-alist'." | |
1492 :group 'files | |
1493 :group 'mule | |
1494 :type '(repeat (cons (regexp :tag "File name regexp") | |
1495 (symbol :tag "Coding system")))) | |
1496 | |
1497 (defcustom auto-coding-regexp-alist | |
1498 '(("^BABYL OPTIONS:[ \t]*-\\*-[ \t]*rmail[ \t]*-\\*-" . no-conversion)) | |
1499 "Alist of patterns vs corresponding coding systems. | |
1500 Each element looks like (REGEXP . CODING-SYSTEM). | |
1501 A file whose first bytes match REGEXP is decoded by CODING-SYSTEM on reading. | |
1502 | |
1503 The settings in this alist take priority over `coding:' tags | |
1504 in the file (see the function `set-auto-coding') | |
1505 and the contents of `file-coding-system-alist'." | |
1506 :group 'files | |
1507 :group 'mule | |
1508 :type '(repeat (cons (regexp :tag "Regexp") | |
1509 (symbol :tag "Coding system")))) | |
1510 | |
1511 ;; See the bottom of this file for built-in auto coding functions. | |
1512 (defcustom auto-coding-functions '(sgml-xml-auto-coding-function | |
1513 sgml-html-meta-auto-coding-function) | |
1514 "A list of functions which attempt to determine a coding system. | |
1515 | |
1516 Each function in this list should be written to operate on the | |
1517 current buffer, but should not modify it in any way. The buffer | |
1518 will contain undecoded text of parts of the file. Each function | |
1519 should take one argument, SIZE, which says how many | |
1520 characters (starting from point) it should look at. | |
1521 | |
1522 If one of these functions succeeds in determining a coding | |
1523 system, it should return that coding system. Otherwise, it | |
1524 should return nil. | |
1525 | |
1526 If a file has a `coding:' tag, that takes precedence over these | |
1527 functions, so they won't be called at all." | |
1528 :group 'files | |
1529 :group 'mule | |
1530 :type '(repeat function)) | |
1531 | |
1532 (defvar set-auto-coding-for-load nil | |
1533 "Non-nil means look for `load-coding' property instead of `coding'. | |
1534 This is used for loading and byte-compiling Emacs Lisp files.") | |
1535 | |
1536 (defun auto-coding-alist-lookup (filename) | |
1537 "Return the coding system specified by `auto-coding-alist' for FILENAME." | |
1538 (let ((alist auto-coding-alist) | |
1539 (case-fold-search (memq system-type '(vax-vms windows-nt ms-dos cygwin))) | |
1540 coding-system) | |
1541 (while (and alist (not coding-system)) | |
1542 (if (string-match (car (car alist)) filename) | |
1543 (setq coding-system (cdr (car alist))) | |
1544 (setq alist (cdr alist)))) | |
1545 coding-system)) | |
1546 | |
1547 (defun set-auto-coding (filename size) | |
1548 "Return coding system for a file FILENAME of which SIZE bytes follow point. | |
1549 These bytes should include at least the first 1k of the file | |
1550 and the last 3k of the file, but the middle may be omitted. | |
1551 | |
1552 The function checks FILENAME against the variable `auto-coding-alist'. | |
1553 If FILENAME doesn't match any entries in the variable, it checks the | |
1554 contents of the current buffer following point against | |
1555 `auto-coding-regexp-alist'. If no match is found, it checks for a | |
1556 `coding:' tag in the first one or two lines following point. If no | |
1557 `coding:' tag is found, it checks any local variables list in the last | |
1558 3K bytes out of the SIZE bytes. Finally, if none of these methods | |
1559 succeed, it checks to see if any function in `auto-coding-functions' | |
1560 gives a match. | |
1561 | |
1562 The return value is the specified coding system, or nil if nothing is | |
1563 specified. | |
1564 | |
1565 The variable `set-auto-coding-function' (which see) is set to this | |
1566 function by default." | |
1567 (or (auto-coding-alist-lookup filename) | |
1568 ;; Try using `auto-coding-regexp-alist'. | |
1569 (save-excursion | |
1570 (let ((alist auto-coding-regexp-alist) | |
1571 coding-system) | |
1572 (while (and alist (not coding-system)) | |
1573 (let ((regexp (car (car alist)))) | |
1574 (when (re-search-forward regexp (+ (point) size) t) | |
1575 (setq coding-system (cdr (car alist))))) | |
1576 (setq alist (cdr alist))) | |
1577 coding-system)) | |
1578 (let* ((case-fold-search t) | |
1579 (head-start (point)) | |
1580 (head-end (+ head-start (min size 1024))) | |
1581 (tail-start (+ head-start (max (- size 3072) 0))) | |
1582 (tail-end (+ head-start size)) | |
1583 coding-system head-found tail-found pos) | |
1584 ;; Try a short cut by searching for the string "coding:" | |
1585 ;; and for "unibyte:" at the head and tail of SIZE bytes. | |
1586 (setq head-found (or (search-forward "coding:" head-end t) | |
1587 (search-forward "unibyte:" head-end t))) | |
1588 (if (and head-found (> head-found tail-start)) | |
1589 ;; Head and tail are overlapped. | |
1590 (setq tail-found head-found) | |
1591 (goto-char tail-start) | |
1592 (setq tail-found (or (search-forward "coding:" tail-end t) | |
1593 (search-forward "unibyte:" tail-end t)))) | |
1594 | |
1595 ;; At first check the head. | |
1596 (when head-found | |
1597 (goto-char head-start) | |
1598 (setq head-end (set-auto-mode-1)) | |
1599 (setq head-start (point)) | |
1600 (when (and head-end (< head-found head-end)) | |
1601 (goto-char head-start) | |
1602 (when (and set-auto-coding-for-load | |
1603 (re-search-forward | |
1604 "\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)" | |
1605 head-end t)) | |
1606 (setq coding-system 'raw-text)) | |
1607 (when (and (not coding-system) | |
1608 (re-search-forward | |
1609 "\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)" | |
1610 head-end t)) | |
1611 (setq coding-system (intern (match-string 2))) | |
1612 (or (coding-system-p coding-system) | |
1613 (setq coding-system nil))))) | |
1614 | |
1615 ;; If no coding: tag in the head, check the tail. | |
1616 (when (and tail-found (not coding-system)) | |
1617 (goto-char tail-start) | |
1618 (search-forward "\n\^L" nil t) | |
1619 (if (re-search-forward | |
1620 "^\\(.*\\)[ \t]*Local Variables:[ \t]*\\(.*\\)$" tail-end t) | |
1621 ;; The prefix is what comes before "local variables:" in its | |
1622 ;; line. The suffix is what comes after "local variables:" | |
1623 ;; in its line. | |
1624 (let* ((prefix (regexp-quote (match-string 1))) | |
1625 (suffix (regexp-quote (match-string 2))) | |
1626 (re-coding | |
1627 (concat | |
1628 "^" prefix | |
1629 ;; N.B. without the \n below, the regexp can | |
1630 ;; eat newlines. | |
1631 "[ \t]*coding[ \t]*:[ \t]*\\([^ \t\n]+\\)[ \t]*" | |
1632 suffix "$")) | |
1633 (re-unibyte | |
1634 (concat | |
1635 "^" prefix | |
1636 "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t\n]+\\)[ \t]*" | |
1637 suffix "$")) | |
1638 (re-end | |
1639 (concat "^" prefix "[ \t]*End *:[ \t]*" suffix "$")) | |
1640 (pos (point))) | |
1641 (re-search-forward re-end tail-end 'move) | |
1642 (setq tail-end (point)) | |
1643 (goto-char pos) | |
1644 (when (and set-auto-coding-for-load | |
1645 (re-search-forward re-unibyte tail-end t)) | |
1646 (setq coding-system 'raw-text)) | |
1647 (when (and (not coding-system) | |
1648 (re-search-forward re-coding tail-end t)) | |
1649 (setq coding-system (intern (match-string 1))) | |
1650 (or (coding-system-p coding-system) | |
1651 (setq coding-system nil)))))) | |
1652 coding-system) | |
1653 ;; Finally, try all the `auto-coding-functions'. | |
1654 (let ((funcs auto-coding-functions) | |
1655 (coding-system nil)) | |
1656 (while (and funcs (not coding-system)) | |
1657 (setq coding-system (condition-case e | |
1658 (save-excursion | |
1659 (goto-char (point-min)) | |
1660 (funcall (pop funcs) size)) | |
1661 (error nil)))) | |
1662 coding-system))) | |
1663 | |
1664 (setq set-auto-coding-function 'set-auto-coding) | |
1665 | |
1666 (defun after-insert-file-set-coding (inserted) | |
1667 "Set `buffer-file-coding-system' of current buffer after text is inserted. | |
1668 INSERTED is the number of characters that were inserted, as figured | |
1669 in the situation before this function. Return the number of characters | |
1670 inserted, as figured in the situation after. The two numbers can be | |
1671 different if the buffer has become unibyte." | |
1672 (if last-coding-system-used | |
1673 (let ((coding-system | |
1674 (find-new-buffer-file-coding-system last-coding-system-used)) | |
1675 (modified-p (buffer-modified-p))) | |
1676 (when coding-system | |
1677 (set-buffer-file-coding-system coding-system t) | |
1678 (if (and enable-multibyte-characters | |
1679 (or (eq coding-system 'no-conversion) | |
1680 (eq (coding-system-type coding-system) 5)) | |
1681 ;; If buffer was unmodified and the size is the | |
1682 ;; same as INSERTED, we must be visiting it. | |
1683 (not modified-p) | |
1684 (= (buffer-size) inserted)) | |
1685 ;; For coding systems no-conversion and raw-text..., | |
1686 ;; edit the buffer as unibyte. | |
1687 (let ((pos-marker (copy-marker (+ (point) inserted)))) | |
1688 (set-buffer-multibyte nil) | |
1689 (setq inserted (- pos-marker (point))))) | |
1690 (set-buffer-modified-p modified-p)))) | |
1691 inserted) | |
1692 | |
1693 ;; The coding-spec and eol-type of coding-system returned is decided | |
1694 ;; independently in the following order. | |
1695 ;; 1. That of buffer-file-coding-system locally bound. | |
1696 ;; 2. That of CODING. | |
1697 | |
1698 (defun find-new-buffer-file-coding-system (coding) | |
1699 "Return a coding system for a buffer when a file of CODING is inserted. | |
1700 The local variable `buffer-file-coding-system' of the current buffer | |
1701 is set to the returned value. | |
1702 Return nil if there's no need to set `buffer-file-coding-system'." | |
1703 (let (local-coding local-eol | |
1704 found-coding found-eol | |
1705 new-coding new-eol) | |
1706 (if (null coding) | |
1707 ;; Nothing found about coding. | |
1708 nil | |
1709 | |
1710 ;; Get information of `buffer-file-coding-system' in LOCAL-EOL | |
1711 ;; and LOCAL-CODING. | |
1712 (setq local-eol (coding-system-eol-type buffer-file-coding-system)) | |
1713 (if (null (numberp local-eol)) | |
1714 ;; But eol-type is not yet set. | |
1715 (setq local-eol nil)) | |
1716 (if (and buffer-file-coding-system | |
1717 (not (eq (coding-system-type buffer-file-coding-system) t))) | |
1718 ;; This is not `undecided'. | |
1719 (setq local-coding (coding-system-base buffer-file-coding-system))) | |
1720 | |
1721 (if (and (local-variable-p 'buffer-file-coding-system) | |
1722 local-eol local-coding) | |
1723 ;; The current buffer has already set full coding-system, we | |
1724 ;; had better not change it. | |
1725 nil | |
1726 | |
1727 (setq found-eol (coding-system-eol-type coding)) | |
1728 (if (null (numberp found-eol)) | |
1729 ;; But eol-type is not found. | |
1730 ;; If EOL conversions are inhibited, force unix eol-type. | |
1731 (setq found-eol (if inhibit-eol-conversion 0))) | |
1732 (if (eq (coding-system-type coding) t) | |
1733 (setq found-coding 'undecided) | |
1734 (setq found-coding (coding-system-base coding))) | |
1735 | |
1736 (if (and (not found-eol) (eq found-coding 'undecided)) | |
1737 ;; No valid coding information found. | |
1738 nil | |
1739 | |
1740 ;; Some coding information (eol or text) found. | |
1741 | |
1742 ;; The local setting takes precedence over the found one. | |
1743 (setq new-coding (if (local-variable-p 'buffer-file-coding-system) | |
1744 (or local-coding found-coding) | |
1745 (or found-coding local-coding))) | |
1746 (setq new-eol (if (local-variable-p 'buffer-file-coding-system) | |
1747 (or local-eol found-eol) | |
1748 (or found-eol local-eol))) | |
1749 | |
1750 (let ((eol-type (coding-system-eol-type new-coding))) | |
1751 (if (and (numberp new-eol) (vectorp eol-type)) | |
1752 (aref eol-type new-eol) | |
1753 new-coding))))))) | |
1754 | |
1755 (defun modify-coding-system-alist (target-type regexp coding-system) | |
1756 "Modify one of look up tables for finding a coding system on I/O operation. | |
1757 There are three of such tables, `file-coding-system-alist', | |
1758 `process-coding-system-alist', and `network-coding-system-alist'. | |
1759 | |
1760 TARGET-TYPE specifies which of them to modify. | |
1761 If it is `file', it affects `file-coding-system-alist' (which see). | |
1762 If it is `process', it affects `process-coding-system-alist' (which see). | |
1763 If it is `network', it affects `network-coding-system-alist' (which see). | |
1764 | |
1765 REGEXP is a regular expression matching a target of I/O operation. | |
1766 The target is a file name if TARGET-TYPE is `file', a program name if | |
1767 TARGET-TYPE is `process', or a network service name or a port number | |
1768 to connect to if TARGET-TYPE is `network'. | |
1769 | |
1770 CODING-SYSTEM is a coding system to perform code conversion on the I/O | |
1771 operation, or a cons cell (DECODING . ENCODING) specifying the coding systems | |
1772 for decoding and encoding respectively, | |
1773 or a function symbol which, when called, returns such a cons cell." | |
1774 (or (memq target-type '(file process network)) | |
1775 (error "Invalid target type: %s" target-type)) | |
1776 (or (stringp regexp) | |
1777 (and (eq target-type 'network) (integerp regexp)) | |
1778 (error "Invalid regular expression: %s" regexp)) | |
1779 (if (symbolp coding-system) | |
1780 (if (not (fboundp coding-system)) | |
1781 (progn | |
1782 (check-coding-system coding-system) | |
1783 (setq coding-system (cons coding-system coding-system)))) | |
1784 (check-coding-system (car coding-system)) | |
1785 (check-coding-system (cdr coding-system))) | |
1786 (cond ((eq target-type 'file) | |
1787 (let ((slot (assoc regexp file-coding-system-alist))) | |
1788 (if slot | |
1789 (setcdr slot coding-system) | |
1790 (setq file-coding-system-alist | |
1791 (cons (cons regexp coding-system) | |
1792 file-coding-system-alist))))) | |
1793 ((eq target-type 'process) | |
1794 (let ((slot (assoc regexp process-coding-system-alist))) | |
1795 (if slot | |
1796 (setcdr slot coding-system) | |
1797 (setq process-coding-system-alist | |
1798 (cons (cons regexp coding-system) | |
1799 process-coding-system-alist))))) | |
1800 (t | |
1801 (let ((slot (assoc regexp network-coding-system-alist))) | |
1802 (if slot | |
1803 (setcdr slot coding-system) | |
1804 (setq network-coding-system-alist | |
1805 (cons (cons regexp coding-system) | |
1806 network-coding-system-alist))))))) | |
1807 | |
1808 (defun decode-coding-inserted-region (from to filename | |
1809 &optional visit beg end replace) | |
1810 "Decode the region between FROM and TO as if it is read from file FILENAME. | |
1811 Optional arguments VISIT, BEG, END, and REPLACE are the same as those | |
1812 of the function `insert-file-contents'." | |
1813 (save-excursion | |
1814 (save-restriction | |
1815 (narrow-to-region from to) | |
1816 (goto-char (point-min)) | |
1817 (let ((coding coding-system-for-read)) | |
1818 (or coding | |
1819 (setq coding (funcall set-auto-coding-function | |
1820 filename (- (point-max) (point-min))))) | |
1821 (or coding | |
1822 (setq coding (find-operation-coding-system | |
1823 'insert-file-contents | |
1824 filename visit beg end replace))) | |
1825 (if (coding-system-p coding) | |
1826 (or enable-multibyte-characters | |
1827 (setq coding | |
1828 (coding-system-change-text-conversion coding 'raw-text))) | |
1829 (setq coding nil)) | |
1830 (if coding | |
1831 (decode-coding-region (point-min) (point-max) coding)) | |
1832 (setq last-coding-system-used coding))))) | |
1833 | |
1834 (defun make-translation-table (&rest args) | |
1835 "Make a translation table from arguments. | |
1836 A translation table is a char table intended for character | |
1837 translation in CCL programs. | |
1838 | |
1839 Each argument is a list of elements of the form (FROM . TO), where FROM | |
1840 is a character to be translated to TO. | |
1841 | |
1842 FROM can be a generic character (see `make-char'). In this case, TO is | |
1843 a generic character containing the same number of characters, or an | |
1844 ordinary character. If FROM and TO are both generic characters, all | |
1845 characters belonging to FROM are translated to characters belonging to TO | |
1846 without changing their position code(s). | |
1847 | |
1848 The arguments and forms in each argument are processed in the given | |
1849 order, and if a previous form already translates TO to some other | |
1850 character, say TO-ALT, FROM is also translated to TO-ALT." | |
1851 (let ((table (make-char-table 'translation-table)) | |
1852 revlist) | |
1853 (while args | |
1854 (let ((elts (car args))) | |
1855 (while elts | |
1856 (let* ((from (car (car elts))) | |
1857 (from-i 0) ; degree of freedom of FROM | |
1858 (from-rev (nreverse (split-char from))) | |
1859 (to (cdr (car elts))) | |
1860 (to-i 0) ; degree of freedom of TO | |
1861 (to-rev (nreverse (split-char to)))) | |
1862 ;; Check numbers of heading 0s in FROM-REV and TO-REV. | |
1863 (while (eq (car from-rev) 0) | |
1864 (setq from-i (1+ from-i) from-rev (cdr from-rev))) | |
1865 (while (eq (car to-rev) 0) | |
1866 (setq to-i (1+ to-i) to-rev (cdr to-rev))) | |
1867 (if (and (/= from-i to-i) (/= to-i 0)) | |
1868 (error "Invalid character pair (%d . %d)" from to)) | |
1869 ;; If we have already translated TO to TO-ALT, FROM should | |
1870 ;; also be translated to TO-ALT. But, this is only if TO | |
1871 ;; is a generic character or TO-ALT is not a generic | |
1872 ;; character. | |
1873 (let ((to-alt (aref table to))) | |
1874 (if (and to-alt | |
1875 (or (> to-i 0) (not (generic-char-p to-alt)))) | |
1876 (setq to to-alt))) | |
1877 (if (> from-i 0) | |
1878 (set-char-table-default table from to) | |
1879 (aset table from to)) | |
1880 ;; If we have already translated some chars to FROM, they | |
1881 ;; should also be translated to TO. | |
1882 (let ((l (assq from revlist))) | |
1883 (if l | |
1884 (let ((ch (car l))) | |
1885 (setcar l to) | |
1886 (setq l (cdr l)) | |
1887 (while l | |
1888 (aset table ch to) | |
1889 (setq l (cdr l)) )))) | |
1890 ;; Now update REVLIST. | |
1891 (let ((l (assq to revlist))) | |
1892 (if l | |
1893 (setcdr l (cons from (cdr l))) | |
1894 (setq revlist (cons (list to from) revlist))))) | |
1895 (setq elts (cdr elts)))) | |
1896 (setq args (cdr args))) | |
1897 ;; Return TABLE just created. | |
1898 table)) | |
1899 | |
1900 (defun make-translation-table-from-vector (vec) | |
1901 "Make translation table from decoding vector VEC. | |
1902 VEC is an array of 256 elements to map unibyte codes to multibyte | |
1903 characters. Elements may be nil for undefined code points. | |
1904 See also the variable `nonascii-translation-table'." | |
1905 (let ((table (make-char-table 'translation-table)) | |
1906 (rev-table (make-char-table 'translation-table)) | |
1907 ch) | |
1908 (dotimes (i 256) | |
1909 (setq ch (aref vec i)) | |
1910 (when ch | |
1911 (aset table i ch) | |
1912 (if (>= ch 256) | |
1913 (aset rev-table ch i)))) | |
1914 (set-char-table-extra-slot table 0 rev-table) | |
1915 table)) | |
1916 | |
1917 (defun define-translation-table (symbol &rest args) | |
1918 "Define SYMBOL as the name of translation table made by ARGS. | |
1919 This sets up information so that the table can be used for | |
1920 translations in a CCL program. | |
1921 | |
1922 If the first element of ARGS is a char-table whose purpose is | |
1923 `translation-table', just define SYMBOL to name it. (Note that this | |
1924 function does not bind SYMBOL.) | |
1925 | |
1926 Any other ARGS should be suitable as arguments of the function | |
1927 `make-translation-table' (which see). | |
1928 | |
1929 This function sets properties `translation-table' and | |
1930 `translation-table-id' of SYMBOL to the created table itself and the | |
1931 identification number of the table respectively. It also registers | |
1932 the table in `translation-table-vector'." | |
1933 (let ((table (if (and (char-table-p (car args)) | |
1934 (eq (char-table-subtype (car args)) | |
1935 'translation-table)) | |
1936 (car args) | |
1937 (apply 'make-translation-table args))) | |
1938 (len (length translation-table-vector)) | |
1939 (id 0) | |
1940 (done nil)) | |
1941 (put symbol 'translation-table table) | |
1942 (while (not done) | |
1943 (if (>= id len) | |
1944 (setq translation-table-vector | |
1945 (vconcat translation-table-vector (make-vector len nil)))) | |
1946 (let ((slot (aref translation-table-vector id))) | |
1947 (if (or (not slot) | |
1948 (eq (car slot) symbol)) | |
1949 (progn | |
1950 (aset translation-table-vector id (cons symbol table)) | |
1951 (setq done t)) | |
1952 (setq id (1+ id))))) | |
1953 (put symbol 'translation-table-id id) | |
1954 id)) | |
1955 | |
1956 (put 'with-category-table 'lisp-indent-function 1) | |
1957 | |
1958 (defmacro with-category-table (table &rest body) | |
1959 "Evaluate BODY with category table of current buffer set to TABLE. | |
1960 The category table of the current buffer is saved, BODY is evaluated, | |
1961 then the saved table is restored, even in case of an abnormal exit. | |
1962 Value is what BODY returns." | |
1963 (let ((old-table (make-symbol "old-table")) | |
1964 (old-buffer (make-symbol "old-buffer"))) | |
1965 `(let ((,old-table (category-table)) | |
1966 (,old-buffer (current-buffer))) | |
1967 (unwind-protect | |
1968 (progn | |
1969 (set-category-table ,table) | |
1970 ,@body) | |
1971 (save-current-buffer | |
1972 (set-buffer ,old-buffer) | |
1973 (set-category-table ,old-table)))))) | |
1974 | |
1975 (defun define-translation-hash-table (symbol table) | |
1976 "Define SYMBOL as the name of the hash translation TABLE for use in CCL. | |
1977 | |
1978 Analogous to `define-translation-table', but updates | |
1979 `translation-hash-table-vector' and the table is for use in the CCL | |
1980 `lookup-integer' and `lookup-character' functions." | |
1981 (unless (and (symbolp symbol) | |
1982 (hash-table-p table)) | |
1983 (error "Bad args to define-translation-hash-table")) | |
1984 (let ((len (length translation-hash-table-vector)) | |
1985 (id 0) | |
1986 done) | |
1987 (put symbol 'translation-hash-table table) | |
1988 (while (not done) | |
1989 (if (>= id len) | |
1990 (setq translation-hash-table-vector | |
1991 (vconcat translation-hash-table-vector [nil]))) | |
1992 (let ((slot (aref translation-hash-table-vector id))) | |
1993 (if (or (not slot) | |
1994 (eq (car slot) symbol)) | |
1995 (progn | |
1996 (aset translation-hash-table-vector id (cons symbol table)) | |
1997 (setq done t)) | |
1998 (setq id (1+ id))))) | |
1999 (put symbol 'translation-hash-table-id id) | |
2000 id)) | |
2001 | |
2002 ;;; Initialize some variables. | |
2003 | |
2004 (put 'use-default-ascent 'char-table-extra-slots 0) | |
2005 (setq use-default-ascent (make-char-table 'use-default-ascent)) | |
2006 (put 'ignore-relative-composition 'char-table-extra-slots 0) | |
2007 (setq ignore-relative-composition | |
2008 (make-char-table 'ignore-relative-composition)) | |
2009 | |
2010 | |
2011 ;;; Built-in auto-coding-functions: | |
2012 | |
2013 (defun sgml-xml-auto-coding-function (size) | |
2014 "Determine whether the buffer is XML, and if so, its encoding. | |
2015 This function is intended to be added to `auto-coding-functions'." | |
2016 (setq size (+ (point) size)) | |
2017 (when (re-search-forward "\\`[[:space:]\n]*<\\?xml" size t) | |
2018 (let ((end (save-excursion | |
2019 ;; This is a hack. | |
2020 (re-search-forward "\"\\s-*\\?>" size t)))) | |
2021 (when end | |
2022 (if (re-search-forward "encoding=\"\\(.+?\\)\"" end t) | |
2023 (let* ((match (match-string 1)) | |
2024 (sym (intern (downcase match)))) | |
2025 (if (coding-system-p sym) | |
2026 sym | |
2027 (message "Warning: unknown coding system \"%s\"" match) | |
2028 nil)) | |
2029 'utf-8))))) | |
2030 | |
2031 (defun sgml-html-meta-auto-coding-function (size) | |
2032 "If the buffer has an HTML meta tag, use it to determine encoding. | |
2033 This function is intended to be added to `auto-coding-functions'." | |
2034 (setq size (min (+ (point) size) | |
2035 ;; Only search forward 10 lines | |
2036 (save-excursion | |
2037 (forward-line 10) | |
2038 (point)))) | |
2039 (when (and (search-forward "<html>" size t) | |
2040 (re-search-forward "<meta\\s-+http-equiv=\"content-type\"\\s-+content=\"text/\\sw+;\\s-*charset=\\(.+?\\)\"" size t)) | |
2041 (let* ((match (match-string 1)) | |
2042 (sym (intern (downcase match)))) | |
2043 (if (coding-system-p sym) | |
2044 sym | |
2045 (message "Warning: unknown coding system \"%s\"" match) | |
2046 nil)))) | |
2047 | |
2048 ;;; | |
2049 (provide 'mule) | |
2050 | |
2051 ;;; mule.el ends here |