Mercurial > emacs
comparison lisp/international/mule.el @ 88775:31ba8935bb97
(auto-coding-regexp-alist): Recognize
Emacs 20/1 byte-compiled files.
author | Dave Love <fx@gnu.org> |
---|---|
date | Mon, 24 Jun 2002 18:24:28 +0000 |
parents | 97a127f9efeb |
children | c24b60ed2b31 |
comparison
equal
deleted
inserted
replaced
88774:ef046df4c6ee | 88775:31ba8935bb97 |
---|---|
1 ;;; mule.el --- basic commands for multilingual environment | 1 (binary file application/octet-stream, hash: 7dcd9c08755f855e2cdf9bcb05673a2040e4e925) |
2 | |
3 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. | |
4 ;; Licensed to the Free Software Foundation. | |
5 ;; Copyright (C) 2001 Free Software Foundation, Inc. | |
6 ;; Copyright (C) 2001, 2002 | |
7 ;; National Institute of Advanced Industrial Science and Technology (AIST) | |
8 ;; Registration Number H13PRO009 | |
9 | |
10 ;; Keywords: mule, multilingual, character set, coding system | |
11 | |
12 ;; This file is part of GNU Emacs. | |
13 | |
14 ;; GNU Emacs is free software; you can redistribute it and/or modify | |
15 ;; it under the terms of the GNU General Public License as published by | |
16 ;; the Free Software Foundation; either version 2, or (at your option) | |
17 ;; any later version. | |
18 | |
19 ;; GNU Emacs is distributed in the hope that it will be useful, | |
20 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
21 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
22 ;; GNU General Public License for more details. | |
23 | |
24 ;; You should have received a copy of the GNU General Public License | |
25 ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
26 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
27 ;; Boston, MA 02111-1307, USA. | |
28 | |
29 ;;; Commentary: | |
30 | |
31 ;;; Code: | |
32 | |
33 (defconst mule-version "7.0 (SAKAKI)" "\ | |
34 Version number and name of this version of MULE (multilingual environment).") | |
35 | |
36 (defconst mule-version-date "2002.2.28" "\ | |
37 Distribution date of this version of MULE (multilingual environment).") | |
38 | |
39 | |
40 | |
41 ;;; CHARACTER | |
42 (defalias 'char-valid-p 'characterp) | |
43 (make-obsolete 'char-valid-p 'characterp "22.1") | |
44 | |
45 | |
46 ;;; CHARSET | |
47 | |
48 (defun define-charset (name docstring &rest props) | |
49 "Define NAME (symbol) as a charset with DOCSTRING. | |
50 The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE | |
51 may be any symbol. The following have special meanings, and one of | |
52 `:code-offset', `:map', `:subset', `:superset' must be specified. | |
53 | |
54 `:short-name' | |
55 | |
56 VALUE must be a short string to identify the charset. If omitted, | |
57 NAME is used. | |
58 | |
59 `:long-name' | |
60 | |
61 VALUE must be a string longer than `:short-name' to identify the | |
62 charset. If omitted, the value of the `:short-name' attribute is used. | |
63 | |
64 `:dimension' | |
65 | |
66 VALUE must be an integer 0, 1, 2, or 3, specifying the dimension of | |
67 code-points of the charsets. If omitted, it is calculated from the | |
68 value of the `:code-space' attribute. | |
69 | |
70 `:code-space' | |
71 | |
72 VALUE must be a vector of length at most 8 specifying the byte code | |
73 range of each dimension in this format: | |
74 [ MIN-1 MAX-1 MIN-2 MAX-2 ... ] | |
75 where MIN-N is the minimum byte value of Nth dimension of code-point, | |
76 MAX-N is the maximum byte value of that. | |
77 | |
78 `:min-code' | |
79 | |
80 VALUE must be an integer specifying the mininum code point of the | |
81 charset. If omitted, it is calculated from `:code-space'. VALUE may | |
82 be a cons (HIGH . LOW), where HIGH is the most significant 16 bits of | |
83 the code point and LOW is the least significant 16 bits. | |
84 | |
85 `max-code' | |
86 | |
87 VALUE must be an integer specifying the maxinum code point of the | |
88 charset. If omitted, it is calculated from `:code-space'. VALUE may | |
89 be a cons (HIGH . LOW), where HIGH is the most significant 16 bits of | |
90 the code point and LOW is the least significant 16 bits. | |
91 | |
92 `:iso-final-char' | |
93 | |
94 VALUE must be a character in the range 32 to 127 (inclusive) | |
95 specifying the final char of the charset for ISO-2022 encoding. If | |
96 omitted, the charset can't be encoded by ISO-2022 based | |
97 coding-systems. | |
98 | |
99 `:iso-revision-number' | |
100 | |
101 VALUE must be an integer in the range 0..63, specifying the revision | |
102 number of the charset for ISO-2022 encoding. | |
103 | |
104 `:emacs-mule-id' | |
105 | |
106 VALUE must be an integer of 0, 128..255. If omitted, the charset | |
107 can't be encoded by coding-systems of type `emacs-mule'. | |
108 | |
109 `:ascii-compatible-p' | |
110 | |
111 VALUE must be nil or t (default nil). If VALUE is t, the charset is | |
112 compatible with ASCII, i.e. the first 128 code points map to ASCII. | |
113 | |
114 `:supplementary-p' | |
115 | |
116 VALUE must be nil or t. If the VALUE is t, the charset is | |
117 supplementary, which means it is used only as a parent of some other | |
118 charset. | |
119 | |
120 `:invalid-code' | |
121 | |
122 VALUE must be a nonnegative integer that can be used as an invalid | |
123 code point of the charset. If the minimum code is 0 and the maximum | |
124 code is greater than Emacs' maximum integer value, `:invalid-code' | |
125 should not be omitted. | |
126 | |
127 `:code-offset' | |
128 | |
129 VALUE must be an integer added to the index number of a character to | |
130 get the corresponding character code. | |
131 | |
132 `:map' | |
133 | |
134 VALUE must be vector or string. | |
135 | |
136 If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ], | |
137 where CODE-n is a code-point of the charset, and CHAR-n is the | |
138 corresponding character code. | |
139 | |
140 If it is a string, it is a name of file that contains the above | |
141 information. Each line of the file must be this format: | |
142 0xXXX 0xYYY | |
143 where XXX is a hexadecimal representation of CODE-n and YYY is a | |
144 hexadecimal representation of CHAR-n. A line starting with `#' is a | |
145 comment line. | |
146 | |
147 `:subset' | |
148 | |
149 VALUE must be a list: | |
150 ( PARENT MIN-CODE MAX-CODE OFFSET ) | |
151 PARENT is a parent charset. MIN-CODE and MAX-CODE specify the range | |
152 of characters inherited from the parent. OFFSET is an integer value | |
153 to add to a code point of the parent charset to get the corresponding | |
154 code point of this charset. | |
155 | |
156 `:superset' | |
157 | |
158 VALUE must be a list of parent charsets. The charset inherits | |
159 characters from them. Each element of the list may be a cons (PARENT | |
160 . OFFSET), where PARENT is a parent charset, and OFFSET is an offset | |
161 value to add to a code point of PARENT to get the corresponding code | |
162 point of this charset. | |
163 | |
164 `:unify-map' | |
165 | |
166 VALUE must be vector or string. | |
167 | |
168 If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ], | |
169 where CODE-n is a code-point of the charset, and CHAR-n is the | |
170 corresponding Unicode character code. | |
171 | |
172 If it is a string, it is a name of file that contains the above | |
173 information. The file format is the same as what described for `:map' | |
174 attribute." | |
175 (let ((attrs (mapcar 'list '(:dimension | |
176 :code-space | |
177 :min-code | |
178 :max-code | |
179 :iso-final-char | |
180 :iso-revision-number | |
181 :emacs-mule-id | |
182 :ascii-compatible-p | |
183 :supplementary-p | |
184 :invalid-code | |
185 :code-offset | |
186 :map | |
187 :subset | |
188 :superset | |
189 :unify-map | |
190 :plist)))) | |
191 | |
192 ;; If :dimension is omitted, get the dimension from :code-space. | |
193 (let ((dimension (plist-get props :dimension))) | |
194 (or dimension | |
195 (progn | |
196 (setq dimension (/ (length (plist-get props :code-space)) 2)) | |
197 (setq props (plist-put props :dimension dimension))))) | |
198 | |
199 (dolist (slot attrs) | |
200 (setcdr slot (plist-get props (car slot)))) | |
201 | |
202 ;; Make sure that the value of :code-space is a vector of 8 | |
203 ;; elements. | |
204 (let* ((slot (assq :code-space attrs)) | |
205 (val (cdr slot)) | |
206 (len (length val))) | |
207 (if (< len 8) | |
208 (setcdr slot | |
209 (vconcat val (make-vector (- 8 len) 0))))) | |
210 | |
211 ;; Add :name and :docstring properties to PROPS. | |
212 (setq props | |
213 (cons :name (cons name (cons :docstring (cons docstring props))))) | |
214 (or (plist-get props :short-name) | |
215 (plist-put props :short-name (symbol-name name))) | |
216 (or (plist-get props :long-name) | |
217 (plist-put props :long-name (plist-get props :short-name))) | |
218 ;; We can probably get a worthwhile amount in purespace. | |
219 (setq props | |
220 (mapcar (lambda (elt) | |
221 (if (stringp elt) | |
222 (purecopy elt) | |
223 elt)) | |
224 props)) | |
225 (setcdr (assq :plist attrs) props) | |
226 | |
227 (apply 'define-charset-internal name (mapcar 'cdr attrs)))) | |
228 | |
229 | |
230 (defun load-with-code-conversion (fullname file &optional noerror nomessage) | |
231 "Execute a file of Lisp code named FILE whose absolute name is FULLNAME. | |
232 The file contents are decoded before evaluation if necessary. | |
233 If optional second arg NOERROR is non-nil, | |
234 report no error if FILE doesn't exist. | |
235 Print messages at start and end of loading unless | |
236 optional third arg NOMESSAGE is non-nil. | |
237 Return t if file exists." | |
238 (if (null (file-readable-p fullname)) | |
239 (and (null noerror) | |
240 (signal 'file-error (list "Cannot open load file" file))) | |
241 ;; Read file with code conversion, and then eval. | |
242 (let* ((buffer | |
243 ;; To avoid any autoloading, set default-major-mode to | |
244 ;; fundamental-mode. | |
245 ;; So that we don't get completely screwed if the | |
246 ;; file is encoded in some complicated character set, | |
247 ;; read it with real decoding, as a multibyte buffer, | |
248 ;; even if this is a --unibyte Emacs session. | |
249 (let ((default-major-mode 'fundamental-mode) | |
250 (default-enable-multibyte-characters t)) | |
251 ;; We can't use `generate-new-buffer' because files.el | |
252 ;; is not yet loaded. | |
253 (get-buffer-create (generate-new-buffer-name " *load*")))) | |
254 (load-in-progress t) | |
255 (source (save-match-data (string-match "\\.el\\'" fullname)))) | |
256 (unless nomessage | |
257 (if source | |
258 (message "Loading %s (source)..." file) | |
259 (message "Loading %s..." file))) | |
260 (when purify-flag | |
261 (setq preloaded-file-list (cons file preloaded-file-list))) | |
262 (unwind-protect | |
263 (let ((load-file-name fullname) | |
264 (set-auto-coding-for-load t) | |
265 (inhibit-file-name-operation nil)) | |
266 (save-excursion | |
267 (set-buffer buffer) | |
268 (insert-file-contents fullname) | |
269 ;; If the loaded file was inserted with no-conversion or | |
270 ;; raw-text coding system, make the buffer unibyte. | |
271 ;; Otherwise, eval-buffer might try to interpret random | |
272 ;; binary junk as multibyte characters. | |
273 (if (and enable-multibyte-characters | |
274 (or (eq (coding-system-type last-coding-system-used) 5) | |
275 (eq last-coding-system-used 'no-conversion))) | |
276 (set-buffer-multibyte nil)) | |
277 ;; Make `kill-buffer' quiet. | |
278 (set-buffer-modified-p nil)) | |
279 ;; Have the original buffer current while we eval. | |
280 (eval-buffer buffer nil file | |
281 ;; If this Emacs is running with --unibyte, | |
282 ;; convert multibyte strings to unibyte | |
283 ;; after reading them. | |
284 ;; (not default-enable-multibyte-characters) | |
285 nil t | |
286 )) | |
287 (let (kill-buffer-hook kill-buffer-query-functions) | |
288 (kill-buffer buffer))) | |
289 (let ((hook (assoc file after-load-alist))) | |
290 (when hook | |
291 (mapcar (function eval) (cdr hook)))) | |
292 (unless (or nomessage noninteractive) | |
293 (if source | |
294 (message "Loading %s (source)...done" file) | |
295 (message "Loading %s...done" file))) | |
296 t))) | |
297 | |
298 ;; API (Application Program Interface) for charsets. | |
299 | |
300 ;;; Charset property | |
301 | |
302 (defun get-charset-property (charset propname) | |
303 "Return the value of CHARSET's PROPNAME property. | |
304 This is the last value stored with | |
305 (put-charset-property CHARSET PROPNAME VALUE)." | |
306 (plist-get (charset-plist charset) propname)) | |
307 | |
308 (defun put-charset-property (charset propname value) | |
309 "Store CHARSETS's PROPNAME property with value VALUE. | |
310 It can be retrieved with `(get-charset-property CHARSET PROPNAME)'." | |
311 (set-charset-plist charset | |
312 (plist-put (charset-plist charset) propname value))) | |
313 | |
314 | |
315 (defun charset-description (charset) | |
316 "Return description string of CHARSET." | |
317 (plist-get (charset-plist charset) :docstring)) | |
318 | |
319 (defun charset-dimension (charset) | |
320 "Return dimension string of CHARSET." | |
321 (plist-get (charset-plist charset) :dimension)) | |
322 | |
323 (defun charset-chars (charset &optional dimension) | |
324 "Return character numbers contained in DIMENSION of CHARSET. | |
325 DIMENSION defaults to the first dimension." | |
326 (unless dimension (setq dimension 1)) | |
327 (let ((code-space (plist-get (charset-plist charset) :code-space))) | |
328 (1+ (- (aref code-space (1- (* 2 dimension))) | |
329 (aref code-space (- (* 2 dimension) 2)))))) | |
330 | |
331 (defun charset-iso-final-char (charset) | |
332 "Return final char of CHARSET." | |
333 (or (plist-get (charset-plist charset) :iso-final-char) | |
334 -1)) | |
335 | |
336 (defmacro charset-short-name (charset) | |
337 "Return short name of CHARSET." | |
338 (plist-get (charset-plist charset) :short-name)) | |
339 | |
340 (defmacro charset-long-name (charset) | |
341 "Return long name of CHARSET." | |
342 (plist-get (charset-plist charset) :long-name)) | |
343 | |
344 (defun charset-list () | |
345 "Return list of charsets ever defined. | |
346 | |
347 This function is provided for backward compatibility. | |
348 Now we have the variable `charset-list'." | |
349 charset-list) | |
350 (make-obsolete 'charset-list "Use variable `charset-list'" "22.1") | |
351 | |
352 (defun generic-char-p (char) | |
353 "Always return nil. This exists only for backward compatibility." | |
354 nil) | |
355 (make-obsolete 'generic-char-p "Generic characters no longer exist" "22.1") | |
356 | |
357 ;; Coding system stuff | |
358 | |
359 ;; Coding system is a symbol that has been defined by the function | |
360 ;; `define-coding-system'. | |
361 | |
362 (defconst coding-system-iso-2022-flags | |
363 '(long-form | |
364 ascii-at-eol | |
365 ascii-at-cntl | |
366 7-bit | |
367 locking-shift | |
368 single-shift | |
369 designation | |
370 revision | |
371 direction | |
372 init-at-bol | |
373 designate-at-bol | |
374 safe | |
375 latin-extra | |
376 composition | |
377 euc-tw-shift | |
378 use-roman | |
379 use-oldjis) | |
380 "List of symbols that control ISO-2022 encoder/decoder. | |
381 | |
382 The value of `:flags' attribute in the argument of the function | |
383 `define-coding-system' must be one of them. | |
384 | |
385 If `long-form' is specified, use a long designation sequence on | |
386 encoding for the charsets `japanese-jisx0208-1978', `chinese-gb2312', | |
387 and `japanese-jisx0208'. The long designation sequence doesn't | |
388 conform to ISO 2022, but used by such a coding system as | |
389 `compound-text'. | |
390 | |
391 If `ascii-at-eol' is specified, designate ASCII to g0 at end of line | |
392 on encoding. | |
393 | |
394 If `ascii-at-cntl' is specified, designate ASCII to g0 before control | |
395 codes and SPC on encoding. | |
396 | |
397 If `7-bit' is specified, use 7-bit code only on encoding. | |
398 | |
399 If `locking-shift' is specified, decode locking-shift code correctly | |
400 on decoding, and use locking-shift to invoke a graphic element on | |
401 encoding. | |
402 | |
403 If `single-shift' is specified, decode single-shift code correctly on | |
404 decoding, and use single-shift to invoke a graphic element on encoding. | |
405 | |
406 If `designation' is specified, decode designation code correctly on | |
407 decoding, and use designation to designate a charset to a graphic | |
408 element on encoding. | |
409 | |
410 If `revision' is specified, produce an escape sequence to specify | |
411 revision number of a charset on encoding. Such an escape sequence is | |
412 always correctly decoded on decoding. | |
413 | |
414 If `direction' is specified, decode ISO6429's code for specifying | |
415 direction correctly, and produced the code on encoding. | |
416 | |
417 If `init-at-bol' is specified, on encoding, it is assumed that | |
418 invocation and designation statuses are reset at each beginning of | |
419 line even if `ascii-at-eol' is not specified thus no code for | |
420 resetting them are produced. | |
421 | |
422 If `safe' is specified, on encoding, characters not supported by a | |
423 coding are replaced with `?'. | |
424 | |
425 If `latin-extra' is specified, code-detection routine assumes that a | |
426 code specified in `latin-extra-code-table' (which see) is valid. | |
427 | |
428 If `composition' is specified, an escape sequence to specify | |
429 composition sequence is correctly decode on decoding, and is produced | |
430 on encoding. | |
431 | |
432 If `euc-tw-shift' is specified, the EUC-TW specific shifting code is | |
433 correctly decoded on decoding, and is produced on encoding.") | |
434 | |
435 (defun define-coding-system (name docstring &rest props) | |
436 "Define NAME (symbol) as a coding system with DOCSTRING and attributes. | |
437 The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE | |
438 may be any symbol. | |
439 | |
440 The following attributes have special meanings. If labeled as | |
441 \"(required)\", it should not be omitted. | |
442 | |
443 `:mnemonic' (required) | |
444 | |
445 VALUE is a character to display on mode line for the coding system. | |
446 | |
447 `:coding-type' (required) | |
448 | |
449 VALUE must be one of `charset', `utf-8', `utf-16', `iso-2022', | |
450 `emacs-mule', `shift-jis', `big5', `ccl', `raw-text', `undecided'. | |
451 | |
452 `:eol-type' (optional) | |
453 | |
454 VALUE is an EOL (end-of-line) format of the coding system. It must be | |
455 one of `unix', `dos', `mac'. The symbol `unix' means Unix-like EOL | |
456 \(i.e. single LF), `dos' means DOS-like EOL \(i.e. sequence of CR LF), | |
457 and `mac' means MAC-like EOL \(i.e. single CR). If omitted, on | |
458 decoding by the coding system, Emacs automatically detects an EOL | |
459 format of the source text. | |
460 | |
461 `:charset-list' (required) | |
462 | |
463 VALUE must be a list of charsets supported by the coding system. On | |
464 encoding by the coding system, if a character belongs to multiple | |
465 charsets in the list, a charset that comes earlier in the list is | |
466 selected. | |
467 | |
468 `:ascii-compatible-p' (optional) | |
469 | |
470 If VALUE is non-nil, the coding system decodes all 7-bit bytes into | |
471 the corresponding ASCII characters, and encodes all ASCII characters | |
472 back to the corresponding 7-bit bytes. If omitted, the VALUE defaults | |
473 to nil. | |
474 | |
475 `:decode-translation-table' (optional) | |
476 | |
477 VALUE must be a translation table to use on decoding. | |
478 | |
479 `:encode-translation-table' (optional) | |
480 | |
481 VALUE must be a translation table to use on encoding. | |
482 | |
483 `:post-read-conversion' (optional) | |
484 | |
485 VALUE must be a function to call after some text is inserted and | |
486 decoded by the coding system itself and before any functions in | |
487 `after-insert-functions' are called. The arguments to this function | |
488 is the same as those of a function in `after-insert-functions', | |
489 i.e. LENGTH of a text while putting point at the head of the text to | |
490 be decoded | |
491 | |
492 `:pre-write-conversion' | |
493 | |
494 VALUE must be a function to call after all functions in | |
495 `write-region-annotate-functions' and `buffer-file-format' are called, | |
496 and before the text is encoded by the coding system itself. The | |
497 arguments to this function is the same as those of a function in | |
498 `write-region-annotate-functions', i.e. FROM and TO specifying region | |
499 of a text. | |
500 | |
501 `:default-char' | |
502 | |
503 VALUE must be a character. On encoding, a character not supported by | |
504 the coding system is replaced with VALUE. | |
505 | |
506 `:eol-type' | |
507 | |
508 VALUE must be `unix', `dos', `mac'. The symbol `unix' means Unix-like | |
509 EOL (LF), `dos' means DOS-like EOL (CRLF), and `mac' means MAC-like | |
510 EOL (CR). If omitted, on decoding, the coding system detect EOL | |
511 format automatically, and on encoding, used Unix-like EOL. | |
512 | |
513 `:mime-charset' | |
514 | |
515 VALUE must be a symbol who has MIME-charset name. | |
516 | |
517 `:flags' | |
518 | |
519 VALUE must be a list of symbols that control ISO-2022 converter. Each | |
520 symbol must be a member of the variable `coding-system-iso-2022-flags' | |
521 \(which see). This attribute has a meaning only when `:coding-type' | |
522 is `iso-2022'. | |
523 | |
524 `:designation' | |
525 | |
526 VALUE must be a vector [ G0-USAGE G1-USAGE G2-USAGE G3-USAGE]. | |
527 GN-USAGE specifies the usage of graphic register GN as follows. | |
528 | |
529 If it is nil, no charset can be designated to GN. | |
530 | |
531 If it is a charset, the charset is initially designated to GN, and | |
532 never used by the other charsets. | |
533 | |
534 If it is a list, the elements must be charsets, nil, 94, or 96. GN | |
535 can be used by all listed charsets. If the list contains 94, any | |
536 charsets whose iso-chars is 94 can be designated to GN. If the list | |
537 contains 96, any charsets whose iso-chars is 96 can be designated to | |
538 GN. If the first element is a charset, the charset is initially | |
539 designated to GN. | |
540 | |
541 This attribute has a meaning only when `:coding-type' is `iso-2022'. | |
542 | |
543 `:bom' | |
544 | |
545 VALUE must nil, t, or cons of coding systems whose `:coding-type' is | |
546 `utf-16'. | |
547 | |
548 This attribute has a meaning only when `:coding-type' is `utf-16'. | |
549 | |
550 `:endian' | |
551 | |
552 VALUE must be t or nil. See the above description for the detail. | |
553 | |
554 This attribute has a meaning only when `:coding-type' is `utf-16'. | |
555 | |
556 `:ccl-decoder' | |
557 | |
558 This attribute has a meaning only when `:coding-type' is `ccl'. | |
559 | |
560 `:ccl-encoder' | |
561 | |
562 This attribute has a meaning only when `:coding-type' is `ccl'." | |
563 (let* ((common-attrs (mapcar 'list | |
564 '(:mnemonic | |
565 :coding-type | |
566 :charset-list | |
567 :ascii-compatible-p | |
568 :docode-translation-table | |
569 :encode-translation-table | |
570 :post-read-conversion | |
571 :pre-write-conversion | |
572 :default-char | |
573 :plist | |
574 :eol-type))) | |
575 (coding-type (plist-get props :coding-type)) | |
576 (spec-attrs (mapcar 'list | |
577 (cond ((eq coding-type 'iso-2022) | |
578 '(:initial | |
579 :reg-usage | |
580 :request | |
581 :flags)) | |
582 ((eq coding-type 'utf-16) | |
583 '(:bom | |
584 :endian)) | |
585 ;; Fixme: CCL definition is broken. | |
586 ((eq coding-type 'ccl) | |
587 '(:ccl-decoder | |
588 :ccl-encoder | |
589 :valids)))))) | |
590 | |
591 (dolist (slot common-attrs) | |
592 (setcdr slot (plist-get props (car slot)))) | |
593 | |
594 (dolist (slot spec-attrs) | |
595 (setcdr slot (plist-get props (car slot)))) | |
596 | |
597 (if (eq coding-type 'iso-2022) | |
598 (let ((designation (plist-get props :designation)) | |
599 (flags (plist-get props :flags)) | |
600 (initial (make-vector 4 nil)) | |
601 (reg-usage (cons 4 4)) | |
602 request elt) | |
603 (dotimes (i 4) | |
604 (setq elt (aref designation i)) | |
605 (cond ((charsetp elt) | |
606 (aset initial i elt) | |
607 (setq request (cons (cons elt i) request))) | |
608 ((consp elt) | |
609 (aset initial i (car elt)) | |
610 (if (charsetp (car elt)) | |
611 (setq request (cons (cons (car elt) i) request))) | |
612 (dolist (e (cdr elt)) | |
613 (cond ((charsetp e) | |
614 (setq request (cons (cons e i) request))) | |
615 ((eq e 94) | |
616 (setcar reg-usage i)) | |
617 ((eq e 96) | |
618 (setcdr reg-usage i)) | |
619 ((eq e t) | |
620 (setcar reg-usage i) | |
621 (setcdr reg-usage i))))))) | |
622 (setcdr (assq :initial spec-attrs) initial) | |
623 (setcdr (assq :reg-usage spec-attrs) reg-usage) | |
624 (setcdr (assq :request spec-attrs) request) | |
625 | |
626 ;; Change :flags value from a list to a bit-mask. | |
627 (let ((bits 0) | |
628 (i 0)) | |
629 (dolist (elt coding-system-iso-2022-flags) | |
630 (if (memq elt flags) | |
631 (setq bits (logior bits (lsh 1 i)))) | |
632 (setq i (1+ i))) | |
633 (setcdr (assq :flags spec-attrs) bits)))) | |
634 | |
635 ;; Add :name and :docstring properties to PROPS. | |
636 (setq props | |
637 (cons :name (cons name (cons :docstring (cons (purecopy docstring) | |
638 props))))) | |
639 (setcdr (assq :plist common-attrs) props) | |
640 | |
641 (apply 'define-coding-system-internal | |
642 name (mapcar 'cdr (append common-attrs spec-attrs))))) | |
643 | |
644 (defun coding-system-doc-string (coding-system) | |
645 "Return the documentation string for CODING-SYSTEM." | |
646 (plist-get (coding-system-plist coding-system) :docstring)) | |
647 | |
648 (defun coding-system-mnemonic (coding-system) | |
649 "Return the mnemonic character of CODING-SYSTEM. | |
650 The mnemonic character of a coding system is used in mode line | |
651 to indicate the coding system. If the arg is nil, return ?-." | |
652 (plist-get (coding-system-plist coding-system) :mnemonic)) | |
653 | |
654 (defun coding-system-type (coding-system) | |
655 "Return the coding type of CODING-SYSTEM. | |
656 A coding type is a symbol indicating the encoding method of CODING-SYSTEM. | |
657 See the function `define-coding-system' for more detail." | |
658 (plist-get (coding-system-plist coding-system) :coding-type)) | |
659 | |
660 (defun coding-system-charset-list (coding-system) | |
661 "Return list of charsets supported by CODING-SYSTEM. | |
662 If CODING-SYSTEM supports all ISO-2022 charsets, return `iso-2022'. | |
663 If CODING-SYSTEM supports all emacs-mule charsets, return `emacs-mule'." | |
664 (plist-get (coding-system-plist coding-system) :charset-list)) | |
665 | |
666 (defun coding-system-get (coding-system prop) | |
667 "Extract a value from CODING-SYSTEM's property list for property PROP. | |
668 For compatibility with Emacs 20/21, this accepts old-style symbols | |
669 like `mime-charset' as well as the current style like `:mime-charset'." | |
670 (or (plist-get (coding-system-plist coding-system) prop) | |
671 (if (not (keywordp prop)) | |
672 (plist-get (coding-system-plist coding-system) | |
673 (intern (concat ":" (symbol-name prop))))))) | |
674 | |
675 (defun coding-system-put (coding-system prop val) | |
676 "Change value in CODING-SYSTEM's property list PROP to VAL." | |
677 (plist-put (coding-system-plist coding-system) prop val)) | |
678 | |
679 (defalias 'coding-system-parent 'coding-system-base) | |
680 (make-obsolete 'coding-system-parent 'coding-system-base "20.3") | |
681 | |
682 ;; Coding system also has a property `eol-type'. | |
683 ;; | |
684 ;; This property indicates how the coding system handles end-of-line | |
685 ;; format. The value is integer 0, 1, 2, or a vector of three coding | |
686 ;; systems. Each integer value 0, 1, and 2 indicates the format of | |
687 ;; end-of-line LF, CRLF, and CR respectively. A vector value | |
688 ;; indicates that the format of end-of-line should be detected | |
689 ;; automatically. Nth element of the vector is the subsidiary coding | |
690 ;; system whose `eol-type' property is N. | |
691 | |
692 (defun coding-system-lessp (x y) | |
693 (cond ((eq x 'no-conversion) t) | |
694 ((eq y 'no-conversion) nil) | |
695 ((eq x 'emacs-mule) t) | |
696 ((eq y 'emacs-mule) nil) | |
697 ((eq x 'undecided) t) | |
698 ((eq y 'undecided) nil) | |
699 (t (let ((c1 (coding-system-mnemonic x)) | |
700 (c2 (coding-system-mnemonic y))) | |
701 (or (< (downcase c1) (downcase c2)) | |
702 (and (not (> (downcase c1) (downcase c2))) | |
703 (< c1 c2))))))) | |
704 | |
705 (defun add-to-coding-system-list (coding-system) | |
706 "Add CODING-SYSTEM to `coding-system-list' while keeping it sorted." | |
707 (if (or (null coding-system-list) | |
708 (coding-system-lessp coding-system (car coding-system-list))) | |
709 (setq coding-system-list (cons coding-system coding-system-list)) | |
710 (let ((len (length coding-system-list)) | |
711 mid (tem coding-system-list)) | |
712 (while (> len 1) | |
713 (setq mid (nthcdr (/ len 2) tem)) | |
714 (if (coding-system-lessp (car mid) coding-system) | |
715 (setq tem mid | |
716 len (- len (/ len 2))) | |
717 (setq len (/ len 2)))) | |
718 (setcdr tem (cons coding-system (cdr tem)))))) | |
719 | |
720 (defun coding-system-list (&optional base-only) | |
721 "Return a list of all existing non-subsidiary coding systems. | |
722 If optional arg BASE-ONLY is non-nil, only base coding systems are listed. | |
723 The value doesn't include subsidiary coding systems which are what | |
724 made from bases and aliases automatically for various end-of-line | |
725 formats (e.g. iso-latin-1-unix, koi8-r-dos)." | |
726 (let* ((codings (copy-sequence coding-system-list)) | |
727 (tail (cons nil codings))) | |
728 ;; Remove subsidiary coding systems (eol variants) and alias | |
729 ;; coding systems (if necessary). | |
730 (while (cdr tail) | |
731 (let* ((coding (car (cdr tail))) | |
732 (aliases (coding-system-aliases coding))) | |
733 (if (or | |
734 ;; CODING is an eol variant if not in ALIASES. | |
735 (not (memq coding aliases)) | |
736 ;; CODING is an alias if it is not car of ALIASES. | |
737 (and base-only (not (eq coding (car aliases))))) | |
738 (setcdr tail (cdr (cdr tail))) | |
739 (setq tail (cdr tail))))) | |
740 codings)) | |
741 | |
742 (defun set-buffer-file-coding-system (coding-system &optional force) | |
743 "Set the file coding-system of the current buffer to CODING-SYSTEM. | |
744 This means that when you save the buffer, it will be converted | |
745 according to CODING-SYSTEM. For a list of possible values of CODING-SYSTEM, | |
746 use \\[list-coding-systems]. | |
747 | |
748 If the buffer's previous file coding-system value specifies end-of-line | |
749 conversion, and CODING-SYSTEM does not specify one, CODING-SYSTEM is | |
750 merged with the already-specified end-of-line conversion. | |
751 | |
752 If the buffer's previous file coding-system value specifies text | |
753 conversion, and CODING-SYSTEM does not specify one, CODING-SYSTEM is | |
754 merged with the already-specified text conversion. | |
755 | |
756 However, if the optional prefix argument FORCE is non-nil, then | |
757 CODING-SYSTEM is used exactly as specified. | |
758 | |
759 This marks the buffer modified so that the succeeding \\[save-buffer] | |
760 surely saves the buffer with CODING-SYSTEM. From a program, if you | |
761 don't want to mark the buffer modified, just set the variable | |
762 `buffer-file-coding-system' directly." | |
763 (interactive "zCoding system for visited file (default, nil): \nP") | |
764 (check-coding-system coding-system) | |
765 (if (and coding-system buffer-file-coding-system (null force)) | |
766 (let ((base (coding-system-base buffer-file-coding-system)) | |
767 (eol (coding-system-eol-type buffer-file-coding-system))) | |
768 ;; If CODING-SYSTEM doesn't specify text conversion, merge | |
769 ;; with that of buffer-file-coding-system. | |
770 (if (eq (coding-system-base coding-system) 'undecided) | |
771 (setq coding-system (coding-system-change-text-conversion | |
772 coding-system base))) | |
773 ;; If CODING-SYSTEM doesn't specify eol conversion, merge with | |
774 ;; that of buffer-file-coding-system. | |
775 (if (and (vectorp (coding-system-eol-type coding-system)) | |
776 (numberp eol) (>= eol 0) (<= eol 2)) | |
777 (setq coding-system (coding-system-change-eol-conversion | |
778 coding-system eol))))) | |
779 (setq buffer-file-coding-system coding-system) | |
780 (set-buffer-modified-p t) | |
781 (force-mode-line-update)) | |
782 | |
783 (defvar default-terminal-coding-system nil | |
784 "Default value for the terminal coding system. | |
785 This is normally set according to the selected language environment. | |
786 See also the command `set-terminal-coding-system'.") | |
787 | |
788 (defun set-terminal-coding-system (coding-system) | |
789 "Set coding system of your terminal to CODING-SYSTEM. | |
790 All text output to the terminal will be encoded | |
791 with the specified coding system. | |
792 For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]. | |
793 The default is determined by the selected language environment | |
794 or by the previous use of this command." | |
795 (interactive | |
796 (list (let ((default (if (and (not (terminal-coding-system)) | |
797 default-terminal-coding-system) | |
798 default-terminal-coding-system))) | |
799 (read-coding-system | |
800 (format "Coding system for terminal display (default, %s): " | |
801 default) | |
802 default)))) | |
803 (if (and (not coding-system) | |
804 (not (terminal-coding-system))) | |
805 (setq coding-system default-terminal-coding-system)) | |
806 (if coding-system | |
807 (setq default-terminal-coding-system coding-system)) | |
808 (set-terminal-coding-system-internal coding-system) | |
809 (redraw-frame (selected-frame))) | |
810 | |
811 (defvar default-keyboard-coding-system nil | |
812 "Default value of the keyboard coding system. | |
813 This is normally set according to the selected language environment. | |
814 See also the command `set-keyboard-coding-system'.") | |
815 | |
816 (defun set-keyboard-coding-system (coding-system) | |
817 "Set coding system for keyboard input to CODING-SYSTEM. | |
818 In addition, this command enables Encoded-kbd minor mode. | |
819 \(If CODING-SYSTEM is nil, Encoded-kbd mode is turned off -- see | |
820 `encoded-kbd-mode'.) | |
821 For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]. | |
822 The default is determined by the selected language environment | |
823 or by the previous use of this command." | |
824 (interactive | |
825 (list (let ((default (if (and (not (keyboard-coding-system)) | |
826 default-keyboard-coding-system) | |
827 default-keyboard-coding-system))) | |
828 (read-coding-system | |
829 (format "Coding system for keyboard input (default, %s): " | |
830 default) | |
831 default)))) | |
832 (if (and (not coding-system) | |
833 (not (keyboard-coding-system))) | |
834 (setq coding-system default-keyboard-coding-system)) | |
835 (if coding-system | |
836 (setq default-keyboard-coding-system coding-system)) | |
837 (set-keyboard-coding-system-internal coding-system) | |
838 (setq keyboard-coding-system coding-system) | |
839 (encoded-kbd-mode (if coding-system 1 0))) | |
840 | |
841 (defcustom keyboard-coding-system nil | |
842 "Specify coding system for keyboard input. | |
843 If you set this on a terminal which can't distinguish Meta keys from | |
844 8-bit characters, you will have to use ESC to type Meta characters. | |
845 See Info node `Specify Coding' and Info node `Single-Byte Character Support'. | |
846 | |
847 Setting this variable directly does not take effect; | |
848 use either M-x customize or \\[set-keyboard-coding-system]." | |
849 :type '(coding-system :tag "Coding system") | |
850 :link '(info-link "(emacs)Specify Coding") | |
851 :link '(info-link "(emacs)Single-Byte Character Support") | |
852 :set (lambda (symbol value) | |
853 ;; Don't load encoded-kbd-mode unnecessarily. | |
854 (if (or value (boundp 'encoded-kbd-mode)) | |
855 (set-keyboard-coding-system value) | |
856 (set-default 'keyboard-coding-system nil))) ; must initialize | |
857 :version "21.1" | |
858 :group 'keyboard | |
859 :group 'mule) | |
860 | |
861 (defun set-buffer-process-coding-system (decoding encoding) | |
862 "Set coding systems for the process associated with the current buffer. | |
863 DECODING is the coding system to be used to decode input from the process, | |
864 ENCODING is the coding system to be used to encode output to the process. | |
865 | |
866 For a list of possible values of CODING-SYSTEM, use \\[list-coding-systems]." | |
867 (interactive | |
868 "zCoding-system for output from the process: \nzCoding-system for input to the process: ") | |
869 (let ((proc (get-buffer-process (current-buffer)))) | |
870 (if (null proc) | |
871 (error "No process") | |
872 (check-coding-system decoding) | |
873 (check-coding-system encoding) | |
874 (set-process-coding-system proc decoding encoding))) | |
875 (force-mode-line-update)) | |
876 | |
877 (defalias 'set-clipboard-coding-system 'set-selection-coding-system) | |
878 | |
879 (defun set-selection-coding-system (coding-system) | |
880 "Make CODING-SYSTEM used for communicating with other X clients . | |
881 When sending or receiving text via cut_buffer, selection, and clipboard, | |
882 the text is encoded or decoded by CODING-SYSTEM." | |
883 (interactive "zCoding system for X selection: ") | |
884 (check-coding-system coding-system) | |
885 (setq selection-coding-system coding-system)) | |
886 | |
887 ;; Coding system lastly specified by the command | |
888 ;; set-next-selection-coding-system. | |
889 (defvar last-next-selection-coding-system nil) | |
890 | |
891 (defun set-next-selection-coding-system (coding-system) | |
892 "Make CODING-SYSTEM used for the next communication with other X clients. | |
893 This setting is effective for the next communication only." | |
894 (interactive | |
895 (list (read-coding-system | |
896 (if last-next-selection-coding-system | |
897 (format "Coding system for the next X selection (default, %S): " | |
898 last-next-selection-coding-system) | |
899 "Coding system for the next X selection: ") | |
900 last-next-selection-coding-system))) | |
901 (if coding-system | |
902 (setq last-next-selection-coding-system coding-system) | |
903 (setq coding-system last-next-selection-coding-system)) | |
904 (check-coding-system coding-system) | |
905 | |
906 (setq next-selection-coding-system coding-system)) | |
907 | |
908 ;; Fixme: | |
909 (defun set-coding-priority (arg) | |
910 "Set priority of coding categories according to ARG. | |
911 ARG is a list of coding categories ordered by priority. | |
912 | |
913 This function is provided for backward compatibility. | |
914 Now we have more convenient function `set-coding-system-priority'." | |
915 (let ((l arg) | |
916 (current-list (copy-sequence coding-category-list))) | |
917 ;; Check the validity of ARG while deleting coding categories in | |
918 ;; ARG from CURRENT-LIST. We assume that CODING-CATEGORY-LIST | |
919 ;; contains all coding categories. | |
920 (while l | |
921 (if (or (null (get (car l) 'coding-category-index)) | |
922 (null (memq (car l) current-list))) | |
923 (error "Invalid or duplicated element in argument: %s" arg)) | |
924 (setq current-list (delq (car l) current-list)) | |
925 (setq l (cdr l))) | |
926 ;; Update `coding-category-list' and return it. | |
927 (setq coding-category-list (append arg current-list)) | |
928 ;; Fixme: not defined. | |
929 (set-coding-priority-internal))) | |
930 (make-obsolete 'set-coding-priority 'set-coding-system-priority "22.1") | |
931 | |
932 ;;; X selections | |
933 | |
934 (defvar non-standard-icccm-encodings-alist | |
935 '(("ISO8859-15" . latin-iso8859-15) | |
936 ("ISO8859-14" . latin-iso8859-14) | |
937 ("KOI8-R" . koi8-r) | |
938 ("BIG5-0" . big5)) | |
939 "Alist of font charset names defined by XLFD, and the corresponding Emacs | |
940 charsets or coding systems.") | |
941 | |
942 ;; Functions to support "Non-Standard Character Set Encodings" defined | |
943 ;; by the ICCCM spec. We support that by converting the leading | |
944 ;; sequence of the ``extended segment'' to the corresponding ISO-2022 | |
945 ;; sequences (if the leading sequence names an Emacs charset), or decode | |
946 ;; the segment (if it names a coding system). Encoding does the reverse. | |
947 (defun ctext-post-read-conversion (len) | |
948 "Decode LEN characters encoded as Compound Text with Extended Segments." | |
949 (buffer-disable-undo) ; minimize consing due to insertions and deletions | |
950 (narrow-to-region (point) (+ (point) len)) | |
951 (save-match-data | |
952 (let ((pt (point-marker)) | |
953 (oldpt (point-marker)) | |
954 (newpt (make-marker)) | |
955 (modified-p (buffer-modified-p)) | |
956 (case-fold-search nil) | |
957 last-coding-system-used | |
958 encoding textlen chset) | |
959 (while (re-search-forward | |
960 "\\(\e\\)%/[0-4]\\([\200-\377][\200-\377]\\)\\([^\002]+\\)\002" | |
961 nil 'move) | |
962 (set-marker newpt (point)) | |
963 (set-marker pt (match-beginning 0)) | |
964 (setq encoding (match-string 3)) | |
965 (setq textlen (- (+ (* (- (aref (match-string 2) 0) 128) 128) | |
966 (- (aref (match-string 2) 1) 128)) | |
967 (1+ (length encoding)))) | |
968 (setq | |
969 chset (cdr (assoc-ignore-case encoding | |
970 non-standard-icccm-encodings-alist))) | |
971 (cond ((null chset) | |
972 ;; This charset is not supported--leave this extended | |
973 ;; segment unaltered and skip over it. | |
974 (goto-char (+ (point) textlen))) | |
975 ((charsetp chset) | |
976 ;; If it's a charset, replace the leading escape sequence | |
977 ;; with a standard ISO-2022 sequence. We will decode all | |
978 ;; such segments later, in one go, when we exit the loop | |
979 ;; or find an extended segment that names a coding | |
980 ;; system, not a charset. | |
981 (replace-match | |
982 (concat "\\1" | |
983 (if (= 0 (charset-iso-graphic-plane chset)) | |
984 ;; GL charsets | |
985 (if (= 1 (charset-dimension chset)) "(" "$(") | |
986 ;; GR charsets | |
987 (if (= 96 (charset-chars chset)) | |
988 "-" | |
989 (if (= 1 (charset-dimension chset)) ")" "$)"))) | |
990 (string (charset-iso-final-char chset))) | |
991 t) | |
992 (goto-char (+ (point) textlen))) | |
993 ((coding-system-p chset) | |
994 ;; If it's a coding system, we need to decode the segment | |
995 ;; right away. But first, decode what we've skipped | |
996 ;; across until now. | |
997 (when (> pt oldpt) | |
998 (decode-coding-region oldpt pt 'ctext-no-compositions)) | |
999 (delete-region pt newpt) | |
1000 (set-marker newpt (+ newpt textlen)) | |
1001 (decode-coding-region pt newpt chset) | |
1002 (goto-char newpt) | |
1003 (set-marker oldpt newpt)))) | |
1004 ;; Decode what's left. | |
1005 (when (> (point) oldpt) | |
1006 (decode-coding-region oldpt (point) 'ctext-no-compositions)) | |
1007 ;; This buffer started as unibyte, because the string we get from | |
1008 ;; the X selection is a unibyte string. We must now make it | |
1009 ;; multibyte, so that the decoded text is inserted as multibyte | |
1010 ;; into its buffer. | |
1011 (set-buffer-multibyte t) | |
1012 (set-buffer-modified-p modified-p) | |
1013 (- (point-max) (point-min))))) | |
1014 | |
1015 (defvar non-standard-designations-alist | |
1016 '(("$(0" . (big5 "big5-0" 2)) | |
1017 ("$(1" . (big5 "big5-0" 2)) | |
1018 ("-V" . (t "iso8859-10" 1)) | |
1019 ("-Y" . (t "iso8859-13" 1)) | |
1020 ("-_" . (t "iso8859-14" 1)) | |
1021 ("-b" . (t "iso8859-15" 1)) | |
1022 ("-f" . (t "iso8859-16" 1))) | |
1023 "Alist of ctext control sequences that introduce character sets which | |
1024 are not in the list of approved ICCCM encodings, and the corresponding | |
1025 coding system, identifier string, and number of octets per encoded | |
1026 character. | |
1027 | |
1028 Each element has the form (CTLSEQ . (ENCODING CHARSET NOCTETS)). CTLSEQ | |
1029 is the control sequence (sans the leading ESC) that introduces the character | |
1030 set in the text encoded by compound-text. ENCODING is a coding system | |
1031 symbol; if it is t, it means that the ctext coding system already encodes | |
1032 the text correctly, and only the leading control sequence needs to be altered. | |
1033 If ENCODING is a coding system, we need to re-encode the text with that | |
1034 coding system. CHARSET is the ICCCM name of the charset we need to put into | |
1035 the leading control sequence. NOCTETS is the number of octets (bytes) that | |
1036 encode each character in this charset. NOCTETS can be 0 (meaning the number | |
1037 of octets per character is variable), 1, 2, 3, or 4.") | |
1038 | |
1039 (defun ctext-pre-write-conversion (from to) | |
1040 "Encode characters between FROM and TO as Compound Text w/Extended Segments. | |
1041 | |
1042 If FROM is a string, or if the current buffer is not the one set up for us | |
1043 by run_pre_post_conversion_on_str, generate a new temp buffer, insert the | |
1044 text, and convert it in the temporary buffer. Otherwise, convert in-place." | |
1045 (cond ((and (string= (buffer-name) " *code-converting-work*") | |
1046 (not (stringp from))) | |
1047 ; Minimize consing due to subsequent insertions and deletions. | |
1048 (buffer-disable-undo) | |
1049 (narrow-to-region from to)) | |
1050 (t | |
1051 (let ((buf (current-buffer))) | |
1052 (set-buffer (generate-new-buffer " *temp")) | |
1053 (buffer-disable-undo) | |
1054 (if (stringp from) | |
1055 (insert from) | |
1056 (insert-buffer-substring buf from to))))) | |
1057 (encode-coding-region from to 'ctext-no-compositions) | |
1058 ;; Replace ISO-2022 charset designations with extended segments, for | |
1059 ;; those charsets that are not part of the official X registry. | |
1060 (save-match-data | |
1061 (goto-char (point-min)) | |
1062 (let ((newpt (make-marker)) | |
1063 (case-fold-search nil) | |
1064 pt desig encode-info encoding chset noctets textlen) | |
1065 (set-buffer-multibyte nil) | |
1066 ;; The regexp below finds the leading sequences for big5 and | |
1067 ;; iso8859-1[03-6] charsets. | |
1068 (while (re-search-forward "\e\\(\$([01]\\|-[VY_bf]\\)" nil 'move) | |
1069 (setq desig (match-string 1) | |
1070 pt (point-marker) | |
1071 encode-info (cdr (assoc desig non-standard-designations-alist)) | |
1072 encoding (car encode-info) | |
1073 chset (cadr encode-info) | |
1074 noctets (car (cddr encode-info))) | |
1075 (skip-chars-forward "^\e") | |
1076 (set-marker newpt (point)) | |
1077 (cond | |
1078 ((eq encoding t) ; only the leading sequence needs to be changed | |
1079 (setq textlen (+ (- newpt pt) (length chset) 1)) | |
1080 ;; Generate the ICCCM control sequence for an extended segment. | |
1081 (replace-match (format "\e%%/%d%c%c%s" | |
1082 noctets | |
1083 (+ (/ textlen 128) 128) | |
1084 (+ (% textlen 128) 128) | |
1085 chset) | |
1086 t t)) | |
1087 ((coding-system-p encoding) ; need to recode the entire segment... | |
1088 (set-marker pt (match-beginning 0)) | |
1089 (decode-coding-region pt newpt 'ctext-no-compositions) | |
1090 (set-buffer-multibyte t) | |
1091 (encode-coding-region pt newpt encoding) | |
1092 (set-buffer-multibyte nil) | |
1093 (setq textlen (+ (- newpt pt) (length chset) 1)) | |
1094 (goto-char pt) | |
1095 (insert (format "\e%%/%d%c%c%s" | |
1096 noctets | |
1097 (+ (/ textlen 128) 128) | |
1098 (+ (% textlen 128) 128) | |
1099 chset)))) | |
1100 (goto-char newpt)))) | |
1101 (set-buffer-multibyte t) | |
1102 ;; Must return nil, as build_annotations_2 expects that. | |
1103 nil) | |
1104 | |
1105 (make-obsolete 'set-coding-priority 'set-coding-system-priority "22.0") | |
1106 | |
1107 ;;; FILE I/O | |
1108 | |
1109 (defcustom auto-coding-alist | |
1110 '(("\\.\\(arc\\|zip\\|lzh\\|zoo\\|jar\\|tar\\|tgz\\)\\'" . no-conversion) | |
1111 ("\\.\\(gz\\|Z\\|bz\\|bz2\\|gpg\\)\\'" . no-conversion)) | |
1112 "Alist of filename patterns vs corresponding coding systems. | |
1113 Each element looks like (REGEXP . CODING-SYSTEM). | |
1114 A file whose name matches REGEXP is decoded by CODING-SYSTEM on reading. | |
1115 | |
1116 The settings in this alist take priority over `coding:' tags | |
1117 in the file (see the function `set-auto-coding') | |
1118 and the contents of `file-coding-system-alist'." | |
1119 :group 'files | |
1120 :group 'mule | |
1121 :type '(repeat (cons (regexp :tag "File name regexp") | |
1122 (symbol :tag "Coding system")))) | |
1123 | |
1124 (defcustom auto-coding-regexp-alist | |
1125 '(("^BABYL OPTIONS:[ \t]*-\\*-[ \t]*rmail[ \t]*-\\*-" . no-conversion)) | |
1126 "Alist of patterns vs corresponding coding systems. | |
1127 Each element looks like (REGEXP . CODING-SYSTEM). | |
1128 A file whose first bytes match REGEXP is decoded by CODING-SYSTEM on reading. | |
1129 | |
1130 The settings in this alist take priority over `coding:' tags | |
1131 in the file (see the function `set-auto-coding') | |
1132 and the contents of `file-coding-system-alist'." | |
1133 :group 'files | |
1134 :group 'mule | |
1135 :type '(repeat (cons (regexp :tag "Regexp") | |
1136 (symbol :tag "Coding system")))) | |
1137 | |
1138 (defvar set-auto-coding-for-load nil | |
1139 "Non-nil means look for `load-coding' property instead of `coding'. | |
1140 This is used for loading and byte-compiling Emacs Lisp files.") | |
1141 | |
1142 (defun auto-coding-alist-lookup (filename) | |
1143 "Return the coding system specified by `auto-coding-alist' for FILENAME." | |
1144 (let ((alist auto-coding-alist) | |
1145 (case-fold-search (memq system-type '(vax-vms windows-nt ms-dos))) | |
1146 coding-system) | |
1147 (while (and alist (not coding-system)) | |
1148 (if (string-match (car (car alist)) filename) | |
1149 (setq coding-system (cdr (car alist))) | |
1150 (setq alist (cdr alist)))) | |
1151 coding-system)) | |
1152 | |
1153 | |
1154 (defun auto-coding-from-file-contents (size) | |
1155 "Determine a coding system from the contents of the current buffer. | |
1156 The current buffer contains SIZE bytes starting at point. | |
1157 Value is either a coding system or nil." | |
1158 (save-excursion | |
1159 (let ((alist auto-coding-regexp-alist) | |
1160 coding-system) | |
1161 (while (and alist (not coding-system)) | |
1162 (let ((regexp (car (car alist)))) | |
1163 (when (re-search-forward regexp (+ (point) size) t) | |
1164 (setq coding-system (cdr (car alist))))) | |
1165 (setq alist (cdr alist))) | |
1166 coding-system))) | |
1167 | |
1168 | |
1169 (defun set-auto-coding (filename size) | |
1170 "Return coding system for a file FILENAME of which SIZE bytes follow point. | |
1171 These bytes should include at least the first 1k of the file | |
1172 and the last 3k of the file, but the middle may be omitted. | |
1173 | |
1174 It checks FILENAME against the variable `auto-coding-alist'. If | |
1175 FILENAME doesn't match any entries in the variable, it checks the | |
1176 contents of the current buffer following point against | |
1177 `auto-coding-regexp-alist'. If no match is found, it checks for a | |
1178 `coding:' tag in the first one or two lines following point. If no | |
1179 `coding:' tag is found, it checks for local variables list in the last | |
1180 3K bytes out of the SIZE bytes. | |
1181 | |
1182 The return value is the specified coding system, | |
1183 or nil if nothing specified. | |
1184 | |
1185 The variable `set-auto-coding-function' (which see) is set to this | |
1186 function by default." | |
1187 (or (auto-coding-alist-lookup filename) | |
1188 (auto-coding-from-file-contents size) | |
1189 (let* ((case-fold-search t) | |
1190 (head-start (point)) | |
1191 (head-end (+ head-start (min size 1024))) | |
1192 (tail-start (+ head-start (max (- size 3072) 0))) | |
1193 (tail-end (+ head-start size)) | |
1194 coding-system head-found tail-found pos) | |
1195 ;; Try a short cut by searching for the string "coding:" | |
1196 ;; and for "unibyte:" at the head and tail of SIZE bytes. | |
1197 (setq head-found (or (search-forward "coding:" head-end t) | |
1198 (search-forward "unibyte:" head-end t))) | |
1199 (if (and head-found (> head-found tail-start)) | |
1200 ;; Head and tail are overlapped. | |
1201 (setq tail-found head-found) | |
1202 (goto-char tail-start) | |
1203 (setq tail-found (or (search-forward "coding:" tail-end t) | |
1204 (search-forward "unibyte:" tail-end t)))) | |
1205 | |
1206 ;; At first check the head. | |
1207 (when head-found | |
1208 (goto-char head-start) | |
1209 (setq head-end (set-auto-mode-1)) | |
1210 (setq head-start (point)) | |
1211 (when (and head-end (< head-found head-end)) | |
1212 (goto-char head-start) | |
1213 (when (and set-auto-coding-for-load | |
1214 (re-search-forward | |
1215 "\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)" | |
1216 head-end t)) | |
1217 (setq coding-system 'raw-text)) | |
1218 (when (and (not coding-system) | |
1219 (re-search-forward | |
1220 "\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)" | |
1221 head-end t)) | |
1222 (setq coding-system (intern (match-string 2))) | |
1223 (or (coding-system-p coding-system) | |
1224 (setq coding-system nil))))) | |
1225 | |
1226 ;; If no coding: tag in the head, check the tail. | |
1227 (when (and tail-found (not coding-system)) | |
1228 (goto-char tail-start) | |
1229 (search-forward "\n\^L" nil t) | |
1230 (if (re-search-forward | |
1231 "^\\(.*\\)[ \t]*Local Variables:[ \t]*\\(.*\\)$" tail-end t) | |
1232 ;; The prefix is what comes before "local variables:" in its | |
1233 ;; line. The suffix is what comes after "local variables:" | |
1234 ;; in its line. | |
1235 (let* ((prefix (regexp-quote (match-string 1))) | |
1236 (suffix (regexp-quote (match-string 2))) | |
1237 (re-coding | |
1238 (concat | |
1239 "^" prefix | |
1240 ;; N.B. without the \n below, the regexp can | |
1241 ;; eat newlines. | |
1242 "[ \t]*coding[ \t]*:[ \t]*\\([^ \t\n]+\\)[ \t]*" | |
1243 suffix "$")) | |
1244 (re-unibyte | |
1245 (concat | |
1246 "^" prefix | |
1247 "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t\n]+\\)[ \t]*" | |
1248 suffix "$")) | |
1249 (re-end | |
1250 (concat "^" prefix "[ \t]*End *:[ \t]*" suffix "$")) | |
1251 (pos (point))) | |
1252 (re-search-forward re-end tail-end 'move) | |
1253 (setq tail-end (point)) | |
1254 (goto-char pos) | |
1255 (when (and set-auto-coding-for-load | |
1256 (re-search-forward re-unibyte tail-end t)) | |
1257 (setq coding-system 'raw-text)) | |
1258 (when (and (not coding-system) | |
1259 (re-search-forward re-coding tail-end t)) | |
1260 (setq coding-system (intern (match-string 1))) | |
1261 (or (coding-system-p coding-system) | |
1262 (setq coding-system nil)))))) | |
1263 coding-system))) | |
1264 | |
1265 (setq set-auto-coding-function 'set-auto-coding) | |
1266 | |
1267 (defun after-insert-file-set-buffer-file-coding-system (inserted) | |
1268 "Set `buffer-file-coding-system' of current buffer after text is inserted." | |
1269 (if last-coding-system-used | |
1270 (let ((coding-system | |
1271 (find-new-buffer-file-coding-system last-coding-system-used)) | |
1272 (modified-p (buffer-modified-p))) | |
1273 (when coding-system | |
1274 (set-buffer-file-coding-system coding-system t) | |
1275 (if (and enable-multibyte-characters | |
1276 (or (eq (coding-system-type coding-system) 'raw-text)) | |
1277 ;; If buffer was unmodified and the size is the | |
1278 ;; same as INSERTED, we must be visiting it. | |
1279 (not modified-p) | |
1280 (= (buffer-size) inserted)) | |
1281 ;; For coding systems no-conversion and raw-text..., | |
1282 ;; edit the buffer as unibyte. | |
1283 (let ((pos-byte (position-bytes (+ (point) inserted)))) | |
1284 (set-buffer-multibyte nil) | |
1285 (setq inserted (- pos-byte (position-bytes (point)))))) | |
1286 (set-buffer-modified-p modified-p)))) | |
1287 inserted) | |
1288 | |
1289 (add-hook 'after-insert-file-functions | |
1290 'after-insert-file-set-buffer-file-coding-system) | |
1291 | |
1292 ;; The coding-spec and eol-type of coding-system returned is decided | |
1293 ;; independently in the following order. | |
1294 ;; 1. That of buffer-file-coding-system locally bound. | |
1295 ;; 2. That of CODING. | |
1296 | |
1297 (defun find-new-buffer-file-coding-system (coding) | |
1298 "Return a coding system for a buffer when a file of CODING is inserted. | |
1299 The local variable `buffer-file-coding-system' of the current buffer | |
1300 is set to the returned value. | |
1301 Return nil if there's no need to set `buffer-file-coding-system'." | |
1302 (let (local-coding local-eol | |
1303 found-coding found-eol | |
1304 new-coding new-eol) | |
1305 (if (null coding) | |
1306 ;; Nothing found about coding. | |
1307 nil | |
1308 | |
1309 ;; Get information of `buffer-file-coding-system' in LOCAL-EOL | |
1310 ;; and LOCAL-CODING. | |
1311 (setq local-eol (coding-system-eol-type buffer-file-coding-system)) | |
1312 (if (null (numberp local-eol)) | |
1313 ;; But eol-type is not yet set. | |
1314 (setq local-eol nil)) | |
1315 (if (and buffer-file-coding-system | |
1316 (not (eq (coding-system-type buffer-file-coding-system) | |
1317 'undecided))) | |
1318 (setq local-coding (coding-system-base buffer-file-coding-system))) | |
1319 | |
1320 (if (and (local-variable-p 'buffer-file-coding-system) | |
1321 local-eol local-coding) | |
1322 ;; The current buffer has already set full coding-system, we | |
1323 ;; had better not change it. | |
1324 nil | |
1325 | |
1326 (setq found-eol (coding-system-eol-type coding)) | |
1327 (if (null (numberp found-eol)) | |
1328 ;; But eol-type is not found. | |
1329 ;; If EOL conversions are inhibited, force unix eol-type. | |
1330 (setq found-eol (if inhibit-eol-conversion 0))) | |
1331 (setq found-coding (coding-system-base coding)) | |
1332 | |
1333 (if (and (not found-eol) (eq found-coding 'undecided)) | |
1334 ;; No valid coding information found. | |
1335 nil | |
1336 | |
1337 ;; Some coding information (eol or text) found. | |
1338 | |
1339 ;; The local setting takes precedence over the found one. | |
1340 (setq new-coding (if (local-variable-p 'buffer-file-coding-system) | |
1341 (or local-coding found-coding) | |
1342 (or found-coding local-coding))) | |
1343 (setq new-eol (if (local-variable-p 'buffer-file-coding-system) | |
1344 (or local-eol found-eol) | |
1345 (or found-eol local-eol))) | |
1346 | |
1347 (let ((eol-type (coding-system-eol-type new-coding))) | |
1348 (if (and (numberp new-eol) (vectorp eol-type)) | |
1349 (aref eol-type new-eol) | |
1350 new-coding))))))) | |
1351 | |
1352 (defun modify-coding-system-alist (target-type regexp coding-system) | |
1353 "Modify one of look up tables for finding a coding system on I/O operation. | |
1354 There are three of such tables, `file-coding-system-alist', | |
1355 `process-coding-system-alist', and `network-coding-system-alist'. | |
1356 | |
1357 TARGET-TYPE specifies which of them to modify. | |
1358 If it is `file', it affects `file-coding-system-alist' (which see). | |
1359 If it is `process', it affects `process-coding-system-alist' (which see). | |
1360 If it is `network', it affects `network-coding-system-alist' (which see). | |
1361 | |
1362 REGEXP is a regular expression matching a target of I/O operation. | |
1363 The target is a file name if TARGET-TYPE is `file', a program name if | |
1364 TARGET-TYPE is `process', or a network service name or a port number | |
1365 to connect to if TARGET-TYPE is `network'. | |
1366 | |
1367 CODING-SYSTEM is a coding system to perform code conversion on the I/O | |
1368 operation, or a cons cell (DECODING . ENCODING) specifying the coding systems | |
1369 for decoding and encoding respectively, | |
1370 or a function symbol which, when called, returns such a cons cell." | |
1371 (or (memq target-type '(file process network)) | |
1372 (error "Invalid target type: %s" target-type)) | |
1373 (or (stringp regexp) | |
1374 (and (eq target-type 'network) (integerp regexp)) | |
1375 (error "Invalid regular expression: %s" regexp)) | |
1376 (if (symbolp coding-system) | |
1377 (if (not (fboundp coding-system)) | |
1378 (progn | |
1379 (check-coding-system coding-system) | |
1380 (setq coding-system (cons coding-system coding-system)))) | |
1381 (check-coding-system (car coding-system)) | |
1382 (check-coding-system (cdr coding-system))) | |
1383 (cond ((eq target-type 'file) | |
1384 (let ((slot (assoc regexp file-coding-system-alist))) | |
1385 (if slot | |
1386 (setcdr slot coding-system) | |
1387 (setq file-coding-system-alist | |
1388 (cons (cons regexp coding-system) | |
1389 file-coding-system-alist))))) | |
1390 ((eq target-type 'process) | |
1391 (let ((slot (assoc regexp process-coding-system-alist))) | |
1392 (if slot | |
1393 (setcdr slot coding-system) | |
1394 (setq process-coding-system-alist | |
1395 (cons (cons regexp coding-system) | |
1396 process-coding-system-alist))))) | |
1397 (t | |
1398 (let ((slot (assoc regexp network-coding-system-alist))) | |
1399 (if slot | |
1400 (setcdr slot coding-system) | |
1401 (setq network-coding-system-alist | |
1402 (cons (cons regexp coding-system) | |
1403 network-coding-system-alist))))))) | |
1404 | |
1405 (defun make-translation-table (&rest args) | |
1406 "Make a translation table from arguments. | |
1407 A translation table is a char table intended for character | |
1408 translation in CCL programs. | |
1409 | |
1410 Each argument is a list of elements of the form (FROM . TO), where FROM | |
1411 is a character to be translated to TO. | |
1412 | |
1413 The arguments and forms in each argument are processed in the given | |
1414 order, and if a previous form already translates TO to some other | |
1415 character, say TO-ALT, FROM is also translated to TO-ALT." | |
1416 (let ((table (make-char-table 'translation-table)) | |
1417 revlist) | |
1418 (while args | |
1419 (let ((elts (car args))) | |
1420 (while elts | |
1421 (let* ((from (car (car elts))) | |
1422 (from-i 0) ; degree of freedom of FROM | |
1423 (from-rev (nreverse (split-char from))) | |
1424 (to (cdr (car elts))) | |
1425 (to-i 0) ; degree of freedom of TO | |
1426 (to-rev (nreverse (split-char to)))) | |
1427 ;; Check numbers of heading 0s in FROM-REV and TO-REV. | |
1428 (while (eq (car from-rev) 0) | |
1429 (setq from-i (1+ from-i) from-rev (cdr from-rev))) | |
1430 (while (eq (car to-rev) 0) | |
1431 (setq to-i (1+ to-i) to-rev (cdr to-rev))) | |
1432 (if (and (/= from-i to-i) (/= to-i 0)) | |
1433 (error "Invalid character pair (%d . %d)" from to)) | |
1434 ;; If we have already translated TO to TO-ALT, FROM should | |
1435 ;; also be translated to TO-ALT. | |
1436 (let ((to-alt (aref table to))) | |
1437 (if (and to-alt (> to-i 0)) | |
1438 (setq to to-alt))) | |
1439 (if (> from-i 0) | |
1440 (set-char-table-default table from to) | |
1441 (aset table from to)) | |
1442 ;; If we have already translated some chars to FROM, they | |
1443 ;; should also be translated to TO. | |
1444 (let ((l (assq from revlist))) | |
1445 (if l | |
1446 (let ((ch (car l))) | |
1447 (setcar l to) | |
1448 (setq l (cdr l)) | |
1449 (while l | |
1450 (aset table ch to) | |
1451 (setq l (cdr l)) )))) | |
1452 ;; Now update REVLIST. | |
1453 (let ((l (assq to revlist))) | |
1454 (if l | |
1455 (setcdr l (cons from (cdr l))) | |
1456 (setq revlist (cons (list to from) revlist))))) | |
1457 (setq elts (cdr elts)))) | |
1458 (setq args (cdr args))) | |
1459 ;; Return TABLE just created. | |
1460 table)) | |
1461 | |
1462 (defun make-translation-table-from-vector (vec) | |
1463 "Make translation table from decoding vector VEC. | |
1464 VEC is an array of 256 elements to map unibyte codes to multibyte | |
1465 characters. Elements may be nil for undefined code points. | |
1466 See also the variable `nonascii-translation-table'." | |
1467 (let ((table (make-char-table 'translation-table)) | |
1468 (rev-table (make-char-table 'translation-table)) | |
1469 ch) | |
1470 (dotimes (i 256) | |
1471 (setq ch (aref vec i)) | |
1472 (when ch | |
1473 (aset table i ch) | |
1474 (if (>= ch 256) | |
1475 (aset rev-table ch i)))) | |
1476 (set-char-table-extra-slot table 0 rev-table) | |
1477 table)) | |
1478 | |
1479 (defun define-translation-table (symbol &rest args) | |
1480 "Define SYMBOL as the name of translation table made by ARGS. | |
1481 This sets up information so that the table can be used for | |
1482 translations in a CCL program. | |
1483 | |
1484 If the first element of ARGS is a char-table whose purpose is | |
1485 `translation-table', just define SYMBOL to name it. (Note that this | |
1486 function does not bind SYMBOL.) | |
1487 | |
1488 Any other ARGS should be suitable as arguments of the function | |
1489 `make-translation-table' (which see). | |
1490 | |
1491 This function sets properties `translation-table' and | |
1492 `translation-table-id' of SYMBOL to the created table itself and the | |
1493 identification number of the table respectively. It also registers | |
1494 the table in `translation-table-vector'." | |
1495 (let ((table (if (and (char-table-p (car args)) | |
1496 (eq (char-table-subtype (car args)) | |
1497 'translation-table)) | |
1498 (car args) | |
1499 (apply 'make-translation-table args))) | |
1500 (len (length translation-table-vector)) | |
1501 (id 0) | |
1502 (done nil)) | |
1503 (put symbol 'translation-table table) | |
1504 (while (not done) | |
1505 (if (>= id len) | |
1506 (setq translation-table-vector | |
1507 (vconcat translation-table-vector (make-vector len nil)))) | |
1508 (let ((slot (aref translation-table-vector id))) | |
1509 (if (or (not slot) | |
1510 (eq (car slot) symbol)) | |
1511 (progn | |
1512 (aset translation-table-vector id (cons symbol table)) | |
1513 (setq done t)) | |
1514 (setq id (1+ id))))) | |
1515 (put symbol 'translation-table-id id) | |
1516 id)) | |
1517 | |
1518 (put 'with-category-table 'lisp-indent-function 1) | |
1519 | |
1520 (defmacro with-category-table (category-table &rest body) | |
1521 "Execute BODY like `progn' with CATEGORY-TABLE the current category table." | |
1522 (let ((current-category-table (make-symbol "current-category-table"))) | |
1523 `(let ((,current-category-table (category-table))) | |
1524 (set-category-table ,category-table) | |
1525 (unwind-protect | |
1526 (progn ,@body) | |
1527 (set-category-table ,current-category-table))))) | |
1528 | |
1529 ;;; Initialize some variables. | |
1530 | |
1531 (put 'use-default-ascent 'char-table-extra-slots 0) | |
1532 (setq use-default-ascent (make-char-table 'use-default-ascent)) | |
1533 (put 'ignore-relative-composition 'char-table-extra-slots 0) | |
1534 (setq ignore-relative-composition | |
1535 (make-char-table 'ignore-relative-composition)) | |
1536 | |
1537 ;;; | |
1538 (provide 'mule) | |
1539 | |
1540 ;;; mule.el ends here |