Mercurial > emacs
comparison lisp/international/codepage.el @ 23920:efcf2fcda617
(cp-coding-system-for-codepage-1):
Create separate encoders and decoders, for DOS and Unix. Make the
usual family of 3 coding systems, so that automatic detection of
EOL type works.
(cp-make-coding-systems-for-codepage): Don't intern DOS- and
Unix-specific symbols here, and don't call
cp-coding-system-for-codepage-1 twice. (Suggested by Ken'ichi
Handa <handa@etl.go.jp>.)
author | Eli Zaretskii <eliz@gnu.org> |
---|---|
date | Sun, 20 Dec 1998 15:17:49 +0000 |
parents | 7ce49fb0dfbc |
children | 4ef8ec98dd43 |
comparison
equal
deleted
inserted
replaced
23919:3b3a9cd1785a | 23920:efcf2fcda617 |
---|---|
50 DECODER is a translation table for converting characters in the DOS codepage | 50 DECODER is a translation table for converting characters in the DOS codepage |
51 encoding to Emacs multibyte characters. | 51 encoding to Emacs multibyte characters. |
52 ENCODER is a translation table for encoding Emacs multibyte characters into | 52 ENCODER is a translation table for encoding Emacs multibyte characters into |
53 external DOS codepage codes. | 53 external DOS codepage codes. |
54 | 54 |
55 Note that the coding systems created by this function don't support | 55 Note that the coding systems created by this function support automatic |
56 automatic detection of the EOL format. Use explicit -dos or -unix variants | 56 detection of the EOL format." |
57 as appropriate (Mac EOL style is not supported, as it doesn't make sense for | |
58 these coding systems). | |
59 | |
60 If the coding system's name ends with \"-dos\", this function automatically | |
61 creates a coding system which converts from and to DOS EOL format; otherwise | |
62 the created coding system assumes Unix-style EOL (i.e., it doesn't perform | |
63 any EOL conversions)." | |
64 (save-match-data | 57 (save-match-data |
65 (let* ((coding-name (symbol-name coding)) | 58 (let* ((coding-name (symbol-name coding)) |
66 (eol-type (string-match "-\\(dos\\|unix\\)\\'" coding-name)) | 59 (ccl-decoder-dos |
67 (dos-p | 60 (ccl-compile |
68 (and eol-type | 61 `(4 (loop (read r1) |
69 (string= "-dos" (substring coding-name eol-type)))) | 62 (if (r1 != ?\r) |
70 (coding-sans-eol | 63 (if (r1 >= 128) |
71 (if eol-type (substring coding-name 0 eol-type) coding-name)) | 64 ((r0 = ,(charset-id 'ascii)) |
72 (ccl-decoder | 65 (translate-character ,decoder r0 r1) |
73 (if dos-p | 66 (if (r0 == ,(charset-id 'ascii)) |
74 (ccl-compile | 67 (write r1) |
75 `(4 (loop (read r1) | 68 (write-multibyte-character r0 r1))) |
76 (if (r1 != ?\r) | 69 (write r1))) |
77 (if (r1 >= 128) | 70 (repeat))))) |
78 ((r0 = ,(charset-id 'ascii)) | 71 (ccl-decoder-unix |
79 (translate-character ,decoder r0 r1) | 72 (ccl-compile |
80 (if (r0 == ,(charset-id 'ascii)) | 73 `(4 (loop (read r1) |
81 (write r1) | 74 (if (r1 >= 128) |
82 (write-multibyte-character r0 r1))) | 75 ((r0 = ,(charset-id 'ascii)) |
83 (write r1))) | 76 (translate-character ,decoder r0 r1) |
84 (repeat)))) | 77 (if (r0 == ,(charset-id 'ascii)) |
85 (ccl-compile | 78 (write r1) |
86 `(4 (loop (read r1) | 79 (write-multibyte-character r0 r1))) |
87 (if (r1 >= 128) | 80 (write r1)) |
88 ((r0 = ,(charset-id 'ascii)) | 81 (repeat))))) |
89 (translate-character ,decoder r0 r1) | 82 (ccl-encoder-dos |
90 (if (r0 == ,(charset-id 'ascii)) | 83 (ccl-compile |
91 (write r1) | 84 `(1 (loop (read-multibyte-character r0 r1) |
92 (write-multibyte-character r0 r1))) | 85 (if (r1 == ?\n) |
93 (write r1)) | 86 (write ?\r) |
94 (repeat)))))) | |
95 (ccl-encoder | |
96 (if dos-p | |
97 (ccl-compile | |
98 `(1 (loop (read-multibyte-character r0 r1) | |
99 (if (r1 == ?\n) | |
100 (write ?\r) | |
101 (if (r0 != ,(charset-id 'ascii)) | |
102 ((translate-character ,encoder r0 r1) | |
103 (if (r0 == ,(charset-id 'japanese-jisx0208)) | |
104 ((r1 = ??) | |
105 (write r1)))))) | |
106 (write-repeat r1)))) | |
107 (ccl-compile | |
108 `(1 (loop (read-multibyte-character r0 r1) | |
109 (if (r0 != ,(charset-id 'ascii)) | 87 (if (r0 != ,(charset-id 'ascii)) |
110 ((translate-character ,encoder r0 r1) | 88 ((translate-character ,encoder r0 r1) |
111 (if (r0 == ,(charset-id 'japanese-jisx0208)) | 89 (if (r0 == ,(charset-id 'japanese-jisx0208)) |
112 ((r1 = ??) | 90 ((r1 = ??) |
113 (write r1))))) | 91 (write r1)))))) |
114 (write-repeat r1))))))) | 92 (write-repeat r1))))) |
93 (ccl-encoder-unix | |
94 (ccl-compile | |
95 `(1 (loop (read-multibyte-character r0 r1) | |
96 (if (r0 != ,(charset-id 'ascii)) | |
97 ((translate-character ,encoder r0 r1) | |
98 (if (r0 == ,(charset-id 'japanese-jisx0208)) | |
99 ((r1 = ??) | |
100 (write r1))))) | |
101 (write-repeat r1)))))) | |
115 (if (memq coding coding-system-list) | 102 (if (memq coding coding-system-list) |
116 (setq coding-system-list (delq coding coding-system-list))) | 103 (setq coding-system-list (delq coding coding-system-list))) |
104 | |
105 ;; Make coding system CODING. | |
117 (make-coding-system | 106 (make-coding-system |
118 coding 4 mnemonic | 107 coding 4 mnemonic |
119 (concat "8-bit encoding of " (symbol-name iso-name) | 108 (concat "8-bit encoding of " (symbol-name iso-name) |
120 " characters using IBM codepage " (substring coding-sans-eol 2)) | 109 " characters using IBM codepage " coding-name) |
121 (cons ccl-decoder ccl-encoder) | 110 (cons ccl-decoder-unix ccl-encoder-unix) |
122 `((safe-charsets ascii ,iso-name))) | 111 `((safe-charsets ascii ,iso-name))) |
123 (put coding 'eol-type (if dos-p 1 0))))) | 112 ;;; Make coding systems CODING-unix, CODING-dos, CODING-mac. |
113 (make-subsidiary-coding-system coding) | |
114 (put coding 'eol-type (vector (intern (format "%s-unix" coding)) | |
115 (intern (format "%s-dos" coding)) | |
116 (intern (format "%s-mac" coding)))) | |
117 ;; Change CCL code for CODING-dos. | |
118 (let ((coding-spec (copy-sequence (get coding 'coding-system)))) | |
119 (aset coding-spec 4 | |
120 (cons (check-ccl-program | |
121 ccl-decoder-dos | |
122 (intern (format "%s-dos-decoder" coding))) | |
123 (check-ccl-program | |
124 ccl-encoder-dos | |
125 (intern (format "%s-dos-encoder" coding))))) | |
126 (put (intern (concat coding-name "-dos")) 'coding-system | |
127 coding-spec))))) | |
124 | 128 |
125 (defun cp-decoding-vector-for-codepage (table charset offset) | 129 (defun cp-decoding-vector-for-codepage (table charset offset) |
126 "Create a vector for decoding IBM PC characters using conversion table | 130 "Create a vector for decoding IBM PC characters using conversion table |
127 TABLE into an ISO-8859 character set CHARSET whose first non-ASCII | 131 TABLE into an ISO-8859 character set CHARSET whose first non-ASCII |
128 character' is generated by (make-char CHARSET OFFSET)." | 132 character' is generated by (make-char CHARSET OFFSET)." |
416 (nonascii-table | 420 (nonascii-table |
417 (intern (format "%s-nonascii-translation-table" codepage))) | 421 (intern (format "%s-nonascii-translation-table" codepage))) |
418 (decode-translation | 422 (decode-translation |
419 (intern (format "%s-decode-translation-table" codepage))) | 423 (intern (format "%s-decode-translation-table" codepage))) |
420 (encode-translation | 424 (encode-translation |
421 (intern (format "%s-encode-translation-table" codepage))) | 425 (intern (format "%s-encode-translation-table" codepage)))) |
422 (codepage-dos | |
423 (intern (format "%s-dos" codepage))) | |
424 (codepage-unix | |
425 (intern (format "%s-unix" codepage)))) | |
426 (set nonascii-table | 426 (set nonascii-table |
427 (make-translation-table-from-vector | 427 (make-translation-table-from-vector |
428 (cp-decoding-vector-for-codepage | 428 (cp-decoding-vector-for-codepage |
429 (symbol-value decode-table) iso-name offset))) | 429 (symbol-value decode-table) iso-name offset))) |
430 (define-translation-table encode-translation | 430 (define-translation-table encode-translation |
442 (if (= (charset-width (car charsets)) 1) ?? wide-column-char)) | 442 (if (= (charset-width (car charsets)) 1) ?? wide-column-char)) |
443 (setq charsets (cdr charsets)))) | 443 (setq charsets (cdr charsets)))) |
444 (define-translation-table decode-translation | 444 (define-translation-table decode-translation |
445 (symbol-value nonascii-table)) | 445 (symbol-value nonascii-table)) |
446 (cp-coding-system-for-codepage-1 | 446 (cp-coding-system-for-codepage-1 |
447 codepage-dos ?D iso-name decode-translation encode-translation) | 447 (intern codepage) ?D iso-name decode-translation encode-translation) |
448 (cp-coding-system-for-codepage-1 | 448 )) |
449 codepage-unix ?D iso-name decode-translation encode-translation))) | |
450 | 449 |
451 (defun cp-codepage-decoder (codepage) | 450 (defun cp-codepage-decoder (codepage) |
452 "If CODEPAGE is the name of a supported codepage, return its decode table; | 451 "If CODEPAGE is the name of a supported codepage, return its decode table; |
453 otherwise return nil." | 452 otherwise return nil." |
454 (let ((cp (if (symbolp codepage) (symbol-name codepage) codepage))) | 453 (let ((cp (if (symbolp codepage) (symbol-name codepage) codepage))) |