Mercurial > emacs
comparison lisp/international/mule-diag.el @ 27912:ed26ed5b0afc
(list-character-sets): Completely
rewritten.
(sort-listed-character-sets): New function.
(list-character-sets-1): Completely rewritten.
(list-character-sets-2): New function.
(non-iso-charset-alist): New variable.
(decode-codepage-char): New function.
(charset-history): New variable.
(read-charset) (list-block-of-chars)
(list-iso-charset-chars)
(list-non-iso-charset-chars)
(list-charset-chars): New functions.
(mule-diag): Call list-character-sets-2, not
list-character-sets-2.
(dump-charsets): Likewise.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Tue, 29 Feb 2000 11:32:52 +0000 |
parents | 1830bc1903fb |
children | 8b485e43ac51 |
comparison
equal
deleted
inserted
replaced
27911:01ed7d4ff0b6 | 27912:ed26ed5b0afc |
---|---|
41 (function (lambda (x y) (< (charset-id x) (charset-id y))))))) | 41 (function (lambda (x y) (< (charset-id x) (charset-id y))))))) |
42 | 42 |
43 ;;; CHARSET | 43 ;;; CHARSET |
44 | 44 |
45 ;;;###autoload | 45 ;;;###autoload |
46 (defun list-character-sets (&optional arg) | 46 (defun list-character-sets (arg) |
47 "Display a list of all character sets. | 47 "Display a list of all character sets. |
48 | 48 |
49 The ID column contains a charset identification number for internal Emacs use. | 49 The ID-NUM column contains a charset identification number |
50 The B column contains a number of bytes occupied in a buffer | 50 for internal Emacs use. |
51 by any character in this character set. | 51 |
52 The W column contains a number of columns occupied on the screen | 52 The MULTIBYTE-FORM column contains a format of multibyte sequence |
53 by any character in this character set. | 53 of characters in the charset for buffer and string |
54 by one to four hexadecimal digits. | |
55 `xx' stands for any byte in the range 0..127. | |
56 `XX' stands for any byte in the range 160..255. | |
57 | |
58 The D column contains a dimension of this character set. | |
59 The CH column contains a number of characters in a block of this character set. | |
60 The FINAL-CHAR column contains an ISO-2022's <final-char> to use for | |
61 designating this character set in ISO-2022-based coding systems. | |
54 | 62 |
55 With prefix arg, the output format gets more cryptic, | 63 With prefix arg, the output format gets more cryptic, |
56 but still shows the full information." | 64 but still shows the full information." |
57 (interactive "P") | 65 (interactive "P") |
58 (sort-charset-list) | |
59 (with-output-to-temp-buffer "*Help*" | 66 (with-output-to-temp-buffer "*Help*" |
60 (save-excursion | 67 (with-current-buffer standard-output |
61 (set-buffer standard-output) | 68 (if arg |
62 (list-character-sets-1 arg) | 69 (list-character-sets-2) |
63 (help-mode) | 70 ;; Insert header. |
64 (setq truncate-lines t)))) | 71 (insert |
65 | 72 (substitute-command-keys |
66 (defun list-character-sets-1 (arg) | 73 (concat |
67 (let ((l charset-list) | 74 "Use " |
68 charset) | 75 (if (display-mouse-p) "\\[help-follow-mouse] or ") |
69 (if (null arg) | 76 "\\[help-follow] on a title of column\nto sort by that title."))) |
70 (progn | 77 (indent-to 56) |
71 (insert "ID Name B W Description\n") | 78 (insert "+----DIMENSION\n") |
72 (insert "-- ---- - - -----------\n") | 79 (indent-to 56) |
73 (while l | 80 (insert "| +--CHARS\n") |
74 (setq charset (car l) l (cdr l)) | 81 (let ((columns '(("ID-NUM" . id) "\t" |
75 (insert (format "%03d %s" (charset-id charset) charset)) | 82 ("CHARSET-NAME" . name) "\t\t\t" |
76 (indent-to 28) | 83 ("MULTIBYTE-FORM" . id) "\t" |
77 (insert (format "%d %d %s\n" | 84 ("D CH FINAL-CHAR" . iso-spec))) |
78 (charset-bytes charset) | 85 (help-highlight-face 'region) |
79 (charset-width charset) | 86 pos) |
80 (charset-description charset))))) | 87 (while columns |
81 (insert "\ | 88 (if (stringp (car columns)) |
82 ######################### | 89 (insert (car columns)) |
90 (insert (car (car columns))) | |
91 (search-backward (car (car columns))) | |
92 (help-xref-button 0 'sort-listed-character-sets | |
93 (cdr (car columns))) | |
94 (goto-char (point-max))) | |
95 (setq columns (cdr columns))) | |
96 (insert "\n")) | |
97 (insert "------\t------------\t\t\t--------------\t- -- ----------\n") | |
98 | |
99 ;; Insert body sorted by charset IDs. | |
100 (list-character-sets-1 'id))))) | |
101 | |
102 | |
103 ;; Sort character set list by SORT-KEY. | |
104 | |
105 (defun sort-listed-character-sets (sort-key) | |
106 (if sort-key | |
107 (save-excursion | |
108 (let ((buffer-read-only nil)) | |
109 (goto-char (point-min)) | |
110 (re-search-forward "[0-9][0-9][0-9]") | |
111 (beginning-of-line) | |
112 (delete-region (point) (point-max)) | |
113 (list-character-sets-1 sort-key))))) | |
114 | |
115 | |
116 ;; Insert a list of character sets sorted by SORT-KEY. SORT-KEY | |
117 ;; should be one of `id', `name', and `iso-spec'. If SORT-KEY is nil, | |
118 ;; it defaults to `id'. | |
119 | |
120 (defun list-character-sets-1 (sort-key) | |
121 (or sort-key | |
122 (setq sort-key 'id)) | |
123 (let ((tail (charset-list)) | |
124 charset-info-list elt charset info sort-func) | |
125 (while tail | |
126 (setq charset (car tail) tail (cdr tail) | |
127 info (charset-info charset)) | |
128 | |
129 ;; Generate a list that contains all information to display. | |
130 (setq charset-info-list | |
131 (cons (list (charset-id charset) ; ID-NUM | |
132 charset ; CHARSET-NAME | |
133 (if (eq charset 'ascii) ; MULTIBYTE-FORM | |
134 "xx" | |
135 (let ((str (format "%2X" (aref info 6)))) | |
136 (if (> (aref info 7) 0) | |
137 (setq str (format "%s %2X" str (aref info 7)))) | |
138 (setq str (concat str " XX")) | |
139 (if (> (aref info 2) 1) | |
140 (setq str (concat str " XX"))) | |
141 str)) | |
142 (aref info 2) ; DIMENSION | |
143 (aref info 3) ; CHARS | |
144 (aref info 8) ; FINAL-CHAR | |
145 ) | |
146 charset-info-list))) | |
147 | |
148 ;; Determine a predicate for `sort' by SORT-KEY. | |
149 (setq sort-func | |
150 (cond ((eq sort-key 'id) | |
151 (function (lambda (x y) (< (car x) (car y))))) | |
152 | |
153 ((eq sort-key 'name) | |
154 (function (lambda (x y) (string< (nth 1 x) (nth 1 y))))) | |
155 | |
156 ((eq sort-key 'iso-spec) | |
157 ;; Sort by DIMENSION CHARS FINAL-CHAR | |
158 (function | |
159 (lambda (x y) | |
160 (or (< (nth 3 x) (nth 3 y)) | |
161 (and (= (nth 3 x) (nth 3 y)) | |
162 (or (< (nth 4 x) (nth 4 y)) | |
163 (and (= (nth 4 x) (nth 4 y)) | |
164 (< (nth 5 x) (nth 5 y))))))))) | |
165 (t | |
166 (error "Invalid charset sort key: %s" sort-key)))) | |
167 | |
168 (setq charset-info-list (sort charset-info-list sort-func)) | |
169 | |
170 ;; Insert information of character sets. | |
171 (while charset-info-list | |
172 (setq elt (car charset-info-list) | |
173 charset-info-list (cdr charset-info-list)) | |
174 (insert (format "%03d(%02X)" (car elt) (car elt))) ; ID-NUM | |
175 (indent-to 8) | |
176 (insert (symbol-name (nth 1 elt))) ; CHARSET-NAME | |
177 (search-backward (symbol-name (nth 1 elt))) | |
178 (help-xref-button 0 'list-charset-chars (nth 1 elt)) | |
179 (goto-char (point-max)) | |
180 (insert "\t") | |
181 (indent-to 40) | |
182 (insert (nth 2 elt)) ; MULTIBYTE-FORM | |
183 (indent-to 56) | |
184 (insert (format "%d %2d %c" ; ISO specs | |
185 (nth 3 elt) (nth 4 elt) (nth 5 elt))) | |
186 (insert "\n")))) | |
187 | |
188 | |
189 ;; List all character sets in a form that a program can easily parse. | |
190 | |
191 (defun list-character-sets-2 () | |
192 (insert "######################### | |
83 ## LIST OF CHARSETS | 193 ## LIST OF CHARSETS |
84 ## Each line corresponds to one charset. | 194 ## Each line corresponds to one charset. |
85 ## The following attributes are listed in this order | 195 ## The following attributes are listed in this order |
86 ## separated by a colon `:' in one line. | 196 ## separated by a colon `:' in one line. |
87 ## CHARSET-ID, | 197 ## CHARSET-ID, |
93 ## DIRECTION (0:left-to-right, 1:right-to-left), | 203 ## DIRECTION (0:left-to-right, 1:right-to-left), |
94 ## ISO-FINAL-CHAR (character code of ISO-2022's final character) | 204 ## ISO-FINAL-CHAR (character code of ISO-2022's final character) |
95 ## ISO-GRAPHIC-PLANE (ISO-2022's graphic plane, 0:GL, 1:GR) | 205 ## ISO-GRAPHIC-PLANE (ISO-2022's graphic plane, 0:GL, 1:GR) |
96 ## DESCRIPTION (describing string of the charset) | 206 ## DESCRIPTION (describing string of the charset) |
97 ") | 207 ") |
98 (while l | 208 (let ((l charset-list) |
99 (setq charset (car l) l (cdr l)) | 209 charset) |
100 (princ (format "%03d:%s:%d:%d:%d:%d:%d:%d:%d:%s\n" | 210 (while l |
101 (charset-id charset) | 211 (setq charset (car l) l (cdr l)) |
102 charset | 212 (princ (format "%03d:%s:%d:%d:%d:%d:%d:%d:%d:%s\n" |
103 (charset-dimension charset) | 213 (charset-id charset) |
104 (charset-chars charset) | 214 charset |
105 (charset-bytes charset) | 215 (charset-dimension charset) |
106 (charset-width charset) | 216 (charset-chars charset) |
107 (charset-direction charset) | 217 (charset-bytes charset) |
108 (charset-iso-final-char charset) | 218 (charset-width charset) |
109 (charset-iso-graphic-plane charset) | 219 (charset-direction charset) |
110 (charset-description charset))))))) | 220 (charset-iso-final-char charset) |
221 (charset-iso-graphic-plane charset) | |
222 (charset-description charset)))))) | |
223 | |
224 (defvar non-iso-charset-alist | |
225 `((viscii | |
226 (ascii vietnamese-viscii-lower vietnamese-viscii-upper) | |
227 ,viet-viscii-nonascii-translation-table | |
228 ((0 255))) | |
229 (koi8-r | |
230 (ascii cyrillic-iso8859-5) | |
231 ,cyrillic-koi8-r-nonascii-translation-table | |
232 ((32 255))) | |
233 (alternativnyj | |
234 (ascii cyrillic-iso8859-5) | |
235 ,cyrillic-alternativnyj-nonascii-translation-table | |
236 ((32 255))) | |
237 (big5 | |
238 (ascii chinese-big5-1 chinese-big5-2) | |
239 decode-big5-char | |
240 ((32 127) | |
241 ((?\xA1 ?\xFE) . (?\x40 ?\x7E ?\xA1 ?\xFE)))) | |
242 (sjis | |
243 (ascii katakana-jisx0201 japanese-jisx0208) | |
244 decode-sjis-char | |
245 ((32 127 ?\xA1 ?\xDF) | |
246 ((?\x81 ?\x9F ?\xE0 ?\xEF) . (?\x40 ?\x7E ?\x80 ?\xFC))))) | |
247 "Alist of non-ISO charset names vs the corresponding information. | |
248 | |
249 Non-ISO charsets are what Emacs can read (or write) by mapping to (or | |
250 from) some Emacs' charsets that correspond to ISO charsets. | |
251 | |
252 Each element has the following format: | |
253 (NON-ISO-CHARSET CHARSET-LIST TRANSLATION-METHOD [ CODE-RANGE ]) | |
254 | |
255 NON-ISO-CHARSET is a name (symbol) of the non-ISO charset. | |
256 | |
257 CHARSET-LIST is a list of Emacs' charsets into which characters of | |
258 NON-ISO-CHARSET are mapped. | |
259 | |
260 TRANSLATION-METHOD is a char-table to translate a character code of | |
261 NON-ISO-CHARSET to the corresponding Emacs character code. It can | |
262 also be a function to call with one argument, a character code in | |
263 NON-ISO-CHARSET. | |
264 | |
265 CODE-RANGE specifies the valid code ranges of NON-ISO-CHARSET. | |
266 It is a list of RANGEs, where each RANGE is of the form: | |
267 (FROM1 TO1 FROM2 TO2 ...) | |
268 or | |
269 ((FROM1-1 TO1-1 FROM1-2 TO1-2 ...) . (FROM2-1 TO2-1 FROM2-2 TO2-2 ...)) | |
270 In the first form, valid codes are between FROM1 and TO1, or FROM2 and | |
271 TO2, or... | |
272 The second form is used for 2-byte codes. The car part is the ranges | |
273 of the first byte, and the cdr part is the ranges of the second byte.") | |
274 | |
275 | |
276 ;; Decode a character that has code CODE in CODEPAGE. Value is a | |
277 ;; string of decoded character. | |
278 | |
279 (defun decode-codepage-char (codepage code) | |
280 ;; Each CODEPAGE corresponds to a coding system cpCODEPAGE. | |
281 (let ((coding-system (intern (format "cp%d" codepage)))) | |
282 (or (coding-system-p coding-system) | |
283 (codepage-setup codepage)) | |
284 (string-to-char | |
285 (decode-coding-string (char-to-string code) coding-system)))) | |
286 | |
287 | |
288 ;; Add DOS codepages to `non-iso-charset-alist'. | |
289 | |
290 (let ((tail (cp-supported-codepages)) | |
291 elt) | |
292 (while tail | |
293 (setq elt (car tail) tail (cdr tail)) | |
294 ;; Now ELT is (CODEPAGE . CHARSET), where CODEPAGE is a string | |
295 ;; (e.g. "850"), CHARSET is a charset that characters in CODEPAGE | |
296 ;; are mapped to. | |
297 (setq non-iso-charset-alist | |
298 (cons (list (intern (concat "cp" (car elt))) | |
299 (list 'ascii (cdr elt)) | |
300 `(lambda (code) | |
301 (decode-codepage-char ,(string-to-int (car elt)) | |
302 code)) | |
303 (list (list 0 255))) | |
304 non-iso-charset-alist)))) | |
305 | |
306 | |
307 ;; A variable to hold charset input history. | |
308 (defvar charset-history nil) | |
309 | |
310 | |
311 ;;;###autoload | |
312 (defun read-charset (prompt &optional default-value initial-input) | |
313 "Read a character set from the minibuffer, prompting with string PROMPT. | |
314 It reads an Emacs' character set listed in the variable `charset-list' | |
315 or a non-ISO character set listed in the variable | |
316 `non-iso-charset-alist'. | |
317 | |
318 Optional arguments are DEFAULT-VALUE and INITIAL-INPUT. | |
319 DEFAULT-VALUE, if non-nil, is the default value. | |
320 INITIAL-INPUT, if non-nil, is a string inserted in the minibuffer initially. | |
321 See the documentation of the function `completing-read' for the | |
322 detailed meanings of these arguments." | |
323 (let* ((table (append (mapcar (function (lambda (x) (list (symbol-name x)))) | |
324 charset-list) | |
325 (mapcar (function (lambda (x) | |
326 (list (symbol-name (car x))))) | |
327 non-iso-charset-alist))) | |
328 (charset (completing-read prompt table | |
329 nil t initial-input 'charset-history | |
330 default-value))) | |
331 (if (> (length charset) 0) | |
332 (intern charset)))) | |
333 | |
334 | |
335 ;; List characters of the range MIN and MAX of CHARSET. If dimension | |
336 ;; of CHARSET is two (i.e. 2-byte charset), ROW is the first byte | |
337 ;; (block index) of the characters, and MIN and MAX are the second | |
338 ;; bytes of the characters. If the dimension is one, ROW should be 0. | |
339 ;; For a non-ISO charset, CHARSET is a char-table or a function to get | |
340 ;; Emacs' character codes that corresponds to the characters to list. | |
341 | |
342 (defun list-block-of-chars (charset row min max) | |
343 (let (i ch) | |
344 (insert-char ?- (+ 4 (* 3 16))) | |
345 (insert "\n ") | |
346 (setq i 0) | |
347 (while (< i 16) | |
348 (insert (format "%3X" i)) | |
349 (setq i (1+ i))) | |
350 (setq i (* (/ min 16) 16)) | |
351 (while (<= i max) | |
352 (if (= (% i 16) 0) | |
353 (insert (format "\n%3Xx" (/ (+ (* row 256) i) 16)))) | |
354 (setq ch (cond ((< i min) | |
355 32) | |
356 ((charsetp charset) | |
357 (if (= row 0) | |
358 (make-char charset i) | |
359 (make-char charset row i))) | |
360 ((char-table-p charset) | |
361 (aref charset i)) | |
362 (t (funcall charset (+ (* row 256) i))))) | |
363 (if (or (< ch 32) (and (>= ch 127) (<= ch 255))) | |
364 ;; Don't insert a control code. | |
365 (setq ch 32)) | |
366 (insert (format "%3c" ch)) | |
367 (setq i (1+ i)))) | |
368 (insert "\n")) | |
369 | |
370 | |
371 ;; List all characters in ISO charset CHARSET. | |
372 | |
373 (defun list-iso-charset-chars (charset) | |
374 (let ((dim (charset-dimension charset)) | |
375 (chars (charset-chars charset)) | |
376 (plane (charset-iso-graphic-plane charset)) | |
377 min max) | |
378 (insert (format "Characters in the charset %s.\n" charset)) | |
379 | |
380 (if (= chars 94) | |
381 (setq min 33 max 126) | |
382 (setq min 32 max 127)) | |
383 (or (= plane 0) | |
384 (setq min (+ min 128) max (+ max 128))) | |
385 | |
386 (if (= dim 1) | |
387 (list-block-of-chars charset 0 min max) | |
388 (let ((i min)) | |
389 (while (< i max) | |
390 (list-block-of-chars charset i min max) | |
391 (setq i (1+ i))))))) | |
392 | |
393 | |
394 ;; List all characters in non-ISO charset CHARSET. | |
395 | |
396 (defun list-non-iso-charset-chars (charset) | |
397 (let* ((slot (assq charset non-iso-charset-alist)) | |
398 (charsets (nth 1 slot)) | |
399 (translate-method (nth 2 slot)) | |
400 (ranges (nth 3 slot)) | |
401 range) | |
402 (or slot | |
403 (error "Unknown external charset: %s" charset)) | |
404 (insert (format "Characters in non-ISO charset %s.\n" charset)) | |
405 (insert "They are mapped to: " | |
406 (mapconcat (lambda (x) (symbol-name x)) charsets ", ") | |
407 "\n") | |
408 (while ranges | |
409 (setq range (car ranges) ranges (cdr ranges)) | |
410 (if (integerp (car range)) | |
411 ;; The form of RANGES is (FROM1 TO1 FROM2 TO2 ...). | |
412 (while range | |
413 (list-block-of-chars translate-method | |
414 0 (car range) (nth 1 range)) | |
415 (setq range (nthcdr 2 range))) | |
416 ;; The form of RANGES is ((FROM1-1 TO1-1 ...) . (FROM2-1 TO2-1 ...)). | |
417 (let ((row-range (car range)) | |
418 row row-max | |
419 col-range col col-max) | |
420 (while row-range | |
421 (setq row (car row-range) row-max (nth 1 row-range) | |
422 row-range (nthcdr 2 row-range)) | |
423 (while (< row row-max) | |
424 (setq col-range (cdr range)) | |
425 (while col-range | |
426 (setq col (car col-range) col-max (nth 1 col-range) | |
427 col-range (nthcdr 2 col-range)) | |
428 (list-block-of-chars translate-method row col col-max)) | |
429 (setq row (1+ row))))))))) | |
430 | |
431 | |
432 ;;;###autoload | |
433 (defun list-charset-chars (charset) | |
434 "Display a list of characters in the specified character set." | |
435 (interactive (list (read-charset "Character set: "))) | |
436 (with-output-to-temp-buffer "*Help*" | |
437 (with-current-buffer standard-output | |
438 (set-buffer-multibyte t) | |
439 (cond ((charsetp charset) | |
440 (list-iso-charset-chars charset)) | |
441 ((assq charset non-iso-charset-alist) | |
442 (list-non-iso-charset-chars charset)) | |
443 (t | |
444 (error "Invalid charset %s" charset)))))) | |
445 | |
111 | 446 |
112 ;;; CODING-SYSTEM | 447 ;;; CODING-SYSTEM |
113 | 448 |
114 ;; Print information of designation of each graphic register in FLAGS | 449 ;; Print information of designation of each graphic register in FLAGS |
115 ;; in human readable format. See the documentation of | 450 ;; in human readable format. See the documentation of |
799 (princ (format "%s:%s\n" (car l) (symbol-value (car l)))) | 1134 (princ (format "%s:%s\n" (car l) (symbol-value (car l)))) |
800 (setq l (cdr l)))) | 1135 (setq l (cdr l)))) |
801 (insert "\n") | 1136 (insert "\n") |
802 | 1137 |
803 (insert-section 5 "Character sets") | 1138 (insert-section 5 "Character sets") |
804 (list-character-sets-1 t) | 1139 (list-character-sets-2) |
805 (insert "\n") | 1140 (insert "\n") |
806 | 1141 |
807 (when (and window-system (boundp 'global-fontset-alist)) | 1142 (when (and window-system (boundp 'global-fontset-alist)) |
808 ;; This code duplicates most of list-fontsets. | 1143 ;; This code duplicates most of list-fontsets. |
809 (insert-section 6 "Fontsets") | 1144 (insert-section 6 "Fontsets") |
830 (save-window-excursion | 1165 (save-window-excursion |
831 (save-excursion | 1166 (save-excursion |
832 (set-buffer buf) | 1167 (set-buffer buf) |
833 (setq buffer-read-only nil) | 1168 (setq buffer-read-only nil) |
834 (erase-buffer) | 1169 (erase-buffer) |
835 (list-character-sets t) | 1170 (list-character-sets-2) |
836 (insert-buffer-substring "*Help*") | 1171 (insert-buffer-substring "*Help*") |
837 (let (make-backup-files | 1172 (let (make-backup-files |
838 coding-system-for-write) | 1173 coding-system-for-write) |
839 (save-buffer)))) | 1174 (save-buffer)))) |
840 (kill-buffer buf)) | 1175 (kill-buffer buf)) |