comparison lisp/international/mule-diag.el @ 27912:ed26ed5b0afc

(list-character-sets): Completely rewritten. (sort-listed-character-sets): New function. (list-character-sets-1): Completely rewritten. (list-character-sets-2): New function. (non-iso-charset-alist): New variable. (decode-codepage-char): New function. (charset-history): New variable. (read-charset) (list-block-of-chars) (list-iso-charset-chars) (list-non-iso-charset-chars) (list-charset-chars): New functions. (mule-diag): Call list-character-sets-2, not list-character-sets-2. (dump-charsets): Likewise.
author Kenichi Handa <handa@m17n.org>
date Tue, 29 Feb 2000 11:32:52 +0000
parents 1830bc1903fb
children 8b485e43ac51
comparison
equal deleted inserted replaced
27911:01ed7d4ff0b6 27912:ed26ed5b0afc
41 (function (lambda (x y) (< (charset-id x) (charset-id y))))))) 41 (function (lambda (x y) (< (charset-id x) (charset-id y)))))))
42 42
43 ;;; CHARSET 43 ;;; CHARSET
44 44
45 ;;;###autoload 45 ;;;###autoload
46 (defun list-character-sets (&optional arg) 46 (defun list-character-sets (arg)
47 "Display a list of all character sets. 47 "Display a list of all character sets.
48 48
49 The ID column contains a charset identification number for internal Emacs use. 49 The ID-NUM column contains a charset identification number
50 The B column contains a number of bytes occupied in a buffer 50 for internal Emacs use.
51 by any character in this character set. 51
52 The W column contains a number of columns occupied on the screen 52 The MULTIBYTE-FORM column contains a format of multibyte sequence
53 by any character in this character set. 53 of characters in the charset for buffer and string
54 by one to four hexadecimal digits.
55 `xx' stands for any byte in the range 0..127.
56 `XX' stands for any byte in the range 160..255.
57
58 The D column contains a dimension of this character set.
59 The CH column contains a number of characters in a block of this character set.
60 The FINAL-CHAR column contains an ISO-2022's <final-char> to use for
61 designating this character set in ISO-2022-based coding systems.
54 62
55 With prefix arg, the output format gets more cryptic, 63 With prefix arg, the output format gets more cryptic,
56 but still shows the full information." 64 but still shows the full information."
57 (interactive "P") 65 (interactive "P")
58 (sort-charset-list)
59 (with-output-to-temp-buffer "*Help*" 66 (with-output-to-temp-buffer "*Help*"
60 (save-excursion 67 (with-current-buffer standard-output
61 (set-buffer standard-output) 68 (if arg
62 (list-character-sets-1 arg) 69 (list-character-sets-2)
63 (help-mode) 70 ;; Insert header.
64 (setq truncate-lines t)))) 71 (insert
65 72 (substitute-command-keys
66 (defun list-character-sets-1 (arg) 73 (concat
67 (let ((l charset-list) 74 "Use "
68 charset) 75 (if (display-mouse-p) "\\[help-follow-mouse] or ")
69 (if (null arg) 76 "\\[help-follow] on a title of column\nto sort by that title.")))
70 (progn 77 (indent-to 56)
71 (insert "ID Name B W Description\n") 78 (insert "+----DIMENSION\n")
72 (insert "-- ---- - - -----------\n") 79 (indent-to 56)
73 (while l 80 (insert "| +--CHARS\n")
74 (setq charset (car l) l (cdr l)) 81 (let ((columns '(("ID-NUM" . id) "\t"
75 (insert (format "%03d %s" (charset-id charset) charset)) 82 ("CHARSET-NAME" . name) "\t\t\t"
76 (indent-to 28) 83 ("MULTIBYTE-FORM" . id) "\t"
77 (insert (format "%d %d %s\n" 84 ("D CH FINAL-CHAR" . iso-spec)))
78 (charset-bytes charset) 85 (help-highlight-face 'region)
79 (charset-width charset) 86 pos)
80 (charset-description charset))))) 87 (while columns
81 (insert "\ 88 (if (stringp (car columns))
82 ######################### 89 (insert (car columns))
90 (insert (car (car columns)))
91 (search-backward (car (car columns)))
92 (help-xref-button 0 'sort-listed-character-sets
93 (cdr (car columns)))
94 (goto-char (point-max)))
95 (setq columns (cdr columns)))
96 (insert "\n"))
97 (insert "------\t------------\t\t\t--------------\t- -- ----------\n")
98
99 ;; Insert body sorted by charset IDs.
100 (list-character-sets-1 'id)))))
101
102
103 ;; Sort character set list by SORT-KEY.
104
105 (defun sort-listed-character-sets (sort-key)
106 (if sort-key
107 (save-excursion
108 (let ((buffer-read-only nil))
109 (goto-char (point-min))
110 (re-search-forward "[0-9][0-9][0-9]")
111 (beginning-of-line)
112 (delete-region (point) (point-max))
113 (list-character-sets-1 sort-key)))))
114
115
116 ;; Insert a list of character sets sorted by SORT-KEY. SORT-KEY
117 ;; should be one of `id', `name', and `iso-spec'. If SORT-KEY is nil,
118 ;; it defaults to `id'.
119
120 (defun list-character-sets-1 (sort-key)
121 (or sort-key
122 (setq sort-key 'id))
123 (let ((tail (charset-list))
124 charset-info-list elt charset info sort-func)
125 (while tail
126 (setq charset (car tail) tail (cdr tail)
127 info (charset-info charset))
128
129 ;; Generate a list that contains all information to display.
130 (setq charset-info-list
131 (cons (list (charset-id charset) ; ID-NUM
132 charset ; CHARSET-NAME
133 (if (eq charset 'ascii) ; MULTIBYTE-FORM
134 "xx"
135 (let ((str (format "%2X" (aref info 6))))
136 (if (> (aref info 7) 0)
137 (setq str (format "%s %2X" str (aref info 7))))
138 (setq str (concat str " XX"))
139 (if (> (aref info 2) 1)
140 (setq str (concat str " XX")))
141 str))
142 (aref info 2) ; DIMENSION
143 (aref info 3) ; CHARS
144 (aref info 8) ; FINAL-CHAR
145 )
146 charset-info-list)))
147
148 ;; Determine a predicate for `sort' by SORT-KEY.
149 (setq sort-func
150 (cond ((eq sort-key 'id)
151 (function (lambda (x y) (< (car x) (car y)))))
152
153 ((eq sort-key 'name)
154 (function (lambda (x y) (string< (nth 1 x) (nth 1 y)))))
155
156 ((eq sort-key 'iso-spec)
157 ;; Sort by DIMENSION CHARS FINAL-CHAR
158 (function
159 (lambda (x y)
160 (or (< (nth 3 x) (nth 3 y))
161 (and (= (nth 3 x) (nth 3 y))
162 (or (< (nth 4 x) (nth 4 y))
163 (and (= (nth 4 x) (nth 4 y))
164 (< (nth 5 x) (nth 5 y)))))))))
165 (t
166 (error "Invalid charset sort key: %s" sort-key))))
167
168 (setq charset-info-list (sort charset-info-list sort-func))
169
170 ;; Insert information of character sets.
171 (while charset-info-list
172 (setq elt (car charset-info-list)
173 charset-info-list (cdr charset-info-list))
174 (insert (format "%03d(%02X)" (car elt) (car elt))) ; ID-NUM
175 (indent-to 8)
176 (insert (symbol-name (nth 1 elt))) ; CHARSET-NAME
177 (search-backward (symbol-name (nth 1 elt)))
178 (help-xref-button 0 'list-charset-chars (nth 1 elt))
179 (goto-char (point-max))
180 (insert "\t")
181 (indent-to 40)
182 (insert (nth 2 elt)) ; MULTIBYTE-FORM
183 (indent-to 56)
184 (insert (format "%d %2d %c" ; ISO specs
185 (nth 3 elt) (nth 4 elt) (nth 5 elt)))
186 (insert "\n"))))
187
188
189 ;; List all character sets in a form that a program can easily parse.
190
191 (defun list-character-sets-2 ()
192 (insert "#########################
83 ## LIST OF CHARSETS 193 ## LIST OF CHARSETS
84 ## Each line corresponds to one charset. 194 ## Each line corresponds to one charset.
85 ## The following attributes are listed in this order 195 ## The following attributes are listed in this order
86 ## separated by a colon `:' in one line. 196 ## separated by a colon `:' in one line.
87 ## CHARSET-ID, 197 ## CHARSET-ID,
93 ## DIRECTION (0:left-to-right, 1:right-to-left), 203 ## DIRECTION (0:left-to-right, 1:right-to-left),
94 ## ISO-FINAL-CHAR (character code of ISO-2022's final character) 204 ## ISO-FINAL-CHAR (character code of ISO-2022's final character)
95 ## ISO-GRAPHIC-PLANE (ISO-2022's graphic plane, 0:GL, 1:GR) 205 ## ISO-GRAPHIC-PLANE (ISO-2022's graphic plane, 0:GL, 1:GR)
96 ## DESCRIPTION (describing string of the charset) 206 ## DESCRIPTION (describing string of the charset)
97 ") 207 ")
98 (while l 208 (let ((l charset-list)
99 (setq charset (car l) l (cdr l)) 209 charset)
100 (princ (format "%03d:%s:%d:%d:%d:%d:%d:%d:%d:%s\n" 210 (while l
101 (charset-id charset) 211 (setq charset (car l) l (cdr l))
102 charset 212 (princ (format "%03d:%s:%d:%d:%d:%d:%d:%d:%d:%s\n"
103 (charset-dimension charset) 213 (charset-id charset)
104 (charset-chars charset) 214 charset
105 (charset-bytes charset) 215 (charset-dimension charset)
106 (charset-width charset) 216 (charset-chars charset)
107 (charset-direction charset) 217 (charset-bytes charset)
108 (charset-iso-final-char charset) 218 (charset-width charset)
109 (charset-iso-graphic-plane charset) 219 (charset-direction charset)
110 (charset-description charset))))))) 220 (charset-iso-final-char charset)
221 (charset-iso-graphic-plane charset)
222 (charset-description charset))))))
223
224 (defvar non-iso-charset-alist
225 `((viscii
226 (ascii vietnamese-viscii-lower vietnamese-viscii-upper)
227 ,viet-viscii-nonascii-translation-table
228 ((0 255)))
229 (koi8-r
230 (ascii cyrillic-iso8859-5)
231 ,cyrillic-koi8-r-nonascii-translation-table
232 ((32 255)))
233 (alternativnyj
234 (ascii cyrillic-iso8859-5)
235 ,cyrillic-alternativnyj-nonascii-translation-table
236 ((32 255)))
237 (big5
238 (ascii chinese-big5-1 chinese-big5-2)
239 decode-big5-char
240 ((32 127)
241 ((?\xA1 ?\xFE) . (?\x40 ?\x7E ?\xA1 ?\xFE))))
242 (sjis
243 (ascii katakana-jisx0201 japanese-jisx0208)
244 decode-sjis-char
245 ((32 127 ?\xA1 ?\xDF)
246 ((?\x81 ?\x9F ?\xE0 ?\xEF) . (?\x40 ?\x7E ?\x80 ?\xFC)))))
247 "Alist of non-ISO charset names vs the corresponding information.
248
249 Non-ISO charsets are what Emacs can read (or write) by mapping to (or
250 from) some Emacs' charsets that correspond to ISO charsets.
251
252 Each element has the following format:
253 (NON-ISO-CHARSET CHARSET-LIST TRANSLATION-METHOD [ CODE-RANGE ])
254
255 NON-ISO-CHARSET is a name (symbol) of the non-ISO charset.
256
257 CHARSET-LIST is a list of Emacs' charsets into which characters of
258 NON-ISO-CHARSET are mapped.
259
260 TRANSLATION-METHOD is a char-table to translate a character code of
261 NON-ISO-CHARSET to the corresponding Emacs character code. It can
262 also be a function to call with one argument, a character code in
263 NON-ISO-CHARSET.
264
265 CODE-RANGE specifies the valid code ranges of NON-ISO-CHARSET.
266 It is a list of RANGEs, where each RANGE is of the form:
267 (FROM1 TO1 FROM2 TO2 ...)
268 or
269 ((FROM1-1 TO1-1 FROM1-2 TO1-2 ...) . (FROM2-1 TO2-1 FROM2-2 TO2-2 ...))
270 In the first form, valid codes are between FROM1 and TO1, or FROM2 and
271 TO2, or...
272 The second form is used for 2-byte codes. The car part is the ranges
273 of the first byte, and the cdr part is the ranges of the second byte.")
274
275
276 ;; Decode a character that has code CODE in CODEPAGE. Value is a
277 ;; string of decoded character.
278
279 (defun decode-codepage-char (codepage code)
280 ;; Each CODEPAGE corresponds to a coding system cpCODEPAGE.
281 (let ((coding-system (intern (format "cp%d" codepage))))
282 (or (coding-system-p coding-system)
283 (codepage-setup codepage))
284 (string-to-char
285 (decode-coding-string (char-to-string code) coding-system))))
286
287
288 ;; Add DOS codepages to `non-iso-charset-alist'.
289
290 (let ((tail (cp-supported-codepages))
291 elt)
292 (while tail
293 (setq elt (car tail) tail (cdr tail))
294 ;; Now ELT is (CODEPAGE . CHARSET), where CODEPAGE is a string
295 ;; (e.g. "850"), CHARSET is a charset that characters in CODEPAGE
296 ;; are mapped to.
297 (setq non-iso-charset-alist
298 (cons (list (intern (concat "cp" (car elt)))
299 (list 'ascii (cdr elt))
300 `(lambda (code)
301 (decode-codepage-char ,(string-to-int (car elt))
302 code))
303 (list (list 0 255)))
304 non-iso-charset-alist))))
305
306
307 ;; A variable to hold charset input history.
308 (defvar charset-history nil)
309
310
311 ;;;###autoload
312 (defun read-charset (prompt &optional default-value initial-input)
313 "Read a character set from the minibuffer, prompting with string PROMPT.
314 It reads an Emacs' character set listed in the variable `charset-list'
315 or a non-ISO character set listed in the variable
316 `non-iso-charset-alist'.
317
318 Optional arguments are DEFAULT-VALUE and INITIAL-INPUT.
319 DEFAULT-VALUE, if non-nil, is the default value.
320 INITIAL-INPUT, if non-nil, is a string inserted in the minibuffer initially.
321 See the documentation of the function `completing-read' for the
322 detailed meanings of these arguments."
323 (let* ((table (append (mapcar (function (lambda (x) (list (symbol-name x))))
324 charset-list)
325 (mapcar (function (lambda (x)
326 (list (symbol-name (car x)))))
327 non-iso-charset-alist)))
328 (charset (completing-read prompt table
329 nil t initial-input 'charset-history
330 default-value)))
331 (if (> (length charset) 0)
332 (intern charset))))
333
334
335 ;; List characters of the range MIN and MAX of CHARSET. If dimension
336 ;; of CHARSET is two (i.e. 2-byte charset), ROW is the first byte
337 ;; (block index) of the characters, and MIN and MAX are the second
338 ;; bytes of the characters. If the dimension is one, ROW should be 0.
339 ;; For a non-ISO charset, CHARSET is a char-table or a function to get
340 ;; Emacs' character codes that corresponds to the characters to list.
341
342 (defun list-block-of-chars (charset row min max)
343 (let (i ch)
344 (insert-char ?- (+ 4 (* 3 16)))
345 (insert "\n ")
346 (setq i 0)
347 (while (< i 16)
348 (insert (format "%3X" i))
349 (setq i (1+ i)))
350 (setq i (* (/ min 16) 16))
351 (while (<= i max)
352 (if (= (% i 16) 0)
353 (insert (format "\n%3Xx" (/ (+ (* row 256) i) 16))))
354 (setq ch (cond ((< i min)
355 32)
356 ((charsetp charset)
357 (if (= row 0)
358 (make-char charset i)
359 (make-char charset row i)))
360 ((char-table-p charset)
361 (aref charset i))
362 (t (funcall charset (+ (* row 256) i)))))
363 (if (or (< ch 32) (and (>= ch 127) (<= ch 255)))
364 ;; Don't insert a control code.
365 (setq ch 32))
366 (insert (format "%3c" ch))
367 (setq i (1+ i))))
368 (insert "\n"))
369
370
371 ;; List all characters in ISO charset CHARSET.
372
373 (defun list-iso-charset-chars (charset)
374 (let ((dim (charset-dimension charset))
375 (chars (charset-chars charset))
376 (plane (charset-iso-graphic-plane charset))
377 min max)
378 (insert (format "Characters in the charset %s.\n" charset))
379
380 (if (= chars 94)
381 (setq min 33 max 126)
382 (setq min 32 max 127))
383 (or (= plane 0)
384 (setq min (+ min 128) max (+ max 128)))
385
386 (if (= dim 1)
387 (list-block-of-chars charset 0 min max)
388 (let ((i min))
389 (while (< i max)
390 (list-block-of-chars charset i min max)
391 (setq i (1+ i)))))))
392
393
394 ;; List all characters in non-ISO charset CHARSET.
395
396 (defun list-non-iso-charset-chars (charset)
397 (let* ((slot (assq charset non-iso-charset-alist))
398 (charsets (nth 1 slot))
399 (translate-method (nth 2 slot))
400 (ranges (nth 3 slot))
401 range)
402 (or slot
403 (error "Unknown external charset: %s" charset))
404 (insert (format "Characters in non-ISO charset %s.\n" charset))
405 (insert "They are mapped to: "
406 (mapconcat (lambda (x) (symbol-name x)) charsets ", ")
407 "\n")
408 (while ranges
409 (setq range (car ranges) ranges (cdr ranges))
410 (if (integerp (car range))
411 ;; The form of RANGES is (FROM1 TO1 FROM2 TO2 ...).
412 (while range
413 (list-block-of-chars translate-method
414 0 (car range) (nth 1 range))
415 (setq range (nthcdr 2 range)))
416 ;; The form of RANGES is ((FROM1-1 TO1-1 ...) . (FROM2-1 TO2-1 ...)).
417 (let ((row-range (car range))
418 row row-max
419 col-range col col-max)
420 (while row-range
421 (setq row (car row-range) row-max (nth 1 row-range)
422 row-range (nthcdr 2 row-range))
423 (while (< row row-max)
424 (setq col-range (cdr range))
425 (while col-range
426 (setq col (car col-range) col-max (nth 1 col-range)
427 col-range (nthcdr 2 col-range))
428 (list-block-of-chars translate-method row col col-max))
429 (setq row (1+ row)))))))))
430
431
432 ;;;###autoload
433 (defun list-charset-chars (charset)
434 "Display a list of characters in the specified character set."
435 (interactive (list (read-charset "Character set: ")))
436 (with-output-to-temp-buffer "*Help*"
437 (with-current-buffer standard-output
438 (set-buffer-multibyte t)
439 (cond ((charsetp charset)
440 (list-iso-charset-chars charset))
441 ((assq charset non-iso-charset-alist)
442 (list-non-iso-charset-chars charset))
443 (t
444 (error "Invalid charset %s" charset))))))
445
111 446
112 ;;; CODING-SYSTEM 447 ;;; CODING-SYSTEM
113 448
114 ;; Print information of designation of each graphic register in FLAGS 449 ;; Print information of designation of each graphic register in FLAGS
115 ;; in human readable format. See the documentation of 450 ;; in human readable format. See the documentation of
799 (princ (format "%s:%s\n" (car l) (symbol-value (car l)))) 1134 (princ (format "%s:%s\n" (car l) (symbol-value (car l))))
800 (setq l (cdr l)))) 1135 (setq l (cdr l))))
801 (insert "\n") 1136 (insert "\n")
802 1137
803 (insert-section 5 "Character sets") 1138 (insert-section 5 "Character sets")
804 (list-character-sets-1 t) 1139 (list-character-sets-2)
805 (insert "\n") 1140 (insert "\n")
806 1141
807 (when (and window-system (boundp 'global-fontset-alist)) 1142 (when (and window-system (boundp 'global-fontset-alist))
808 ;; This code duplicates most of list-fontsets. 1143 ;; This code duplicates most of list-fontsets.
809 (insert-section 6 "Fontsets") 1144 (insert-section 6 "Fontsets")
830 (save-window-excursion 1165 (save-window-excursion
831 (save-excursion 1166 (save-excursion
832 (set-buffer buf) 1167 (set-buffer buf)
833 (setq buffer-read-only nil) 1168 (setq buffer-read-only nil)
834 (erase-buffer) 1169 (erase-buffer)
835 (list-character-sets t) 1170 (list-character-sets-2)
836 (insert-buffer-substring "*Help*") 1171 (insert-buffer-substring "*Help*")
837 (let (make-backup-files 1172 (let (make-backup-files
838 coding-system-for-write) 1173 coding-system-for-write)
839 (save-buffer)))) 1174 (save-buffer))))
840 (kill-buffer buf)) 1175 (kill-buffer buf))