Mercurial > emacs
changeset 88818:9ae3a7ed0b93
(describe-char-after): Modify display
list processing.
(unicodedata-file, unicodedata-find): New.
author | Dave Love <fx@gnu.org> |
---|---|
date | Mon, 01 Jul 2002 20:41:34 +0000 |
parents | 5756cf8c8828 |
children | 6d6256fd3cd7 |
files | lisp/international/mule-diag.el |
diffstat | 1 files changed, 204 insertions(+), 12 deletions(-) [+] |
line wrap: on
line diff
--- a/lisp/international/mule-diag.el Mon Jul 01 20:38:35 2002 +0000 +++ b/lisp/international/mule-diag.el Mon Jul 01 20:41:34 2002 +0000 @@ -459,8 +459,15 @@ (encoded (encode-coding-char char coding))) (if encoded (encoded-string-description encoded coding) - "not encodable"))))))) - (setq max-width (apply #'max (mapcar #'(lambda (x) (length (car x))) + "not encodable")))) + ,@(let ((unicodedata (if (encode-char char 'ucs) + (unicode-data char)))) + (if unicodedata + (cons (list "Unicode data" " ") unicodedata)))))) + (setq max-width (apply #'max (mapcar #'(lambda (x) + (if (cadr x) + (length (car x)) + 0)) item-list))) (with-output-to-temp-buffer "*Help*" (save-excursion @@ -468,16 +475,17 @@ (set-buffer-multibyte multibyte-p) (let ((formatter (format "%%%ds:" max-width))) (dolist (elt item-list) - (insert (format formatter (car elt))) - (dolist (clm (cdr elt)) - (when (>= (+ (current-column) - (or (string-match "\n" clm) - (string-width clm)) 1) - (frame-width)) - (insert "\n") - (indent-to (1+ max-width))) - (insert " " clm)) - (insert "\n"))) + (when (cadr elt) + (insert (format formatter (car elt))) + (dolist (clm (cdr elt)) + (when (>= (+ (current-column) + (or (string-match "\n" clm) + (string-width clm)) 1) + (frame-width)) + (insert "\n") + (indent-to (1+ max-width))) + (insert " " clm)) + (insert "\n")))) (when composition (insert "\nComposed with the following character(s) " (mapconcat (lambda (x) (format "`%c'" x)) @@ -1218,4 +1226,188 @@ (setq fontsets (cdr fontsets))))) (print-help-return-message)))) +(defcustom unicodedata-file nil + "Location of UnicodeData file. +This is the UnicodeData.txt file from the Unicode consortium, used for +diagnostics. If it is non-nil `describe-char-after' will print data +looked up from it." + :group 'mule + :type '(choice (const :tag "None" nil) + file)) + +;; We could convert the unidata file into a Lispy form once-for-all +;; and distribute it for loading on demand. It might be made more +;; space-efficient by splitting strings word-wise and replacing them +;; with lists of symbols interned in a private obarray, e.g. +;; "LATIN SMALL LETTER A" => '(LATIN SMALL LETTER A). +(defun unicode-data (char) + "Return a list of Unicode data for unicode CHAR. +Each element is a list of a property description and the property value. +The list is null if CHAR isn't found in `unicodedata-file'." + (if unicodedata-file + (save-excursion + (set-buffer (find-file-noselect unicodedata-file)) + (goto-char (point-min)) + (let ((hex (format "%04X" char)) + found first last) + (if (re-search-forward (concat "^" hex) nil t) + (setq found t) + ;; It's not listed explicitly. Look for ranges, e.g. CJK + ;; ideographs, and check whether it's in one of them. + (while (and (re-search-forward "^\\([^;]+\\);[^;]+First>;" nil t) + (>= char (setq first + (string-to-number (match-string 1) 16))) + (progn + (forward-line 1) + (looking-at "^\\([^;]+\\);[^;]+Last>;") + (> char + (setq last + (string-to-number (match-string 1) 16)))))) + (if (and (>= char first) + (<= char last)) + (setq found t))) + (if found + (let ((fields (mapcar (lambda (elt) + (if (> (length elt) 0) + elt)) + (cdr (split-string + (buffer-substring + (line-beginning-position) + (line-end-position)) + ";"))))) + ;; The length depends on whether the last field was empty. + (unless (or (= 13 (length fields)) + (= 14 (length fields))) + (error "Invalid contents in %s" unicodedata-file)) + ;; The field names and values lists are slightly + ;; modified from Mule-UCS unidata.el. + (list + (list "Name" (let ((name (nth 0 fields))) + ;; Check for <..., First>, <..., Last> + (if (string-match "\\`\\(<[^,]+\\)," name) + (concat (match-string 1 name) ">") + name))) + (list "Category" + (cdr (assoc + (nth 1 fields) + '(("Lu" . "uppercase letter") + ("Ll" . "lowercase letter") + ("Lt" . "titlecase letter") + ("Mn" . "non-spacing mark") + ("Mc" . "spacing-combining mark") + ("Me" . "enclosing mark") + ("Nd" . "decimal digit") + ("Nl" . "letter number") + ("No" . "other number") + ("Zs" . "space separator") + ("Zl" . "line separator") + ("Zp" . "paragraph separator") + ("Cc" . "other control") + ("Cf" . "other format") + ("Cs" . "surrogate") + ("Co" . "private use") + ("Cn" . "not assigned") + ("Lm" . "modifier letter") + ("Lo" . "other letter") + ("Pc" . "connector punctuation") + ("Pd" . "dash punctuation") + ("Ps" . "open punctuation") + ("Pe" . "close punctuation") + ("Pi" . "initial-quotation punctuation") + ("Pf" . "final-quotation punctuation") + ("Po" . "other punctuation") + ("Sm" . "math symbol") + ("Sc" . "currency symbol") + ("Sk" . "modifier symbol") + ("So" . "other symbol"))))) + (list "Combining class" + (cdr (assoc + (string-to-number (nth 2 fields)) + '((0 . "Spacing") + (1 . "Overlays and interior") + (7 . "Nuktas") + (8 . "Hiragana/Katakana voicing marks") + (9 . "Viramas") + (10 . "Start of fixed position classes") + (199 . "End of fixed position classes") + (200 . "Below left attached") + (202 . "Below attached") + (204 . "Below right attached") + (208 . "Left attached (reordrant around \ +single base character)") + (210 . "Right attached") + (212 . "Above left attached") + (214 . "Above attached") + (216 . "Above right attached") + (218 . "Below left") + (220 . "Below") + (222 . "Below right") + (224 . "Left (reordrant around single base \ +character)") + (226 . "Right") + (228 . "Above left") + (230 . "Above") + (232 . "Above right") + (233 . "Double below") + (234 . "Double above") + (240 . "Below (iota subscript)"))))) + (list "Bidi category" + (cdr (assoc + (nth 3 fields) + '(("L" . "Left-to-Right") + ("LRE" . "Left-to-Right Embedding") + ("LRO" . "Left-to-Right Override") + ("R" . "Right-to-Left") + ("AL" . "Right-to-Left Arabic") + ("RLE" . "Right-to-Left Embedding") + ("RLO" . "Right-to-Left Override") + ("PDF" . "Pop Directional Format") + ("EN" . "European Number") + ("ES" . "European Number Separator") + ("ET" . "European Number Terminator") + ("AN" . "Arabic Number") + ("CS" . "Common Number Separator") + ("NSM" . "Non-Spacing Mark") + ("BN" . "Boundary Neutral") + ("B" . "Paragraph Separator") + ("S" . "Segment Separator") + ("WS" . "Whitespace") + ("ON" . "Other Neutrals"))))) + (list "Decomposition" + (if (nth 4 fields) + (let* ((parts (split-string (nth 4 fields))) + (info (car parts))) + (if (string-match "\\`<\\(.+\\)>\\'" info) + (setq info (match-string 1 info)) + (setq info nil)) + (if info (setq parts (cdr parts))) + (setq parts (mapconcat + (lambda (arg) + (string (string-to-number arg 16))) + parts " ")) + (concat info parts)))) + (list "Decimal digit value" + (if (nth 5 fields) + (string-to-number (nth 5 fields)))) + (list "Digit value" + (if (nth 6 fields) + (string-to-number (nth 6 fields)))) + (list "Numeric value" + (if (nth 7 fields) + (string-to-number (nth 6 fields)))) + (list "Mirrored" + (if (equal "Y" (nth 8 fields)) + "yes")) + (list "Old name" (nth 9 fields)) + (list "ISO 10646 comment" (nth 10 fields)) + (list "Uppercase" (and (nth 11 fields) + (string (string-to-number + (nth 11 fields) 16)))) + (list "Lowercase" (and (nth 12 fields) + (string (string-to-number + (nth 12 fields) 16)))) + (list "Titlecase" (and (nth 13 fields) + (string (string-to-number + (nth 13 fields) 16))))))))))) + ;;; mule-diag.el ends here