Mercurial > emacs
changeset 55584:bdbbd721cc67
(describe-char-unicode-data, describe-char-unicodedata-file):
Re-enable the unicode code now that the licensing issues have been
cleared in the unicode-4 license.
(describe-text-properties-1): Remove unused `overlay' var.
(describe-char): Remove unused var `buffer'.
author | Stefan Monnier <monnier@iro.umontreal.ca> |
---|---|
date | Fri, 14 May 2004 05:02:18 +0000 |
parents | 21f88361795c |
children | 70afc4f63286 |
files | lisp/descr-text.el |
diffstat | 1 files changed, 205 insertions(+), 214 deletions(-) [+] |
line wrap: on
line diff
--- a/lisp/descr-text.el Fri May 14 03:07:12 2004 +0000 +++ b/lisp/descr-text.el Fri May 14 05:02:18 2004 +0000 @@ -183,7 +183,6 @@ (defun describe-text-properties-1 (pos output-buffer) (let* ((properties (text-properties-at pos)) (overlays (overlays-at pos)) - overlay (wid-field (get-char-property pos 'field)) (wid-button (get-char-property pos 'button)) (wid-doc (get-char-property pos 'widget-doc)) @@ -225,221 +224,214 @@ (widget-insert "There are text properties here:\n") (describe-property-list properties))))) -;;; We cannot use the UnicodeData.txt file as such; it is not free. -;;; We can turn that info a different format and release the result -;;; as free data. When that is done, we could reinstate the code below. -;;; For the mean time, here is a dummy placeholder. -;;; -- rms -(defun describe-char-unicode-data (char) nil) - -;;; (defcustom describe-char-unicodedata-file nil -;;; "Location of Unicode data file. -;;; This is the UnicodeData.txt file from the Unicode consortium, used for -;;; diagnostics. If it is non-nil `describe-char-after' will print data -;;; looked up from it. This facility is mostly of use to people doing -;;; multilingual development. +(defcustom describe-char-unicodedata-file nil + "Location of Unicode data file. +This is the UnicodeData.txt file from the Unicode consortium, used for +diagnostics. If it is non-nil `describe-char-after' will print data +looked up from it. This facility is mostly of use to people doing +multilingual development. -;;; This is a fairly large file, not typically present on GNU systems. At -;;; the time of writing it is at -;;; <URL:ftp://www.unicode.org/Public/UNIDATA/UnicodeData.txt>." -;;; :group 'mule -;;; :version "21.5" -;;; :type '(choice (const :tag "None" nil) -;;; file)) +This is a fairly large file, not typically present on GNU systems. At +the time of writing it is at +<URL:http://www.unicode.org/Public/UNIDATA/UnicodeData.txt>." + :group 'mule + :version "21.4" + :type '(choice (const :tag "None" nil) + file)) -;;; ;; We could convert the unidata file into a Lispy form once-for-all -;;; ;; and distribute it for loading on demand. It might be made more -;;; ;; space-efficient by splitting strings word-wise and replacing them -;;; ;; with lists of symbols interned in a private obarray, e.g. -;;; ;; "LATIN SMALL LETTER A" => '(LATIN SMALL LETTER A). +;; We could convert the unidata file into a Lispy form once-for-all +;; and distribute it for loading on demand. It might be made more +;; space-efficient by splitting strings word-wise and replacing them +;; with lists of symbols interned in a private obarray, e.g. +;; "LATIN SMALL LETTER A" => '(LATIN SMALL LETTER A). -;;; ;; Fixme: Check whether this needs updating for Unicode 4. -;;; (defun describe-char-unicode-data (char) -;;; "Return a list of Unicode data for unicode CHAR. -;;; Each element is a list of a property description and the property value. -;;; The list is null if CHAR isn't found in `describe-char-unicodedata-file'." -;;; (when describe-char-unicodedata-file -;;; (unless (file-exists-p describe-char-unicodedata-file) -;;; (error "`unicodedata-file' %s not found" describe-char-unicodedata-file)) -;;; (save-excursion -;;; ;; Find file in fundamental mode to avoid, e.g. flyspell turned -;;; ;; on for .txt. Don't use RAWFILE arg in case of DOS line endings. -;;; (set-buffer (let ((auto-mode-alist)) -;;; (find-file-noselect describe-char-unicodedata-file))) -;;; (goto-char (point-min)) -;;; (let ((hex (format "%04X" char)) -;;; found first last) -;;; (if (re-search-forward (concat "^" hex) nil t) -;;; (setq found t) -;;; ;; It's not listed explicitly. Look for ranges, e.g. CJK -;;; ;; ideographs, and check whether it's in one of them. -;;; (while (and (re-search-forward "^\\([^;]+\\);[^;]+First>;" nil t) -;;; (>= char (setq first -;;; (string-to-number (match-string 1) 16))) -;;; (progn -;;; (forward-line 1) -;;; (looking-at "^\\([^;]+\\);[^;]+Last>;") -;;; (> char -;;; (setq last -;;; (string-to-number (match-string 1) 16)))))) -;;; (if (and (>= char first) -;;; (<= char last)) -;;; (setq found t))) -;;; (if found -;;; (let ((fields (mapcar (lambda (elt) -;;; (if (> (length elt) 0) -;;; elt)) -;;; (cdr (split-string -;;; (buffer-substring -;;; (line-beginning-position) -;;; (line-end-position)) -;;; ";"))))) -;;; ;; The length depends on whether the last field was empty. -;;; (unless (or (= 13 (length fields)) -;;; (= 14 (length fields))) -;;; (error "Invalid contents in %s" describe-char-unicodedata-file)) -;;; ;; The field names and values lists are slightly -;;; ;; modified from Mule-UCS unidata.el. -;;; (list -;;; (list "Name" (let ((name (nth 0 fields))) -;;; ;; Check for <..., First>, <..., Last> -;;; (if (string-match "\\`\\(<[^,]+\\)," name) -;;; (concat (match-string 1 name) ">") -;;; name))) -;;; (list "Category" -;;; (cdr (assoc -;;; (nth 1 fields) -;;; '(("Lu" . "uppercase letter") -;;; ("Ll" . "lowercase letter") -;;; ("Lt" . "titlecase letter") -;;; ("Mn" . "non-spacing mark") -;;; ("Mc" . "spacing-combining mark") -;;; ("Me" . "enclosing mark") -;;; ("Nd" . "decimal digit") -;;; ("Nl" . "letter number") -;;; ("No" . "other number") -;;; ("Zs" . "space separator") -;;; ("Zl" . "line separator") -;;; ("Zp" . "paragraph separator") -;;; ("Cc" . "other control") -;;; ("Cf" . "other format") -;;; ("Cs" . "surrogate") -;;; ("Co" . "private use") -;;; ("Cn" . "not assigned") -;;; ("Lm" . "modifier letter") -;;; ("Lo" . "other letter") -;;; ("Pc" . "connector punctuation") -;;; ("Pd" . "dash punctuation") -;;; ("Ps" . "open punctuation") -;;; ("Pe" . "close punctuation") -;;; ("Pi" . "initial-quotation punctuation") -;;; ("Pf" . "final-quotation punctuation") -;;; ("Po" . "other punctuation") -;;; ("Sm" . "math symbol") -;;; ("Sc" . "currency symbol") -;;; ("Sk" . "modifier symbol") -;;; ("So" . "other symbol"))))) -;;; (list "Combining class" -;;; (cdr (assoc -;;; (string-to-number (nth 2 fields)) -;;; '((0 . "Spacing") -;;; (1 . "Overlays and interior") -;;; (7 . "Nuktas") -;;; (8 . "Hiragana/Katakana voicing marks") -;;; (9 . "Viramas") -;;; (10 . "Start of fixed position classes") -;;; (199 . "End of fixed position classes") -;;; (200 . "Below left attached") -;;; (202 . "Below attached") -;;; (204 . "Below right attached") -;;; (208 . "Left attached (reordrant around \ -;;; single base character)") -;;; (210 . "Right attached") -;;; (212 . "Above left attached") -;;; (214 . "Above attached") -;;; (216 . "Above right attached") -;;; (218 . "Below left") -;;; (220 . "Below") -;;; (222 . "Below right") -;;; (224 . "Left (reordrant around single base \ -;;; character)") -;;; (226 . "Right") -;;; (228 . "Above left") -;;; (230 . "Above") -;;; (232 . "Above right") -;;; (233 . "Double below") -;;; (234 . "Double above") -;;; (240 . "Below (iota subscript)"))))) -;;; (list "Bidi category" -;;; (cdr (assoc -;;; (nth 3 fields) -;;; '(("L" . "Left-to-Right") -;;; ("LRE" . "Left-to-Right Embedding") -;;; ("LRO" . "Left-to-Right Override") -;;; ("R" . "Right-to-Left") -;;; ("AL" . "Right-to-Left Arabic") -;;; ("RLE" . "Right-to-Left Embedding") -;;; ("RLO" . "Right-to-Left Override") -;;; ("PDF" . "Pop Directional Format") -;;; ("EN" . "European Number") -;;; ("ES" . "European Number Separator") -;;; ("ET" . "European Number Terminator") -;;; ("AN" . "Arabic Number") -;;; ("CS" . "Common Number Separator") -;;; ("NSM" . "Non-Spacing Mark") -;;; ("BN" . "Boundary Neutral") -;;; ("B" . "Paragraph Separator") -;;; ("S" . "Segment Separator") -;;; ("WS" . "Whitespace") -;;; ("ON" . "Other Neutrals"))))) -;;; (list -;;; "Decomposition" -;;; (if (nth 4 fields) -;;; (let* ((parts (split-string (nth 4 fields))) -;;; (info (car parts))) -;;; (if (string-match "\\`<\\(.+\\)>\\'" info) -;;; (setq info (match-string 1 info)) -;;; (setq info nil)) -;;; (if info (setq parts (cdr parts))) -;;; ;; Maybe printing ? for unrepresentable unicodes -;;; ;; here and below should be changed? -;;; (setq parts (mapconcat -;;; (lambda (arg) -;;; (string (or (decode-char -;;; 'ucs -;;; (string-to-number arg 16)) -;;; ??))) -;;; parts " ")) -;;; (concat info parts)))) -;;; (list "Decimal digit value" -;;; (nth 5 fields)) -;;; (list "Digit value" -;;; (nth 6 fields)) -;;; (list "Numeric value" -;;; (nth 7 fields)) -;;; (list "Mirrored" -;;; (if (equal "Y" (nth 8 fields)) -;;; "yes")) -;;; (list "Old name" (nth 9 fields)) -;;; (list "ISO 10646 comment" (nth 10 fields)) -;;; (list "Uppercase" (and (nth 11 fields) -;;; (string (or (decode-char -;;; 'ucs -;;; (string-to-number -;;; (nth 11 fields) 16)) -;;; ??)))) -;;; (list "Lowercase" (and (nth 12 fields) -;;; (string (or (decode-char -;;; 'ucs -;;; (string-to-number -;;; (nth 12 fields) 16)) -;;; ??)))) -;;; (list "Titlecase" (and (nth 13 fields) -;;; (string (or (decode-char -;;; 'ucs -;;; (string-to-number -;;; (nth 13 fields) 16)) -;;; ??))))))))))) +;; Fixme: Check whether this needs updating for Unicode 4. +(defun describe-char-unicode-data (char) + "Return a list of Unicode data for unicode CHAR. +Each element is a list of a property description and the property value. +The list is null if CHAR isn't found in `describe-char-unicodedata-file'." + (when describe-char-unicodedata-file + (unless (file-exists-p describe-char-unicodedata-file) + (error "`unicodedata-file' %s not found" describe-char-unicodedata-file)) + (with-current-buffer + ;; Find file in fundamental mode to avoid, e.g. flyspell turned + ;; on for .txt. Don't use RAWFILE arg in case of DOS line endings. + (let ((auto-mode-alist)) + (find-file-noselect describe-char-unicodedata-file)) + (goto-char (point-min)) + (let ((hex (format "%04X" char)) + found first last) + (if (re-search-forward (concat "^" hex) nil t) + (setq found t) + ;; It's not listed explicitly. Look for ranges, e.g. CJK + ;; ideographs, and check whether it's in one of them. + (while (and (re-search-forward "^\\([^;]+\\);[^;]+First>;" nil t) + (>= char (setq first + (string-to-number (match-string 1) 16))) + (progn + (forward-line 1) + (looking-at "^\\([^;]+\\);[^;]+Last>;") + (> char + (setq last + (string-to-number (match-string 1) 16)))))) + (if (and (>= char first) + (<= char last)) + (setq found t))) + (if found + (let ((fields (mapcar (lambda (elt) + (if (> (length elt) 0) + elt)) + (cdr (split-string + (buffer-substring + (line-beginning-position) + (line-end-position)) + ";"))))) + ;; The length depends on whether the last field was empty. + (unless (or (= 13 (length fields)) + (= 14 (length fields))) + (error "Invalid contents in %s" describe-char-unicodedata-file)) + ;; The field names and values lists are slightly + ;; modified from Mule-UCS unidata.el. + (list + (list "Name" (let ((name (nth 0 fields))) + ;; Check for <..., First>, <..., Last> + (if (string-match "\\`\\(<[^,]+\\)," name) + (concat (match-string 1 name) ">") + name))) + (list "Category" + (cdr (assoc + (nth 1 fields) + '(("Lu" . "uppercase letter") + ("Ll" . "lowercase letter") + ("Lt" . "titlecase letter") + ("Mn" . "non-spacing mark") + ("Mc" . "spacing-combining mark") + ("Me" . "enclosing mark") + ("Nd" . "decimal digit") + ("Nl" . "letter number") + ("No" . "other number") + ("Zs" . "space separator") + ("Zl" . "line separator") + ("Zp" . "paragraph separator") + ("Cc" . "other control") + ("Cf" . "other format") + ("Cs" . "surrogate") + ("Co" . "private use") + ("Cn" . "not assigned") + ("Lm" . "modifier letter") + ("Lo" . "other letter") + ("Pc" . "connector punctuation") + ("Pd" . "dash punctuation") + ("Ps" . "open punctuation") + ("Pe" . "close punctuation") + ("Pi" . "initial-quotation punctuation") + ("Pf" . "final-quotation punctuation") + ("Po" . "other punctuation") + ("Sm" . "math symbol") + ("Sc" . "currency symbol") + ("Sk" . "modifier symbol") + ("So" . "other symbol"))))) + (list "Combining class" + (cdr (assoc + (string-to-number (nth 2 fields)) + '((0 . "Spacing") + (1 . "Overlays and interior") + (7 . "Nuktas") + (8 . "Hiragana/Katakana voicing marks") + (9 . "Viramas") + (10 . "Start of fixed position classes") + (199 . "End of fixed position classes") + (200 . "Below left attached") + (202 . "Below attached") + (204 . "Below right attached") + (208 . "Left attached (reordrant around \ +single base character)") + (210 . "Right attached") + (212 . "Above left attached") + (214 . "Above attached") + (216 . "Above right attached") + (218 . "Below left") + (220 . "Below") + (222 . "Below right") + (224 . "Left (reordrant around single base \ +character)") + (226 . "Right") + (228 . "Above left") + (230 . "Above") + (232 . "Above right") + (233 . "Double below") + (234 . "Double above") + (240 . "Below (iota subscript)"))))) + (list "Bidi category" + (cdr (assoc + (nth 3 fields) + '(("L" . "Left-to-Right") + ("LRE" . "Left-to-Right Embedding") + ("LRO" . "Left-to-Right Override") + ("R" . "Right-to-Left") + ("AL" . "Right-to-Left Arabic") + ("RLE" . "Right-to-Left Embedding") + ("RLO" . "Right-to-Left Override") + ("PDF" . "Pop Directional Format") + ("EN" . "European Number") + ("ES" . "European Number Separator") + ("ET" . "European Number Terminator") + ("AN" . "Arabic Number") + ("CS" . "Common Number Separator") + ("NSM" . "Non-Spacing Mark") + ("BN" . "Boundary Neutral") + ("B" . "Paragraph Separator") + ("S" . "Segment Separator") + ("WS" . "Whitespace") + ("ON" . "Other Neutrals"))))) + (list + "Decomposition" + (if (nth 4 fields) + (let* ((parts (split-string (nth 4 fields))) + (info (car parts))) + (if (string-match "\\`<\\(.+\\)>\\'" info) + (setq info (match-string 1 info)) + (setq info nil)) + (if info (setq parts (cdr parts))) + ;; Maybe printing ? for unrepresentable unicodes + ;; here and below should be changed? + (setq parts (mapconcat + (lambda (arg) + (string (or (decode-char + 'ucs + (string-to-number arg 16)) + ??))) + parts " ")) + (concat info parts)))) + (list "Decimal digit value" + (nth 5 fields)) + (list "Digit value" + (nth 6 fields)) + (list "Numeric value" + (nth 7 fields)) + (list "Mirrored" + (if (equal "Y" (nth 8 fields)) + "yes")) + (list "Old name" (nth 9 fields)) + (list "ISO 10646 comment" (nth 10 fields)) + (list "Uppercase" (and (nth 11 fields) + (string (or (decode-char + 'ucs + (string-to-number + (nth 11 fields) 16)) + ??)))) + (list "Lowercase" (and (nth 12 fields) + (string (or (decode-char + 'ucs + (string-to-number + (nth 12 fields) 16)) + ??)))) + (list "Titlecase" (and (nth 13 fields) + (string (or (decode-char + 'ucs + (string-to-number + (nth 13 fields) 16)) + ??))))))))))) ;; Return information about how CHAR is displayed at the buffer ;; position POS. If the selected frame is on a graphic display, @@ -466,7 +458,6 @@ (error "No character follows specified position")) (let* ((char (char-after pos)) (charset (char-charset char)) - (buffer (current-buffer)) (composition (find-composition pos nil nil t)) (component-chars nil) (display-table (or (window-display-table)