Mercurial > emacs
comparison lisp/descr-text.el @ 51278:7192dc1bfcf4
(describe-char-unicode-data): New dummy definition.
Real definition commented out since we can't use UnicodeData.txt as is.
(describe-char-unicodedata-file): Variable commented out.
author | Richard M. Stallman <rms@gnu.org> |
---|---|
date | Wed, 28 May 2003 11:14:07 +0000 |
parents | d68739c97632 |
children | 67502df21b92 |
comparison
equal
deleted
inserted
replaced
51277:caaa4fda6808 | 51278:7192dc1bfcf4 |
---|---|
216 ;; Text properties | 216 ;; Text properties |
217 (when properties | 217 (when properties |
218 (newline) | 218 (newline) |
219 (widget-insert "There are text properties here:\n") | 219 (widget-insert "There are text properties here:\n") |
220 (describe-property-list properties))))) | 220 (describe-property-list properties))))) |
221 | 221 |
222 (defcustom unicodedata-file nil | 222 ;;; We cannot use the UnicodeData.txt file as such; it is not free. |
223 "Location of Unicode data file. | 223 ;;; We can turn that info a different format and release the result |
224 This is the UnicodeData.txt file from the Unicode consortium, used for | 224 ;;; as free data. When that is done, we could reinstate the code below. |
225 diagnostics. If it is non-nil `describe-char-after' will print data | 225 ;;; For the mean time, here is a dummy placeholder. |
226 looked up from it. This facility is mostly of use to people doing | 226 ;;; -- rms |
227 multilingual development. | 227 (defun describe-char-unicode-data (char) nil) |
228 | 228 |
229 This is a fairly large file, not typically present on GNU systems. At | 229 ;;; (defcustom describe-char-unicodedata-file nil |
230 the time of writing it is at | 230 ;;; "Location of Unicode data file. |
231 <URL:ftp://www.unicode.org/Public/UNIDATA/UnicodeData.txt>." | 231 ;;; This is the UnicodeData.txt file from the Unicode consortium, used for |
232 :group 'mule | 232 ;;; diagnostics. If it is non-nil `describe-char-after' will print data |
233 :version "21.5" | 233 ;;; looked up from it. This facility is mostly of use to people doing |
234 :type '(choice (const :tag "None" nil) | 234 ;;; multilingual development. |
235 file)) | 235 |
236 | 236 ;;; This is a fairly large file, not typically present on GNU systems. At |
237 ;; We could convert the unidata file into a Lispy form once-for-all | 237 ;;; the time of writing it is at |
238 ;; and distribute it for loading on demand. It might be made more | 238 ;;; <URL:ftp://www.unicode.org/Public/UNIDATA/UnicodeData.txt>." |
239 ;; space-efficient by splitting strings word-wise and replacing them | 239 ;;; :group 'mule |
240 ;; with lists of symbols interned in a private obarray, e.g. | 240 ;;; :version "21.5" |
241 ;; "LATIN SMALL LETTER A" => '(LATIN SMALL LETTER A). | 241 ;;; :type '(choice (const :tag "None" nil) |
242 | 242 ;;; file)) |
243 ;; Fixme: Check whether this needs updating for Unicode 4. | 243 |
244 (defun unicode-data (char) | 244 ;;; ;; We could convert the unidata file into a Lispy form once-for-all |
245 "Return a list of Unicode data for unicode CHAR. | 245 ;;; ;; and distribute it for loading on demand. It might be made more |
246 Each element is a list of a property description and the property value. | 246 ;;; ;; space-efficient by splitting strings word-wise and replacing them |
247 The list is null if CHAR isn't found in `unicodedata-file'." | 247 ;;; ;; with lists of symbols interned in a private obarray, e.g. |
248 (when unicodedata-file | 248 ;;; ;; "LATIN SMALL LETTER A" => '(LATIN SMALL LETTER A). |
249 (unless (file-exists-p unicodedata-file) | 249 |
250 (error "`unicodedata-file' %s not found" unicodedata-file)) | 250 ;;; ;; Fixme: Check whether this needs updating for Unicode 4. |
251 (save-excursion | 251 ;;; (defun describe-char-unicode-data (char) |
252 ;; Find file in fundamental mode to avoid, e.g. flyspell turned | 252 ;;; "Return a list of Unicode data for unicode CHAR. |
253 ;; on for .txt. Don't use RAWFILE arg in case of DOS line endings. | 253 ;;; Each element is a list of a property description and the property value. |
254 (set-buffer (let ((auto-mode-alist)) | 254 ;;; The list is null if CHAR isn't found in `describe-char-unicodedata-file'." |
255 (find-file-noselect unicodedata-file))) | 255 ;;; (when describe-char-unicodedata-file |
256 (goto-char (point-min)) | 256 ;;; (unless (file-exists-p describe-char-unicodedata-file) |
257 (let ((hex (format "%04X" char)) | 257 ;;; (error "`unicodedata-file' %s not found" describe-char-unicodedata-file)) |
258 found first last) | 258 ;;; (save-excursion |
259 (if (re-search-forward (concat "^" hex) nil t) | 259 ;;; ;; Find file in fundamental mode to avoid, e.g. flyspell turned |
260 (setq found t) | 260 ;;; ;; on for .txt. Don't use RAWFILE arg in case of DOS line endings. |
261 ;; It's not listed explicitly. Look for ranges, e.g. CJK | 261 ;;; (set-buffer (let ((auto-mode-alist)) |
262 ;; ideographs, and check whether it's in one of them. | 262 ;;; (find-file-noselect describe-char-unicodedata-file))) |
263 (while (and (re-search-forward "^\\([^;]+\\);[^;]+First>;" nil t) | 263 ;;; (goto-char (point-min)) |
264 (>= char (setq first | 264 ;;; (let ((hex (format "%04X" char)) |
265 (string-to-number (match-string 1) 16))) | 265 ;;; found first last) |
266 (progn | 266 ;;; (if (re-search-forward (concat "^" hex) nil t) |
267 (forward-line 1) | 267 ;;; (setq found t) |
268 (looking-at "^\\([^;]+\\);[^;]+Last>;") | 268 ;;; ;; It's not listed explicitly. Look for ranges, e.g. CJK |
269 (> char | 269 ;;; ;; ideographs, and check whether it's in one of them. |
270 (setq last | 270 ;;; (while (and (re-search-forward "^\\([^;]+\\);[^;]+First>;" nil t) |
271 (string-to-number (match-string 1) 16)))))) | 271 ;;; (>= char (setq first |
272 (if (and (>= char first) | 272 ;;; (string-to-number (match-string 1) 16))) |
273 (<= char last)) | 273 ;;; (progn |
274 (setq found t))) | 274 ;;; (forward-line 1) |
275 (if found | 275 ;;; (looking-at "^\\([^;]+\\);[^;]+Last>;") |
276 (let ((fields (mapcar (lambda (elt) | 276 ;;; (> char |
277 (if (> (length elt) 0) | 277 ;;; (setq last |
278 elt)) | 278 ;;; (string-to-number (match-string 1) 16)))))) |
279 (cdr (split-string | 279 ;;; (if (and (>= char first) |
280 (buffer-substring | 280 ;;; (<= char last)) |
281 (line-beginning-position) | 281 ;;; (setq found t))) |
282 (line-end-position)) | 282 ;;; (if found |
283 ";"))))) | 283 ;;; (let ((fields (mapcar (lambda (elt) |
284 ;; The length depends on whether the last field was empty. | 284 ;;; (if (> (length elt) 0) |
285 (unless (or (= 13 (length fields)) | 285 ;;; elt)) |
286 (= 14 (length fields))) | 286 ;;; (cdr (split-string |
287 (error "Invalid contents in %s" unicodedata-file)) | 287 ;;; (buffer-substring |
288 ;; The field names and values lists are slightly | 288 ;;; (line-beginning-position) |
289 ;; modified from Mule-UCS unidata.el. | 289 ;;; (line-end-position)) |
290 (list | 290 ;;; ";"))))) |
291 (list "Name" (let ((name (nth 0 fields))) | 291 ;;; ;; The length depends on whether the last field was empty. |
292 ;; Check for <..., First>, <..., Last> | 292 ;;; (unless (or (= 13 (length fields)) |
293 (if (string-match "\\`\\(<[^,]+\\)," name) | 293 ;;; (= 14 (length fields))) |
294 (concat (match-string 1 name) ">") | 294 ;;; (error "Invalid contents in %s" describe-char-unicodedata-file)) |
295 name))) | 295 ;;; ;; The field names and values lists are slightly |
296 (list "Category" | 296 ;;; ;; modified from Mule-UCS unidata.el. |
297 (cdr (assoc | 297 ;;; (list |
298 (nth 1 fields) | 298 ;;; (list "Name" (let ((name (nth 0 fields))) |
299 '(("Lu" . "uppercase letter") | 299 ;;; ;; Check for <..., First>, <..., Last> |
300 ("Ll" . "lowercase letter") | 300 ;;; (if (string-match "\\`\\(<[^,]+\\)," name) |
301 ("Lt" . "titlecase letter") | 301 ;;; (concat (match-string 1 name) ">") |
302 ("Mn" . "non-spacing mark") | 302 ;;; name))) |
303 ("Mc" . "spacing-combining mark") | 303 ;;; (list "Category" |
304 ("Me" . "enclosing mark") | 304 ;;; (cdr (assoc |
305 ("Nd" . "decimal digit") | 305 ;;; (nth 1 fields) |
306 ("Nl" . "letter number") | 306 ;;; '(("Lu" . "uppercase letter") |
307 ("No" . "other number") | 307 ;;; ("Ll" . "lowercase letter") |
308 ("Zs" . "space separator") | 308 ;;; ("Lt" . "titlecase letter") |
309 ("Zl" . "line separator") | 309 ;;; ("Mn" . "non-spacing mark") |
310 ("Zp" . "paragraph separator") | 310 ;;; ("Mc" . "spacing-combining mark") |
311 ("Cc" . "other control") | 311 ;;; ("Me" . "enclosing mark") |
312 ("Cf" . "other format") | 312 ;;; ("Nd" . "decimal digit") |
313 ("Cs" . "surrogate") | 313 ;;; ("Nl" . "letter number") |
314 ("Co" . "private use") | 314 ;;; ("No" . "other number") |
315 ("Cn" . "not assigned") | 315 ;;; ("Zs" . "space separator") |
316 ("Lm" . "modifier letter") | 316 ;;; ("Zl" . "line separator") |
317 ("Lo" . "other letter") | 317 ;;; ("Zp" . "paragraph separator") |
318 ("Pc" . "connector punctuation") | 318 ;;; ("Cc" . "other control") |
319 ("Pd" . "dash punctuation") | 319 ;;; ("Cf" . "other format") |
320 ("Ps" . "open punctuation") | 320 ;;; ("Cs" . "surrogate") |
321 ("Pe" . "close punctuation") | 321 ;;; ("Co" . "private use") |
322 ("Pi" . "initial-quotation punctuation") | 322 ;;; ("Cn" . "not assigned") |
323 ("Pf" . "final-quotation punctuation") | 323 ;;; ("Lm" . "modifier letter") |
324 ("Po" . "other punctuation") | 324 ;;; ("Lo" . "other letter") |
325 ("Sm" . "math symbol") | 325 ;;; ("Pc" . "connector punctuation") |
326 ("Sc" . "currency symbol") | 326 ;;; ("Pd" . "dash punctuation") |
327 ("Sk" . "modifier symbol") | 327 ;;; ("Ps" . "open punctuation") |
328 ("So" . "other symbol"))))) | 328 ;;; ("Pe" . "close punctuation") |
329 (list "Combining class" | 329 ;;; ("Pi" . "initial-quotation punctuation") |
330 (cdr (assoc | 330 ;;; ("Pf" . "final-quotation punctuation") |
331 (string-to-number (nth 2 fields)) | 331 ;;; ("Po" . "other punctuation") |
332 '((0 . "Spacing") | 332 ;;; ("Sm" . "math symbol") |
333 (1 . "Overlays and interior") | 333 ;;; ("Sc" . "currency symbol") |
334 (7 . "Nuktas") | 334 ;;; ("Sk" . "modifier symbol") |
335 (8 . "Hiragana/Katakana voicing marks") | 335 ;;; ("So" . "other symbol"))))) |
336 (9 . "Viramas") | 336 ;;; (list "Combining class" |
337 (10 . "Start of fixed position classes") | 337 ;;; (cdr (assoc |
338 (199 . "End of fixed position classes") | 338 ;;; (string-to-number (nth 2 fields)) |
339 (200 . "Below left attached") | 339 ;;; '((0 . "Spacing") |
340 (202 . "Below attached") | 340 ;;; (1 . "Overlays and interior") |
341 (204 . "Below right attached") | 341 ;;; (7 . "Nuktas") |
342 (208 . "Left attached (reordrant around \ | 342 ;;; (8 . "Hiragana/Katakana voicing marks") |
343 single base character)") | 343 ;;; (9 . "Viramas") |
344 (210 . "Right attached") | 344 ;;; (10 . "Start of fixed position classes") |
345 (212 . "Above left attached") | 345 ;;; (199 . "End of fixed position classes") |
346 (214 . "Above attached") | 346 ;;; (200 . "Below left attached") |
347 (216 . "Above right attached") | 347 ;;; (202 . "Below attached") |
348 (218 . "Below left") | 348 ;;; (204 . "Below right attached") |
349 (220 . "Below") | 349 ;;; (208 . "Left attached (reordrant around \ |
350 (222 . "Below right") | 350 ;;; single base character)") |
351 (224 . "Left (reordrant around single base \ | 351 ;;; (210 . "Right attached") |
352 character)") | 352 ;;; (212 . "Above left attached") |
353 (226 . "Right") | 353 ;;; (214 . "Above attached") |
354 (228 . "Above left") | 354 ;;; (216 . "Above right attached") |
355 (230 . "Above") | 355 ;;; (218 . "Below left") |
356 (232 . "Above right") | 356 ;;; (220 . "Below") |
357 (233 . "Double below") | 357 ;;; (222 . "Below right") |
358 (234 . "Double above") | 358 ;;; (224 . "Left (reordrant around single base \ |
359 (240 . "Below (iota subscript)"))))) | 359 ;;; character)") |
360 (list "Bidi category" | 360 ;;; (226 . "Right") |
361 (cdr (assoc | 361 ;;; (228 . "Above left") |
362 (nth 3 fields) | 362 ;;; (230 . "Above") |
363 '(("L" . "Left-to-Right") | 363 ;;; (232 . "Above right") |
364 ("LRE" . "Left-to-Right Embedding") | 364 ;;; (233 . "Double below") |
365 ("LRO" . "Left-to-Right Override") | 365 ;;; (234 . "Double above") |
366 ("R" . "Right-to-Left") | 366 ;;; (240 . "Below (iota subscript)"))))) |
367 ("AL" . "Right-to-Left Arabic") | 367 ;;; (list "Bidi category" |
368 ("RLE" . "Right-to-Left Embedding") | 368 ;;; (cdr (assoc |
369 ("RLO" . "Right-to-Left Override") | 369 ;;; (nth 3 fields) |
370 ("PDF" . "Pop Directional Format") | 370 ;;; '(("L" . "Left-to-Right") |
371 ("EN" . "European Number") | 371 ;;; ("LRE" . "Left-to-Right Embedding") |
372 ("ES" . "European Number Separator") | 372 ;;; ("LRO" . "Left-to-Right Override") |
373 ("ET" . "European Number Terminator") | 373 ;;; ("R" . "Right-to-Left") |
374 ("AN" . "Arabic Number") | 374 ;;; ("AL" . "Right-to-Left Arabic") |
375 ("CS" . "Common Number Separator") | 375 ;;; ("RLE" . "Right-to-Left Embedding") |
376 ("NSM" . "Non-Spacing Mark") | 376 ;;; ("RLO" . "Right-to-Left Override") |
377 ("BN" . "Boundary Neutral") | 377 ;;; ("PDF" . "Pop Directional Format") |
378 ("B" . "Paragraph Separator") | 378 ;;; ("EN" . "European Number") |
379 ("S" . "Segment Separator") | 379 ;;; ("ES" . "European Number Separator") |
380 ("WS" . "Whitespace") | 380 ;;; ("ET" . "European Number Terminator") |
381 ("ON" . "Other Neutrals"))))) | 381 ;;; ("AN" . "Arabic Number") |
382 (list | 382 ;;; ("CS" . "Common Number Separator") |
383 "Decomposition" | 383 ;;; ("NSM" . "Non-Spacing Mark") |
384 (if (nth 4 fields) | 384 ;;; ("BN" . "Boundary Neutral") |
385 (let* ((parts (split-string (nth 4 fields))) | 385 ;;; ("B" . "Paragraph Separator") |
386 (info (car parts))) | 386 ;;; ("S" . "Segment Separator") |
387 (if (string-match "\\`<\\(.+\\)>\\'" info) | 387 ;;; ("WS" . "Whitespace") |
388 (setq info (match-string 1 info)) | 388 ;;; ("ON" . "Other Neutrals"))))) |
389 (setq info nil)) | 389 ;;; (list |
390 (if info (setq parts (cdr parts))) | 390 ;;; "Decomposition" |
391 ;; Maybe printing ? for unrepresentable unicodes | 391 ;;; (if (nth 4 fields) |
392 ;; here and below should be changed? | 392 ;;; (let* ((parts (split-string (nth 4 fields))) |
393 (setq parts (mapconcat | 393 ;;; (info (car parts))) |
394 (lambda (arg) | 394 ;;; (if (string-match "\\`<\\(.+\\)>\\'" info) |
395 (string (or (decode-char | 395 ;;; (setq info (match-string 1 info)) |
396 'ucs | 396 ;;; (setq info nil)) |
397 (string-to-number arg 16)) | 397 ;;; (if info (setq parts (cdr parts))) |
398 ??))) | 398 ;;; ;; Maybe printing ? for unrepresentable unicodes |
399 parts " ")) | 399 ;;; ;; here and below should be changed? |
400 (concat info parts)))) | 400 ;;; (setq parts (mapconcat |
401 (list "Decimal digit value" | 401 ;;; (lambda (arg) |
402 (nth 5 fields)) | 402 ;;; (string (or (decode-char |
403 (list "Digit value" | 403 ;;; 'ucs |
404 (nth 6 fields)) | 404 ;;; (string-to-number arg 16)) |
405 (list "Numeric value" | 405 ;;; ??))) |
406 (nth 7 fields)) | 406 ;;; parts " ")) |
407 (list "Mirrored" | 407 ;;; (concat info parts)))) |
408 (if (equal "Y" (nth 8 fields)) | 408 ;;; (list "Decimal digit value" |
409 "yes")) | 409 ;;; (nth 5 fields)) |
410 (list "Old name" (nth 9 fields)) | 410 ;;; (list "Digit value" |
411 (list "ISO 10646 comment" (nth 10 fields)) | 411 ;;; (nth 6 fields)) |
412 (list "Uppercase" (and (nth 11 fields) | 412 ;;; (list "Numeric value" |
413 (string (or (decode-char | 413 ;;; (nth 7 fields)) |
414 'ucs | 414 ;;; (list "Mirrored" |
415 (string-to-number | 415 ;;; (if (equal "Y" (nth 8 fields)) |
416 (nth 11 fields) 16)) | 416 ;;; "yes")) |
417 ??)))) | 417 ;;; (list "Old name" (nth 9 fields)) |
418 (list "Lowercase" (and (nth 12 fields) | 418 ;;; (list "ISO 10646 comment" (nth 10 fields)) |
419 (string (or (decode-char | 419 ;;; (list "Uppercase" (and (nth 11 fields) |
420 'ucs | 420 ;;; (string (or (decode-char |
421 (string-to-number | 421 ;;; 'ucs |
422 (nth 12 fields) 16)) | 422 ;;; (string-to-number |
423 ??)))) | 423 ;;; (nth 11 fields) 16)) |
424 (list "Titlecase" (and (nth 13 fields) | 424 ;;; ??)))) |
425 (string (or (decode-char | 425 ;;; (list "Lowercase" (and (nth 12 fields) |
426 'ucs | 426 ;;; (string (or (decode-char |
427 (string-to-number | 427 ;;; 'ucs |
428 (nth 13 fields) 16)) | 428 ;;; (string-to-number |
429 ??))))))))))) | 429 ;;; (nth 12 fields) 16)) |
430 | 430 ;;; ??)))) |
431 ;;; (list "Titlecase" (and (nth 13 fields) | |
432 ;;; (string (or (decode-char | |
433 ;;; 'ucs | |
434 ;;; (string-to-number | |
435 ;;; (nth 13 fields) 16)) | |
436 ;;; ??))))))))))) | |
437 | |
431 ;;;###autoload | 438 ;;;###autoload |
432 (defun describe-char (pos) | 439 (defun describe-char (pos) |
433 "Describe the character after POS (interactively, the character after point). | 440 "Describe the character after POS (interactively, the character after point). |
434 The information includes character code, charset and code points in it, | 441 The information includes character code, charset and code points in it, |
435 syntax, category, how the character is encoded in a file, | 442 syntax, category, how the character is encoded in a file, |
515 (encoded (encode-coding-char char coding))) | 522 (encoded (encode-coding-char char coding))) |
516 (if encoded | 523 (if encoded |
517 (encoded-string-description encoded coding) | 524 (encoded-string-description encoded coding) |
518 "not encodable")))) | 525 "not encodable")))) |
519 ,@(let ((unicodedata (and unicode | 526 ,@(let ((unicodedata (and unicode |
520 (unicode-data unicode)))) | 527 (describe-char-unicode-data unicode)))) |
521 (if unicodedata | 528 (if unicodedata |
522 (cons (list "Unicode data" " ") unicodedata)))))) | 529 (cons (list "Unicode data" " ") unicodedata)))))) |
523 (setq max-width (apply #'max (mapcar #'(lambda (x) (length (car x))) | 530 (setq max-width (apply #'max (mapcar #'(lambda (x) (length (car x))) |
524 item-list))) | 531 item-list))) |
525 (when (eq (current-buffer) (get-buffer "*Help*")) | 532 (when (eq (current-buffer) (get-buffer "*Help*")) |