comparison lisp/descr-text.el @ 51278:7192dc1bfcf4

(describe-char-unicode-data): New dummy definition. Real definition commented out since we can't use UnicodeData.txt as is. (describe-char-unicodedata-file): Variable commented out.
author Richard M. Stallman <rms@gnu.org>
date Wed, 28 May 2003 11:14:07 +0000
parents d68739c97632
children 67502df21b92
comparison
equal deleted inserted replaced
51277:caaa4fda6808 51278:7192dc1bfcf4
216 ;; Text properties 216 ;; Text properties
217 (when properties 217 (when properties
218 (newline) 218 (newline)
219 (widget-insert "There are text properties here:\n") 219 (widget-insert "There are text properties here:\n")
220 (describe-property-list properties))))) 220 (describe-property-list properties)))))
221 221
222 (defcustom unicodedata-file nil 222 ;;; We cannot use the UnicodeData.txt file as such; it is not free.
223 "Location of Unicode data file. 223 ;;; We can turn that info a different format and release the result
224 This is the UnicodeData.txt file from the Unicode consortium, used for 224 ;;; as free data. When that is done, we could reinstate the code below.
225 diagnostics. If it is non-nil `describe-char-after' will print data 225 ;;; For the mean time, here is a dummy placeholder.
226 looked up from it. This facility is mostly of use to people doing 226 ;;; -- rms
227 multilingual development. 227 (defun describe-char-unicode-data (char) nil)
228 228
229 This is a fairly large file, not typically present on GNU systems. At 229 ;;; (defcustom describe-char-unicodedata-file nil
230 the time of writing it is at 230 ;;; "Location of Unicode data file.
231 <URL:ftp://www.unicode.org/Public/UNIDATA/UnicodeData.txt>." 231 ;;; This is the UnicodeData.txt file from the Unicode consortium, used for
232 :group 'mule 232 ;;; diagnostics. If it is non-nil `describe-char-after' will print data
233 :version "21.5" 233 ;;; looked up from it. This facility is mostly of use to people doing
234 :type '(choice (const :tag "None" nil) 234 ;;; multilingual development.
235 file)) 235
236 236 ;;; This is a fairly large file, not typically present on GNU systems. At
237 ;; We could convert the unidata file into a Lispy form once-for-all 237 ;;; the time of writing it is at
238 ;; and distribute it for loading on demand. It might be made more 238 ;;; <URL:ftp://www.unicode.org/Public/UNIDATA/UnicodeData.txt>."
239 ;; space-efficient by splitting strings word-wise and replacing them 239 ;;; :group 'mule
240 ;; with lists of symbols interned in a private obarray, e.g. 240 ;;; :version "21.5"
241 ;; "LATIN SMALL LETTER A" => '(LATIN SMALL LETTER A). 241 ;;; :type '(choice (const :tag "None" nil)
242 242 ;;; file))
243 ;; Fixme: Check whether this needs updating for Unicode 4. 243
244 (defun unicode-data (char) 244 ;;; ;; We could convert the unidata file into a Lispy form once-for-all
245 "Return a list of Unicode data for unicode CHAR. 245 ;;; ;; and distribute it for loading on demand. It might be made more
246 Each element is a list of a property description and the property value. 246 ;;; ;; space-efficient by splitting strings word-wise and replacing them
247 The list is null if CHAR isn't found in `unicodedata-file'." 247 ;;; ;; with lists of symbols interned in a private obarray, e.g.
248 (when unicodedata-file 248 ;;; ;; "LATIN SMALL LETTER A" => '(LATIN SMALL LETTER A).
249 (unless (file-exists-p unicodedata-file) 249
250 (error "`unicodedata-file' %s not found" unicodedata-file)) 250 ;;; ;; Fixme: Check whether this needs updating for Unicode 4.
251 (save-excursion 251 ;;; (defun describe-char-unicode-data (char)
252 ;; Find file in fundamental mode to avoid, e.g. flyspell turned 252 ;;; "Return a list of Unicode data for unicode CHAR.
253 ;; on for .txt. Don't use RAWFILE arg in case of DOS line endings. 253 ;;; Each element is a list of a property description and the property value.
254 (set-buffer (let ((auto-mode-alist)) 254 ;;; The list is null if CHAR isn't found in `describe-char-unicodedata-file'."
255 (find-file-noselect unicodedata-file))) 255 ;;; (when describe-char-unicodedata-file
256 (goto-char (point-min)) 256 ;;; (unless (file-exists-p describe-char-unicodedata-file)
257 (let ((hex (format "%04X" char)) 257 ;;; (error "`unicodedata-file' %s not found" describe-char-unicodedata-file))
258 found first last) 258 ;;; (save-excursion
259 (if (re-search-forward (concat "^" hex) nil t) 259 ;;; ;; Find file in fundamental mode to avoid, e.g. flyspell turned
260 (setq found t) 260 ;;; ;; on for .txt. Don't use RAWFILE arg in case of DOS line endings.
261 ;; It's not listed explicitly. Look for ranges, e.g. CJK 261 ;;; (set-buffer (let ((auto-mode-alist))
262 ;; ideographs, and check whether it's in one of them. 262 ;;; (find-file-noselect describe-char-unicodedata-file)))
263 (while (and (re-search-forward "^\\([^;]+\\);[^;]+First>;" nil t) 263 ;;; (goto-char (point-min))
264 (>= char (setq first 264 ;;; (let ((hex (format "%04X" char))
265 (string-to-number (match-string 1) 16))) 265 ;;; found first last)
266 (progn 266 ;;; (if (re-search-forward (concat "^" hex) nil t)
267 (forward-line 1) 267 ;;; (setq found t)
268 (looking-at "^\\([^;]+\\);[^;]+Last>;") 268 ;;; ;; It's not listed explicitly. Look for ranges, e.g. CJK
269 (> char 269 ;;; ;; ideographs, and check whether it's in one of them.
270 (setq last 270 ;;; (while (and (re-search-forward "^\\([^;]+\\);[^;]+First>;" nil t)
271 (string-to-number (match-string 1) 16)))))) 271 ;;; (>= char (setq first
272 (if (and (>= char first) 272 ;;; (string-to-number (match-string 1) 16)))
273 (<= char last)) 273 ;;; (progn
274 (setq found t))) 274 ;;; (forward-line 1)
275 (if found 275 ;;; (looking-at "^\\([^;]+\\);[^;]+Last>;")
276 (let ((fields (mapcar (lambda (elt) 276 ;;; (> char
277 (if (> (length elt) 0) 277 ;;; (setq last
278 elt)) 278 ;;; (string-to-number (match-string 1) 16))))))
279 (cdr (split-string 279 ;;; (if (and (>= char first)
280 (buffer-substring 280 ;;; (<= char last))
281 (line-beginning-position) 281 ;;; (setq found t)))
282 (line-end-position)) 282 ;;; (if found
283 ";"))))) 283 ;;; (let ((fields (mapcar (lambda (elt)
284 ;; The length depends on whether the last field was empty. 284 ;;; (if (> (length elt) 0)
285 (unless (or (= 13 (length fields)) 285 ;;; elt))
286 (= 14 (length fields))) 286 ;;; (cdr (split-string
287 (error "Invalid contents in %s" unicodedata-file)) 287 ;;; (buffer-substring
288 ;; The field names and values lists are slightly 288 ;;; (line-beginning-position)
289 ;; modified from Mule-UCS unidata.el. 289 ;;; (line-end-position))
290 (list 290 ;;; ";")))))
291 (list "Name" (let ((name (nth 0 fields))) 291 ;;; ;; The length depends on whether the last field was empty.
292 ;; Check for <..., First>, <..., Last> 292 ;;; (unless (or (= 13 (length fields))
293 (if (string-match "\\`\\(<[^,]+\\)," name) 293 ;;; (= 14 (length fields)))
294 (concat (match-string 1 name) ">") 294 ;;; (error "Invalid contents in %s" describe-char-unicodedata-file))
295 name))) 295 ;;; ;; The field names and values lists are slightly
296 (list "Category" 296 ;;; ;; modified from Mule-UCS unidata.el.
297 (cdr (assoc 297 ;;; (list
298 (nth 1 fields) 298 ;;; (list "Name" (let ((name (nth 0 fields)))
299 '(("Lu" . "uppercase letter") 299 ;;; ;; Check for <..., First>, <..., Last>
300 ("Ll" . "lowercase letter") 300 ;;; (if (string-match "\\`\\(<[^,]+\\)," name)
301 ("Lt" . "titlecase letter") 301 ;;; (concat (match-string 1 name) ">")
302 ("Mn" . "non-spacing mark") 302 ;;; name)))
303 ("Mc" . "spacing-combining mark") 303 ;;; (list "Category"
304 ("Me" . "enclosing mark") 304 ;;; (cdr (assoc
305 ("Nd" . "decimal digit") 305 ;;; (nth 1 fields)
306 ("Nl" . "letter number") 306 ;;; '(("Lu" . "uppercase letter")
307 ("No" . "other number") 307 ;;; ("Ll" . "lowercase letter")
308 ("Zs" . "space separator") 308 ;;; ("Lt" . "titlecase letter")
309 ("Zl" . "line separator") 309 ;;; ("Mn" . "non-spacing mark")
310 ("Zp" . "paragraph separator") 310 ;;; ("Mc" . "spacing-combining mark")
311 ("Cc" . "other control") 311 ;;; ("Me" . "enclosing mark")
312 ("Cf" . "other format") 312 ;;; ("Nd" . "decimal digit")
313 ("Cs" . "surrogate") 313 ;;; ("Nl" . "letter number")
314 ("Co" . "private use") 314 ;;; ("No" . "other number")
315 ("Cn" . "not assigned") 315 ;;; ("Zs" . "space separator")
316 ("Lm" . "modifier letter") 316 ;;; ("Zl" . "line separator")
317 ("Lo" . "other letter") 317 ;;; ("Zp" . "paragraph separator")
318 ("Pc" . "connector punctuation") 318 ;;; ("Cc" . "other control")
319 ("Pd" . "dash punctuation") 319 ;;; ("Cf" . "other format")
320 ("Ps" . "open punctuation") 320 ;;; ("Cs" . "surrogate")
321 ("Pe" . "close punctuation") 321 ;;; ("Co" . "private use")
322 ("Pi" . "initial-quotation punctuation") 322 ;;; ("Cn" . "not assigned")
323 ("Pf" . "final-quotation punctuation") 323 ;;; ("Lm" . "modifier letter")
324 ("Po" . "other punctuation") 324 ;;; ("Lo" . "other letter")
325 ("Sm" . "math symbol") 325 ;;; ("Pc" . "connector punctuation")
326 ("Sc" . "currency symbol") 326 ;;; ("Pd" . "dash punctuation")
327 ("Sk" . "modifier symbol") 327 ;;; ("Ps" . "open punctuation")
328 ("So" . "other symbol"))))) 328 ;;; ("Pe" . "close punctuation")
329 (list "Combining class" 329 ;;; ("Pi" . "initial-quotation punctuation")
330 (cdr (assoc 330 ;;; ("Pf" . "final-quotation punctuation")
331 (string-to-number (nth 2 fields)) 331 ;;; ("Po" . "other punctuation")
332 '((0 . "Spacing") 332 ;;; ("Sm" . "math symbol")
333 (1 . "Overlays and interior") 333 ;;; ("Sc" . "currency symbol")
334 (7 . "Nuktas") 334 ;;; ("Sk" . "modifier symbol")
335 (8 . "Hiragana/Katakana voicing marks") 335 ;;; ("So" . "other symbol")))))
336 (9 . "Viramas") 336 ;;; (list "Combining class"
337 (10 . "Start of fixed position classes") 337 ;;; (cdr (assoc
338 (199 . "End of fixed position classes") 338 ;;; (string-to-number (nth 2 fields))
339 (200 . "Below left attached") 339 ;;; '((0 . "Spacing")
340 (202 . "Below attached") 340 ;;; (1 . "Overlays and interior")
341 (204 . "Below right attached") 341 ;;; (7 . "Nuktas")
342 (208 . "Left attached (reordrant around \ 342 ;;; (8 . "Hiragana/Katakana voicing marks")
343 single base character)") 343 ;;; (9 . "Viramas")
344 (210 . "Right attached") 344 ;;; (10 . "Start of fixed position classes")
345 (212 . "Above left attached") 345 ;;; (199 . "End of fixed position classes")
346 (214 . "Above attached") 346 ;;; (200 . "Below left attached")
347 (216 . "Above right attached") 347 ;;; (202 . "Below attached")
348 (218 . "Below left") 348 ;;; (204 . "Below right attached")
349 (220 . "Below") 349 ;;; (208 . "Left attached (reordrant around \
350 (222 . "Below right") 350 ;;; single base character)")
351 (224 . "Left (reordrant around single base \ 351 ;;; (210 . "Right attached")
352 character)") 352 ;;; (212 . "Above left attached")
353 (226 . "Right") 353 ;;; (214 . "Above attached")
354 (228 . "Above left") 354 ;;; (216 . "Above right attached")
355 (230 . "Above") 355 ;;; (218 . "Below left")
356 (232 . "Above right") 356 ;;; (220 . "Below")
357 (233 . "Double below") 357 ;;; (222 . "Below right")
358 (234 . "Double above") 358 ;;; (224 . "Left (reordrant around single base \
359 (240 . "Below (iota subscript)"))))) 359 ;;; character)")
360 (list "Bidi category" 360 ;;; (226 . "Right")
361 (cdr (assoc 361 ;;; (228 . "Above left")
362 (nth 3 fields) 362 ;;; (230 . "Above")
363 '(("L" . "Left-to-Right") 363 ;;; (232 . "Above right")
364 ("LRE" . "Left-to-Right Embedding") 364 ;;; (233 . "Double below")
365 ("LRO" . "Left-to-Right Override") 365 ;;; (234 . "Double above")
366 ("R" . "Right-to-Left") 366 ;;; (240 . "Below (iota subscript)")))))
367 ("AL" . "Right-to-Left Arabic") 367 ;;; (list "Bidi category"
368 ("RLE" . "Right-to-Left Embedding") 368 ;;; (cdr (assoc
369 ("RLO" . "Right-to-Left Override") 369 ;;; (nth 3 fields)
370 ("PDF" . "Pop Directional Format") 370 ;;; '(("L" . "Left-to-Right")
371 ("EN" . "European Number") 371 ;;; ("LRE" . "Left-to-Right Embedding")
372 ("ES" . "European Number Separator") 372 ;;; ("LRO" . "Left-to-Right Override")
373 ("ET" . "European Number Terminator") 373 ;;; ("R" . "Right-to-Left")
374 ("AN" . "Arabic Number") 374 ;;; ("AL" . "Right-to-Left Arabic")
375 ("CS" . "Common Number Separator") 375 ;;; ("RLE" . "Right-to-Left Embedding")
376 ("NSM" . "Non-Spacing Mark") 376 ;;; ("RLO" . "Right-to-Left Override")
377 ("BN" . "Boundary Neutral") 377 ;;; ("PDF" . "Pop Directional Format")
378 ("B" . "Paragraph Separator") 378 ;;; ("EN" . "European Number")
379 ("S" . "Segment Separator") 379 ;;; ("ES" . "European Number Separator")
380 ("WS" . "Whitespace") 380 ;;; ("ET" . "European Number Terminator")
381 ("ON" . "Other Neutrals"))))) 381 ;;; ("AN" . "Arabic Number")
382 (list 382 ;;; ("CS" . "Common Number Separator")
383 "Decomposition" 383 ;;; ("NSM" . "Non-Spacing Mark")
384 (if (nth 4 fields) 384 ;;; ("BN" . "Boundary Neutral")
385 (let* ((parts (split-string (nth 4 fields))) 385 ;;; ("B" . "Paragraph Separator")
386 (info (car parts))) 386 ;;; ("S" . "Segment Separator")
387 (if (string-match "\\`<\\(.+\\)>\\'" info) 387 ;;; ("WS" . "Whitespace")
388 (setq info (match-string 1 info)) 388 ;;; ("ON" . "Other Neutrals")))))
389 (setq info nil)) 389 ;;; (list
390 (if info (setq parts (cdr parts))) 390 ;;; "Decomposition"
391 ;; Maybe printing ? for unrepresentable unicodes 391 ;;; (if (nth 4 fields)
392 ;; here and below should be changed? 392 ;;; (let* ((parts (split-string (nth 4 fields)))
393 (setq parts (mapconcat 393 ;;; (info (car parts)))
394 (lambda (arg) 394 ;;; (if (string-match "\\`<\\(.+\\)>\\'" info)
395 (string (or (decode-char 395 ;;; (setq info (match-string 1 info))
396 'ucs 396 ;;; (setq info nil))
397 (string-to-number arg 16)) 397 ;;; (if info (setq parts (cdr parts)))
398 ??))) 398 ;;; ;; Maybe printing ? for unrepresentable unicodes
399 parts " ")) 399 ;;; ;; here and below should be changed?
400 (concat info parts)))) 400 ;;; (setq parts (mapconcat
401 (list "Decimal digit value" 401 ;;; (lambda (arg)
402 (nth 5 fields)) 402 ;;; (string (or (decode-char
403 (list "Digit value" 403 ;;; 'ucs
404 (nth 6 fields)) 404 ;;; (string-to-number arg 16))
405 (list "Numeric value" 405 ;;; ??)))
406 (nth 7 fields)) 406 ;;; parts " "))
407 (list "Mirrored" 407 ;;; (concat info parts))))
408 (if (equal "Y" (nth 8 fields)) 408 ;;; (list "Decimal digit value"
409 "yes")) 409 ;;; (nth 5 fields))
410 (list "Old name" (nth 9 fields)) 410 ;;; (list "Digit value"
411 (list "ISO 10646 comment" (nth 10 fields)) 411 ;;; (nth 6 fields))
412 (list "Uppercase" (and (nth 11 fields) 412 ;;; (list "Numeric value"
413 (string (or (decode-char 413 ;;; (nth 7 fields))
414 'ucs 414 ;;; (list "Mirrored"
415 (string-to-number 415 ;;; (if (equal "Y" (nth 8 fields))
416 (nth 11 fields) 16)) 416 ;;; "yes"))
417 ??)))) 417 ;;; (list "Old name" (nth 9 fields))
418 (list "Lowercase" (and (nth 12 fields) 418 ;;; (list "ISO 10646 comment" (nth 10 fields))
419 (string (or (decode-char 419 ;;; (list "Uppercase" (and (nth 11 fields)
420 'ucs 420 ;;; (string (or (decode-char
421 (string-to-number 421 ;;; 'ucs
422 (nth 12 fields) 16)) 422 ;;; (string-to-number
423 ??)))) 423 ;;; (nth 11 fields) 16))
424 (list "Titlecase" (and (nth 13 fields) 424 ;;; ??))))
425 (string (or (decode-char 425 ;;; (list "Lowercase" (and (nth 12 fields)
426 'ucs 426 ;;; (string (or (decode-char
427 (string-to-number 427 ;;; 'ucs
428 (nth 13 fields) 16)) 428 ;;; (string-to-number
429 ??))))))))))) 429 ;;; (nth 12 fields) 16))
430 430 ;;; ??))))
431 ;;; (list "Titlecase" (and (nth 13 fields)
432 ;;; (string (or (decode-char
433 ;;; 'ucs
434 ;;; (string-to-number
435 ;;; (nth 13 fields) 16))
436 ;;; ??)))))))))))
437
431 ;;;###autoload 438 ;;;###autoload
432 (defun describe-char (pos) 439 (defun describe-char (pos)
433 "Describe the character after POS (interactively, the character after point). 440 "Describe the character after POS (interactively, the character after point).
434 The information includes character code, charset and code points in it, 441 The information includes character code, charset and code points in it,
435 syntax, category, how the character is encoded in a file, 442 syntax, category, how the character is encoded in a file,
515 (encoded (encode-coding-char char coding))) 522 (encoded (encode-coding-char char coding)))
516 (if encoded 523 (if encoded
517 (encoded-string-description encoded coding) 524 (encoded-string-description encoded coding)
518 "not encodable")))) 525 "not encodable"))))
519 ,@(let ((unicodedata (and unicode 526 ,@(let ((unicodedata (and unicode
520 (unicode-data unicode)))) 527 (describe-char-unicode-data unicode))))
521 (if unicodedata 528 (if unicodedata
522 (cons (list "Unicode data" " ") unicodedata)))))) 529 (cons (list "Unicode data" " ") unicodedata))))))
523 (setq max-width (apply #'max (mapcar #'(lambda (x) (length (car x))) 530 (setq max-width (apply #'max (mapcar #'(lambda (x) (length (car x)))
524 item-list))) 531 item-list)))
525 (when (eq (current-buffer) (get-buffer "*Help*")) 532 (when (eq (current-buffer) (get-buffer "*Help*"))