comparison lisp/international/mule-cmds.el @ 23889:3b5045c64434

(find-coding-systems-for-charsets): Handle the case of unknown charset. (find-multibyte-characters): If invalid multibyte characters are found, return the corresponding strings instead of character codes. (find-multibyte-characters): Adjusted for the above change. (select-safe-coding-system): For a unibyte buffer, always returns DEFAULT-CODING-SYSTEM. (get-charset-property): Fix previous change. Make it a function. (put-charset-property): Make it a function.
author Kenichi Handa <handa@m17n.org>
date Tue, 15 Dec 1998 04:35:38 +0000
parents e7dfcab35c8b
children ccc00be328a1
comparison
equal deleted inserted replaced
23888:86cf240ba90e 23889:3b5045c64434
288 (if (or (null charsets) 288 (if (or (null charsets)
289 (and (= (length charsets) 1) 289 (and (= (length charsets) 1)
290 (eq 'ascii (car charsets)))) 290 (eq 'ascii (car charsets))))
291 '(undecided) 291 '(undecided)
292 (setq charsets (delq 'composition charsets)) 292 (setq charsets (delq 'composition charsets))
293 (let ((l coding-system-list) 293 (let ((l (coding-system-list 'base-only))
294 (charset-prefered-codings 294 (charset-prefered-codings
295 (mapcar (function 295 (mapcar (function
296 (lambda (x) 296 (lambda (x)
297 (get-charset-property x 'prefered-coding-system))) 297 (if (eq x 'unknown)
298 'raw-text
299 (get-charset-property x 'prefered-coding-system))))
298 charsets)) 300 charsets))
299 (priorities (mapcar (function (lambda (x) (symbol-value x))) 301 (priorities (mapcar (function (lambda (x) (symbol-value x)))
300 coding-category-list)) 302 coding-category-list))
301 codings coding safe) 303 codings coding safe)
304 (if (memq 'unknown charsets)
305 ;; The region contains invalid multibyte characters.
306 (setq l '(raw-text)))
302 (while l 307 (while l
303 (setq coding (car l) l (cdr l)) 308 (setq coding (car l) l (cdr l))
304 (if (and (eq coding (coding-system-base coding)) 309 (if (and (setq safe (coding-system-get coding 'safe-charsets))
305 (setq safe (coding-system-get coding 'safe-charsets))
306 (or (eq safe t) 310 (or (eq safe t)
307 (find-coding-systems-region-subset-p charsets safe))) 311 (find-coding-systems-region-subset-p charsets safe)))
308 ;; We put the higher priority to coding systems included 312 ;; We put the higher priority to coding systems included
309 ;; in CHARSET-PREFERED-CODINGS, and within them, put the 313 ;; in CHARSET-PREFERED-CODINGS, and within them, put the
310 ;; higher priority to coding systems which support smaller 314 ;; higher priority to coding systems which support smaller
328 where 332 where
329 CHARSET is a character set, 333 CHARSET is a character set,
330 COUNT is a number of characters, 334 COUNT is a number of characters,
331 CHARs are found characters of the character set. 335 CHARs are found characters of the character set.
332 Optional 3rd arg MAXCOUNT limits how many CHARs are put in the above list. 336 Optional 3rd arg MAXCOUNT limits how many CHARs are put in the above list.
333 Optional 4th arg EXCLUDE is a list of character sets to be ignored." 337 Optional 4th arg EXCLUDE is a list of character sets to be ignored.
338
339 For invalid characters, CHARs are actually strings."
334 (let ((chars nil) 340 (let ((chars nil)
335 charset char) 341 charset char)
336 (if (stringp from) 342 (if (stringp from)
337 (let ((idx 0)) 343 (let ((idx 0))
338 (while (setq idx (string-match "[^\000-\177]" from idx)) 344 (while (setq idx (string-match "[^\000-\177]" from idx))
339 (setq char (aref from idx) 345 (setq char (aref from idx)
340 charset (char-charset char)) 346 charset (char-charset char))
341 (if (not (memq charset excludes)) 347 (if (eq charset 'unknown)
348 (setq char (match-string 0)))
349 (if (or (eq charset 'unknown)
350 (not (or (eq excludes t) (memq charset excludes))))
342 (let ((slot (assq charset chars))) 351 (let ((slot (assq charset chars)))
343 (if slot 352 (if slot
344 (if (not (memq char (nthcdr 2 slot))) 353 (if (not (memq char (nthcdr 2 slot)))
345 (let ((count (nth 1 slot))) 354 (let ((count (nth 1 slot)))
346 (setcar (cdr slot) (1+ count)) 355 (setcar (cdr slot) (1+ count))
351 (save-excursion 360 (save-excursion
352 (goto-char from) 361 (goto-char from)
353 (while (re-search-forward "[^\000-\177]" to t) 362 (while (re-search-forward "[^\000-\177]" to t)
354 (setq char (preceding-char) 363 (setq char (preceding-char)
355 charset (char-charset char)) 364 charset (char-charset char))
356 (if (not (memq charset excludes)) 365 (if (eq charset 'unknown)
366 (setq char (match-string 0)))
367 (if (or (eq charset 'unknown)
368 (not (or (eq excludes t) (memq charset excludes))))
357 (let ((slot (assq charset chars))) 369 (let ((slot (assq charset chars)))
358 (if slot 370 (if slot
359 (if (not (memq char (nthcdr 2 slot))) 371 (if (not (member char (nthcdr 2 slot)))
360 (let ((count (nth 1 slot))) 372 (let ((count (nth 1 slot)))
361 (setcar (cdr slot) (1+ count)) 373 (setcar (cdr slot) (1+ count))
362 (if (or (not maxcount) (< count maxcount)) 374 (if (or (not maxcount) (< count maxcount))
363 (nconc slot (list char))))) 375 (nconc slot (list char)))))
364 (setq chars (cons (list charset 1 char) chars)))))))) 376 (setq chars (cons (list charset 1 char) chars))))))))
388 (or default-coding-system 400 (or default-coding-system
389 (setq default-coding-system buffer-file-coding-system)) 401 (setq default-coding-system buffer-file-coding-system))
390 (let* ((charsets (if (stringp from) (find-charset-string from) 402 (let* ((charsets (if (stringp from) (find-charset-string from)
391 (find-charset-region from to))) 403 (find-charset-region from to)))
392 (safe-coding-systems (find-coding-systems-for-charsets charsets))) 404 (safe-coding-systems (find-coding-systems-for-charsets charsets)))
393 (if (or (eq (car safe-coding-systems) 'undecided) 405 (if (or (not enable-multibyte-characters)
406 (eq (car safe-coding-systems) 'undecided)
394 (eq default-coding-system 'no-conversion) 407 (eq default-coding-system 'no-conversion)
395 (and default-coding-system 408 (and default-coding-system
396 (memq (coding-system-base default-coding-system) 409 (memq (coding-system-base default-coding-system)
397 safe-coding-systems))) 410 safe-coding-systems)))
398 default-coding-system 411 default-coding-system
447 (when (> (length (car non-safe-chars)) 2) 460 (when (> (length (car non-safe-chars)) 2)
448 (setq shown (1+ shown)) 461 (setq shown (1+ shown))
449 (insert (format "%25s: " (car (car non-safe-chars)))) 462 (insert (format "%25s: " (car (car non-safe-chars))))
450 (let ((l (nthcdr 2 (car non-safe-chars)))) 463 (let ((l (nthcdr 2 (car non-safe-chars))))
451 (while l 464 (while l
452 (insert (car l)) 465 (if (or (stringp (car l)) (char-valid-p (car l)))
466 (insert (car l)))
453 (setq l (cdr l)))) 467 (setq l (cdr l))))
454 (if (> (nth 1 (car non-safe-chars)) 3) 468 (if (> (nth 1 (car non-safe-chars)) 3)
455 (insert "...")) 469 (insert "..."))
456 (insert "\n")) 470 (insert "\n"))
457 (setq non-safe-chars (cdr non-safe-chars))) 471 (setq non-safe-chars (cdr non-safe-chars)))
1323 (terpri))) 1337 (terpri)))
1324 (setq l (cdr l)))))))) 1338 (setq l (cdr l))))))))
1325 1339
1326 ;;; Charset property 1340 ;;; Charset property
1327 1341
1328 (defsubst get-charset-property (charset propname) 1342 (defun get-charset-property (charset propname)
1329 "Return the value of CHARSET's PROPNAME property. 1343 "Return the value of CHARSET's PROPNAME property.
1330 This is the last value stored with 1344 This is the last value stored with
1331 (put-charset-property CHARSET PROPNAME VALUE)." 1345 (put-charset-property CHARSET PROPNAME VALUE)."
1332 (or (eq charset 'composition) 1346 (and (not (eq charset 'composition))
1333 (plist-get (charset-plist charset) propname))) 1347 (plist-get (charset-plist charset) propname)))
1334 1348
1335 (defsubst put-charset-property (charset propname value) 1349 (defun put-charset-property (charset propname value)
1336 "Store CHARSETS's PROPNAME property with value VALUE. 1350 "Store CHARSETS's PROPNAME property with value VALUE.
1337 It can be retrieved with `(get-charset-property CHARSET PROPNAME)'." 1351 It can be retrieved with `(get-charset-property CHARSET PROPNAME)'."
1338 (or (eq charset 'composition) 1352 (or (eq charset 'composition)
1339 (set-charset-plist charset 1353 (set-charset-plist charset
1340 (plist-put (charset-plist charset) propname value)))) 1354 (plist-put (charset-plist charset) propname value))))