Mercurial > emacs
comparison lisp/international/mule-cmds.el @ 23889:3b5045c64434
(find-coding-systems-for-charsets):
Handle the case of unknown charset.
(find-multibyte-characters): If invalid multibyte characters are
found, return the corresponding strings instead of character
codes.
(find-multibyte-characters): Adjusted for the above change.
(select-safe-coding-system): For a unibyte buffer, always returns
DEFAULT-CODING-SYSTEM.
(get-charset-property): Fix previous change. Make it a function.
(put-charset-property): Make it a function.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Tue, 15 Dec 1998 04:35:38 +0000 |
parents | e7dfcab35c8b |
children | ccc00be328a1 |
comparison
equal
deleted
inserted
replaced
23888:86cf240ba90e | 23889:3b5045c64434 |
---|---|
288 (if (or (null charsets) | 288 (if (or (null charsets) |
289 (and (= (length charsets) 1) | 289 (and (= (length charsets) 1) |
290 (eq 'ascii (car charsets)))) | 290 (eq 'ascii (car charsets)))) |
291 '(undecided) | 291 '(undecided) |
292 (setq charsets (delq 'composition charsets)) | 292 (setq charsets (delq 'composition charsets)) |
293 (let ((l coding-system-list) | 293 (let ((l (coding-system-list 'base-only)) |
294 (charset-prefered-codings | 294 (charset-prefered-codings |
295 (mapcar (function | 295 (mapcar (function |
296 (lambda (x) | 296 (lambda (x) |
297 (get-charset-property x 'prefered-coding-system))) | 297 (if (eq x 'unknown) |
298 'raw-text | |
299 (get-charset-property x 'prefered-coding-system)))) | |
298 charsets)) | 300 charsets)) |
299 (priorities (mapcar (function (lambda (x) (symbol-value x))) | 301 (priorities (mapcar (function (lambda (x) (symbol-value x))) |
300 coding-category-list)) | 302 coding-category-list)) |
301 codings coding safe) | 303 codings coding safe) |
304 (if (memq 'unknown charsets) | |
305 ;; The region contains invalid multibyte characters. | |
306 (setq l '(raw-text))) | |
302 (while l | 307 (while l |
303 (setq coding (car l) l (cdr l)) | 308 (setq coding (car l) l (cdr l)) |
304 (if (and (eq coding (coding-system-base coding)) | 309 (if (and (setq safe (coding-system-get coding 'safe-charsets)) |
305 (setq safe (coding-system-get coding 'safe-charsets)) | |
306 (or (eq safe t) | 310 (or (eq safe t) |
307 (find-coding-systems-region-subset-p charsets safe))) | 311 (find-coding-systems-region-subset-p charsets safe))) |
308 ;; We put the higher priority to coding systems included | 312 ;; We put the higher priority to coding systems included |
309 ;; in CHARSET-PREFERED-CODINGS, and within them, put the | 313 ;; in CHARSET-PREFERED-CODINGS, and within them, put the |
310 ;; higher priority to coding systems which support smaller | 314 ;; higher priority to coding systems which support smaller |
328 where | 332 where |
329 CHARSET is a character set, | 333 CHARSET is a character set, |
330 COUNT is a number of characters, | 334 COUNT is a number of characters, |
331 CHARs are found characters of the character set. | 335 CHARs are found characters of the character set. |
332 Optional 3rd arg MAXCOUNT limits how many CHARs are put in the above list. | 336 Optional 3rd arg MAXCOUNT limits how many CHARs are put in the above list. |
333 Optional 4th arg EXCLUDE is a list of character sets to be ignored." | 337 Optional 4th arg EXCLUDE is a list of character sets to be ignored. |
338 | |
339 For invalid characters, CHARs are actually strings." | |
334 (let ((chars nil) | 340 (let ((chars nil) |
335 charset char) | 341 charset char) |
336 (if (stringp from) | 342 (if (stringp from) |
337 (let ((idx 0)) | 343 (let ((idx 0)) |
338 (while (setq idx (string-match "[^\000-\177]" from idx)) | 344 (while (setq idx (string-match "[^\000-\177]" from idx)) |
339 (setq char (aref from idx) | 345 (setq char (aref from idx) |
340 charset (char-charset char)) | 346 charset (char-charset char)) |
341 (if (not (memq charset excludes)) | 347 (if (eq charset 'unknown) |
348 (setq char (match-string 0))) | |
349 (if (or (eq charset 'unknown) | |
350 (not (or (eq excludes t) (memq charset excludes)))) | |
342 (let ((slot (assq charset chars))) | 351 (let ((slot (assq charset chars))) |
343 (if slot | 352 (if slot |
344 (if (not (memq char (nthcdr 2 slot))) | 353 (if (not (memq char (nthcdr 2 slot))) |
345 (let ((count (nth 1 slot))) | 354 (let ((count (nth 1 slot))) |
346 (setcar (cdr slot) (1+ count)) | 355 (setcar (cdr slot) (1+ count)) |
351 (save-excursion | 360 (save-excursion |
352 (goto-char from) | 361 (goto-char from) |
353 (while (re-search-forward "[^\000-\177]" to t) | 362 (while (re-search-forward "[^\000-\177]" to t) |
354 (setq char (preceding-char) | 363 (setq char (preceding-char) |
355 charset (char-charset char)) | 364 charset (char-charset char)) |
356 (if (not (memq charset excludes)) | 365 (if (eq charset 'unknown) |
366 (setq char (match-string 0))) | |
367 (if (or (eq charset 'unknown) | |
368 (not (or (eq excludes t) (memq charset excludes)))) | |
357 (let ((slot (assq charset chars))) | 369 (let ((slot (assq charset chars))) |
358 (if slot | 370 (if slot |
359 (if (not (memq char (nthcdr 2 slot))) | 371 (if (not (member char (nthcdr 2 slot))) |
360 (let ((count (nth 1 slot))) | 372 (let ((count (nth 1 slot))) |
361 (setcar (cdr slot) (1+ count)) | 373 (setcar (cdr slot) (1+ count)) |
362 (if (or (not maxcount) (< count maxcount)) | 374 (if (or (not maxcount) (< count maxcount)) |
363 (nconc slot (list char))))) | 375 (nconc slot (list char))))) |
364 (setq chars (cons (list charset 1 char) chars)))))))) | 376 (setq chars (cons (list charset 1 char) chars)))))))) |
388 (or default-coding-system | 400 (or default-coding-system |
389 (setq default-coding-system buffer-file-coding-system)) | 401 (setq default-coding-system buffer-file-coding-system)) |
390 (let* ((charsets (if (stringp from) (find-charset-string from) | 402 (let* ((charsets (if (stringp from) (find-charset-string from) |
391 (find-charset-region from to))) | 403 (find-charset-region from to))) |
392 (safe-coding-systems (find-coding-systems-for-charsets charsets))) | 404 (safe-coding-systems (find-coding-systems-for-charsets charsets))) |
393 (if (or (eq (car safe-coding-systems) 'undecided) | 405 (if (or (not enable-multibyte-characters) |
406 (eq (car safe-coding-systems) 'undecided) | |
394 (eq default-coding-system 'no-conversion) | 407 (eq default-coding-system 'no-conversion) |
395 (and default-coding-system | 408 (and default-coding-system |
396 (memq (coding-system-base default-coding-system) | 409 (memq (coding-system-base default-coding-system) |
397 safe-coding-systems))) | 410 safe-coding-systems))) |
398 default-coding-system | 411 default-coding-system |
447 (when (> (length (car non-safe-chars)) 2) | 460 (when (> (length (car non-safe-chars)) 2) |
448 (setq shown (1+ shown)) | 461 (setq shown (1+ shown)) |
449 (insert (format "%25s: " (car (car non-safe-chars)))) | 462 (insert (format "%25s: " (car (car non-safe-chars)))) |
450 (let ((l (nthcdr 2 (car non-safe-chars)))) | 463 (let ((l (nthcdr 2 (car non-safe-chars)))) |
451 (while l | 464 (while l |
452 (insert (car l)) | 465 (if (or (stringp (car l)) (char-valid-p (car l))) |
466 (insert (car l))) | |
453 (setq l (cdr l)))) | 467 (setq l (cdr l)))) |
454 (if (> (nth 1 (car non-safe-chars)) 3) | 468 (if (> (nth 1 (car non-safe-chars)) 3) |
455 (insert "...")) | 469 (insert "...")) |
456 (insert "\n")) | 470 (insert "\n")) |
457 (setq non-safe-chars (cdr non-safe-chars))) | 471 (setq non-safe-chars (cdr non-safe-chars))) |
1323 (terpri))) | 1337 (terpri))) |
1324 (setq l (cdr l)))))))) | 1338 (setq l (cdr l)))))))) |
1325 | 1339 |
1326 ;;; Charset property | 1340 ;;; Charset property |
1327 | 1341 |
1328 (defsubst get-charset-property (charset propname) | 1342 (defun get-charset-property (charset propname) |
1329 "Return the value of CHARSET's PROPNAME property. | 1343 "Return the value of CHARSET's PROPNAME property. |
1330 This is the last value stored with | 1344 This is the last value stored with |
1331 (put-charset-property CHARSET PROPNAME VALUE)." | 1345 (put-charset-property CHARSET PROPNAME VALUE)." |
1332 (or (eq charset 'composition) | 1346 (and (not (eq charset 'composition)) |
1333 (plist-get (charset-plist charset) propname))) | 1347 (plist-get (charset-plist charset) propname))) |
1334 | 1348 |
1335 (defsubst put-charset-property (charset propname value) | 1349 (defun put-charset-property (charset propname value) |
1336 "Store CHARSETS's PROPNAME property with value VALUE. | 1350 "Store CHARSETS's PROPNAME property with value VALUE. |
1337 It can be retrieved with `(get-charset-property CHARSET PROPNAME)'." | 1351 It can be retrieved with `(get-charset-property CHARSET PROPNAME)'." |
1338 (or (eq charset 'composition) | 1352 (or (eq charset 'composition) |
1339 (set-charset-plist charset | 1353 (set-charset-plist charset |
1340 (plist-put (charset-plist charset) propname value)))) | 1354 (plist-put (charset-plist charset) propname value)))) |