Mercurial > emacs
comparison lisp/international/mule-util.el @ 20730:42d729244a85
(find-safe-coding-system): Moved to
mule-cmds.el.
(detect-coding-with-priority): New macro.
(detect-coding-with-language-environment): New function.
(string-to-sequence): Adjusted for the change of
multibyte-form handling (byte-base to char-base).
(store-substring): Likewise.
(truncate-string-to-width): Likewise.
(decompose-region): Likewise.
(decompose-string): Likewise.
(decompose-composite-char): Call string instead of concat-chars.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Thu, 22 Jan 1998 01:42:20 +0000 |
parents | dc6f12ef4d47 |
children | ef5fd882ca63 |
comparison
equal
deleted
inserted
replaced
20729:821b2167b6c3 | 20730:42d729244a85 |
---|---|
29 | 29 |
30 ;;;###autoload | 30 ;;;###autoload |
31 (defun string-to-sequence (string type) | 31 (defun string-to-sequence (string type) |
32 "Convert STRING to a sequence of TYPE which contains characters in STRING. | 32 "Convert STRING to a sequence of TYPE which contains characters in STRING. |
33 TYPE should be `list' or `vector'." | 33 TYPE should be `list' or `vector'." |
34 (or (eq type 'list) (eq type 'vector) | 34 (let ((len (length string)) |
35 (error "Invalid type: %s" type)) | 35 (i 0) |
36 (let* ((len (length string)) | 36 val) |
37 (i 0) | 37 (cond ((eq type 'list) |
38 l ch) | 38 (setq val (make-list len 0)) |
39 (while (< i len) | 39 (let ((l val)) |
40 (setq ch (if enable-multibyte-characters | 40 (while (< i len) |
41 (sref string i) (aref string i))) | 41 (setcar l (aref string i)) |
42 (setq l (cons ch l)) | 42 (setq l (cdr l) i (1+ i))))) |
43 (setq i (+ i (char-bytes ch)))) | 43 ((eq type 'vector) |
44 (setq l (nreverse l)) | 44 (setq val (make-vector len 0)) |
45 (if (eq type 'list) | 45 (while (< i len) |
46 l | 46 (aset val i (aref string i)) |
47 (vconcat l)))) | 47 (setq i (1+ i)))) |
48 (t | |
49 (error "Invalid type: %s" type))) | |
50 val)) | |
48 | 51 |
49 ;;;###autoload | 52 ;;;###autoload |
50 (defsubst string-to-list (string) | 53 (defsubst string-to-list (string) |
51 "Return a list of characters in STRING." | 54 "Return a list of characters in STRING." |
52 (string-to-sequence string 'list)) | 55 (string-to-sequence string 'list)) |
57 (string-to-sequence string 'vector)) | 60 (string-to-sequence string 'vector)) |
58 | 61 |
59 ;;;###autoload | 62 ;;;###autoload |
60 (defun store-substring (string idx obj) | 63 (defun store-substring (string idx obj) |
61 "Embed OBJ (string or character) at index IDX of STRING." | 64 "Embed OBJ (string or character) at index IDX of STRING." |
62 (let* ((str (cond ((stringp obj) obj) | 65 (if (integerp obj) |
63 ((integerp obj) (char-to-string obj)) | 66 (aset string idx obj) |
64 (t (error | 67 (let ((len1 (length obj)) |
65 "Invalid argument (should be string or character): %s" | 68 (len2 (length string)) |
66 obj)))) | 69 (i 0)) |
67 (string-len (length string)) | 70 (while (< i len1) |
68 (len (length str)) | 71 (aset string (+ idx i) (aref obj i)) |
69 (i 0)) | 72 (setq i (1+ i))))) |
70 (while (and (< i len) (< idx string-len)) | 73 string) |
71 (aset string idx (aref str i)) | |
72 (setq idx (1+ idx) i (1+ i))) | |
73 string)) | |
74 | 74 |
75 ;;;###autoload | 75 ;;;###autoload |
76 (defun truncate-string-to-width (str end-column &optional start-column padding) | 76 (defun truncate-string-to-width (str end-column &optional start-column padding) |
77 "Truncate string STR to end at column END-COLUMN. | 77 "Truncate string STR to end at column END-COLUMN. |
78 The optional 2nd arg START-COLUMN, if non-nil, specifies | 78 The optional 2nd arg START-COLUMN, if non-nil, specifies |
94 (column 0) | 94 (column 0) |
95 (head-padding "") (tail-padding "") | 95 (head-padding "") (tail-padding "") |
96 ch last-column last-idx from-idx) | 96 ch last-column last-idx from-idx) |
97 (condition-case nil | 97 (condition-case nil |
98 (while (< column start-column) | 98 (while (< column start-column) |
99 (setq ch (sref str idx) | 99 (setq ch (aref str idx) |
100 column (+ column (char-width ch)) | 100 column (+ column (char-width ch)) |
101 idx (+ idx (char-bytes ch)))) | 101 idx (1+ idx))) |
102 (args-out-of-range (setq idx len))) | 102 (args-out-of-range (setq idx len))) |
103 (if (< column start-column) | 103 (if (< column start-column) |
104 (if padding (make-string end-column padding) "") | 104 (if padding (make-string end-column padding) "") |
105 (if (and padding (> column start-column)) | 105 (if (and padding (> column start-column)) |
106 (setq head-padding (make-string (- column start-column) ?\ ))) | 106 (setq head-padding (make-string (- column start-column) padding))) |
107 (setq from-idx idx) | 107 (setq from-idx idx) |
108 (if (< end-column column) | 108 (if (< end-column column) |
109 (setq idx from-idx) | 109 (setq idx from-idx) |
110 (condition-case nil | 110 (condition-case nil |
111 (while (< column end-column) | 111 (while (< column end-column) |
112 (setq last-column column | 112 (setq last-column column |
113 last-idx idx | 113 last-idx idx |
114 ch (sref str idx) | 114 ch (aref str idx) |
115 column (+ column (char-width ch)) | 115 column (+ column (char-width ch)) |
116 idx (+ idx (char-bytes ch)))) | 116 idx (1+ idx))) |
117 (args-out-of-range (setq idx len))) | 117 (args-out-of-range (setq idx len))) |
118 (if (> column end-column) | 118 (if (> column end-column) |
119 (setq column last-column idx last-idx)) | 119 (setq column last-column idx last-idx)) |
120 (if (and padding (< column end-column)) | 120 (if (and padding (< column end-column)) |
121 (setq tail-padding (make-string (- end-column column) padding)))) | 121 (setq tail-padding (make-string (- end-column column) padding)))) |
286 (eol-type-2 (coding-system-eol-type coding-system-2))) | 286 (eol-type-2 (coding-system-eol-type coding-system-2))) |
287 (or (eq eol-type-1 eol-type-2) | 287 (or (eq eol-type-1 eol-type-2) |
288 (and (vectorp eol-type-1) (vectorp eol-type-2))))))) | 288 (and (vectorp eol-type-1) (vectorp eol-type-2))))))) |
289 | 289 |
290 ;;;###autoload | 290 ;;;###autoload |
291 (defun find-safe-coding-system (from to) | 291 (defmacro detect-coding-with-priority (from to priority-list) |
292 "Return a list of proper coding systems to encode a text between FROM and TO. | 292 "Detect a coding system of the text between FROM and TO with PRIORITY-LIST. |
293 All coding systems in the list can safely encode any multibyte characters | 293 PRIORITY-LIST is an alist of coding categories vs the corresponding |
294 in the region. | 294 coding systems ordered by priority." |
295 | 295 `(let* ((prio-list ,priority-list) |
296 If the region contains no multibyte charcters, the returned list | 296 (coding-category-list coding-category-list) |
297 contains a single element `undecided'. | 297 ,@(mapcar (function (lambda (x) (list x x))) coding-category-list)) |
298 | 298 (mapcar (function (lambda (x) (set (car x) (cdr x)))) |
299 Kludgy feature: if FROM is a string, then that string is the target | 299 prio-list) |
300 for finding proper coding systems, and TO is ignored." | 300 (set-coding-priority (mapcar (function (lambda (x) (car x))) prio-list)) |
301 (let ((found (if (stringp from) | 301 (detect-coding-region ,from ,to))) |
302 (find-charset-string from) | 302 |
303 (find-charset-region from to))) | 303 ;;;###autoload |
304 (l coding-system-list) | 304 (defun detect-coding-with-language-environment (from to lang-env) |
305 codings coding safe) | 305 "Detect a coding system of the text between FROM and TO with LANG-ENV. |
306 (if (and (= (length found) 1) | 306 The detection takes into accont the coding system priorities for the |
307 (eq 'ascii (car found))) | 307 language environment LANG-ENV." |
308 '(undecided) | 308 (let ((coding-priority (get-language-info lang-env 'coding-priority))) |
309 (while l | 309 (if coding-priority |
310 (setq coding (car l) l (cdr l)) | 310 (detect-coding-with-priority |
311 (if (and (eq coding (coding-system-base coding)) | 311 from to |
312 (setq safe (coding-system-get coding 'safe-charsets)) | 312 (mapcar (function (lambda (x) |
313 (or (eq safe t) | 313 (cons (coding-system-get x 'coding-category) x))) |
314 (catch 'tag | 314 coding-priority)) |
315 (mapcar (function (lambda (x) | 315 (detect-coding-region from to)))) |
316 (if (not (memq x safe)) | |
317 (throw 'tag nil)))) | |
318 found)))) | |
319 (setq codings (cons coding codings)))) | |
320 codings))) | |
321 | 316 |
322 | 317 |
323 ;;; Composite charcater manipulations. | 318 ;;; Composite charcater manipulations. |
324 | 319 |
325 ;;;###autoload | 320 ;;;###autoload |
339 "Decompose all composite characters in the current region. | 334 "Decompose all composite characters in the current region. |
340 Composite characters are broken up into individual components. | 335 Composite characters are broken up into individual components. |
341 When called from a program, expects two arguments, | 336 When called from a program, expects two arguments, |
342 positions (integers or markers) specifying the region." | 337 positions (integers or markers) specifying the region." |
343 (interactive "r") | 338 (interactive "r") |
344 (save-restriction | 339 (save-excursion |
345 (narrow-to-region start end) | 340 (save-restriction |
346 (goto-char (point-min)) | 341 (narrow-to-region start end) |
347 (let ((enable-multibyte-characters nil) | 342 (goto-char (point-min)) |
348 ;; This matches the whole bytes of single composite character. | 343 (while (not (eobp)) |
349 (re-cmpchar "\200[\240-\377]+") | 344 (let ((ch (following-char))) |
350 p ch str) | 345 (if (>= ch min-composite-char) |
351 (while (re-search-forward re-cmpchar nil t) | 346 (progn |
352 (setq str (buffer-substring (match-beginning 0) (match-end 0))) | 347 (delete-char 1) |
353 (delete-region (match-beginning 0) (match-end 0)) | 348 (insert (decompose-composite-char ch))) |
354 (insert (decompose-composite-char (string-to-char str))))))) | 349 (forward-char 1))))))) |
355 | 350 |
356 ;;;###autoload | 351 ;;;###autoload |
357 (defun decompose-string (string) | 352 (defun decompose-string (string) |
358 "Decompose all composite characters in STRING." | 353 "Decompose all composite characters in STRING." |
359 (let* ((l (string-to-list string)) | 354 (let ((len (length string)) |
360 (tail l) | 355 (idx 0) |
361 ch) | 356 (i 0) |
362 (while tail | 357 (str-list nil) |
363 (setq ch (car tail)) | 358 ch) |
364 (setcar tail (if (cmpcharp ch) (decompose-composite-char ch) | 359 (while (< idx len) |
365 (char-to-string ch))) | 360 (setq ch (aref string idx)) |
366 (setq tail (cdr tail))) | 361 (if (>= ch min-composite-char) |
367 (apply 'concat l))) | 362 (progn |
363 (if (> idx i) | |
364 (setq str-list (cons (substring string i idx) str-list))) | |
365 (setq str-list (cons (decompose-composite-char ch) str-list)) | |
366 (setq i (1+ idx)))) | |
367 (setq idx (1+ idx))) | |
368 (if (not str-list) | |
369 (copy-sequence string) | |
370 (if (> idx i) | |
371 (setq str-list (cons (substring string i idx) str-list))) | |
372 (apply 'concat (nreverse str-list))))) | |
368 | 373 |
369 ;;;###autoload | 374 ;;;###autoload |
370 (defconst reference-point-alist | 375 (defconst reference-point-alist |
371 '((tl . 0) (tc . 1) (tr . 2) | 376 '((tl . 0) (tc . 1) (tr . 2) |
372 (ml . 3) (mc . 4) (mr . 5) | 377 (ml . 3) (mc . 4) (mr . 5) |
481 (let ((rule (- (composite-char-composition-rule char i) ?\xA0))) | 486 (let ((rule (- (composite-char-composition-rule char i) ?\xA0))) |
482 (setq l (cons (cons (/ rule 9) (% rule 9)) l)))) | 487 (setq l (cons (cons (/ rule 9) (% rule 9)) l)))) |
483 (setq i (1- i))) | 488 (setq i (1- i))) |
484 (setq l (cons (composite-char-component char 0) l)) | 489 (setq l (cons (composite-char-component char 0) l)) |
485 (cond ((eq type 'string) | 490 (cond ((eq type 'string) |
486 (apply 'concat-chars l)) | 491 (apply 'string l)) |
487 ((eq type 'list) | 492 ((eq type 'list) |
488 l) | 493 l) |
489 (t ; i.e. TYPE is vector | 494 (t ; i.e. TYPE is vector |
490 (vconcat l))))) | 495 (vconcat l))))) |
491 | 496 |