Mercurial > emacs
annotate lisp/international/mule-util.el @ 17323:15fa68d983e7
(ccl_driver): Fix bug of the case CCL_WriteArrayReadJump.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Mon, 07 Apr 1997 07:12:13 +0000 |
parents | e7920fdc4948 |
children | c913160e34a7 |
rev | line source |
---|---|
17052 | 1 ;;; mule-util.el --- Utility functions for mulitilingual environment (mule) |
2 | |
3 ;; Copyright (C) 1995 Free Software Foundation, Inc. | |
4 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. | |
5 | |
6 ;; Keywords: mule, multilingual | |
7 | |
8 ;; This file is part of GNU Emacs. | |
9 | |
10 ;; GNU Emacs is free software; you can redistribute it and/or modify | |
11 ;; it under the terms of the GNU General Public License as published by | |
12 ;; the Free Software Foundation; either version 2, or (at your option) | |
13 ;; any later version. | |
14 | |
15 ;; GNU Emacs is distributed in the hope that it will be useful, | |
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 ;; GNU General Public License for more details. | |
19 | |
20 ;; You should have received a copy of the GNU General Public License | |
17071 | 21 ;; along with GNU Emacs; see the file COPYING. If not, write to the |
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
23 ;; Boston, MA 02111-1307, USA. | |
17052 | 24 |
25 ;;; Code: | |
26 | |
27 ;;; String manipulations while paying attention to multibyte | |
28 ;;; characters. | |
29 | |
30 ;;;###autoload | |
31 (defun string-to-sequence (string type) | |
32 "Convert STRING to a sequence of TYPE which contains characters in STRING. | |
33 TYPE should be `list' or `vector'. | |
34 Multibyte characters are conserned." | |
35 (or (eq type 'list) (eq type 'vector) | |
36 (error "Invalid type: %s" type)) | |
37 (let* ((len (length string)) | |
38 (i 0) | |
39 l ch) | |
40 (while (< i len) | |
41 (setq ch (sref string i)) | |
42 (setq l (cons ch l)) | |
43 (setq i (+ i (char-bytes ch)))) | |
44 (setq l (nreverse l)) | |
45 (if (eq type 'list) | |
46 l | |
47 (vconcat l)))) | |
48 | |
49 ;;;###autoload | |
50 (defsubst string-to-list (string) | |
51 "Return a list of characters in STRING." | |
52 (string-to-sequence string 'list)) | |
53 | |
54 ;;;###autoload | |
55 (defsubst string-to-vector (string) | |
56 "Return a vector of characters in STRING." | |
57 (string-to-sequence string 'vector)) | |
58 | |
59 ;;;###autoload | |
60 (defun store-substring (string idx obj) | |
61 "Embed OBJ (string or character) at index IDX of STRING." | |
62 (let* ((str (cond ((stringp obj) obj) | |
63 ((integerp obj) (char-to-string obj)) | |
64 (t (error | |
65 "Invalid argument (should be string or character): %s" | |
66 obj)))) | |
67 (string-len (length string)) | |
68 (len (length str)) | |
69 (i 0)) | |
70 (while (and (< i len) (< idx string-len)) | |
71 (aset string idx (aref str i)) | |
72 (setq idx (1+ idx) i (1+ i))) | |
73 string)) | |
74 | |
75 ;;;###autoload | |
76 (defun truncate-string-to-width (str width &optional start-column padding) | |
77 "Truncate string STR to fit in WIDTH columns. | |
78 Optional 1st arg START-COLUMN if non-nil specifies the starting column. | |
17092
e7920fdc4948
(truncate-string-to-width): Argument PADDING can be a padding character.
Kenichi Handa <handa@m17n.org>
parents:
17071
diff
changeset
|
79 Optional 2nd arg PADDING if non-nil is a padding character to be padded at |
17052 | 80 the head and tail of the resulting string to fit in WIDTH if necessary. |
81 If PADDING is nil, the resulting string may be narrower than WIDTH." | |
82 (or start-column | |
83 (setq start-column 0)) | |
84 (let ((len (length str)) | |
85 (idx 0) | |
86 (column 0) | |
87 (head-padding "") (tail-padding "") | |
88 ch last-column last-idx from-idx) | |
89 (condition-case nil | |
90 (while (< column start-column) | |
91 (setq ch (sref str idx) | |
92 column (+ column (char-width ch)) | |
93 idx (+ idx (char-bytes ch)))) | |
94 (args-out-of-range (setq idx len))) | |
95 (if (< column start-column) | |
17092
e7920fdc4948
(truncate-string-to-width): Argument PADDING can be a padding character.
Kenichi Handa <handa@m17n.org>
parents:
17071
diff
changeset
|
96 (if padding (make-string width padding) "") |
17052 | 97 (if (and padding (> column start-column)) |
98 (setq head-padding (make-string (- column start-column) ?\ ))) | |
99 (setq from-idx idx) | |
100 (condition-case nil | |
101 (while (< column width) | |
102 (setq last-column column | |
103 last-idx idx | |
104 ch (sref str idx) | |
105 column (+ column (char-width ch)) | |
106 idx (+ idx (char-bytes ch)))) | |
107 (args-out-of-range (setq idx len))) | |
108 (if (> column width) | |
109 (setq column last-column idx last-idx)) | |
110 (if (and padding (< column width)) | |
17092
e7920fdc4948
(truncate-string-to-width): Argument PADDING can be a padding character.
Kenichi Handa <handa@m17n.org>
parents:
17071
diff
changeset
|
111 (setq tail-padding (make-string (- width column) padding))) |
17052 | 112 (setq str (substring str from-idx idx)) |
113 (if padding | |
114 (concat head-padding str tail-padding) | |
115 str)))) | |
116 | |
117 ;;; For backward compatiblity ... | |
118 ;;;###autoload | |
119 (defalias 'truncate-string 'truncate-string-to-width) | |
120 (make-obsolete 'truncate-string 'truncate-string-to-width) | |
121 | |
122 ;;; Nested alist handler. Nested alist is alist whose elements are | |
123 ;;; also nested alist. | |
124 | |
125 ;;;###autoload | |
126 (defsubst nested-alist-p (obj) | |
127 "Return t if OBJ is a nesetd alist. | |
128 | |
129 Nested alist is a list of the form (ENTRY . BRANCHES), where ENTRY is | |
130 any Lisp object, and BRANCHES is a list of cons cells of the form | |
131 (KEY-ELEMENT . NESTED-ALIST). | |
132 | |
133 You can use a nested alist to store any Lisp object (ENTRY) for a key | |
134 sequence KEYSEQ, where KEYSEQ is a sequence of KEY-ELEMENT. KEYSEQ | |
135 can be a string, a vector, or a list." | |
136 (and obj (listp obj) (listp (cdr obj)))) | |
137 | |
138 ;;;###autoload | |
139 (defun set-nested-alist (keyseq entry alist &optional len branches) | |
140 "Set ENTRY for KEYSEQ in a nested alist ALIST. | |
141 Optional 4th arg LEN non-nil means the firlst LEN elements in KEYSEQ | |
142 is considered. | |
143 Optional argument BRANCHES if non-nil is branches for a keyseq | |
144 longer than KEYSEQ. | |
145 See the documentation of `nested-alist-p' for more detail." | |
146 (or (nested-alist-p alist) | |
147 (error "Invalid arguement %s" alist)) | |
148 (let ((islist (listp keyseq)) | |
149 (len (or len (length keyseq))) | |
150 (i 0) | |
151 key-elt slot) | |
152 (while (< i len) | |
153 (if (null (nested-alist-p alist)) | |
154 (error "Keyseq %s is too long for this nested alist" keyseq)) | |
155 (setq key-elt (if islist (nth i keyseq) (aref keyseq i))) | |
156 (setq slot (assoc key-elt (cdr alist))) | |
157 (if (null slot) | |
158 (progn | |
159 (setq slot (cons key-elt (list t))) | |
160 (setcdr alist (cons slot (cdr alist))))) | |
161 (setq alist (cdr slot)) | |
162 (setq i (1+ i))) | |
163 (setcar alist entry) | |
164 (if branches | |
165 (if (cdr alist) | |
166 (error "Can't set branches for keyseq %s" keyseq) | |
167 (setcdr alist branches))))) | |
168 | |
169 ;;;###autoload | |
170 (defun lookup-nested-alist (keyseq alist &optional len start nil-for-too-long) | |
171 "Look up key sequence KEYSEQ in nested alist ALIST. Return the definition. | |
172 Optional 1st argument LEN specifies the length of KEYSEQ. | |
173 Optional 2nd argument START specifies index of the starting key. | |
174 The returned value is normally a nested alist of which | |
175 car part is the entry for KEYSEQ. | |
176 If ALIST is not deep enough for KEYSEQ, return number which is | |
177 how many key elements at the front of KEYSEQ it takes | |
178 to reach a leaf in ALIST. | |
179 Optional 3rd argument NIL-FOR-TOO-LONG non-nil means return nil | |
180 even if ALIST is not deep enough." | |
181 (or (nested-alist-p alist) | |
182 (error "invalid arguement %s" alist)) | |
183 (or len | |
184 (setq len (length keyseq))) | |
185 (let ((i (or start 0))) | |
186 (if (catch 'lookup-nested-alist-tag | |
187 (if (listp keyseq) | |
188 (while (< i len) | |
189 (if (setq alist (cdr (assoc (nth i keyseq) (cdr alist)))) | |
190 (setq i (1+ i)) | |
191 (throw 'lookup-nested-alist-tag t)))) | |
192 (while (< i len) | |
193 (if (setq alist (cdr (assoc (aref keyseq i) (cdr alist)))) | |
194 (setq i (1+ i)) | |
195 (throw 'lookup-nested-alist-tag t)))) | |
196 ;; KEYSEQ is too long. | |
197 (if nil-for-too-long nil i) | |
198 alist))) | |
199 | |
200 ;; Coding system related functions. | |
201 | |
202 ;;;###autoload | |
203 (defun set-coding-system-alist (target-type regexp coding-system | |
204 &optional operation) | |
205 "Update `coding-system-alist' according to the arguments. | |
206 TARGET-TYPE specifies a type of the target: `file', `process', or `network'. | |
207 TARGET-TYPE tells which slots of coding-system-alist should be affected. | |
208 If `file', it affects slots for insert-file-contents and write-region. | |
209 If `process', it affects slots for call-process, call-process-region, and | |
210 start-process. | |
211 If `network', it affects a slot for open-network-process. | |
212 REGEXP is a regular expression matching a target of I/O operation. | |
213 CODING-SYSTEM is a coding system to perform code conversion | |
214 on the I/O operation, or a cons of coding systems for decoding and | |
215 encoding respectively, or a function symbol which returns the cons. | |
216 Optional arg OPERATION if non-nil specifies directly one of slots above. | |
217 The valid value is: insert-file-contents, write-region, | |
218 call-process, call-process-region, start-process, or open-network-stream. | |
219 If OPERATION is specified, TARGET-TYPE is ignored. | |
220 See the documentation of `coding-system-alist' for more detail." | |
221 (or (stringp regexp) | |
222 (error "Invalid regular expression: %s" regexp)) | |
223 (or (memq target-type '(file process network)) | |
224 (error "Invalid target type: %s" target-type)) | |
225 (if (symbolp coding-system) | |
226 (if (not (fboundp coding-system)) | |
227 (progn | |
228 (check-coding-system coding-system) | |
229 (setq coding-system (cons coding-system coding-system)))) | |
230 (check-coding-system (car coding-system)) | |
231 (check-coding-system (cdr coding-system))) | |
232 (let ((op-list (if operation (list operation) | |
233 (cond ((eq target-type 'file) | |
234 '(insert-file-contents write-region)) | |
235 ((eq target-type 'process) | |
236 '(call-process call-process-region start-process)) | |
237 (t ; i.e. (eq target-type network) | |
238 '(open-network-stream))))) | |
239 slot) | |
240 (while op-list | |
241 (setq slot (assq (car op-list) coding-system-alist)) | |
242 (if slot | |
243 (let ((chain (cdr slot))) | |
244 (if (catch 'tag | |
245 (while chain | |
246 (if (string= regexp (car (car chain))) | |
247 (progn | |
248 (setcdr (car chain) coding-system) | |
249 (throw 'tag nil))) | |
250 (setq chain (cdr chain))) | |
251 t) | |
252 (setcdr slot (cons (cons regexp coding-system) (cdr slot))))) | |
253 (setq coding-system-alist | |
254 (cons (cons (car op-list) (list (cons regexp coding-system))) | |
255 coding-system-alist))) | |
256 (setq op-list (cdr op-list))))) | |
257 | |
258 ;;;###autoload | |
259 (defun coding-system-list () | |
260 "Return a list of all existing coding systems." | |
261 (let (l) | |
262 (mapatoms (lambda (x) (if (get x 'coding-system) (setq l (cons x l))))) | |
263 l)) | |
264 | |
265 | |
266 ;;; Composite charcater manipulations. | |
267 | |
268 ;;;###autoload | |
269 (defun compose-region (start end) | |
270 "Compose all characters in the current region into one composite character. | |
271 When called from a program, expects two arguments, | |
272 positions (integers or markers) specifying the region." | |
273 (interactive "r") | |
274 (save-excursion | |
275 (let ((str (buffer-substring start end))) | |
276 (goto-char start) | |
277 (delete-region start end) | |
278 (insert (compose-string str))))) | |
279 | |
280 ;;;###autoload | |
281 (defun decompose-region (start end) | |
282 "Decompose all composite characters in the current region. | |
283 Composite characters are broken up into individual components. | |
284 When called from a program, expects two arguments, | |
285 positions (integers or markers) specifying the region." | |
286 (interactive "r") | |
287 (save-restriction | |
288 (narrow-to-region start end) | |
289 (goto-char (point-min)) | |
290 (let ((enable-multibyte-characters nil) | |
291 ;; This matches the whole bytes of single composite character. | |
292 (re-cmpchar "\200[\240-\377]+") | |
293 p ch str) | |
294 (while (re-search-forward re-cmpchar nil t) | |
295 (setq str (buffer-substring (match-beginning 0) (match-end 0))) | |
296 (delete-region (match-beginning 0) (match-end 0)) | |
297 (insert (decompose-composite-char (string-to-char str))))))) | |
298 | |
299 ;;;###autoload | |
300 (defconst reference-point-alist | |
301 '((tl . 0) (tc . 1) (tr . 2) | |
302 (ml . 3) (mc . 4) (mr . 5) | |
303 (bl . 6) (bc . 7) (br . 8) | |
304 (top-left . 0) (top-center . 1) (top-right . 2) | |
305 (mid-left . 3) (mid-center . 4) (mid-right . 5) | |
306 (bottom-left . 6) (bottom-center . 7) (bottom-right . 8) | |
307 (0 . 0) (1 . 1) (2 . 2) | |
308 (3 . 3) (4 . 4) (5 . 5) | |
309 (6 . 6) (7 . 7) (8 . 8)) | |
310 "Alist of reference point symbols vs reference point codes. | |
311 Meanings of reference point codes are as follows: | |
312 | |
313 0----1----2 <-- ascent 0:tl or top-left | |
314 | | 1:tc or top-center | |
315 | | 2:tr or top-right | |
316 | | 3:ml or mid-left | |
317 | 4 <--+---- center 4:mc or mid-center | |
318 | | 5:mr or mid-right | |
319 --- 3 5 <-- baseline 6:bl or bottom-left | |
320 | | 7:bc or bottom-center | |
321 6----7----8 <-- descent 8:br or bottom-right | |
322 | |
323 Reference point symbols are to be used to specify composition rule of | |
324 the form \(GLOBAL-REF-POINT . NEW-REF-POINT), where GLOBAL-REF-POINT | |
325 is a reference point in the overall glyphs already composed, and | |
326 NEW-REF-POINT is a reference point in the new glyph to be added. | |
327 | |
328 For instance, if GLOBAL-REF-POINT is 8 and NEW-REF-POINT is 1, the | |
329 overall glyph is updated as follows: | |
330 | |
331 +-------+--+ <--- new ascent | |
332 | | | | |
333 | global| | | |
334 | glyph | | | |
335 --- | | | <--- baseline (doesn't change) | |
336 +----+--+--+ | |
337 | | new | | |
338 | |glyph| | |
339 +----+-----+ <--- new descent | |
340 ") | |
341 | |
342 ;; Return a string for char CH to be embedded in multibyte form of | |
343 ;; composite character. | |
344 (defun compose-chars-component (ch) | |
345 (if (< ch 128) | |
346 (format "\240%c" (+ ch 128)) | |
347 (let ((str (char-to-string ch))) | |
348 (if (cmpcharp ch) | |
349 (if (/= (aref str 1) ?\xFF) | |
350 (error "Char %c can't be composed" ch) | |
351 (substring str 2)) | |
352 (aset str 0 (+ (aref str 0) ?\x20)) | |
353 str)))) | |
354 | |
355 ;; Return a string for composition rule RULE to be embedded in | |
356 ;; multibyte form of composite character. | |
357 (defsubst compose-chars-rule (rule) | |
358 (char-to-string (+ ?\xA0 | |
359 (* (cdr (assq (car rule) reference-point-alist)) 9) | |
360 (cdr (assq (cdr rule) reference-point-alist))))) | |
361 | |
362 ;;;###autoload | |
363 (defun compose-chars (first-component &rest args) | |
364 "Return one char string composed from the arguments. | |
365 Each argument is a character (including a composite chararacter) | |
366 or a composition rule. | |
367 A composition rule has the form \(GLOBAL-REF-POINT . NEW-REF-POINT). | |
368 See the documentation of `reference-point-alist' for more detail." | |
369 (if (= (length args) 0) | |
370 (char-to-string first-component) | |
371 (let* ((with-rule (consp (car args))) | |
372 (str (if with-rule (concat (vector leading-code-composition ?\xFF)) | |
373 (char-to-string leading-code-composition)))) | |
374 (setq str (concat str (compose-chars-component first-component))) | |
375 (while args | |
376 (if with-rule | |
377 (progn | |
378 (if (not (consp (car args))) | |
379 (error "Invalid composition rule: %s" (car args))) | |
380 (setq str (concat str (compose-chars-rule (car args)) | |
381 (compose-chars-component (car (cdr args)))) | |
382 args (cdr (cdr args)))) | |
383 (setq str (concat str (compose-chars-component (car args))) | |
384 args (cdr args)))) | |
385 str))) | |
386 | |
387 ;;;###autoload | |
388 (defun decompose-composite-char (char &optional type with-composition-rule) | |
389 "Convert composite character CHAR to a string containing components of CHAR. | |
390 Optional 1st arg TYPE specifies the type of sequence returned. | |
391 It should be `string' (default), `list', or `vector'. | |
392 Optional 2nd arg WITH-COMPOSITION-RULE non-nil means the returned | |
393 sequence contains embedded composition rules if any. In this case, the | |
394 order of elements in the sequence is the same as arguments for | |
395 `compose-chars' to create CHAR. | |
396 If TYPE is omitted or is `string', composition rules are omitted | |
397 even if WITH-COMPOSITION-RULE is t." | |
398 (or type | |
399 (setq type 'string)) | |
400 (let* ((len (composite-char-component-count char)) | |
401 (i (1- len)) | |
402 l) | |
403 (setq with-composition-rule (and with-composition-rule | |
404 (not (eq type 'string)) | |
405 (composite-char-composition-rule-p char))) | |
406 (while (> i 0) | |
407 (setq l (cons (composite-char-component char i) l)) | |
408 (if with-composition-rule | |
409 (let ((rule (- (composite-char-composition-rule char i) ?\xA0))) | |
410 (setq l (cons (cons (/ rule 9) (% rule 9)) l)))) | |
411 (setq i (1- i))) | |
412 (setq l (cons (composite-char-component char 0) l)) | |
413 (cond ((eq type 'string) | |
414 (apply 'concat-chars l)) | |
415 ((eq type 'list) | |
416 l) | |
417 (t ; i.e. TYPE is vector | |
418 (vconcat l))))) | |
419 | |
420 ;;; mule-util.el ends here |