# HG changeset patch # User Eli Zaretskii # Date 1014385461 0 # Node ID 28a1416840948297f12b06b1481081081446b3db # Parent bfca0eb3e752e108fa12a12a39db69262ae6eef2 (non-standard-icccm-encodings-alist, non-standard-designations-alist): New variables. (ctext-post-read-conversion, ctext-pre-write-conversion): New functions. diff -r bfca0eb3e752 -r 28a141684094 lisp/international/mule.el --- a/lisp/international/mule.el Fri Feb 22 13:20:00 2002 +0000 +++ b/lisp/international/mule.el Fri Feb 22 13:44:21 2002 +0000 @@ -1284,6 +1284,161 @@ (setq coding-category-list (append arg current-list)) (set-coding-priority-internal))) +;;; X selections + +(defvar non-standard-icccm-encodings-alist + '(("ISO8859-15" . latin-iso8859-15) + ("ISO8859-14" . latin-iso8859-14) + ("KOI8-R" . koi8-r) + ("BIG5-0" . big5)) + "Alist of font charset names defined by XLFD, and the corresponding Emacs +charsets or coding systems.") + +;; Functions to support "Non-Standard Character Set Encodings" defined +;; by the ICCCM spec. We support that by converting the leading +;; sequence of the ``extended segment'' to the corresponding ISO-2022 +;; sequences (if the leading sequence names an Emacs charset), or decode +;; the segment (if it names a coding system). Encoding does the reverse. +(defun ctext-post-read-conversion (len) + "Decode LEN characters encoded as Compound Text with Extended Segments." + (buffer-disable-undo) ; minimize consing due to insertions and deletions + (narrow-to-region (point) (+ (point) len)) + (save-match-data + (let ((pt (point-marker)) + (oldpt (point-marker)) + (newpt (make-marker)) + (modified-p (buffer-modified-p)) + (case-fold-search nil) + last-coding-system-used + encoding textlen chset) + (while (re-search-forward + "\\(\e\\)%/[0-4]\\([\200-\377][\200-\377]\\)\\([^\002]+\\)\002" + nil 'move) + (set-marker newpt (point)) + (set-marker pt (match-beginning 0)) + (setq encoding (match-string 3)) + (setq textlen (- (+ (* (- (aref (match-string 2) 0) 128) 128) + (- (aref (match-string 2) 1) 128)) + (1+ (length encoding)))) + (setq + chset (cdr (assoc-ignore-case encoding + non-standard-icccm-encodings-alist))) + (cond ((null chset) + ;; This charset is not supported--leave this extended + ;; segment unaltered and skip over it. + (goto-char (+ (point) textlen))) + ((charsetp chset) + ;; If it's a charset, replace the leading escape sequence + ;; with a standard ISO-2022 sequence. We will decode all + ;; such segments later, in one go, when we exit the loop + ;; or find an extended segment that names a coding + ;; system, not a charset. + (replace-match + (concat "\\1" + (if (= 0 (charset-iso-graphic-plane chset)) + ;; GL charsets + (if (= 1 (charset-dimension chset)) "(" "$(") + ;; GR charsets + (if (= 96 (charset-chars chset)) + "-" + (if (= 1 (charset-dimension chset)) ")" "$)"))) + (string (charset-iso-final-char chset))) + t) + (goto-char (+ (point) textlen))) + ((coding-system-p chset) + ;; If it's a coding system, we need to decode the segment + ;; right away. But first, decode what we've skipped + ;; across until now. + (when (> pt oldpt) + (decode-coding-region oldpt pt 'ctext-no-compositions)) + (delete-region pt newpt) + (set-marker newpt (+ newpt textlen)) + (decode-coding-region pt newpt chset) + (goto-char newpt) + (set-marker oldpt newpt)))) + ;; Decode what's left. + (when (> (point) oldpt) + (decode-coding-region oldpt (point) 'ctext-no-compositions)) + ;; This buffer started as unibyte, because the string we get from + ;; the X selection is a unibyte string. We must now make it + ;; multibyte, so that the decoded text is inserted as multibyte + ;; into its buffer. + (set-buffer-multibyte t) + (set-buffer-modified-p modified-p) + (- (point-max) (point-min))))) + +(defvar non-standard-designations-alist + '(("$(0" . (big5 "big5-0" 2)) + ("$(1" . (big5 "big5-0" 2)) + ("-V" . (t "iso8859-10" 1)) + ("-Y" . (t "iso8859-13" 1)) + ("-_" . (t "iso8859-14" 1)) + ("-b" . (t "iso8859-15" 1)) + ("-f" . (t "iso8859-16" 1))) + "Alist of ctext control sequences that introduce character sets which +are not in the list of approved ICCCM encodings, and the corresponding +coding system, identifier string, and number of octets per encoded +character. + +Each element has the form (CTLSEQ . (ENCODING CHARSET NOCTETS)). CTLSEQ +is the control sequence (sans the leading ESC) that introduces the character +set in the text encoded by compound-text. ENCODING is a coding system +symbol; if it is t, it means that the ctext coding system already encodes +the text correctly, and only the leading control sequence needs to be altered. +If ENCODING is a coding system, we need to re-encode the text with that +coding system. CHARSET is the ICCCM name of the charset we need to put into +the leading control sequence. NOCTETS is the number of octets (bytes) that +encode each character in this charset. NOCTETS can be 0 (meaning the number +of octets per character is variable), 1, 2, 3, or 4.") + +(defun ctext-pre-write-conversion (from to) + "Encode characters between FROM and TO as Compound Text w/Extended Segments." + (buffer-disable-undo) ; minimize consing due to insertions and deletions + (narrow-to-region from to) + (encode-coding-region from to 'ctext-no-compositions) + ;; Replace ISO-2022 charset designations with extended segments, for + ;; those charsets that are not part of the official X registry. + (save-match-data + (goto-char (point-min)) + (let ((newpt (make-marker)) + (case-fold-search nil) + pt desig encode-info encoding chset noctets textlen) + (set-buffer-multibyte nil) + (while (re-search-forward "\e\\(\$([01]\\|-[VY_bf]\\)" nil 'move) + (setq desig (match-string 1) + pt (point-marker) + encode-info (cdr (assoc desig non-standard-designations-alist)) + encoding (car encode-info) + chset (cadr encode-info) + noctets (car (cddr encode-info))) + (skip-chars-forward "^\e") + (set-marker newpt (point)) + (cond + ((eq encoding t) ; only the leading sequence needs to be changed + (setq textlen (+ (- newpt pt) (length chset) 1)) + (replace-match (format "\e%%/%d%c%c%s" + noctets + (+ (/ textlen 128) 128) + (+ (% textlen 128) 128) + chset) + t t)) + ((coding-system-p encoding) ; need to recode the entire segment... + (set-marker pt (match-beginning 0)) + (decode-coding-region pt newpt 'ctext-no-compositions) + (set-buffer-multibyte t) + (encode-coding-region pt newpt encoding) + (set-buffer-multibyte nil) + (setq textlen (+ (- newpt pt) (length chset) 1)) + (goto-char pt) + (insert (format "\e%%/%d%c%c%s" + noctets + (+ (/ textlen 128) 128) + (+ (% textlen 128) 128) + chset)))) + (goto-char newpt)))) + (set-buffer-multibyte t) + nil) + ;;; FILE I/O (defcustom auto-coding-alist