Mercurial > emacs
changeset 91103:5b63818330eb
(selection-coding-system)
(next-selection-coding-system): Declaration moded from xselect.c.
(x-get-selection): Decode by selection-coding-system if it is
non-nil. If it is nil, decode by a proper coding system. Handle
C_STRING.
(ccl-check-utf-8, string-utf-8-p): Delete them.
(xselect-convert-to-string): Fix determining data-type in the case
that TEXT is requested. Don't use selection-coding-system if it's
not proper for the data-type.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Sun, 18 Nov 2007 09:12:14 +0000 |
parents | 63ffdb0bbe19 |
children | 84f80c052155 |
files | lisp/select.el |
diffstat | 1 files changed, 89 insertions(+), 81 deletions(-) [+] |
line wrap: on
line diff
--- a/lisp/select.el Sun Nov 18 09:05:14 2007 +0000 +++ b/lisp/select.el Sun Nov 18 09:12:14 2007 +0000 @@ -28,6 +28,37 @@ ;;; Code: +(defvar selection-coding-system nil + "Coding system for communicating with other X clients. + +When sending text via selection and clipboard, if the target +data-type matches with the type of this coding system, it is used +for encoding the text. Otherwise (including the case that this +variable is nil), a proper coding system is used as below: + +data-type coding system +--------- ------------- +UTF8_STRING utf-8 +COMPOUND_TEXT compound-text-with-extensions +STRING iso-latin-1 +C_STRING no-conversion + +When receiving text, if this coding system is non-nil, it is used +for decoding regardless of the data-type. If this is nil, a +proper coding system is used according to the data-type as above. + +See also the documentation of the variable `x-select-request-type' how +to control which data-type to request for receiving text. + +The default value is nil.") + +(defvar next-selection-coding-system nil + "Coding system for the next communication with other X clients. +Usually, `selection-coding-system' is used for communicating with +other X clients. But, if this variable is set, it is used for +the next communication only. After the communication, this +variable is set to nil.") + ;; This is for temporary compatibility with pre-release Emacs 19. (defalias 'x-selection 'x-get-selection) (defun x-get-selection (&optional type data-type) @@ -48,11 +79,21 @@ coding) (when (and (stringp data) (setq data-type (get-text-property 0 'foreign-selection data))) - (setq coding (if (eq data-type 'UTF8_STRING) - 'utf-8 - (or next-selection-coding-system - selection-coding-system)) - data (decode-coding-string data coding)) + (setq coding (or next-selection-coding-system + selection-coding-system + (cond ((eq data-type 'UTF8_STRING) + 'utf-8) + ((eq data-type 'COMPOUND-TEXT) + 'compound-text-with-extensions) + ((eq data-type 'C_STRING) + nil) + ((eq data-type 'STRING) + 'iso-8859-1) + (t + (error "Unknow selection data type: %S" type)))) + data (if coding (decode-coding-string data coding) + (string-to-multibyte data))) + (setq next-selection-coding-system nil) (put-text-property 0 (length data) 'foreign-selection data-type data)) data)) @@ -152,41 +193,6 @@ ;;; Every selection type that Emacs handles is implemented this way, except ;;; for TIMESTAMP, which is a special case. -(eval-when-compile (require 'ccl)) - -(define-ccl-program ccl-check-utf-8 - '(0 - ((r0 = 1) - (loop - (read-if (r1 < #x80) (repeat) - ((r0 = 0) - (if (r1 < #xC2) (end)) - (read r2) - (if ((r2 & #xC0) != #x80) (end)) - (if (r1 < #xE0) ((r0 = 1) (repeat))) - (read r2) - (if ((r2 & #xC0) != #x80) (end)) - (if (r1 < #xF0) ((r0 = 1) (repeat))) - (read r2) - (if ((r2 & #xC0) != #x80) (end)) - (if (r1 < #xF8) ((r0 = 1) (repeat))) - (read r2) - (if ((r2 & #xC0) != #x80) (end)) - (if (r1 == #xF8) ((r0 = 1) (repeat))) - (end)))))) - "Check if the input unibyte string is a valid UTF-8 sequence or not. -If it is valid, set the register `r0' to 1, else set it to 0.") - -(defun string-utf-8-p (string) - "Return non-nil if STRING is a unibyte string of valid UTF-8 sequence." - (if (or (not (stringp string)) - (multibyte-string-p string)) - (error "Not a unibyte string: %s" string)) - (let ((status (make-vector 9 0))) - (ccl-execute-on-string ccl-check-utf-8 status string) - (= (aref status 0) 1))) - - (defun xselect-convert-to-string (selection type value) (let (str coding) ;; Get the actual string from VALUE. @@ -219,52 +225,54 @@ str (setq coding (or next-selection-coding-system selection-coding-system)) (if coding - (setq coding (coding-system-base coding)) - (setq coding 'raw-text)) + (setq coding (coding-system-base coding))) (let ((inhibit-read-only t)) ;; Suppress producing escape sequences for compositions. (remove-text-properties 0 (length str) '(composition nil) str) - (cond - ((eq type 'TEXT) - (if (not (multibyte-string-p str)) - ;; Don't have to encode unibyte string. - (setq type 'STRING) - ;; If STR contains only ASCII, Latin-1, and raw bytes, - ;; encode STR by iso-latin-1, and return it as type - ;; `STRING'. Otherwise, encode STR by CODING. In that - ;; case, the returing type depends on CODING. - (let ((charsets (find-charset-string str))) - (setq charsets - (delq 'ascii - (delq 'latin-iso8859-1 - (delq 'eight-bit-control - (delq 'eight-bit-graphic charsets))))) - (if charsets - (setq str (encode-coding-string str coding) - type (if (memq coding '(compound-text - compound-text-with-extensions)) - 'COMPOUND_TEXT - 'STRING)) - (setq type 'STRING - str (encode-coding-string str 'iso-latin-1)))))) + (if (not (multibyte-string-p str)) + ;; Don't have to encode unibyte string. + (setq type 'C_STRING) + (if (eq type 'TEXT) + ;; TEXT is a polimorphic target. We must select the + ;; actual type from `UTF8_STRING', `COMPOUND_TEXT', + ;; `STRING', and `C_STRING'. + (let (non-latin-1 non-unicode eight-bit) + (mapc #'(lambda (x) + (if (>= x #x100) + (if (< x #x110000) + (setq non-latin-1 t) + (if (< x #x3FFF80) + (setq non-unicode t) + (setq eight-bit t))))) + str) + (setq type (if non-unicode 'COMPOUND_TEXT + (if non-latin-1 'UTF8_STRING + (if eight-bit 'C_STRING 'STRING)))))) + (cond + ((eq type 'UTF8_STRING) + (if (or (not coding) + (not (eq (coding-system-type coding) 'utf-8))) + (setq coding 'utf-8)) + (setq str (encode-coding-string str coding))) - ((eq type 'COMPOUND_TEXT) - (setq str (encode-coding-string str coding))) - - ((eq type 'STRING) - (if (memq coding '(compound-text - compound-text-with-extensions)) - (setq str (string-make-unibyte str)) - (setq str (encode-coding-string str coding)))) + ((eq type 'STRING) + (if (or (not coding) + (not (eq (coding-system-type coding) 'charset))) + (setq coding 'iso-8859-1)) + (setq str (encode-coding-string str coding))) - ((eq type 'UTF8_STRING) - (if (multibyte-string-p str) - (setq str (encode-coding-string str 'utf-8))) - (if (not (string-utf-8-p str)) - (setq str nil))) ;; Decline request as we don't have UTF-8 data. - (t - (error "Unknow selection type: %S" type)) - ))) + ((eq type 'COMPOUND_TEXT) + (if (or (not coding) + (not (eq (coding-system-type coding) 'iso-2022))) + (setq coding 'compound-text-with-extensions)) + (setq str (encode-coding-string str coding))) + + ((eq type 'C_STRING) + (setq str (string-make-unibyte str))) + + (t + (error "Unknow selection type: %S" type)) + )))) (setq next-selection-coding-system nil) (cons type str))))