# HG changeset patch # User Jan Dj¸«£rv # Date 1161242187 0 # Node ID 50ad827e063d7b7873c6bc1a9f970cca96639e20 # Parent 82e81b1ee1270ea7898698a8701ad6520087df95 * select.el (ccl-check-utf-8, string-utf-8-p): New functions (by Kenichi Handa). (xselect-convert-to-string): Decline requests for UTF8_STRING if the selection is not UTF-8. diff -r 82e81b1ee127 -r 50ad827e063d lisp/ChangeLog --- a/lisp/ChangeLog Thu Oct 19 04:56:46 2006 +0000 +++ b/lisp/ChangeLog Thu Oct 19 07:16:27 2006 +0000 @@ -1,3 +1,10 @@ +2006-10-19 Jan Dj,Ad(Brv + + * select.el (ccl-check-utf-8, string-utf-8-p): New functions + (by Kenichi Handa). + (xselect-convert-to-string): Decline requests for UTF8_STRING if + the selection is not UTF-8. + 2006-10-18 Juanma Barranquero * progmodes/ada-mode.el (ada-83-string-keywords) diff -r 82e81b1ee127 -r 50ad827e063d lisp/select.el --- a/lisp/select.el Thu Oct 19 04:56:46 2006 +0000 +++ b/lisp/select.el Thu Oct 19 07:16:27 2006 +0000 @@ -152,6 +152,41 @@ ;;; Every selection type that Emacs handles is implemented this way, except ;;; for TIMESTAMP, which is a special case. +(eval-when-compile (require 'ccl)) + +(define-ccl-program ccl-check-utf-8 + '(0 + ((r0 = 1) + (loop + (read-if (r1 < #x80) (repeat) + ((r0 = 0) + (if (r1 < #xC2) (end)) + (read r2) + (if ((r2 & #xC0) != #x80) (end)) + (if (r1 < #xE0) ((r0 = 1) (repeat))) + (read r2) + (if ((r2 & #xC0) != #x80) (end)) + (if (r1 < #xF0) ((r0 = 1) (repeat))) + (read r2) + (if ((r2 & #xC0) != #x80) (end)) + (if (r1 < #xF8) ((r0 = 1) (repeat))) + (read r2) + (if ((r2 & #xC0) != #x80) (end)) + (if (r1 == #xF8) ((r0 = 1) (repeat))) + (end)))))) + "Check if the input unibyte string is a valid UTF-8 sequence or not. +If it is valid, set the register `r0' to 1, else set it to 0.") + +(defun string-utf-8-p (string) + "Return non-nil iff STRING is a unibyte string of valid UTF-8 sequence." + (if (or (not (stringp string)) + (multibyte-string-p string)) + (error "Not a unibyte string: %s" string)) + (let ((status (make-vector 9 0))) + (ccl-execute-on-string ccl-check-utf-8 status string) + (= (aref status 0) 1))) + + (defun xselect-convert-to-string (selection type value) (let (str coding) ;; Get the actual string from VALUE. @@ -223,11 +258,10 @@ (setq str (encode-coding-string str coding)))) ((eq type 'UTF8_STRING) - (let ((charsets (find-charset-string str))) - (if (or (memq 'eight-bit-control charsets) - (memq 'eight-bit-graphic charsets)) - (setq type 'STRING) - (setq str (encode-coding-string str 'utf-8))))) + (if (multibyte-string-p str) + (setq str (encode-coding-string str 'utf-8))) + (if (not (string-utf-8-p str)) + (setq str nil))) ;; Decline request as we don't have UTF-8 data. (t (error "Unknow selection type: %S" type)) )))