changeset 73426:50ad827e063d

* select.el (ccl-check-utf-8, string-utf-8-p): New functions (by Kenichi Handa). (xselect-convert-to-string): Decline requests for UTF8_STRING if the selection is not UTF-8.
author Jan Djärv <jan.h.d@swipnet.se>
date Thu, 19 Oct 2006 07:16:27 +0000
parents 82e81b1ee127
children 842c20ea5853
files lisp/ChangeLog lisp/select.el
diffstat 2 files changed, 46 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/lisp/ChangeLog	Thu Oct 19 04:56:46 2006 +0000
+++ b/lisp/ChangeLog	Thu Oct 19 07:16:27 2006 +0000
@@ -1,3 +1,10 @@
+2006-10-19  Jan Dj,Ad(Brv  <jan.h.d@swipnet.se>
+
+	* select.el (ccl-check-utf-8, string-utf-8-p): New functions
+	(by Kenichi Handa).
+	(xselect-convert-to-string): Decline requests for UTF8_STRING if
+	the selection is not UTF-8.
+
 2006-10-18  Juanma Barranquero  <lekktu@gmail.com>
 
 	* progmodes/ada-mode.el (ada-83-string-keywords)
--- a/lisp/select.el	Thu Oct 19 04:56:46 2006 +0000
+++ b/lisp/select.el	Thu Oct 19 07:16:27 2006 +0000
@@ -152,6 +152,41 @@
 ;;; Every selection type that Emacs handles is implemented this way, except
 ;;; for TIMESTAMP, which is a special case.
 
+(eval-when-compile (require 'ccl))
+
+(define-ccl-program ccl-check-utf-8
+  '(0
+    ((r0 = 1)
+     (loop
+      (read-if (r1 < #x80) (repeat)
+	((r0 = 0)
+	 (if (r1 < #xC2) (end))
+	 (read r2)
+	 (if ((r2 & #xC0) != #x80) (end))
+	 (if (r1 < #xE0) ((r0 = 1) (repeat)))
+	 (read r2)
+	 (if ((r2 & #xC0) != #x80) (end))
+	 (if (r1 < #xF0) ((r0 = 1) (repeat)))
+	 (read r2)
+	 (if ((r2 & #xC0) != #x80) (end))
+	 (if (r1 < #xF8) ((r0 = 1) (repeat)))
+	 (read r2)
+	 (if ((r2 & #xC0) != #x80) (end))
+	 (if (r1 == #xF8) ((r0 = 1) (repeat)))
+	 (end))))))
+  "Check if the input unibyte string is a valid UTF-8 sequence or not.
+If it is valid, set the register `r0' to 1, else set it to 0.")
+
+(defun string-utf-8-p (string)
+  "Return non-nil iff STRING is a unibyte string of valid UTF-8 sequence."
+  (if (or (not (stringp string))
+	  (multibyte-string-p string))
+      (error "Not a unibyte string: %s" string))
+  (let ((status (make-vector 9 0)))
+    (ccl-execute-on-string ccl-check-utf-8 status string)
+    (= (aref status 0) 1)))
+
+
 (defun xselect-convert-to-string (selection type value)
   (let (str coding)
     ;; Get the actual string from VALUE.
@@ -223,11 +258,10 @@
 	      (setq str (encode-coding-string str coding))))
 
 	   ((eq type 'UTF8_STRING)
-	    (let ((charsets (find-charset-string str)))
-	      (if (or (memq 'eight-bit-control charsets)
-		      (memq 'eight-bit-graphic charsets))
-		  (setq type 'STRING)
-		(setq str (encode-coding-string str 'utf-8)))))
+	    (if (multibyte-string-p str)
+		(setq str (encode-coding-string str 'utf-8)))
+	    (if (not (string-utf-8-p str))
+		(setq str nil))) ;; Decline request as we don't have UTF-8 data.
 	   (t
 	    (error "Unknow selection type: %S" type))
 	   )))