changeset 95953:bdf59d1e1bee

(encoded-kbd-self-insert-utf-8): Catch and recover from case when the bytes we thought we were reading turn out to be something else entirely, such as latin-1 chars from quail. See bug#396.
author Stefan Monnier <monnier@iro.umontreal.ca>
date Sun, 15 Jun 2008 04:43:35 +0000
parents c0051a1e40a2
children 184aae567806
files lisp/ChangeLog lisp/international/encoded-kb.el
diffstat 2 files changed, 26 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/lisp/ChangeLog	Sun Jun 15 03:06:50 2008 +0000
+++ b/lisp/ChangeLog	Sun Jun 15 04:43:35 2008 +0000
@@ -1,3 +1,10 @@
+2008-06-15  Stefan Monnier  <monnier@iro.umontreal.ca>
+
+	* international/encoded-kb.el (encoded-kbd-self-insert-utf-8):
+	Catch and recover from case when the bytes we thought we were reading
+	turn out to be something else entirely, such as latin-1 chars from
+	quail.  See bug#396.
+
 2008-06-15  Dan Nicolaescu  <dann@ics.uci.edu>
 
 	* vc.el (vc-deduce-fileset): Check if the buffer has a file.
--- a/lisp/international/encoded-kb.el	Sun Jun 15 03:06:50 2008 +0000
+++ b/lisp/international/encoded-kb.el	Sun Jun 15 04:43:35 2008 +0000
@@ -219,8 +219,9 @@
 
 (defun encoded-kbd-self-insert-utf-8 (arg)
   (interactive "p")
-  (let ((char (encoded-kbd-last-key))
-	len)
+  (let* ((lead (encoded-kbd-last-key))
+         (char lead)
+         len event)
     (cond ((< char #xE0)
 	   (setq len 1 char (logand char #x1F)))
 	  ((< char #xF0)
@@ -230,8 +231,22 @@
 	  (t
 	   (setq len 4 char 0)))
     (while (> len 0)
-      (setq char (logior (lsh char 6) (logand (read-char-exclusive) #x3F))
-	    len (1- len)))
+      (setq event (read-char-exclusive))
+      (if (and (>= event #x80) (< event #xc0))
+          ;; Valid utf-8 sequence.
+          (setq char (logior (lsh char 6) (- event #x80))
+                len (1- len))
+        ;; Invalid utf-8 sequence.  Might be because Quail got involved
+        ;; in-between and the bytes we thought we were reading were actually
+        ;; latin-1 chars.  Let's presume that `event' is the second "byte",
+        ;; i.e. there weren't any "apprently correct" between `lead' and
+        ;; `event': it's easy to recover in this case, and the more general
+        ;; case seems pretty unlikely.
+        ;; FIXME: We should really do encoded-kbd decoding before processing
+        ;; input-methods.
+        (push event unread-command-events)
+        (setq char lead)
+        (setq len 0)))
     (vector char)))
 
 (defun encoded-kbd-setup-keymap (keymap coding)