changeset 106474:40531d6992e6

(ucs-names): Weed out at compile-time the chars that don't have names, so the table can be built much faster at run-time.
author Stefan Monnier <monnier@iro.umontreal.ca>
date Mon, 07 Dec 2009 16:12:47 +0000
parents dfb27937da01
children 5354cbc99ef1
files lisp/ChangeLog lisp/international/mule-cmds.el
diffstat 2 files changed, 48 insertions(+), 15 deletions(-) [+]
line wrap: on
line diff
--- a/lisp/ChangeLog	Mon Dec 07 16:09:05 2009 +0000
+++ b/lisp/ChangeLog	Mon Dec 07 16:12:47 2009 +0000
@@ -1,3 +1,9 @@
+2009-12-07  Stefan Monnier  <monnier@iro.umontreal.ca>
+
+	* international/mule-cmds.el (ucs-names): Weed out at compile-time the
+	chars that don't have names, so the table can be built much faster at
+	run-time.
+
 2009-12-07  Chong Yidong  <cyd@stupidchicken.com>
 
 	* simple.el (compose-mail): Check for incompatibilities and warn.
--- a/lisp/international/mule-cmds.el	Mon Dec 07 16:09:05 2009 +0000
+++ b/lisp/international/mule-cmds.el	Mon Dec 07 16:12:47 2009 +0000
@@ -2889,21 +2889,48 @@
 (defun ucs-names ()
   "Return alist of (CHAR-NAME . CHAR-CODE) pairs cached in `ucs-names'."
   (or ucs-names
-      (setq ucs-names
-	    (let (name names)
-	      (dotimes-with-progress-reporter (c #xEFFFF)
-		  "Loading Unicode character names..."
-		(unless (or
-			 (and (>= c #x3400 ) (<= c #x4dbf )) ; CJK Ideograph Extension A
-			 (and (>= c #x4e00 ) (<= c #x9fff )) ; CJK Ideograph
-			 (and (>= c #xd800 ) (<= c #xfaff )) ; Private/Surrogate
-			 (and (>= c #x20000) (<= c #x2ffff)) ; CJK Ideograph Extensions B, C
-			 )
-		  (if (setq name (get-char-code-property c 'name))
-		      (setq names (cons (cons name c) names)))
-		  (if (setq name (get-char-code-property c 'old-name))
-		      (setq names (cons (cons name c) names)))))
-	      names))))
+      (let ((ranges
+             (purecopy
+              ;; We precompute at compile-time the ranges of chars
+              ;; that have names, so that at runtime, building the
+              ;; table can be done faster, since most of the time is
+              ;; spent looking for the chars that do have a name.
+              (eval-when-compile
+                (let ((ranges ())
+                      (first 0)
+                      (last 0))
+                  (dotimes-with-progress-reporter (c #xEFFFF)
+                      "Finding Unicode characters with names..."
+                    (unless (or
+                             ;; CJK Ideograph Extension Arch
+                             (and (>= c #x3400 ) (<= c #x4dbf ))
+                             ;; CJK Ideograph
+                             (and (>= c #x4e00 ) (<= c #x9fff ))
+                             ;; Private/Surrogate
+                             (and (>= c #xd800 ) (<= c #xfaff ))
+                             ;; CJK Ideograph Extensions B, C
+                             (and (>= c #x20000) (<= c #x2ffff))
+                             (null (get-char-code-property c 'name)))
+                      ;; This char has a name.
+                      (if (<= c (1+ last))
+                          ;; Extend the current range.
+                          (setq last c)
+                        ;; We have to split the range.
+                        (push (cons first last) ranges)
+                        (setq first (setq last c)))))
+                  (cons (cons first last) ranges))))
+             name names)
+            (dolist (range ranges)
+              (let ((c (car range))
+                    (end (cdr range)))
+                (while (<= c end)
+                  (if (setq name (get-char-code-property c 'name))
+                      (push (cons name c) names)
+                    (error "Wrong range"))
+                  (if (setq name (get-char-code-property c 'old-name))
+                      (push (cons name c) names))
+                  (setq c (1+ c)))))
+            (setq ucs-names names)))))
 
 (defvar ucs-completions (lazy-completion-table ucs-completions ucs-names)
   "Lazy completion table for completing on Unicode character names.")