changeset 37746:0204bb9ccc1e

(auto-coding-regexp-alist): New user-option. (auto-coding-from-file-contents): New function. (set-auto-coding): Use it to determine a coding system.
author Gerd Moellmann <gerd@gnu.org>
date Wed, 16 May 2001 10:36:54 +0000
parents c05ae69405d1
children e5c590c9fe66
files lisp/international/mule.el
diffstat 1 files changed, 117 insertions(+), 86 deletions(-) [+]
line wrap: on
line diff
--- a/lisp/international/mule.el	Wed May 16 10:18:36 2001 +0000
+++ b/lisp/international/mule.el	Wed May 16 10:36:54 2001 +0000
@@ -1237,6 +1237,20 @@
   :type '(repeat (cons (regexp :tag "File name regexp")
 		       (symbol :tag "Coding system"))))
 
+(defcustom auto-coding-regexp-alist
+  '(("^BABYL OPTIONS:[ \t]*-\\*-[ \t]*rmail[ \t]*-\\*-" . no-conversion))
+  "Alist of patterns vs corresponding coding systems.
+Each element looks like (REGEXP . CODING-SYSTEM).
+A file whose first bytes match REGEXP is decoded by CODING-SYSTEM on reading.
+
+The settings in this alist take priority over `coding:' tags
+in the file (see the function `set-auto-coding')
+and the contents of `file-coding-system-alist'."
+  :group 'files
+  :group 'mule
+  :type '(repeat (cons (regexp :tag "Regexp")
+		       (symbol :tag "Coding system"))))
+
 (defvar set-auto-coding-for-load nil
   "Non-nil means look for `load-coding' property instead of `coding'.
 This is used for loading and byte-compiling Emacs Lisp files.")
@@ -1252,106 +1266,123 @@
 	(setq alist (cdr alist))))
     coding-system))
 
+
+(defun auto-coding-from-file-contents (size)
+  "Determine a coding system from the contents of the current buffer.
+The current buffer contains SIZE bytes starting at point.
+Value is either a coding system or nil."
+  (save-excursion
+    (let ((alist auto-coding-regexp-alist)
+	  coding-system)
+      (while (and alist (not coding-system))
+	(let ((regexp (car (car alist))))
+	  (when (re-search-forward regexp (+ (point) size) t)
+	    (setq coding-system (cdr (car alist)))))
+	(setq alist (cdr alist)))
+      coding-system)))
+		
+
 (defun set-auto-coding (filename size)
   "Return coding system for a file FILENAME of which SIZE bytes follow point.
 These bytes should include at least the first 1k of the file
 and the last 3k of the file, but the middle may be omitted.
 
-It checks FILENAME against the variable `auto-coding-alist'.
-If FILENAME doesn't match any entries in the variable,
-it checks for a `coding:' tag in the first one or two lines following
-point.  If no `coding:' tag is found, it checks for local variables
-list in the last 3K bytes out of the SIZE bytes.
+It checks FILENAME against the variable `auto-coding-alist'.  If
+FILENAME doesn't match any entries in the variable, it checks the
+contents of the current buffer following point against
+`auto-coding-regexp-alist'.  If no match is found, it checks for a
+`coding:' tag in the first one or two lines following point.  If no
+`coding:' tag is found, it checks for local variables list in the last
+3K bytes out of the SIZE bytes.
 
 The return value is the specified coding system,
 or nil if nothing specified.
 
 The variable `set-auto-coding-function' (which see) is set to this
 function by default."
-  (let ((coding-system (auto-coding-alist-lookup filename)))
+  (or (auto-coding-alist-lookup filename)
+      (auto-coding-from-file-contents size)
+      (let* ((case-fold-search t)
+	     (head-start (point))
+	     (head-end (+ head-start (min size 1024)))
+	     (tail-start (+ head-start (max (- size 3072) 0)))
+	     (tail-end (+ head-start size))
+	     coding-system head-found tail-found pos)
+	;; Try a short cut by searching for the string "coding:"
+	;; and for "unibyte:" at the head and tail of SIZE bytes.
+	(setq head-found (or (search-forward "coding:" head-end t)
+			     (search-forward "unibyte:" head-end t)))
+	(if (and head-found (> head-found tail-start))
+	    ;; Head and tail are overlapped.
+	    (setq tail-found head-found)
+	  (goto-char tail-start)
+	  (setq tail-found (or (search-forward "coding:" tail-end t)
+			       (search-forward "unibyte:" tail-end t))))
 
-    (or coding-system
-	(let* ((case-fold-search t)
-	       (head-start (point))
-	       (head-end (+ head-start (min size 1024)))
-	       (tail-start (+ head-start (max (- size 3072) 0)))
-	       (tail-end (+ head-start size))
-	       coding-system head-found tail-found pos)
-	  ;; Try a short cut by searching for the string "coding:"
-	  ;; and for "unibyte:" at the head and tail of SIZE bytes.
-	  (setq head-found (or (search-forward "coding:" head-end t)
-			       (search-forward "unibyte:" head-end t)))
-	  (if (and head-found (> head-found tail-start))
-	      ;; Head and tail are overlapped.
-	      (setq tail-found head-found)
-	    (goto-char tail-start)
-	    (setq tail-found (or (search-forward "coding:" tail-end t)
-				 (search-forward "unibyte:" tail-end t))))
-
-	  ;; At first check the head.
-	  (when head-found
+	;; At first check the head.
+	(when head-found
+	  (goto-char head-start)
+	  (setq pos (re-search-forward "[\n\r]" head-end t))
+	  (if (and pos
+		   (= (char-after head-start) ?#)
+		   (= (char-after (1+ head-start)) ?!))
+	     ;; If the file begins with "#!" (exec interpreter magic),
+	  ;; look for coding frobs in the first two lines.  You cannot
+	      ;; necessarily put them in the first line of such a file
+	      ;; without screwing up the interpreter invocation.
+	      (setq pos (search-forward "\n" head-end t)))
+	  (if pos (setq head-end pos))
+	  (when (< head-found head-end)
 	    (goto-char head-start)
-	    (setq pos (re-search-forward "[\n\r]" head-end t))
-	    (if (and pos
-		     (= (char-after head-start) ?#)
-		     (= (char-after (1+ head-start)) ?!))
-		;; If the file begins with "#!" (exec interpreter magic),
-		;; look for coding frobs in the first two lines.  You cannot
-		;; necessarily put them in the first line of such a file
-		;; without screwing up the interpreter invocation.
-		(setq pos (search-forward "\n" head-end t)))
-	    (if pos (setq head-end pos))
-	    (when (< head-found head-end)
-	      (goto-char head-start)
-	      (when (and set-auto-coding-for-load
-			 (re-search-forward
-			  "-\\*-\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)"
-			  head-end t))
-		(setq coding-system 'raw-text))
-	      (when (and (not coding-system)
-			 (re-search-forward
-			  "-\\*-\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)"
-			  head-end t))
-		(setq coding-system (intern (match-string 2)))
-		(or (coding-system-p coding-system)
-		    (setq coding-system nil)))))
+	    (when (and set-auto-coding-for-load
+		       (re-search-forward
+			"-\\*-\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)"
+			head-end t))
+	      (setq coding-system 'raw-text))
+	    (when (and (not coding-system)
+		       (re-search-forward
+			"-\\*-\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)"
+			head-end t))
+	      (setq coding-system (intern (match-string 2)))
+	      (or (coding-system-p coding-system)
+		  (setq coding-system nil)))))
 
-	  ;; If no coding: tag in the head, check the tail.
-	  (when (and tail-found (not coding-system))
-	    (goto-char tail-start)
-	    (search-forward "\n\^L" nil t)
-	    (if (re-search-forward
-		 "^\\(.*\\)[ \t]*Local Variables:[ \t]*\\(.*\\)$" tail-end t)
-		;; The prefix is what comes before "local variables:" in its
-		;; line.  The suffix is what comes after "local variables:"
-		;; in its line.
-		(let* ((prefix (regexp-quote (match-string 1)))
-		       (suffix (regexp-quote (match-string 2)))
-		       (re-coding
-			(concat
-			 "^" prefix
-			 "[ \t]*coding[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*"
-			 suffix "$"))
-		       (re-unibyte
-			(concat
-			 "^" prefix
-			 "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*"
-			 suffix "$"))
-		       (re-end
-			(concat "^" prefix "[ \t]*end *:[ \t]*" suffix "$"))
-		       (pos (point)))
-		  (re-search-forward re-end tail-end 'move)
-		  (setq tail-end (point))
-		  (goto-char pos)
-		  (when (and set-auto-coding-for-load
-			     (re-search-forward re-unibyte tail-end t))
-		    (setq coding-system 'raw-text))
-		  (when (and (not coding-system)
-			     (re-search-forward re-coding tail-end t))
-		    (setq coding-system (intern (match-string 1)))
-		    (or (coding-system-p coding-system)
-			(setq coding-system nil))))))
-	  coding-system))))
+	;; If no coding: tag in the head, check the tail.
+	(when (and tail-found (not coding-system))
+	  (goto-char tail-start)
+	  (search-forward "\n\^L" nil t)
+	  (if (re-search-forward
+	       "^\\(.*\\)[ \t]*Local Variables:[ \t]*\\(.*\\)$" tail-end t)
+	  ;; The prefix is what comes before "local variables:" in its
+	   ;; line.  The suffix is what comes after "local variables:"
+	      ;; in its line.
+	      (let* ((prefix (regexp-quote (match-string 1)))
+		     (suffix (regexp-quote (match-string 2)))
+		     (re-coding
+		      (concat
+		       "^" prefix
+		       "[ \t]*coding[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*"
+		       suffix "$"))
+		     (re-unibyte
+		      (concat
+		       "^" prefix
+		       "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*"
+		       suffix "$"))
+		     (re-end
+		      (concat "^" prefix "[ \t]*end *:[ \t]*" suffix "$"))
+		     (pos (point)))
+		(re-search-forward re-end tail-end 'move)
+		(setq tail-end (point))
+		(goto-char pos)
+		(when (and set-auto-coding-for-load
+			   (re-search-forward re-unibyte tail-end t))
+		  (setq coding-system 'raw-text))
+		(when (and (not coding-system)
+			   (re-search-forward re-coding tail-end t))
+		  (setq coding-system (intern (match-string 1)))
+		  (or (coding-system-p coding-system)
+		      (setq coding-system nil))))))
+	coding-system)))
 
 (setq set-auto-coding-function 'set-auto-coding)