# HG changeset patch # User Thien-Thi Nguyen # Date 1114688907 0 # Node ID b5cd4f1a1855beb8c0d7fcd29f333f185870459c # Parent 100e0fab95e5671314209c4a4a60facb3f8fe176 Initial revision. diff -r 100e0fab95e5 -r b5cd4f1a1855 lisp/international/latexenc.el --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lisp/international/latexenc.el Thu Apr 28 11:48:27 2005 +0000 @@ -0,0 +1,170 @@ +;;; latexenc.el --- guess correct coding system in LaTeX files + +;; Copyright (C) 2005 Free Software Foundation, Inc. + +;; Author: Arne J,Ax(Brgensen +;; Keywords: mule, coding system, latex + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs; see the file COPYING. If not, write to the +;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Commentary: + +;; This code tries to guess the correct coding system of a LaTeX file. + +;; First it searches for a \inputencoding{...} or +;; \usepackage[...]{inputenc} line in the file and looks up the ... in +;; `latex-inputenc-coding-alist' to find the corresponding coding +;; system. + +;; If this fails it will search for AUCTeX's TeX-master or tex-mode's +;; tex-main-file variable in the local variables section and visit +;; that file to get the coding system from the master file. This check +;; can be disabled by setting `latexenc-dont-use-TeX-master-flag' to +;; t. + +;; If we have still not found a coding system we will try to use the +;; standard tex-mode's `tex-guess-main-file' and get the coding system +;; from the main file. This check can be disabled by setting +;; `latexenc-dont-use-tex-guess-main-file-flag' to t. + +;; The functionality is enabled by adding the function +;; `latexenc-find-file-coding-system' to `file-coding-system-alist' +;; like this + +;; (add-to-list 'file-coding-system-alist +;; '("\\.tex\\|\\.ltx\\|\\.dtx\\|\\.drv\\'" . latexenc-find-file-coding-system)) + +;;; Code: + +;;;###autoload +(defcustom latex-inputenc-coding-alist + '(("ansinew" . windows-1252) ; MS Windows ANSI encoding, extension of Latin-1 + ("applemac" . mac-roman) + ("ascii" . us-ascii) + ("cp1250" . windows-1250) ; MS Windows encoding, codepage 1250 + ("cp1252" . windows-1252) ; synonym of ansinew + ("cp1257" . cp1257) + ("cp437de" . cp437) ; IBM code page 437 (German version): 225 is \ss + ("cp437" . cp437) ; IBM code page 437: 225 is \beta + ("cp850" . cp850) ; IBM code page 850 + ("cp852" . cp852) ; IBM code page 852 + ;; ("cp858" . undecided) ; IBM code page 850 but with a euro symbol + ("cp865" . cp865) ; IBM code page 865 + ;; The DECMultinational charaterset used by the OpenVMS system + ;; ("decmulti" . undecided) + ("latin1" . iso-8859-1) + ("latin2" . iso-8859-2) + ("latin3" . iso-8859-3) + ("latin4" . iso-8859-4) + ("latin5" . iso-8859-5) + ("latin9" . iso-8859-15) + ;; ("latin10" . undecided) + ;; ("macce" . undecided) ; Apple Central European + ("next" . next) ; The Next encoding + ("utf8" . utf-8) + ("utf8x" . utf-8)) ; used by the Unicode LaTeX package + "Mapping from encoding names used by LaTeX's \"inputenc.sty\" to Emacs coding systems. +Used by the function `latexenc-find-file-coding-system'." + :group 'files + :group 'mule + :type '(alist :key-type (string :tag "LaTeX input encoding") + :value-type (coding-system :tag "Coding system"))) + +;;;###autoload +(defun latexenc-inputenc-to-coding-system (inputenc) + "Return the corresponding coding-system for the specified input encoding. +Return nil if no matching coding system can be found." + (cdr (assoc inputenc latex-inputenc-coding-alist))) + +;;;###autoload +(defun latexenc-coding-system-to-inputenc (cs) + "Return the corresponding input encoding for the specified coding system. +Return nil if no matching input encoding can be found." + (let (result) + (catch 'result + (dolist (elem latex-inputenc-coding-alist result) + (let ((elem-cs (cdr elem))) + (when (and (coding-system-p elem-cs) + (coding-system-p cs) + (eq (coding-system-base cs) (coding-system-base elem-cs))) + (setq result (car elem)) + (throw 'result result))))))) + +(defvar latexenc-dont-use-TeX-master-flag nil + "Non-nil means don't follow TeX-master to find the coding system.") + +(defvar latexenc-dont-use-tex-guess-main-file-flag nil + "Non-nil means don't use tex-guessmain-file to find the coding system.") + +;;;###autoload +(defun latexenc-find-file-coding-system (arg-list) + "Determine the coding system of a LaTeX file if it uses \"inputenc.sty\". +The mapping from LaTeX's \"inputenc.sty\" encoding names to Emacs +coding system names is determined from `latex-inputenc-coding-alist'." + (if (eq (car arg-list) 'insert-file-contents) + (save-excursion + ;; try to find the coding system in this file + (goto-char (point-min)) + (if (or + (re-search-forward "^[^%$]*\\inputencoding{\\(.*\\)}" nil t) + (re-search-forward "^[^%$]*\\usepackage\\[\\(.*\\)\\]{inputenc}" nil t)) + (let* ((match (match-string 1)) + (sym (intern match))) + (when (latexenc-inputenc-to-coding-system match) + (setq sym (latexenc-inputenc-to-coding-system match)) + (when (coding-system-p sym) + sym + (if (and (require 'code-pages nil t) (coding-system-p sym)) + sym + 'undecided)))) + ;; else try to find it in the master/main file + (let (latexenc-main-file) + ;; is there a TeX-master or tex-main-file in the local variable section + (unless latexenc-dont-use-TeX-master-flag + (goto-char (point-max)) + (when (re-search-backward "^%+ *\\(TeX-master\\|tex-main-file\\): *\"\\(.+\\)\"" nil t) + (let ((file (concat (file-name-directory (nth 1 arg-list)) (match-string 2)))) + (if (file-exists-p file) + (setq latexenc-main-file file) + (if (boundp 'TeX-default-extension) + (when (file-exists-p (concat file "." TeX-default-extension)) + (setq latexenc-main-file (concat file "." TeX-default-extension))) + (dolist (ext '("drv" "dtx" "ltx" "tex")) + (if (file-exists-p (concat file "." ext)) + (setq latexenc-main-file (concat file "." ext))))))))) + ;; try tex-modes tex-guess-main-file + (when (and (not latexenc-dont-use-tex-guess-main-file-flag) + (not latexenc-main-file)) + (when (fboundp 'tex-guess-main-file) + (let ((tex-start-of-header "\\\\document\\(style\\|class\\)") + (default-directory (file-name-directory (nth 1 arg-list)))) + (setq latexenc-main-file (tex-guess-main-file))))) + ;; if we found a master/main file get the coding system from it + (if (and latexenc-main-file + (file-readable-p latexenc-main-file)) + (let* ((latexenc-dont-use-tex-guess-main-file-flag t) + (latexenc-dont-use-TeX-master-flag t) + (latexenc-main-buffer (find-file-noselect latexenc-main-file t))) + (or (buffer-local-value 'coding-system-for-write latexenc-main-buffer) + (buffer-local-value 'buffer-file-coding-system latexenc-main-buffer))) + 'undecided)))) + 'undecided)) + +(provide 'latexenc) + +;;; latexenc.el ends here