88155
|
1 ;;; latexenc.el --- guess correct coding system in LaTeX files
|
|
2
|
|
3 ;; Copyright (C) 2005 Free Software Foundation, Inc.
|
|
4
|
|
5 ;; Author: Arne J,Ax(Brgensen <arne@arnested.dk>
|
|
6 ;; Keywords: mule, coding system, latex
|
|
7
|
|
8 ;; This file is part of GNU Emacs.
|
|
9
|
|
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
|
|
11 ;; it under the terms of the GNU General Public License as published by
|
|
12 ;; the Free Software Foundation; either version 2, or (at your option)
|
|
13 ;; any later version.
|
|
14
|
|
15 ;; GNU Emacs is distributed in the hope that it will be useful,
|
|
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
18 ;; GNU General Public License for more details.
|
|
19
|
|
20 ;; You should have received a copy of the GNU General Public License
|
|
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
|
|
22 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
|
23 ;; Boston, MA 02110-1301, USA.
|
|
24
|
|
25 ;;; Commentary:
|
|
26
|
|
27 ;; This code tries to guess the correct coding system of a LaTeX file.
|
|
28
|
|
29 ;; First it searches for a \inputencoding{...} or
|
|
30 ;; \usepackage[...]{inputenc} line in the file and looks up the ... in
|
|
31 ;; `latex-inputenc-coding-alist' to find the corresponding coding
|
|
32 ;; system.
|
|
33
|
|
34 ;; If this fails it will search for AUCTeX's TeX-master or tex-mode's
|
|
35 ;; tex-main-file variable in the local variables section and visit
|
|
36 ;; that file to get the coding system from the master file. This check
|
|
37 ;; can be disabled by setting `latexenc-dont-use-TeX-master-flag' to
|
|
38 ;; t.
|
|
39
|
|
40 ;; If we have still not found a coding system we will try to use the
|
|
41 ;; standard tex-mode's `tex-guess-main-file' and get the coding system
|
|
42 ;; from the main file. This check can be disabled by setting
|
|
43 ;; `latexenc-dont-use-tex-guess-main-file-flag' to t.
|
|
44
|
|
45 ;; The functionality is enabled by adding the function
|
|
46 ;; `latexenc-find-file-coding-system' to `file-coding-system-alist'
|
|
47 ;; like this
|
|
48
|
|
49 ;; (add-to-list 'file-coding-system-alist
|
|
50 ;; '("\\.\\(tex\\|ltx\\|dtx\\|drv\\)\\'" . latexenc-find-file-coding-system))
|
|
51
|
|
52 ;;; Code:
|
|
53
|
|
54 ;;;###autoload
|
|
55 (defcustom latex-inputenc-coding-alist
|
|
56 '(("ansinew" . windows-1252) ; MS Windows ANSI encoding, extension of Latin-1
|
|
57 ("applemac" . mac-roman)
|
|
58 ("ascii" . us-ascii)
|
|
59 ("cp1250" . windows-1250) ; MS Windows encoding, codepage 1250
|
|
60 ("cp1252" . windows-1252) ; synonym of ansinew
|
|
61 ("cp1257" . cp1257)
|
|
62 ("cp437de" . cp437) ; IBM code page 437 (German version): 225 is \ss
|
|
63 ("cp437" . cp437) ; IBM code page 437: 225 is \beta
|
|
64 ("cp850" . cp850) ; IBM code page 850
|
|
65 ("cp852" . cp852) ; IBM code page 852
|
|
66 ;; ("cp858" . undecided) ; IBM code page 850 but with a euro symbol
|
|
67 ("cp865" . cp865) ; IBM code page 865
|
|
68 ;; The DECMultinational charaterset used by the OpenVMS system
|
|
69 ;; ("decmulti" . undecided)
|
|
70 ("latin1" . iso-8859-1)
|
|
71 ("latin2" . iso-8859-2)
|
|
72 ("latin3" . iso-8859-3)
|
|
73 ("latin4" . iso-8859-4)
|
|
74 ("latin5" . iso-8859-5)
|
|
75 ("latin9" . iso-8859-15)
|
|
76 ;; ("latin10" . undecided)
|
|
77 ;; ("macce" . undecided) ; Apple Central European
|
|
78 ("next" . next) ; The Next encoding
|
|
79 ("utf8" . utf-8)
|
|
80 ("utf8x" . utf-8)) ; used by the Unicode LaTeX package
|
|
81 "Mapping from LaTeX encodings in \"inputenc.sty\" to Emacs coding systems.
|
|
82 LaTeX encodings are specified with \"\\usepackage[encoding]{inputenc}\".
|
|
83 Used by the function `latexenc-find-file-coding-system'."
|
|
84 :group 'files
|
|
85 :group 'mule
|
|
86 :type '(alist :key-type (string :tag "LaTeX input encoding")
|
|
87 :value-type (coding-system :tag "Coding system")))
|
|
88
|
|
89 ;;;###autoload
|
|
90 (defun latexenc-inputenc-to-coding-system (inputenc)
|
|
91 "Return the corresponding coding-system for the specified input encoding.
|
|
92 Return nil if no matching coding system can be found."
|
|
93 (cdr (assoc inputenc latex-inputenc-coding-alist)))
|
|
94
|
|
95 ;;;###autoload
|
|
96 (defun latexenc-coding-system-to-inputenc (cs)
|
|
97 "Return the corresponding input encoding for the specified coding system.
|
|
98 Return nil if no matching input encoding can be found."
|
|
99 (let (result)
|
|
100 (catch 'result
|
|
101 (dolist (elem latex-inputenc-coding-alist result)
|
|
102 (let ((elem-cs (cdr elem)))
|
|
103 (when (and (coding-system-p elem-cs)
|
|
104 (coding-system-p cs)
|
|
105 (eq (coding-system-base cs) (coding-system-base elem-cs)))
|
|
106 (setq result (car elem))
|
|
107 (throw 'result result)))))))
|
|
108
|
|
109 (defvar latexenc-dont-use-TeX-master-flag nil
|
|
110 "Non-nil means don't follow TeX-master to find the coding system.")
|
|
111
|
|
112 (defvar latexenc-dont-use-tex-guess-main-file-flag nil
|
|
113 "Non-nil means don't use tex-guessmain-file to find the coding system.")
|
|
114
|
|
115 ;;;###autoload
|
|
116 (defun latexenc-find-file-coding-system (arg-list)
|
|
117 "Determine the coding system of a LaTeX file if it uses \"inputenc.sty\".
|
|
118 The mapping from LaTeX's \"inputenc.sty\" encoding names to Emacs
|
|
119 coding system names is determined from `latex-inputenc-coding-alist'."
|
|
120 (if (eq (car arg-list) 'insert-file-contents)
|
|
121 (save-excursion
|
|
122 ;; try to find the coding system in this file
|
|
123 (goto-char (point-min))
|
|
124 (if (catch 'cs
|
|
125 (let ((case-fold-search nil))
|
|
126 (while (search-forward "inputenc" nil t)
|
|
127 (goto-char (match-beginning 0))
|
|
128 (beginning-of-line)
|
|
129 (if (or (looking-at "[^%\n]*\\\\usepackage\\[\\([^]]*\\)\\]{\\([^}]*,\\)?inputenc\\(,[^}]*\\)?}")
|
|
130 (looking-at "[^%\n]*\\\\inputencoding{\\([^}]*\\)}"))
|
|
131 (throw 'cs t)
|
|
132 (goto-char (match-end 0))))))
|
|
133 (let* ((match (match-string 1))
|
|
134 (sym (or (latexenc-inputenc-to-coding-system match)
|
|
135 (intern match))))
|
|
136 (cond
|
|
137 ((coding-system-p sym) sym)
|
|
138 ((and (require 'code-pages nil t) (coding-system-p sym)) sym)
|
|
139 (t 'undecided)))
|
|
140 ;; else try to find it in the master/main file
|
|
141 (let ((default-directory (file-name-directory (nth 1 arg-list)))
|
|
142 latexenc-main-file)
|
|
143 ;; Is there a TeX-master or tex-main-file in the local variables
|
|
144 ;; section?
|
|
145 (unless latexenc-dont-use-TeX-master-flag
|
|
146 (goto-char (point-max))
|
|
147 (search-backward "\n\^L" (max (- (point-max) 3000) (point-min))
|
|
148 'move)
|
|
149 (search-forward "Local Variables:" nil t)
|
|
150 (when (re-search-forward
|
|
151 "^%+ *\\(TeX-master\\|tex-main-file\\): *\"\\(.+\\)\""
|
|
152 nil t)
|
|
153 (let ((file (match-string 2)))
|
|
154 (dolist (ext `("" ,(if (boundp 'TeX-default-extension)
|
|
155 (concat "." TeX-default-extension)
|
|
156 "")
|
|
157 ".tex" ".ltx" ".dtx" ".drv"))
|
|
158 (if (and (null latexenc-main-file) ;Stop at first.
|
|
159 (file-exists-p (concat file ext)))
|
|
160 (setq latexenc-main-file (concat file ext)))))))
|
|
161 ;; try tex-modes tex-guess-main-file
|
|
162 (when (and (not latexenc-dont-use-tex-guess-main-file-flag)
|
|
163 (not latexenc-main-file))
|
|
164 ;; Use a separate `when' so the byte-compiler sees the fboundp.
|
|
165 (when (fboundp 'tex-guess-main-file)
|
|
166 (let ((tex-start-of-header "\\\\document\\(style\\|class\\)"))
|
|
167 (setq latexenc-main-file (tex-guess-main-file)))))
|
|
168 ;; if we found a master/main file get the coding system from it
|
|
169 (if (and latexenc-main-file
|
|
170 (file-readable-p latexenc-main-file))
|
|
171 (let* ((latexenc-dont-use-tex-guess-main-file-flag t)
|
|
172 (latexenc-dont-use-TeX-master-flag t)
|
|
173 (latexenc-main-buffer
|
|
174 (find-file-noselect latexenc-main-file t)))
|
|
175 (coding-system-base ;Disregard the EOL part of the CS.
|
|
176 (with-current-buffer latexenc-main-buffer
|
|
177 (or coding-system-for-write buffer-file-coding-system))))
|
|
178 'undecided))))
|
|
179 'undecided))
|
|
180
|
|
181
|
|
182 (provide 'latexenc)
|
|
183
|
|
184 ;; arch-tag: f971bc3e-1fec-4609-8f2f-73dd41ab22e1
|
|
185 ;;; latexenc.el ends here
|