43926
|
1 ;;; po.el --- basic support of PO translation files -*- coding: latin-1; -*-
|
|
2
|
64751
|
3 ;; Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003,
|
79719
|
4 ;; 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
|
43926
|
5
|
|
6 ;; Authors: Fran�ois Pinard <pinard@iro.umontreal.ca>,
|
|
7 ;; Greg McGary <gkm@magilla.cichlid.com>,
|
|
8 ;; Bruno Haible <bruno@clisp.org>.
|
|
9 ;; Keywords: i18n, files
|
|
10
|
|
11 ;; This file is part of GNU Emacs.
|
|
12
|
94670
|
13 ;; GNU Emacs is free software: you can redistribute it and/or modify
|
43926
|
14 ;; it under the terms of the GNU General Public License as published by
|
94670
|
15 ;; the Free Software Foundation, either version 3 of the License, or
|
|
16 ;; (at your option) any later version.
|
43926
|
17
|
|
18 ;; GNU Emacs is distributed in the hope that it will be useful,
|
|
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
21 ;; GNU General Public License for more details.
|
|
22
|
|
23 ;; You should have received a copy of the GNU General Public License
|
94670
|
24 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
|
43926
|
25
|
|
26 ;;; Commentary:
|
|
27
|
|
28 ;; This package makes sure visiting PO files decodes them correctly,
|
|
29 ;; according to the Charset= header in the PO file. For more support
|
|
30 ;; for editing PO files, see po-mode.el.
|
|
31
|
|
32 ;;; Code:
|
|
33
|
|
34 (defconst po-content-type-charset-alist
|
48092
|
35 '(("ASCII" . undecided)
|
43958
|
36 ("ANSI_X3.4-1968" . undecided)
|
48092
|
37 ("US-ASCII" . undecided))
|
|
38 "Alist of coding system versus GNU libc/libiconv canonical charset name.
|
|
39 Contains canonical charset names that don't correspond to coding systems.")
|
43926
|
40
|
|
41 (defun po-find-charset (filename)
|
70944
|
42 "Return PO charset value for FILENAME.
|
72053
|
43 If FILENAME is a cons cell, its CDR is a buffer that already contains
|
70944
|
44 the PO file (but not yet decoded)."
|
43926
|
45 (let ((charset-regexp
|
51890
9b78d0da1a28
(po-find-charset): White space at the start of the Content-Type field body is
Juanma Barranquero <lekktu@gmail.com>
diff
changeset
|
46 "^\"Content-Type:[ \t]*text/plain;[ \t]*charset=\\(.*\\)\\\\n\"")
|
70944
|
47 (buf (and (consp filename) (cdr filename)))
|
43926
|
48 (short-read nil))
|
70944
|
49 (when buf
|
|
50 (set-buffer buf)
|
|
51 (goto-char (point-min)))
|
43926
|
52 ;; Try the first 4096 bytes. In case we cannot find the charset value
|
|
53 ;; within the first 4096 bytes (the PO file might start with a long
|
|
54 ;; comment) try the next 4096 bytes repeatedly until we'll know for sure
|
|
55 ;; we've checked the empty header entry entirely.
|
70944
|
56 (while (not (or short-read (re-search-forward "^msgid" nil t) buf))
|
43926
|
57 (save-excursion
|
|
58 (goto-char (point-max))
|
|
59 (let ((pair (insert-file-contents-literally filename nil
|
|
60 (1- (point))
|
|
61 (1- (+ (point) 4096)))))
|
|
62 (setq short-read (< (nth 1 pair) 4096)))))
|
43952
bd36495e6ade
(po-find-charset): Search for Charset= header even if we've read less than
Eli Zaretskii <eliz@gnu.org>
diff
changeset
|
63 (cond ((re-search-forward charset-regexp nil t) (match-string 1))
|
70944
|
64 ((or short-read buf) nil)
|
43926
|
65 ;; We've found the first msgid; maybe, only a part of the msgstr
|
|
66 ;; value was loaded. Load the next 1024 bytes; if charset still
|
|
67 ;; isn't available, give up.
|
|
68 (t (save-excursion
|
|
69 (goto-char (point-max))
|
|
70 (insert-file-contents-literally filename nil
|
|
71 (1- (point))
|
|
72 (1- (+ (point) 1024))))
|
|
73 (if (re-search-forward charset-regexp nil t)
|
|
74 (match-string 1))))))
|
|
75
|
|
76 (defun po-find-file-coding-system-guts (operation filename)
|
48092
|
77 "Return a (DECODING . ENCODING) pair for OPERATION on PO file FILENAME.
|
70944
|
78 Do so according to FILENAME's declared charset.
|
|
79 FILENAME may be a cons (NAME . BUFFER). In that case, detect charset
|
|
80 in BUFFER."
|
48092
|
81 (and
|
|
82 (eq operation 'insert-file-contents)
|
70944
|
83 (or (if (consp filename) (buffer-live-p (cdr filename)))
|
|
84 (file-exists-p filename))
|
48092
|
85 (with-temp-buffer
|
|
86 (let* ((coding-system-for-read 'no-conversion)
|
|
87 (charset (or (po-find-charset filename) "ascii"))
|
|
88 assoc)
|
|
89 (list (cond
|
|
90 ((setq assoc
|
62120
912c2cf79005
(po-find-file-coding-system-guts): Replace `assoc-ignore-case' by
Juanma Barranquero <lekktu@gmail.com>
diff
changeset
|
91 (assoc-string charset
|
912c2cf79005
(po-find-file-coding-system-guts): Replace `assoc-ignore-case' by
Juanma Barranquero <lekktu@gmail.com>
diff
changeset
|
92 po-content-type-charset-alist
|
912c2cf79005
(po-find-file-coding-system-guts): Replace `assoc-ignore-case' by
Juanma Barranquero <lekktu@gmail.com>
diff
changeset
|
93 t))
|
48092
|
94 (cdr assoc))
|
62120
912c2cf79005
(po-find-file-coding-system-guts): Replace `assoc-ignore-case' by
Juanma Barranquero <lekktu@gmail.com>
diff
changeset
|
95 ((or (setq assoc (assoc-string charset coding-system-alist t))
|
48092
|
96 (setq assoc
|
62120
912c2cf79005
(po-find-file-coding-system-guts): Replace `assoc-ignore-case' by
Juanma Barranquero <lekktu@gmail.com>
diff
changeset
|
97 (assoc-string (subst-char-in-string ?_ ?-
|
912c2cf79005
(po-find-file-coding-system-guts): Replace `assoc-ignore-case' by
Juanma Barranquero <lekktu@gmail.com>
diff
changeset
|
98 charset)
|
912c2cf79005
(po-find-file-coding-system-guts): Replace `assoc-ignore-case' by
Juanma Barranquero <lekktu@gmail.com>
diff
changeset
|
99 coding-system-alist t)))
|
48092
|
100 (intern (car assoc)))
|
|
101 ;; In principle we should also check the `mime-charset'
|
|
102 ;; property of everything in the base coding system
|
|
103 ;; list, but there should always be a coding system
|
|
104 ;; corresponding to the MIME name.
|
|
105 ((featurep 'code-pages)
|
|
106 ;; Give up.
|
|
107 'raw-text)
|
|
108 (t
|
|
109 ;; Try again with code-pages loaded. Maybe it's best
|
|
110 ;; to require it initially?
|
|
111 (require 'code-pages nil t)
|
|
112 (if (or
|
62120
912c2cf79005
(po-find-file-coding-system-guts): Replace `assoc-ignore-case' by
Juanma Barranquero <lekktu@gmail.com>
diff
changeset
|
113 (setq assoc (assoc-string charset coding-system-alist t))
|
912c2cf79005
(po-find-file-coding-system-guts): Replace `assoc-ignore-case' by
Juanma Barranquero <lekktu@gmail.com>
diff
changeset
|
114 (setq assoc (assoc-string (subst-char-in-string
|
912c2cf79005
(po-find-file-coding-system-guts): Replace `assoc-ignore-case' by
Juanma Barranquero <lekktu@gmail.com>
diff
changeset
|
115 ?_ ?- charset)
|
912c2cf79005
(po-find-file-coding-system-guts): Replace `assoc-ignore-case' by
Juanma Barranquero <lekktu@gmail.com>
diff
changeset
|
116 coding-system-alist t)))
|
48092
|
117 (intern (car assoc))
|
|
118 'raw-text))))))))
|
43926
|
119
|
|
120 ;;;###autoload
|
|
121 (defun po-find-file-coding-system (arg-list)
|
48092
|
122 "Return a (DECODING . ENCODING) pair, according to PO file's charset.
|
|
123 Called through `file-coding-system-alist', before the file is visited for real."
|
43926
|
124 (po-find-file-coding-system-guts (car arg-list) (car (cdr arg-list))))
|
|
125 ;; This is for XEmacs.
|
|
126 ;(defun po-find-file-coding-system (operation filename)
|
|
127 ; "\
|
|
128 ;Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
|
|
129 ;Called through file-coding-system-alist, before the file is visited for real."
|
|
130 ; (po-find-file-coding-system-guts operation filename))
|
48092
|
131
|
|
132 (provide 'po)
|
|
133
|
93975
|
134 ;; arch-tag: 56748a57-d64c-4200-8f6b-c3a70496eb8c
|
48092
|
135 ;;; po.el ends here
|