comparison lisp/textmodes/po.el @ 43926:11fdbf69b362

New file.
author Eli Zaretskii <eliz@gnu.org>
date Fri, 15 Mar 2002 13:22:13 +0000
parents
children e572fbd0d2c8
comparison
equal deleted inserted replaced
43925:c770b7554015 43926:11fdbf69b362
1 ;;; po.el --- basic support of PO translation files -*- coding: latin-1; -*-
2
3 ;; Copyright (C) 1995-1998, 2000-2002 Free Software Foundation, Inc.
4
5 ;; Authors: François Pinard <pinard@iro.umontreal.ca>,
6 ;; Greg McGary <gkm@magilla.cichlid.com>,
7 ;; Bruno Haible <bruno@clisp.org>.
8 ;; Keywords: i18n, files
9
10 ;; This file is part of GNU Emacs.
11
12 ;; GNU Emacs is free software; you can redistribute it and/or modify
13 ;; it under the terms of the GNU General Public License as published by
14 ;; the Free Software Foundation; either version 2, or (at your option)
15 ;; any later version.
16
17 ;; GNU Emacs is distributed in the hope that it will be useful,
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ;; GNU General Public License for more details.
21
22 ;; You should have received a copy of the GNU General Public License
23 ;; along with GNU Emacs; see the file COPYING. If not, write to the
24 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
25 ;; Boston, MA 02111-1307, USA.
26
27 ;;; Commentary:
28
29 ;; This package makes sure visiting PO files decodes them correctly,
30 ;; according to the Charset= header in the PO file. For more support
31 ;; for editing PO files, see po-mode.el.
32
33 ;;; Code:
34
35 ; Make the cpnnn codesets available.
36 (if (not (string-match "XEmacs\\|Lucid" emacs-version))
37 (mapc #'codepage-setup (mapcar #'car (cp-supported-codepages))))
38
39 (defconst po-content-type-charset-alist
40 '(; Note: Emacs 21 doesn't support all encodings, thus the missing entries.
41 (ASCII . undecided)
42 (ANSI_X3.4-1968 . undecided)
43 (US-ASCII . undecided)
44 (ISO-8859-1 . iso-8859-1)
45 (ISO_8859-1 . iso-8859-1)
46 (ISO-8859-2 . iso-8859-2)
47 (ISO_8859-2 . iso-8859-2)
48 (ISO-8859-3 . iso-8859-3)
49 (ISO_8859-3 . iso-8859-3)
50 (ISO-8859-4 . iso-8859-4)
51 (ISO_8859-4 . iso-8859-4)
52 (ISO-8859-5 . iso-8859-5)
53 (ISO_8859-5 . iso-8859-5)
54 ;(ISO-8859-6 . ??)
55 ;(ISO_8859-6 . ??)
56 (ISO-8859-7 . iso-8859-7)
57 (ISO_8859-7 . iso-8859-7)
58 (ISO-8859-8 . iso-8859-8)
59 (ISO_8859-8 . iso-8859-8)
60 (ISO-8859-9 . iso-8859-9)
61 (ISO_8859-9 . iso-8859-9)
62 ;(ISO-8859-13 . ??)
63 ;(ISO_8859-13 . ??)
64 (ISO-8859-15 . iso-8859-15) ; requires Emacs 21
65 (ISO_8859-15 . iso-8859-15) ; requires Emacs 21
66 (KOI8-R . koi8-r)
67 ;(KOI8-U . ??)
68 (CP437 . cp437) ; requires Emacs 20
69 (CP775 . cp775) ; requires Emacs 20
70 (CP850 . cp850) ; requires Emacs 20
71 (CP852 . cp852) ; requires Emacs 20
72 (CP855 . cp855) ; requires Emacs 20
73 ;(CP856 . ??)
74 (CP857 . cp857) ; requires Emacs 20
75 (CP861 . cp861) ; requires Emacs 20
76 (CP862 . cp862) ; requires Emacs 20
77 (CP864 . cp864) ; requires Emacs 20
78 (CP865 . cp865) ; requires Emacs 20
79 (CP866 . cp866) ; requires Emacs 21
80 (CP869 . cp869) ; requires Emacs 20
81 ;(CP874 . ??)
82 ;(CP922 . ??)
83 ;(CP932 . ??)
84 ;(CP943 . ??)
85 ;(CP949 . ??)
86 ;(CP950 . ??)
87 ;(CP1046 . ??)
88 ;(CP1124 . ??)
89 ;(CP1129 . ??)
90 (CP1250 . cp1250) ; requires Emacs 20
91 (CP1251 . cp1251) ; requires Emacs 20
92 (CP1252 . iso-8859-1) ; approximation
93 (CP1253 . cp1253) ; requires Emacs 20
94 (CP1254 . iso-8859-9) ; approximation
95 (CP1255 . iso-8859-8) ; approximation
96 ;(CP1256 . ??)
97 (CP1257 . cp1257) ; requires Emacs 20
98 (GB2312 . cn-gb-2312) ; also named 'gb2312' in XEmacs 21 or Emacs 21
99 ; also named 'euc-cn' in Emacs 20 or Emacs 21
100 (EUC-JP . euc-jp)
101 (EUC-KR . euc-kr)
102 ;(EUC-TW . ??)
103 (BIG5 . big5)
104 ;(BIG5-HKSCS . ??)
105 ;(GBK . ??)
106 ;(GB18030 . ??)
107 (SHIFT_JIS . shift_jis)
108 ;(JOHAB . ??)
109 (TIS-620 . tis-620) ; requires Emacs 20 or Emacs 21
110 (VISCII . viscii) ; requires Emacs 20 or Emacs 21
111 (UTF-8 . utf-8) ; requires Mule-UCS in Emacs 20, or Emacs 21
112 )
113 "How to convert a GNU libc/libiconv canonical charset name as seen in
114 Content-Type into a Mule coding system.")
115
116 (defun po-find-charset (filename)
117 "Return PO file charset value."
118 (interactive)
119 (let ((charset-regexp
120 "^\"Content-Type: text/plain;[ \t]*charset=\\(.*\\)\\\\n\"")
121 (short-read nil))
122 ;; Try the first 4096 bytes. In case we cannot find the charset value
123 ;; within the first 4096 bytes (the PO file might start with a long
124 ;; comment) try the next 4096 bytes repeatedly until we'll know for sure
125 ;; we've checked the empty header entry entirely.
126 (while (not (or short-read (re-search-forward "^msgid" nil t)))
127 (save-excursion
128 (goto-char (point-max))
129 (let ((pair (insert-file-contents-literally filename nil
130 (1- (point))
131 (1- (+ (point) 4096)))))
132 (setq short-read (< (nth 1 pair) 4096)))))
133 (cond (short-read nil)
134 ((re-search-forward charset-regexp nil t) (match-string 1))
135 ;; We've found the first msgid; maybe, only a part of the msgstr
136 ;; value was loaded. Load the next 1024 bytes; if charset still
137 ;; isn't available, give up.
138 (t (save-excursion
139 (goto-char (point-max))
140 (insert-file-contents-literally filename nil
141 (1- (point))
142 (1- (+ (point) 1024))))
143 (if (re-search-forward charset-regexp nil t)
144 (match-string 1))))))
145
146 (defun po-find-file-coding-system-guts (operation filename)
147 "\
148 Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
149 Called through file-coding-system-alist, before the file is visited for real."
150 (and (eq operation 'insert-file-contents)
151 (file-exists-p filename)
152 (po-with-temp-buffer
153 (let* ((coding-system-for-read 'no-conversion)
154 (charset (or (po-find-charset filename) "ascii"))
155 (charset-upper (intern (upcase charset)))
156 (charset-lower (intern (downcase charset))))
157 (list (or (cdr (assq charset-upper po-content-type-charset-alist))
158 (if (memq charset-lower (coding-system-list))
159 charset-lower
160 'no-conversion)))))))
161
162 ;;;###autoload
163 (defun po-find-file-coding-system (arg-list)
164 "\
165 Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
166 Called through file-coding-system-alist, before the file is visited for real."
167 (po-find-file-coding-system-guts (car arg-list) (car (cdr arg-list))))
168 ;; This is for XEmacs.
169 ;(defun po-find-file-coding-system (operation filename)
170 ; "\
171 ;Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
172 ;Called through file-coding-system-alist, before the file is visited for real."
173 ; (po-find-file-coding-system-guts operation filename))