Mercurial > emacs
annotate lisp/textmodes/po.el @ 43954:d97aaaffa849
<top-level>: Remove the setup for all known codepages: it seems
to cause crashes in the CCL driver.
author | Eli Zaretskii <eliz@gnu.org> |
---|---|
date | Sat, 16 Mar 2002 16:27:36 +0000 |
parents | bd36495e6ade |
children | 2479ec7d435b |
rev | line source |
---|---|
43926 | 1 ;;; po.el --- basic support of PO translation files -*- coding: latin-1; -*- |
2 | |
3 ;; Copyright (C) 1995-1998, 2000-2002 Free Software Foundation, Inc. | |
4 | |
5 ;; Authors: François Pinard <pinard@iro.umontreal.ca>, | |
6 ;; Greg McGary <gkm@magilla.cichlid.com>, | |
7 ;; Bruno Haible <bruno@clisp.org>. | |
8 ;; Keywords: i18n, files | |
9 | |
10 ;; This file is part of GNU Emacs. | |
11 | |
12 ;; GNU Emacs is free software; you can redistribute it and/or modify | |
13 ;; it under the terms of the GNU General Public License as published by | |
14 ;; the Free Software Foundation; either version 2, or (at your option) | |
15 ;; any later version. | |
16 | |
17 ;; GNU Emacs is distributed in the hope that it will be useful, | |
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 ;; GNU General Public License for more details. | |
21 | |
22 ;; You should have received a copy of the GNU General Public License | |
23 ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
24 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
25 ;; Boston, MA 02111-1307, USA. | |
26 | |
27 ;;; Commentary: | |
28 | |
29 ;; This package makes sure visiting PO files decodes them correctly, | |
30 ;; according to the Charset= header in the PO file. For more support | |
31 ;; for editing PO files, see po-mode.el. | |
32 | |
33 ;;; Code: | |
34 | |
35 (defconst po-content-type-charset-alist | |
36 '(; Note: Emacs 21 doesn't support all encodings, thus the missing entries. | |
37 (ASCII . undecided) | |
38 (ANSI_X3.4-1968 . undecided) | |
39 (US-ASCII . undecided) | |
40 (ISO-8859-1 . iso-8859-1) | |
41 (ISO_8859-1 . iso-8859-1) | |
42 (ISO-8859-2 . iso-8859-2) | |
43 (ISO_8859-2 . iso-8859-2) | |
44 (ISO-8859-3 . iso-8859-3) | |
45 (ISO_8859-3 . iso-8859-3) | |
46 (ISO-8859-4 . iso-8859-4) | |
47 (ISO_8859-4 . iso-8859-4) | |
48 (ISO-8859-5 . iso-8859-5) | |
49 (ISO_8859-5 . iso-8859-5) | |
50 ;(ISO-8859-6 . ??) | |
51 ;(ISO_8859-6 . ??) | |
52 (ISO-8859-7 . iso-8859-7) | |
53 (ISO_8859-7 . iso-8859-7) | |
54 (ISO-8859-8 . iso-8859-8) | |
55 (ISO_8859-8 . iso-8859-8) | |
56 (ISO-8859-9 . iso-8859-9) | |
57 (ISO_8859-9 . iso-8859-9) | |
58 ;(ISO-8859-13 . ??) | |
59 ;(ISO_8859-13 . ??) | |
60 (ISO-8859-15 . iso-8859-15) ; requires Emacs 21 | |
61 (ISO_8859-15 . iso-8859-15) ; requires Emacs 21 | |
62 (KOI8-R . koi8-r) | |
63 ;(KOI8-U . ??) | |
64 (CP437 . cp437) ; requires Emacs 20 | |
65 (CP775 . cp775) ; requires Emacs 20 | |
66 (CP850 . cp850) ; requires Emacs 20 | |
67 (CP852 . cp852) ; requires Emacs 20 | |
68 (CP855 . cp855) ; requires Emacs 20 | |
69 ;(CP856 . ??) | |
70 (CP857 . cp857) ; requires Emacs 20 | |
71 (CP861 . cp861) ; requires Emacs 20 | |
72 (CP862 . cp862) ; requires Emacs 20 | |
73 (CP864 . cp864) ; requires Emacs 20 | |
74 (CP865 . cp865) ; requires Emacs 20 | |
75 (CP866 . cp866) ; requires Emacs 21 | |
76 (CP869 . cp869) ; requires Emacs 20 | |
77 ;(CP874 . ??) | |
78 ;(CP922 . ??) | |
79 ;(CP932 . ??) | |
80 ;(CP943 . ??) | |
81 ;(CP949 . ??) | |
82 ;(CP950 . ??) | |
83 ;(CP1046 . ??) | |
84 ;(CP1124 . ??) | |
85 ;(CP1129 . ??) | |
86 (CP1250 . cp1250) ; requires Emacs 20 | |
87 (CP1251 . cp1251) ; requires Emacs 20 | |
88 (CP1252 . iso-8859-1) ; approximation | |
89 (CP1253 . cp1253) ; requires Emacs 20 | |
90 (CP1254 . iso-8859-9) ; approximation | |
91 (CP1255 . iso-8859-8) ; approximation | |
92 ;(CP1256 . ??) | |
93 (CP1257 . cp1257) ; requires Emacs 20 | |
94 (GB2312 . cn-gb-2312) ; also named 'gb2312' in XEmacs 21 or Emacs 21 | |
95 ; also named 'euc-cn' in Emacs 20 or Emacs 21 | |
96 (EUC-JP . euc-jp) | |
97 (EUC-KR . euc-kr) | |
98 ;(EUC-TW . ??) | |
99 (BIG5 . big5) | |
100 ;(BIG5-HKSCS . ??) | |
101 ;(GBK . ??) | |
102 ;(GB18030 . ??) | |
103 (SHIFT_JIS . shift_jis) | |
104 ;(JOHAB . ??) | |
105 (TIS-620 . tis-620) ; requires Emacs 20 or Emacs 21 | |
106 (VISCII . viscii) ; requires Emacs 20 or Emacs 21 | |
107 (UTF-8 . utf-8) ; requires Mule-UCS in Emacs 20, or Emacs 21 | |
108 ) | |
109 "How to convert a GNU libc/libiconv canonical charset name as seen in | |
110 Content-Type into a Mule coding system.") | |
111 | |
112 (defun po-find-charset (filename) | |
113 "Return PO file charset value." | |
114 (interactive) | |
115 (let ((charset-regexp | |
116 "^\"Content-Type: text/plain;[ \t]*charset=\\(.*\\)\\\\n\"") | |
117 (short-read nil)) | |
118 ;; Try the first 4096 bytes. In case we cannot find the charset value | |
119 ;; within the first 4096 bytes (the PO file might start with a long | |
120 ;; comment) try the next 4096 bytes repeatedly until we'll know for sure | |
121 ;; we've checked the empty header entry entirely. | |
122 (while (not (or short-read (re-search-forward "^msgid" nil t))) | |
123 (save-excursion | |
124 (goto-char (point-max)) | |
125 (let ((pair (insert-file-contents-literally filename nil | |
126 (1- (point)) | |
127 (1- (+ (point) 4096))))) | |
128 (setq short-read (< (nth 1 pair) 4096))))) | |
43952
bd36495e6ade
(po-find-charset): Search for Charset= header even if we've read less than
Eli Zaretskii <eliz@gnu.org>
parents:
43937
diff
changeset
|
129 (cond ((re-search-forward charset-regexp nil t) (match-string 1)) |
bd36495e6ade
(po-find-charset): Search for Charset= header even if we've read less than
Eli Zaretskii <eliz@gnu.org>
parents:
43937
diff
changeset
|
130 (short-read nil) |
43926 | 131 ;; We've found the first msgid; maybe, only a part of the msgstr |
132 ;; value was loaded. Load the next 1024 bytes; if charset still | |
133 ;; isn't available, give up. | |
134 (t (save-excursion | |
135 (goto-char (point-max)) | |
136 (insert-file-contents-literally filename nil | |
137 (1- (point)) | |
138 (1- (+ (point) 1024)))) | |
139 (if (re-search-forward charset-regexp nil t) | |
140 (match-string 1)))))) | |
141 | |
142 (defun po-find-file-coding-system-guts (operation filename) | |
143 "\ | |
144 Return a Mule (DECODING . ENCODING) pair, according to PO file charset. | |
145 Called through file-coding-system-alist, before the file is visited for real." | |
146 (and (eq operation 'insert-file-contents) | |
147 (file-exists-p filename) | |
43937
e572fbd0d2c8
(po-find-file-coding-system-guts): Use
Eli Zaretskii <eliz@gnu.org>
parents:
43926
diff
changeset
|
148 (with-temp-buffer |
43926 | 149 (let* ((coding-system-for-read 'no-conversion) |
150 (charset (or (po-find-charset filename) "ascii")) | |
151 (charset-upper (intern (upcase charset))) | |
152 (charset-lower (intern (downcase charset)))) | |
153 (list (or (cdr (assq charset-upper po-content-type-charset-alist)) | |
154 (if (memq charset-lower (coding-system-list)) | |
155 charset-lower | |
156 'no-conversion))))))) | |
157 | |
158 ;;;###autoload | |
159 (defun po-find-file-coding-system (arg-list) | |
160 "\ | |
161 Return a Mule (DECODING . ENCODING) pair, according to PO file charset. | |
162 Called through file-coding-system-alist, before the file is visited for real." | |
163 (po-find-file-coding-system-guts (car arg-list) (car (cdr arg-list)))) | |
164 ;; This is for XEmacs. | |
165 ;(defun po-find-file-coding-system (operation filename) | |
166 ; "\ | |
167 ;Return a Mule (DECODING . ENCODING) pair, according to PO file charset. | |
168 ;Called through file-coding-system-alist, before the file is visited for real." | |
169 ; (po-find-file-coding-system-guts operation filename)) |