Mercurial > emacs
annotate lisp/textmodes/po.el @ 67410:103eae0b4ac7
*** empty log message ***
author | Richard M. Stallman <rms@gnu.org> |
---|---|
date | Fri, 09 Dec 2005 00:15:56 +0000 |
parents | 5b1a238fcbb4 |
children | 067115a6e738 2d92f5c9d6ae |
rev | line source |
---|---|
43926 | 1 ;;; po.el --- basic support of PO translation files -*- coding: latin-1; -*- |
2 | |
64751
5b1a238fcbb4
Update years in copyright notice; nfc.
Thien-Thi Nguyen <ttn@gnuvola.org>
parents:
64084
diff
changeset
|
3 ;; Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003, |
5b1a238fcbb4
Update years in copyright notice; nfc.
Thien-Thi Nguyen <ttn@gnuvola.org>
parents:
64084
diff
changeset
|
4 ;; 2004, 2005 Free Software Foundation, Inc. |
43926 | 5 |
6 ;; Authors: François Pinard <pinard@iro.umontreal.ca>, | |
7 ;; Greg McGary <gkm@magilla.cichlid.com>, | |
8 ;; Bruno Haible <bruno@clisp.org>. | |
9 ;; Keywords: i18n, files | |
10 | |
11 ;; This file is part of GNU Emacs. | |
12 | |
13 ;; GNU Emacs is free software; you can redistribute it and/or modify | |
14 ;; it under the terms of the GNU General Public License as published by | |
15 ;; the Free Software Foundation; either version 2, or (at your option) | |
16 ;; any later version. | |
17 | |
18 ;; GNU Emacs is distributed in the hope that it will be useful, | |
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
21 ;; GNU General Public License for more details. | |
22 | |
23 ;; You should have received a copy of the GNU General Public License | |
24 ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
64084 | 25 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
26 ;; Boston, MA 02110-1301, USA. | |
43926 | 27 |
28 ;;; Commentary: | |
29 | |
30 ;; This package makes sure visiting PO files decodes them correctly, | |
31 ;; according to the Charset= header in the PO file. For more support | |
32 ;; for editing PO files, see po-mode.el. | |
33 | |
34 ;;; Code: | |
35 | |
36 (defconst po-content-type-charset-alist | |
48092
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
37 '(("ASCII" . undecided) |
43958
2479ec7d435b
(po-content-type-charset-alist): Convert the
Eli Zaretskii <eliz@gnu.org>
parents:
43954
diff
changeset
|
38 ("ANSI_X3.4-1968" . undecided) |
48092
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
39 ("US-ASCII" . undecided)) |
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
40 "Alist of coding system versus GNU libc/libiconv canonical charset name. |
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
41 Contains canonical charset names that don't correspond to coding systems.") |
43926 | 42 |
43 (defun po-find-charset (filename) | |
48092
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
44 "Return PO charset value for FILENAME." |
43926 | 45 (let ((charset-regexp |
51890
9b78d0da1a28
(po-find-charset): White space at the start of the Content-Type field body is
Juanma Barranquero <lekktu@gmail.com>
parents:
48092
diff
changeset
|
46 "^\"Content-Type:[ \t]*text/plain;[ \t]*charset=\\(.*\\)\\\\n\"") |
43926 | 47 (short-read nil)) |
48 ;; Try the first 4096 bytes. In case we cannot find the charset value | |
49 ;; within the first 4096 bytes (the PO file might start with a long | |
50 ;; comment) try the next 4096 bytes repeatedly until we'll know for sure | |
51 ;; we've checked the empty header entry entirely. | |
52 (while (not (or short-read (re-search-forward "^msgid" nil t))) | |
53 (save-excursion | |
54 (goto-char (point-max)) | |
55 (let ((pair (insert-file-contents-literally filename nil | |
56 (1- (point)) | |
57 (1- (+ (point) 4096))))) | |
58 (setq short-read (< (nth 1 pair) 4096))))) | |
43952
bd36495e6ade
(po-find-charset): Search for Charset= header even if we've read less than
Eli Zaretskii <eliz@gnu.org>
parents:
43937
diff
changeset
|
59 (cond ((re-search-forward charset-regexp nil t) (match-string 1)) |
bd36495e6ade
(po-find-charset): Search for Charset= header even if we've read less than
Eli Zaretskii <eliz@gnu.org>
parents:
43937
diff
changeset
|
60 (short-read nil) |
43926 | 61 ;; We've found the first msgid; maybe, only a part of the msgstr |
62 ;; value was loaded. Load the next 1024 bytes; if charset still | |
63 ;; isn't available, give up. | |
64 (t (save-excursion | |
65 (goto-char (point-max)) | |
66 (insert-file-contents-literally filename nil | |
67 (1- (point)) | |
68 (1- (+ (point) 1024)))) | |
69 (if (re-search-forward charset-regexp nil t) | |
70 (match-string 1)))))) | |
71 | |
72 (defun po-find-file-coding-system-guts (operation filename) | |
48092
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
73 "Return a (DECODING . ENCODING) pair for OPERATION on PO file FILENAME. |
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
74 Do so according to FILENAME's declared charset." |
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
75 (and |
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
76 (eq operation 'insert-file-contents) |
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
77 (file-exists-p filename) |
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
78 (with-temp-buffer |
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
79 (let* ((coding-system-for-read 'no-conversion) |
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
80 (charset (or (po-find-charset filename) "ascii")) |
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
81 assoc) |
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
82 (list (cond |
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
83 ((setq assoc |
62120
912c2cf79005
(po-find-file-coding-system-guts): Replace `assoc-ignore-case' by
Juanma Barranquero <lekktu@gmail.com>
parents:
52401
diff
changeset
|
84 (assoc-string charset |
912c2cf79005
(po-find-file-coding-system-guts): Replace `assoc-ignore-case' by
Juanma Barranquero <lekktu@gmail.com>
parents:
52401
diff
changeset
|
85 po-content-type-charset-alist |
912c2cf79005
(po-find-file-coding-system-guts): Replace `assoc-ignore-case' by
Juanma Barranquero <lekktu@gmail.com>
parents:
52401
diff
changeset
|
86 t)) |
48092
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
87 (cdr assoc)) |
62120
912c2cf79005
(po-find-file-coding-system-guts): Replace `assoc-ignore-case' by
Juanma Barranquero <lekktu@gmail.com>
parents:
52401
diff
changeset
|
88 ((or (setq assoc (assoc-string charset coding-system-alist t)) |
48092
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
89 (setq assoc |
62120
912c2cf79005
(po-find-file-coding-system-guts): Replace `assoc-ignore-case' by
Juanma Barranquero <lekktu@gmail.com>
parents:
52401
diff
changeset
|
90 (assoc-string (subst-char-in-string ?_ ?- |
912c2cf79005
(po-find-file-coding-system-guts): Replace `assoc-ignore-case' by
Juanma Barranquero <lekktu@gmail.com>
parents:
52401
diff
changeset
|
91 charset) |
912c2cf79005
(po-find-file-coding-system-guts): Replace `assoc-ignore-case' by
Juanma Barranquero <lekktu@gmail.com>
parents:
52401
diff
changeset
|
92 coding-system-alist t))) |
48092
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
93 (intern (car assoc))) |
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
94 ;; In principle we should also check the `mime-charset' |
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
95 ;; property of everything in the base coding system |
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
96 ;; list, but there should always be a coding system |
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
97 ;; corresponding to the MIME name. |
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
98 ((featurep 'code-pages) |
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
99 ;; Give up. |
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
100 'raw-text) |
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
101 (t |
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
102 ;; Try again with code-pages loaded. Maybe it's best |
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
103 ;; to require it initially? |
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
104 (require 'code-pages nil t) |
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
105 (if (or |
62120
912c2cf79005
(po-find-file-coding-system-guts): Replace `assoc-ignore-case' by
Juanma Barranquero <lekktu@gmail.com>
parents:
52401
diff
changeset
|
106 (setq assoc (assoc-string charset coding-system-alist t)) |
912c2cf79005
(po-find-file-coding-system-guts): Replace `assoc-ignore-case' by
Juanma Barranquero <lekktu@gmail.com>
parents:
52401
diff
changeset
|
107 (setq assoc (assoc-string (subst-char-in-string |
912c2cf79005
(po-find-file-coding-system-guts): Replace `assoc-ignore-case' by
Juanma Barranquero <lekktu@gmail.com>
parents:
52401
diff
changeset
|
108 ?_ ?- charset) |
912c2cf79005
(po-find-file-coding-system-guts): Replace `assoc-ignore-case' by
Juanma Barranquero <lekktu@gmail.com>
parents:
52401
diff
changeset
|
109 coding-system-alist t))) |
48092
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
110 (intern (car assoc)) |
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
111 'raw-text)))))))) |
43926 | 112 |
113 ;;;###autoload | |
114 (defun po-find-file-coding-system (arg-list) | |
48092
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
115 "Return a (DECODING . ENCODING) pair, according to PO file's charset. |
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
116 Called through `file-coding-system-alist', before the file is visited for real." |
43926 | 117 (po-find-file-coding-system-guts (car arg-list) (car (cdr arg-list)))) |
118 ;; This is for XEmacs. | |
119 ;(defun po-find-file-coding-system (operation filename) | |
120 ; "\ | |
121 ;Return a Mule (DECODING . ENCODING) pair, according to PO file charset. | |
122 ;Called through file-coding-system-alist, before the file is visited for real." | |
123 ; (po-find-file-coding-system-guts operation filename)) | |
48092
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
124 |
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
125 (provide 'po) |
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
126 |
52401 | 127 ;;; arch-tag: 56748a57-d64c-4200-8f6b-c3a70496eb8c |
48092
10d4bf044393
(po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents:
47655
diff
changeset
|
128 ;;; po.el ends here |