annotate lisp/textmodes/po.el @ 89852:b636ae1109c6

(MAX_ANNOTATION_LENGTH): Adjusted for the change of annotation data format. (ADD_ANNOTATION_DATA, ADD_COMPOSITION_DATA, ADD_CHARSET_DATA): Change arguments FROM and TO to single argument NCHARS. Caller changed. (decode_coding_utf_8): Pay attention to coding->charbuf_used. (decode_coding_utf_16, decode_coding_emacs_mule) (decode_coding_iso_2022, decode_coding_sjis, decode_coding_big5) (decode_coding_ccl, decode_coding_charset): Likewise. (get_translation): New function. (produce_chars): New arguments translation_table and last_block. Translate characters here. Return number of carryover chars. Caller changed. (produce_composition): New argument pos. Caller changed. Adjusted for the change of annotation data format. (produce_charset, produce_annotation): Likewise. (decode_coding): Don't call translate_chars.
author Kenichi Handa <handa@m17n.org>
date Wed, 10 Mar 2004 23:11:18 +0000
parents 375f2633d815
children 68c22ea6027c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
43926
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
1 ;;; po.el --- basic support of PO translation files -*- coding: latin-1; -*-
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
2
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
3 ;; Copyright (C) 1995-1998, 2000-2002 Free Software Foundation, Inc.
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
4
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
5 ;; Authors: François Pinard <pinard@iro.umontreal.ca>,
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
6 ;; Greg McGary <gkm@magilla.cichlid.com>,
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
7 ;; Bruno Haible <bruno@clisp.org>.
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
8 ;; Keywords: i18n, files
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
9
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
10 ;; This file is part of GNU Emacs.
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
11
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
12 ;; GNU Emacs is free software; you can redistribute it and/or modify
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
13 ;; it under the terms of the GNU General Public License as published by
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
14 ;; the Free Software Foundation; either version 2, or (at your option)
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
15 ;; any later version.
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
16
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
17 ;; GNU Emacs is distributed in the hope that it will be useful,
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
18 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
20 ;; GNU General Public License for more details.
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
21
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
22 ;; You should have received a copy of the GNU General Public License
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
23 ;; along with GNU Emacs; see the file COPYING. If not, write to the
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
24 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
25 ;; Boston, MA 02111-1307, USA.
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
26
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
27 ;;; Commentary:
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
28
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
29 ;; This package makes sure visiting PO files decodes them correctly,
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
30 ;; according to the Charset= header in the PO file. For more support
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
31 ;; for editing PO files, see po-mode.el.
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
32
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
33 ;;; Code:
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
34
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
35 (defconst po-content-type-charset-alist
48092
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
36 '(("ASCII" . undecided)
43958
2479ec7d435b (po-content-type-charset-alist): Convert the
Eli Zaretskii <eliz@gnu.org>
parents: 43954
diff changeset
37 ("ANSI_X3.4-1968" . undecided)
48092
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
38 ("US-ASCII" . undecided))
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
39 "Alist of coding system versus GNU libc/libiconv canonical charset name.
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
40 Contains canonical charset names that don't correspond to coding systems.")
43926
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
41
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
42 (defun po-find-charset (filename)
48092
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
43 "Return PO charset value for FILENAME."
43926
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
44 (let ((charset-regexp
51890
9b78d0da1a28 (po-find-charset): White space at the start of the Content-Type field body is
Juanma Barranquero <lekktu@gmail.com>
parents: 48092
diff changeset
45 "^\"Content-Type:[ \t]*text/plain;[ \t]*charset=\\(.*\\)\\\\n\"")
43926
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
46 (short-read nil))
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
47 ;; Try the first 4096 bytes. In case we cannot find the charset value
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
48 ;; within the first 4096 bytes (the PO file might start with a long
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
49 ;; comment) try the next 4096 bytes repeatedly until we'll know for sure
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
50 ;; we've checked the empty header entry entirely.
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
51 (while (not (or short-read (re-search-forward "^msgid" nil t)))
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
52 (save-excursion
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
53 (goto-char (point-max))
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
54 (let ((pair (insert-file-contents-literally filename nil
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
55 (1- (point))
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
56 (1- (+ (point) 4096)))))
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
57 (setq short-read (< (nth 1 pair) 4096)))))
43952
bd36495e6ade (po-find-charset): Search for Charset= header even if we've read less than
Eli Zaretskii <eliz@gnu.org>
parents: 43937
diff changeset
58 (cond ((re-search-forward charset-regexp nil t) (match-string 1))
bd36495e6ade (po-find-charset): Search for Charset= header even if we've read less than
Eli Zaretskii <eliz@gnu.org>
parents: 43937
diff changeset
59 (short-read nil)
43926
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
60 ;; We've found the first msgid; maybe, only a part of the msgstr
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
61 ;; value was loaded. Load the next 1024 bytes; if charset still
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
62 ;; isn't available, give up.
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
63 (t (save-excursion
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
64 (goto-char (point-max))
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
65 (insert-file-contents-literally filename nil
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
66 (1- (point))
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
67 (1- (+ (point) 1024))))
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
68 (if (re-search-forward charset-regexp nil t)
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
69 (match-string 1))))))
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
70
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
71 (defun po-find-file-coding-system-guts (operation filename)
48092
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
72 "Return a (DECODING . ENCODING) pair for OPERATION on PO file FILENAME.
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
73 Do so according to FILENAME's declared charset."
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
74 (and
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
75 (eq operation 'insert-file-contents)
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
76 (file-exists-p filename)
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
77 (with-temp-buffer
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
78 (let* ((coding-system-for-read 'no-conversion)
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
79 (charset (or (po-find-charset filename) "ascii"))
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
80 assoc)
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
81 (list (cond
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
82 ((setq assoc
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
83 (assoc-ignore-case charset
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
84 po-content-type-charset-alist))
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
85 (cdr assoc))
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
86 ((or (setq assoc (assoc-ignore-case charset coding-system-alist))
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
87 (setq assoc
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
88 (assoc-ignore-case (subst-char-in-string ?_ ?-
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
89 charset)
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
90 coding-system-alist)))
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
91 (intern (car assoc)))
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
92 ;; In principle we should also check the `mime-charset'
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
93 ;; property of everything in the base coding system
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
94 ;; list, but there should always be a coding system
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
95 ;; corresponding to the MIME name.
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
96 ((featurep 'code-pages)
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
97 ;; Give up.
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
98 'raw-text)
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
99 (t
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
100 ;; Try again with code-pages loaded. Maybe it's best
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
101 ;; to require it initially?
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
102 (require 'code-pages nil t)
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
103 (if (or
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
104 (setq assoc (assoc-ignore-case charset coding-system-alist))
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
105 (setq assoc (assoc-ignore-case (subst-char-in-string
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
106 ?_ ?- charset)
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
107 coding-system-alist)))
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
108 (intern (car assoc))
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
109 'raw-text))))))))
43926
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
110
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
111 ;;;###autoload
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
112 (defun po-find-file-coding-system (arg-list)
48092
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
113 "Return a (DECODING . ENCODING) pair, according to PO file's charset.
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
114 Called through `file-coding-system-alist', before the file is visited for real."
43926
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
115 (po-find-file-coding-system-guts (car arg-list) (car (cdr arg-list))))
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
116 ;; This is for XEmacs.
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
117 ;(defun po-find-file-coding-system (operation filename)
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
118 ; "\
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
119 ;Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
120 ;Called through file-coding-system-alist, before the file is visited for real."
11fdbf69b362 New file.
Eli Zaretskii <eliz@gnu.org>
parents:
diff changeset
121 ; (po-find-file-coding-system-guts operation filename))
48092
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
122
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
123 (provide 'po)
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
124
10d4bf044393 (po-content-type-charset-alist): Delete most
Dave Love <fx@gnu.org>
parents: 47655
diff changeset
125 ;;; po.el ends here