Mercurial > emacs
changeset 43926:11fdbf69b362
New file.
author | Eli Zaretskii <eliz@gnu.org> |
---|---|
date | Fri, 15 Mar 2002 13:22:13 +0000 |
parents | c770b7554015 |
children | 98c84c2295f0 |
files | lisp/textmodes/po.el |
diffstat | 1 files changed, 173 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lisp/textmodes/po.el Fri Mar 15 13:22:13 2002 +0000 @@ -0,0 +1,173 @@ +;;; po.el --- basic support of PO translation files -*- coding: latin-1; -*- + +;; Copyright (C) 1995-1998, 2000-2002 Free Software Foundation, Inc. + +;; Authors: François Pinard <pinard@iro.umontreal.ca>, +;; Greg McGary <gkm@magilla.cichlid.com>, +;; Bruno Haible <bruno@clisp.org>. +;; Keywords: i18n, files + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs; see the file COPYING. If not, write to the +;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Commentary: + +;; This package makes sure visiting PO files decodes them correctly, +;; according to the Charset= header in the PO file. For more support +;; for editing PO files, see po-mode.el. + +;;; Code: + +; Make the cpnnn codesets available. +(if (not (string-match "XEmacs\\|Lucid" emacs-version)) + (mapc #'codepage-setup (mapcar #'car (cp-supported-codepages)))) + +(defconst po-content-type-charset-alist + '(; Note: Emacs 21 doesn't support all encodings, thus the missing entries. + (ASCII . undecided) + (ANSI_X3.4-1968 . undecided) + (US-ASCII . undecided) + (ISO-8859-1 . iso-8859-1) + (ISO_8859-1 . iso-8859-1) + (ISO-8859-2 . iso-8859-2) + (ISO_8859-2 . iso-8859-2) + (ISO-8859-3 . iso-8859-3) + (ISO_8859-3 . iso-8859-3) + (ISO-8859-4 . iso-8859-4) + (ISO_8859-4 . iso-8859-4) + (ISO-8859-5 . iso-8859-5) + (ISO_8859-5 . iso-8859-5) + ;(ISO-8859-6 . ??) + ;(ISO_8859-6 . ??) + (ISO-8859-7 . iso-8859-7) + (ISO_8859-7 . iso-8859-7) + (ISO-8859-8 . iso-8859-8) + (ISO_8859-8 . iso-8859-8) + (ISO-8859-9 . iso-8859-9) + (ISO_8859-9 . iso-8859-9) + ;(ISO-8859-13 . ??) + ;(ISO_8859-13 . ??) + (ISO-8859-15 . iso-8859-15) ; requires Emacs 21 + (ISO_8859-15 . iso-8859-15) ; requires Emacs 21 + (KOI8-R . koi8-r) + ;(KOI8-U . ??) + (CP437 . cp437) ; requires Emacs 20 + (CP775 . cp775) ; requires Emacs 20 + (CP850 . cp850) ; requires Emacs 20 + (CP852 . cp852) ; requires Emacs 20 + (CP855 . cp855) ; requires Emacs 20 + ;(CP856 . ??) + (CP857 . cp857) ; requires Emacs 20 + (CP861 . cp861) ; requires Emacs 20 + (CP862 . cp862) ; requires Emacs 20 + (CP864 . cp864) ; requires Emacs 20 + (CP865 . cp865) ; requires Emacs 20 + (CP866 . cp866) ; requires Emacs 21 + (CP869 . cp869) ; requires Emacs 20 + ;(CP874 . ??) + ;(CP922 . ??) + ;(CP932 . ??) + ;(CP943 . ??) + ;(CP949 . ??) + ;(CP950 . ??) + ;(CP1046 . ??) + ;(CP1124 . ??) + ;(CP1129 . ??) + (CP1250 . cp1250) ; requires Emacs 20 + (CP1251 . cp1251) ; requires Emacs 20 + (CP1252 . iso-8859-1) ; approximation + (CP1253 . cp1253) ; requires Emacs 20 + (CP1254 . iso-8859-9) ; approximation + (CP1255 . iso-8859-8) ; approximation + ;(CP1256 . ??) + (CP1257 . cp1257) ; requires Emacs 20 + (GB2312 . cn-gb-2312) ; also named 'gb2312' in XEmacs 21 or Emacs 21 + ; also named 'euc-cn' in Emacs 20 or Emacs 21 + (EUC-JP . euc-jp) + (EUC-KR . euc-kr) + ;(EUC-TW . ??) + (BIG5 . big5) + ;(BIG5-HKSCS . ??) + ;(GBK . ??) + ;(GB18030 . ??) + (SHIFT_JIS . shift_jis) + ;(JOHAB . ??) + (TIS-620 . tis-620) ; requires Emacs 20 or Emacs 21 + (VISCII . viscii) ; requires Emacs 20 or Emacs 21 + (UTF-8 . utf-8) ; requires Mule-UCS in Emacs 20, or Emacs 21 + ) + "How to convert a GNU libc/libiconv canonical charset name as seen in +Content-Type into a Mule coding system.") + +(defun po-find-charset (filename) + "Return PO file charset value." + (interactive) + (let ((charset-regexp + "^\"Content-Type: text/plain;[ \t]*charset=\\(.*\\)\\\\n\"") + (short-read nil)) + ;; Try the first 4096 bytes. In case we cannot find the charset value + ;; within the first 4096 bytes (the PO file might start with a long + ;; comment) try the next 4096 bytes repeatedly until we'll know for sure + ;; we've checked the empty header entry entirely. + (while (not (or short-read (re-search-forward "^msgid" nil t))) + (save-excursion + (goto-char (point-max)) + (let ((pair (insert-file-contents-literally filename nil + (1- (point)) + (1- (+ (point) 4096))))) + (setq short-read (< (nth 1 pair) 4096))))) + (cond (short-read nil) + ((re-search-forward charset-regexp nil t) (match-string 1)) + ;; We've found the first msgid; maybe, only a part of the msgstr + ;; value was loaded. Load the next 1024 bytes; if charset still + ;; isn't available, give up. + (t (save-excursion + (goto-char (point-max)) + (insert-file-contents-literally filename nil + (1- (point)) + (1- (+ (point) 1024)))) + (if (re-search-forward charset-regexp nil t) + (match-string 1)))))) + +(defun po-find-file-coding-system-guts (operation filename) + "\ +Return a Mule (DECODING . ENCODING) pair, according to PO file charset. +Called through file-coding-system-alist, before the file is visited for real." + (and (eq operation 'insert-file-contents) + (file-exists-p filename) + (po-with-temp-buffer + (let* ((coding-system-for-read 'no-conversion) + (charset (or (po-find-charset filename) "ascii")) + (charset-upper (intern (upcase charset))) + (charset-lower (intern (downcase charset)))) + (list (or (cdr (assq charset-upper po-content-type-charset-alist)) + (if (memq charset-lower (coding-system-list)) + charset-lower + 'no-conversion))))))) + +;;;###autoload +(defun po-find-file-coding-system (arg-list) + "\ +Return a Mule (DECODING . ENCODING) pair, according to PO file charset. +Called through file-coding-system-alist, before the file is visited for real." + (po-find-file-coding-system-guts (car arg-list) (car (cdr arg-list)))) +;; This is for XEmacs. +;(defun po-find-file-coding-system (operation filename) +; "\ +;Return a Mule (DECODING . ENCODING) pair, according to PO file charset. +;Called through file-coding-system-alist, before the file is visited for real." +; (po-find-file-coding-system-guts operation filename))