changeset 48092:10d4bf044393

(po-content-type-charset-alist): Delete most entries. (po-find-charset): Doc fix. Remove interactive spec. (po): Provide. (po-find-file-coding-system-guts): Re-written to use coding-system-alist, and code-pages, not codepage stuff.
author Dave Love <fx@gnu.org>
date Wed, 30 Oct 2002 19:47:53 +0000
parents 21fa9981d2ba
children b7cdece1cebf
files lisp/textmodes/po.el
diffstat 1 files changed, 49 insertions(+), 113 deletions(-) [+]
line wrap: on
line diff
--- a/lisp/textmodes/po.el	Wed Oct 30 19:26:23 2002 +0000
+++ b/lisp/textmodes/po.el	Wed Oct 30 19:47:53 2002 +0000
@@ -33,89 +33,14 @@
 ;;; Code:
 
 (defconst po-content-type-charset-alist
-  '(; Note: Emacs 21 doesn't support all encodings, thus the missing entries.
-    ("ASCII" . undecided)
+  '(("ASCII" . undecided)
     ("ANSI_X3.4-1968" . undecided)
-    ("US-ASCII" . undecided)
-    ("ISO-8859-1" . iso-8859-1)
-    ("ISO_8859-1" . iso-8859-1)
-    ("ISO-8859-2" . iso-8859-2)
-    ("ISO_8859-2" . iso-8859-2)
-    ("ISO-8859-3" . iso-8859-3)
-    ("ISO_8859-3" . iso-8859-3)
-    ("ISO-8859-4" . iso-8859-4)
-    ("ISO_8859-4" . iso-8859-4)
-    ("ISO-8859-5" . iso-8859-5)
-    ("ISO_8859-5" . iso-8859-5)
-    ;("ISO-8859-6" . ??)
-    ;("ISO_8859-6" . ??)
-    ("ISO-8859-7" . iso-8859-7)
-    ("ISO_8859-7" . iso-8859-7)
-    ("ISO-8859-8" . iso-8859-8)
-    ("ISO_8859-8" . iso-8859-8)
-    ("ISO-8859-9" . iso-8859-9)
-    ("ISO_8859-9" . iso-8859-9)
-    ;("ISO-8859-13" . ??)
-    ;("ISO_8859-13" . ??)
-    ;("ISO-8859-14" . ??)
-    ;("ISO_8859-14" . ??)
-    ("ISO-8859-15" . iso-8859-15) ; requires Emacs 21
-    ("ISO_8859-15" . iso-8859-15) ; requires Emacs 21
-    ("KOI8-R" . koi8-r)
-    ;("KOI8-U" . ??)
-    ;("KOI8-T" . ??)
-    ("CP437" . cp437) ; requires Emacs 20
-    ("CP775" . cp775) ; requires Emacs 20
-    ("CP850" . cp850) ; requires Emacs 20
-    ("CP852" . cp852) ; requires Emacs 20
-    ("CP855" . cp855) ; requires Emacs 20
-    ;("CP856" . ??)
-    ("CP857" . cp857) ; requires Emacs 20
-    ("CP861" . cp861) ; requires Emacs 20
-    ("CP862" . cp862) ; requires Emacs 20
-    ("CP864" . cp864) ; requires Emacs 20
-    ("CP865" . cp865) ; requires Emacs 20
-    ("CP866" . cp866) ; requires Emacs 21
-    ("CP869" . cp869) ; requires Emacs 20
-    ;("CP874" . ??)
-    ;("CP922" . ??)
-    ;("CP932" . ??)
-    ;("CP943" . ??)
-    ;("CP949" . ??)
-    ;("CP950" . ??)
-    ;("CP1046" . ??)
-    ;("CP1124" . ??)
-    ;("CP1129" . ??)
-    ("CP1250" . cp1250) ; requires Emacs 20
-    ("CP1251" . cp1251) ; requires Emacs 20
-    ("CP1252" . iso-8859-1) ; approximation
-    ("CP1253" . cp1253) ; requires Emacs 20
-    ("CP1254" . iso-8859-9) ; approximation
-    ("CP1255" . iso-8859-8) ; approximation
-    ;("CP1256" . ??)
-    ("CP1257" . cp1257) ; requires Emacs 20
-    ("GB2312" . cn-gb-2312)  ; also named 'gb2312' in XEmacs 21 or Emacs 21
-                           ; also named 'euc-cn' in Emacs 20 or Emacs 21
-    ("EUC-JP" . euc-jp)
-    ("EUC-KR" . euc-kr)
-    ;("EUC-TW" . ??)
-    ("BIG5" . big5)
-    ;("BIG5-HKSCS" . ??)
-    ;("GBK" . ??)
-    ;("GB18030" . ??)
-    ("SHIFT_JIS" . shift_jis)
-    ;("JOHAB" . ??)
-    ("TIS-620" . tis-620)    ; requires Emacs 20 or Emacs 21
-    ("VISCII" . viscii)      ; requires Emacs 20 or Emacs 21
-    ;("GEORGIAN-PS" . ??)
-    ("UTF-8" . utf-8)        ; requires Mule-UCS in Emacs 20, or Emacs 21
-    )
-  "How to convert a GNU libc/libiconv canonical charset name as seen in
-Content-Type into a Mule coding system.")
+    ("US-ASCII" . undecided))
+  "Alist of coding system versus GNU libc/libiconv canonical charset name.
+Contains canonical charset names that don't correspond to coding systems.")
 
 (defun po-find-charset (filename)
-  "Return PO file charset value."
-  (interactive)
+  "Return PO charset value for FILENAME."
   (let ((charset-regexp
 	 "^\"Content-Type: text/plain;[ \t]*charset=\\(.*\\)\\\\n\"")
 	(short-read nil))
@@ -144,42 +69,49 @@
 		 (match-string 1))))))
 
 (defun po-find-file-coding-system-guts (operation filename)
-  "\
-Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
-Called through file-coding-system-alist, before the file is visited for real."
-  (and (eq operation 'insert-file-contents)
-       (file-exists-p filename)
-       (with-temp-buffer
-	 (let* ((coding-system-for-read 'no-conversion)
-                (charset (or (po-find-charset filename) "ascii"))
-                (charset-upper (upcase charset))
-                (charset-lower (downcase charset))
-		(candidate
-		 (cdr (assoc charset-upper po-content-type-charset-alist)))
-		(try (or candidate (intern-soft charset-lower))))
-           (list (cond ((and try (coding-system-p try))
-			try)
-		       ((and try
-			     (string-match "\\`cp[1-9][0-9][0-9]?\\'"
-					   (symbol-name try))
-			     (assoc (substring (symbol-name try) 2)
-				    (cp-supported-codepages)))
-			(codepage-setup (substring (symbol-name try) 2))
-			try)
-		       ((and (string-match "\\`cp[1-9][0-9][0-9]?\\'"
-					   charset-lower)
-			     (assoc (substring charset-lower 2)
-				    (cp-supported-codepages)))
-			(codepage-setup (substring charset-lower 2))
-			(intern charset-lower))
-		       (t
-			'no-conversion)))))))
+  "Return a (DECODING . ENCODING) pair for OPERATION on PO file FILENAME.
+Do so according to FILENAME's declared charset."
+  (and
+   (eq operation 'insert-file-contents)
+   (file-exists-p filename)
+   (with-temp-buffer
+     (let* ((coding-system-for-read 'no-conversion)
+	    (charset (or (po-find-charset filename) "ascii"))
+	    assoc)
+       (list (cond
+	      ((setq assoc
+		     (assoc-ignore-case charset
+					po-content-type-charset-alist))
+	       (cdr assoc))
+	      ((or (setq assoc (assoc-ignore-case charset coding-system-alist))
+		   (setq assoc
+			 (assoc-ignore-case (subst-char-in-string ?_ ?-
+								  charset)
+					    coding-system-alist)))
+	       (intern (car assoc)))
+	      ;; In principle we should also check the `mime-charset'
+	      ;; property of everything in the base coding system
+	      ;; list, but there should always be a coding system
+	      ;; corresponding to the MIME name.
+	      ((featurep 'code-pages)
+	       ;; Give up.
+	       'raw-text)
+	      (t
+	       ;; Try again with code-pages loaded.  Maybe it's best
+	       ;; to require it initially?
+	       (require 'code-pages nil t)
+	       (if (or
+		    (setq assoc (assoc-ignore-case charset coding-system-alist))
+		    (setq assoc (assoc-ignore-case (subst-char-in-string
+						    ?_ ?- charset)
+						   coding-system-alist)))
+		   (intern (car assoc))
+		 'raw-text))))))))
 
 ;;;###autoload
 (defun po-find-file-coding-system (arg-list)
-  "\
-Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
-Called through file-coding-system-alist, before the file is visited for real."
+  "Return a (DECODING . ENCODING) pair, according to PO file's charset.
+Called through `file-coding-system-alist', before the file is visited for real."
   (po-find-file-coding-system-guts (car arg-list) (car (cdr arg-list))))
 ;; This is for XEmacs.
 ;(defun po-find-file-coding-system (operation filename)
@@ -187,3 +119,7 @@
 ;Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
 ;Called through file-coding-system-alist, before the file is visited for real."
 ;  (po-find-file-coding-system-guts operation filename))
+
+(provide 'po)
+
+;;; po.el ends here