emacs: lisp/international/mule-cmds.el comparison

comparison lisp/international/mule-cmds.el @ 83269:48ba3f89c89f

Merged from miles@gnu.org--gnu-2005 (patch 37-38, 162-182) Patches applied: * miles@gnu.org--gnu-2005/emacs--cvs-trunk--0--patch-162 Update from CVS * miles@gnu.org--gnu-2005/emacs--cvs-trunk--0--patch-163 Update from CVS * miles@gnu.org--gnu-2005/emacs--cvs-trunk--0--patch-164 Update from CVS * miles@gnu.org--gnu-2005/emacs--cvs-trunk--0--patch-165 Merge from gnus--rel--5.10 * miles@gnu.org--gnu-2005/emacs--cvs-trunk--0--patch-166 Update from CVS * miles@gnu.org--gnu-2005/emacs--cvs-trunk--0--patch-167 Tweak obsolete function/variable warning message * miles@gnu.org--gnu-2005/emacs--cvs-trunk--0--patch-168 Update from CVS * miles@gnu.org--gnu-2005/emacs--cvs-trunk--0--patch-169 Update from CVS * miles@gnu.org--gnu-2005/emacs--cvs-trunk--0--patch-170 Update from CVS * miles@gnu.org--gnu-2005/emacs--cvs-trunk--0--patch-171 Update from CVS * miles@gnu.org--gnu-2005/emacs--cvs-trunk--0--patch-172 Merge from gnus--rel--5.10 * miles@gnu.org--gnu-2005/emacs--cvs-trunk--0--patch-173 Update from CVS * miles@gnu.org--gnu-2005/emacs--cvs-trunk--0--patch-174 Update from CVS * miles@gnu.org--gnu-2005/emacs--cvs-trunk--0--patch-175 Update from CVS * miles@gnu.org--gnu-2005/emacs--cvs-trunk--0--patch-176 Update from CVS * miles@gnu.org--gnu-2005/emacs--cvs-trunk--0--patch-177 Update from CVS * miles@gnu.org--gnu-2005/emacs--cvs-trunk--0--patch-178 Update from CVS * miles@gnu.org--gnu-2005/emacs--cvs-trunk--0--patch-179 Update from CVS * miles@gnu.org--gnu-2005/emacs--cvs-trunk--0--patch-180 Update from CVS * miles@gnu.org--gnu-2005/emacs--cvs-trunk--0--patch-181 Update from CVS * miles@gnu.org--gnu-2005/emacs--cvs-trunk--0--patch-182 Update from CVS * miles@gnu.org--gnu-2005/gnus--rel--5.10--patch-37 Update from CVS * miles@gnu.org--gnu-2005/gnus--rel--5.10--patch-38 Update from CVS git-archimport-id: lorentey@elte.hu--2004/emacs--multi-tty--0--patch-309

author	Karoly Lorentey <lorentey@elte.hu>
date	Wed, 16 Mar 2005 16:06:15 +0000
parents	9684495d72bc 10307e6c7baa
children	886073e54ccb

comparison

equal deleted inserted replaced

-:7ea3d7198adc
+:48ba3f89c89f
 ;; uses. In most cases the first two letters are the same, so
 ;; most of the regexps in locale-language-names work. Japanese
 ;; and Chinese are exceptions, which are listed in the
 ;; non-standard section at the bottom of locale-language-names.
-; aa Afar
+("aa_DJ" . "Latin-1") ; Afar
-; ab Abkhazian
+("aa" . "UTF-8")
+;; ab Abkhazian
 ("af" . "Latin-1") ; Afrikaans
-("am" . "Ethiopic") ; Amharic
+("am" "Ethiopic" utf-8) ; Amharic
+("an" . "Latin-9") ; Aragonese
 ; ar Arabic glibc uses 8859-6
 ; as Assamese
 ; ay Aymara
-; az Azerbaijani
+("az" . "UTF-8") ; Azerbaijani
 ; ba Bashkir
-("be" . "Belarusian") ; Belarusian [Byelorussian until early 1990s]
+("be" "Belarusian" cp1251) ; Belarusian [Byelorussian until early 1990s]
-("bg" . "Bulgarian") ; Bulgarian
+("bg" "Bulgarian" cp1251) ; Bulgarian
 ; bh Bihari
 ; bi Bislama
-; bn Bengali, Bangla
+("bn" . "UTF-8") ; Bengali, Bangla
 ("bo" . "Tibetan")
 ("br" . "Latin-1") ; Breton
 ("bs" . "Latin-2") ; Bosnian
+("byn" . "UTF-8")  ; Bilin; Blin
 ("ca" . "Latin-1") ; Catalan
 ; co Corsican
-("cs" . "Czech")
+("cs" "Czech" iso-8859-2)
-("cy" . "Welsh") ; Welsh [glibc uses Latin-8.  Did this change?]
+("cy" "Welsh" iso-8859-14)
 ("da" . "Latin-1") ; Danish
-("de" . "German")
+("de" "German" iso-8859-1)
 ; dz Bhutani
-("el" . "Greek")
+("el" "Greek" iso-8859-7)
 ;; Users who specify "en" explicitly typically want Latin-1, not ASCII.
 ;; That's actually what the GNU locales define, modulo things like
 ;; en_IN -- fx.
+("en_IN" "English" utf-8) ; glibc uses utf-8 for English in India
 ("en" . "Latin-1") ; English
 ("eo" . "Latin-3") ; Esperanto
-("es" . "Spanish")
+("es" "Spanish" iso-8859-1)
-("et" . "Latin-4") ; Estonian
+("et" . "Latin-1") ; Estonian
 ("eu" . "Latin-1") ; Basque
-; fa Persian glibc uses utf-8
+("fa" . "UTF-8") ; Persian
 ("fi" . "Latin-1") ; Finnish
-; fj Fiji
+("fj" . "Latin-1") ; Fiji
 ("fo" . "Latin-1") ; Faroese
-("fr" . "French") ; French
+("fr" "French" iso-8859-1) ; French
 ("fy" . "Latin-1") ; Frisian
 ("ga" . "Latin-1") ; Irish Gaelic (new orthography)
-("gd" . "Latin-1") ; Scots Gaelic
+("gd" . "Latin-9") ; Scots Gaelic
-("gl" . "Latin-1") ; Galician
+("gez" "Ethiopic" utf-8) ; Geez
+("gl" . "Latin-1") ; Gallegan; Galician
 ; gn Guarani
-; gu Gujarati
+("gu" . "UTF-8") ; Gujarati
-("gv" . "Latin-8") ; Manx Gaelic  glibc uses 8859-1
+("gv" . "Latin-1") ; Manx Gaelic
 ; ha Hausa
-("he" . "Hebrew")
+("he" "Hebrew" iso-8859-8)
-("hi" . "Devanagari") ; Hindi  glibc uses utf-8
+("hi" "Devanagari" utf-8) ; Hindi
-("hr" . "Croatian") ; Croatian
+("hr" "Croatian" iso-8859-2) ; Croatian
 ("hu" . "Latin-2") ; Hungarian
 ; hy Armenian
 ; ia Interlingua
 ("id" . "Latin-1") ; Indonesian
 ; ie Interlingue
 ; ik Inupiak
 ("is" . "Latin-1") ; Icelandic
-("it" . "Italian") ; Italian
+("it" "Italian" iso-8859-1) ; Italian
 ; iu Inuktitut
-("ja" . "Japanese")
+("iw" "Hebrew" iso-8859-8)
+("ja" "Japanese" euc-jp)
 ; jw Javanese
-("ka" . "Georgian") ; Georgian
+("ka" "Georgian" georgian-ps) ; Georgian
 ; kk Kazakh
 ("kl" . "Latin-1") ; Greenlandic
 ; km Cambodian
-; kn Kannada
+("kn" "Kannada" utf-8)
-("ko" . "Korean")
+("ko" "Korean" euc-kr)
 ; ks Kashmiri
 ; ku Kurdish
 ("kw" . "Latin-1") ; Cornish
 ; ky Kirghiz
 ("la" . "Latin-1") ; Latin
 ("lb" . "Latin-1") ; Luxemburgish
+("lg" . "Laint-6") ; Ganda
 ; ln Lingala
-("lo" . "Lao") ; Laothian
+("lo" "Lao" utf-8) ; Laothian
-("lt" . "Lithuanian")
+("lt" "Lithuanian" iso-8859-13)
 ("lv" . "Latvian") ; Latvian, Lettish
 ; mg Malagasy
 ("mi" . "Latin-7") ; Maori
-("mk" . "Cyrillic-ISO") ; Macedonian
+("mk" "Cyrillic-ISO" iso-8859-5) ; Macedonian
-; ml Malayalam
+("ml" "Malayalam" utf-8)
-; mn Mongolian
+("mn" . "UTF-8") ; Mongolian
 ; mo Moldavian
-("mr" . "Devanagari") ; Marathi  glibc uses utf-8
+("mr" "Devanagari" utf-8) ; Marathi
 ("ms" . "Latin-1") ; Malay
 ("mt" . "Latin-3") ; Maltese
 ; my Burmese
 ; na Nauru
-("ne" . "Devanagari") ; Nepali
+("nb" . "Latin-1") ; Norwegian
-("nl" . "Dutch")
+("ne" "Devanagari" utf-8) ; Nepali
+("nl" "Dutch" iso-8859-1)
 ("no" . "Latin-1") ; Norwegian
 ("oc" . "Latin-1") ; Occitan
-; om (Afan) Oromo
+("om_ET" . "UTF-8") ; (Afan) Oromo
+("om" . "Latin-1") ; (Afan) Oromo
 ; or Oriya
-; pa Punjabi
+("pa" . "UTF-8") ; Punjabi
 ("pl" . "Latin-2") ; Polish
 ; ps Pashto, Pushto
 ("pt" . "Latin-1") ; Portuguese
 ; qu Quechua
 ("rm" . "Latin-1") ; Rhaeto-Romanic
 ; rn Kirundi
-("ro" . "Romanian")
+("ro" "Romanian" iso-8859-2)
-("ru.*[_.]koi8" . "Russian")
+("ru_RU" "Russian" iso-8859-5)
-("ru" . "Cyrillic-ISO") ; Russian
+("ru_UA" "Russian" koi8-u)
 ; rw Kinyarwanda
 ("sa" . "Devanagari") ; Sanskrit
 ; sd Sindhi
-; se   Northern Sami
+("se" . "UTF-8") ; Northern Sami
 ; sg Sangho
 ("sh" . "Latin-2") ; Serbo-Croatian
 ; si Sinhalese
-("sk" . "Slovak")
+("sid" . "UTF-8") ; Sidamo
-("sl" . "Slovenian")
+("sk" "Slovak" iso-8859-2)
+("sl" "Slovenian" iso-8859-2)
 ; sm Samoan
 ; sn Shona
-; so Somali
+("so_ET" "UTF-8") ; Somali
+("so" "Latin-1") ; Somali
 ("sq" . "Latin-1") ; Albanian
+("sr_YU@cyrillic" . "Cyrillic-ISO")	; Serbian (Cyrillic alphabet)
 ("sr" . "Latin-2") ; Serbian (Latin alphabet)
-("sr_YU@cyrillic" . "Cyrillic-ISO")	; per glibc
 ; ss Siswati
-; st Sesotho
+("st" . "Latin-1") ;  Sesotho
 ; su Sundanese
-("sv" . "Swedish") ; Swedish
+("sv" "Swedish" iso-8859-1)		; Swedish
 ("sw" . "Latin-1") ; Swahili
-; ta Tamil  glibc uses utf-8
+("ta" "Tamil" utf-8)
-; te Telugu  glibc uses utf-8
+("te" . "UTF-8") ; Telugu
-("tg" . "Tajik")
+("tg" "Tajik" koi8-t)
-("th" . "Thai")
+("th" "Thai" tis-620)
-; ti Tigrinya
+("ti" "Ethiopic" utf-8) ; Tigrinya
+("tig_ER" . "UTF-8") ; Tigre
 ; tk Turkmen
 ("tl" . "Latin-1") ; Tagalog
 ; tn Setswana
 ; to Tonga
-("tr" . "Turkish")
+("tr" "Turkish" iso-8859-9)
 ; ts Tsonga
-; tt Tatar
+("tt" . "UTF-8") ; Tatar
 ; tw Twi
 ; ug Uighur
-("uk" . "Ukrainian") ; Ukrainian
+("uk" "Ukrainian" koi8-u)
-; ur Urdu  glibc uses utf-8
+("ur" . "UTF-8") ; Urdu
+("uz_UZ@cyrillic" . "UTF-8"); Uzbek
 ("uz" . "Latin-1") ; Uzbek
-("vi" . "Vietnamese") ;  glibc uses utf-8
+("vi" "Vietnamese" utf-8)
 ; vo Volapuk
 ("wa" . "Latin-1") ; Walloon
 ; wo Wolof
-; xh Xhosa
+("xh" . "Latin-1") ; Xhosa
 ("yi" . "Windows-1255") ; Yiddish
 ; yo Yoruba
 ; za Zhuang
+("zh_HK" . "Chinese-Big5")
-; glibc:
+("zh_TW" . "Chinese-Big5")
+("zh_CN" . "Chinese-GB")
+("zh" . "Chinese-GB")
 ; zh_CN.GB18030/GB18030 \
 ; zh_CN.GBK/GBK \
 ; zh_HK/BIG5-HKSCS \
+("zu" . "Latin-1") ; Zulu
-("zh.*[._]big5" . "Chinese-BIG5")
-("zh.*[._]gbk" . nil) ; Solaris 2.7; has gbk-0 as well as GB 2312.1980-0
-("zh_tw" . "Chinese-CNS") ; glibc uses big5
-("zh_tw[._]euc-tw" . "Chinese-EUC-TW")
-("zh" . "Chinese-GB")
-; zu Zulu
 ;; ISO standard locales
 ("c$" . "ASCII")
 ("posix$" . "ASCII")
 ("su" . "Latin-1") ; Finnish, e.g. Solaris 2.6
 ("jp" . "Japanese") ; e.g. MS Windows
 ("chs" . "Chinese-GB") ; MS Windows Chinese Simplified
 ("cht" . "Chinese-BIG5") ; MS Windows Chinese Traditional
 ))
-"List of pairs of locale regexps and language names.
+"Alist of locale regexps vs the corresponding languages and coding systems.
-The first element whose locale regexp matches the start of a downcased locale
+Each element has these form:
-specifies the language name corresponding to that locale.
+\(LOCALE-REGEXP LANG-ENV CODING-SYSTEM)
-If the language name is nil, there is no corresponding language environment.")
+The first element whose LOCALE-REGEXP matches the start of a
+downcased locale specifies the LANG-ENV \(language environtment)
+and CODING-SYSTEM corresponding to that locale.  If there is no
+appropriate language environment, the element may have this form:
+\(LOCALE-REGEXP . LANG-ENV)
+In this case, LANG-ENV is one of generic language environments for an
+specific encoding such as \"Latin-1\" and \"UTF-8\".")
 (defconst locale-charset-language-names
 (purecopy
 '((".*8859[-_]?1\\>" . "Latin-1")
 (".*8859[-_]?2\\>" . "Latin-2")
 ;; the currency, rather than the charset.)
 (".*@euro\\>" . "Latin-9")))
 "List of pairs of locale regexps and charset language names.
 The first element whose locale regexp matches the start of a downcased locale
 specifies the language name whose charset corresponds to that locale.
-This language name is used if its charsets disagree with the charsets of
+This language name is used if the locale is not listed in
-the language name that would otherwise be used for this locale.")
+`locale-language-names'")
 (defconst locale-preferred-coding-systems
 (purecopy
-'(("ja.*[._]euc" . japanese-iso-8bit)
+'((".*8859[-_]?1\\>" . iso-8859-1)
+(".*8859[-_]?2\\>" . iso-8859-2)
+(".*8859[-_]?3\\>" . iso-8859-3)
+(".*8859[-_]?4\\>" . iso-8859-4)
+(".*8859[-_]?9\\>" . iso-8859-9)
+(".*8859[-_]?14\\>" . iso-8859-14)
+(".*8859[-_]?15\\>" . iso-8859-15)
+(".*utf\\(?:-?8\\)?" . utf-8)
+;; utf-8@euro exists, so put this after utf-8.  (@euro really
+;; specifies the currency, rather than the charset.)
+(".*@euro" . iso-8859-15)
+("koi8-?r" . koi8-r)
+("koi8-?u" . koi8-u)
+("tcvn" . tcvn)
+("big5" . big5)
+("euc-?tw" . euc-tw)
+;; We don't support GBK, but as it is upper compatible with
+;; GB-2312, we setup the default coding system to gb2312.
+("gbk" . gb2312)
+;; We don't support BIG5-HKSCS, but as it is upper compatible with
+;; BIG5, we setup the default coding system to big5.
+("big5hkscs" . big5)
+("ja.*[._]euc" . japanese-iso-8bit)
 ("ja.*[._]jis7" . iso-2022-jp)
 ("ja.*[._]pck" . japanese-shift-jis)
 ("ja.*[._]sjis" . japanese-shift-jis)
 ("jpn" . japanese-shift-jis)   ; MS-Windows uses this.
-(".*[._]utf" . utf-8)))
+))
 "List of pairs of locale regexps and preferred coding systems.
 The first element whose locale regexp matches the start of a downcased locale
-specifies the coding system to prefer when using that locale.")
+specifies the coding system to prefer when using that locale.
+This coding system is used if the locale specifies a specific charset.")
 (defun locale-name-match (key alist)
 "Search for KEY in ALIST, which should be a list of regexp-value pairs.
 Return the value corresponding to the first regexp that matches the
 start of KEY, or nil if there is no match."
 	    (charset-language-name
 	     (locale-name-match locale locale-charset-language-names))
 	    (coding-system
 	     (get-locale-coding-system locale)))
-	;; Give preference to charset-language-name over language-name.
+	(if (consp language-name)
-	(if (and charset-language-name
+	    ;; locale-language-names specify both lang-env and coding.
-		 (not
+	    ;; But, what specified in locale-preferred-coding-systems
-		  (equal (get-language-info language-name 'charset)
+	    ;; has higher priority.
-			 (get-language-info charset-language-name 'charset))))
+	    (setq coding-system (or coding-system
-	    (setq language-name charset-language-name))
+				    (nth 1 language-name))
+		  language-name (car language-name))
+	  ;; Otherwise, if locale is not listed in locale-language-names,
+	  ;; use what listed in locale-charset-language-names.
+	  (if (not language-name)
+	      (setq language-name charset-language-name)))
 	(when language-name
 	  ;; Set up for this character set.  This is now the right way
 	  ;; to do it for both unibyte and multibyte modes.
 	  (set-language-environment language-name)
 	  (setq locale-coding-system
 		(car (get-language-info language-name 'coding-priority))))
-	(when coding-system
+	(when (and coding-system
+		   (not (coding-system-equal coding-system
+					     locale-coding-system)))
 	  (prefer-coding-system coding-system)
 	  (setq locale-coding-system coding-system))))
 ;; On Windows, override locale-coding-system,
 ;; keyboard-coding-system with system codepage.  Note:

Mercurial > emacs

comparison lisp/international/mule-cmds.el @ 83269:48ba3f89c89f