# HG changeset patch # User Paul Eggert # Date 940632693 0 # Node ID ca31ffbed3185e339b3a3bad760d71645bc36212 # Parent 7d8f9a678c698743c5262d93f78d52eb0715dfbb * (locale-language-names): Use Latin-1 (not Latin-3) for Afrikaans, Galician. Use Latin-5 (not Cyrillic-ISO) for Byelorussian, Bulgarian, Macedonian, Russian, Ukrainian, Serbian (Cyrillic alphabet). Use Latin-8 for Welsh. Use Latin-1 for English if "en" is explicitly specified. Use Latin-1 for Scots Gaelic, Tagalog. Use Latin-1 (not Latin-4) for Greenlandic. Use Latin-1 (not Latin-2) for Albanian. (locale-preferred-coding-systems, locale-language-names): Remove generic ISO 8859 locales; locale-charset-language-names now does this. (locale-charset-language-names): New variable. (set-locale-environment): Use language name specified by locale-charset-language-names if its charsets disagree with the language name specified by locale-language-names. diff -r 7d8f9a678c69 -r ca31ffbed318 lisp/international/mule-cmds.el --- a/lisp/international/mule-cmds.el Fri Oct 22 22:51:11 1999 +0000 +++ b/lisp/international/mule-cmds.el Fri Oct 22 22:51:33 1999 +0000 @@ -1471,15 +1471,15 @@ ;; ; aa Afar ; ab Abkhazian - ("af" . "Latin-3") ; Afrikaans + ("af" . "Latin-1") ; Afrikaans ("am" . "Ethiopic") ; Amharic ; ar Arabic ; as Assamese ; ay Aymara ; az Azerbaijani ; ba Bashkir - ("be" . "Cyrillic-ISO") ; Byelorussian - ("bg" . "Cyrillic-ISO") ; Bulgarian + ("be" . "Latin-5") ; Byelorussian + ("bg" . "Latin-5") ; Bulgarian ; bh Bihari ; bi Bislama ; bn Bengali, Bangla @@ -1488,12 +1488,13 @@ ("ca" . "Latin-1") ; Catalan ; co Corsican ("cs" . "Czech") - ; cy Welsh + ("cy" . "Latin-8") ; Welsh ("da" . "Latin-1") ; Danish ("de" . "German") ; dz Bhutani ("el" . "Greek") - ("en" . "English") + ;; Users who specify "en" explicitly typically want Latin-1, not ASCII. + ("en" . "Latin-1") ; English ("eo" . "Latin-3") ; Esperanto ("es" . "Latin-1") ; Spanish ("et" . "Latin-4") ; Estonian @@ -1504,9 +1505,9 @@ ("fo" . "Latin-1") ; Faroese ("fr" . "Latin-1") ; French ("fy" . "Latin-1") ; Frisian - ("ga" . "Latin-1") ; Irish - ; gd Scots Gaelic - ("gl" . "Latin-3") ; Galician + ("ga" . "Latin-1") ; Irish Gaelic (new orthography) + ("gd" . "Latin-1") ; Scots Gaelic + ("gl" . "Latin-1") ; Galician ; gn Guarani ; gu Gujarati ; ha Hausa @@ -1526,7 +1527,7 @@ ; jw Javanese ; ka Georgian ; kk Kazakh - ("kl" . "Latin-4") ; Greenlandic + ("kl" . "Latin-1") ; Greenlandic ; km Cambodian ; kn Kannada ("ko" . "Korean") @@ -1540,7 +1541,7 @@ ("lv" . "Latin-4") ; Latvian, Lettish ; mg Malagasy ; mi Maori - ("mk" . "Cyrillic-ISO") ; Macedonian + ("mk" . "Latin-5") ; Macedonian ; ml Malayalam ; mn Mongolian ; mo Moldavian @@ -1560,11 +1561,11 @@ ; ps Pashto, Pushto ("pt" . "Latin-1") ; Portuguese ; qu Quechua - ("rm" . "Latin-1") ; Rhaeto-Romance + ("rm" . "Latin-1") ; Rhaeto-Romanic ; rn Kirundi ("ro" . "Romanian") ("ru.*[_.]koi8" . "Cyrillic-KOI8") ; Russian - ("ru" . "Cyrillic-ISO") ; Russian + ("ru" . "Latin-5") ; Russian ; rw Kinyarwanda ("sa" . "Devanagari") ; Sanskrit ; sd Sindhi @@ -1576,7 +1577,7 @@ ; sm Samoan ; sn Shona ; so Somali - ("sq" . "Latin-2") ; Albanian + ("sq" . "Latin-1") ; Albanian ("sr" . "Latin-2") ; Serbian (Latin alphabet) ; ss Siswati ; st Sesotho @@ -1589,7 +1590,7 @@ ("th" . "Thai") ; ti Tigrinya ; tk Turkmen - ; tl Tagalog + ("tl" . "Latin-1") ; Tagalog ; tn Setswana ; to Tonga ("tr" . "Latin-5") ; Turkish @@ -1597,7 +1598,7 @@ ; tt Tatar ; tw Twi ; ug Uighur - ("uk" . "Cyrillic-ISO") ; Ukrainian + ("uk" . "Latin-5") ; Ukrainian ; ur Urdu ; uz Uzbek ("vi" . "Vietnamese") @@ -1617,15 +1618,6 @@ ("c$" . "ASCII") ("posix$" . "ASCII") - ;; generic ISO 8859 locales - (".*8859[-_]?1" . "Latin-1") - (".*8859[-_]?2" . "Latin-2") - (".*8859[-_]?3" . "Latin-3") - (".*8859[-_]?4" . "Latin-4") - (".*8859[-_]?9" . "Latin-5") - (".*8859[-_]?14" . "Latin-8") - (".*8859[-_]?15" . "Latin-9") - ;; The "IPA" Emacs language environment does not correspond ;; to any ISO 639 code, so let it stand for itself. ("ipa$" . "IPA") @@ -1634,32 +1626,38 @@ ("cz" . "Czech") ; e.g. Solaris 2.6 ("ee" . "Latin-4") ; Estonian, e.g. X11R6.4 ("iw" . "Hebrew") ; e.g. X11R6.4 - ("sp" . "Cyrillic-ISO") ; Serbian (Cyrillic alphabet), e.g. X11R6.4 + ("sp" . "Latin-5") ; Serbian (Cyrillic alphabet), e.g. X11R6.4 ("su" . "Latin-1") ; Finnish, e.g. Solaris 2.6 ) "List of pairs of locale regexps and language names. -The first element whose locale regexp matches the start of a downcased -locale specifies the language name corresponding to that locale. +The first element whose locale regexp matches the start of a downcased locale +specifies the language name corresponding to that locale. If the language name is nil, there is no corresponding language environment.") +(defvar locale-charset-language-names + '((".*8859[-_]?1\\>" . "Latin-1") + (".*8859[-_]?2\\>" . "Latin-2") + (".*8859[-_]?3\\>" . "Latin-3") + (".*8859[-_]?4\\>" . "Latin-4") + (".*8859[-_]?9\\>" . "Latin-5") + (".*8859[-_]?14\\>" . "Latin-8") + (".*8859[-_]?15\\>" . "Latin-9") + ) + "List of pairs of locale regexps and charset language names. +The first element whose locale regexp matches the start of a downcased locale +specifies the language name whose charsets corresponds to that locale. +This language name is used if its charsets disagree with the charsets of +the language name that would otherwise be used for this locale.") + (defvar locale-preferred-coding-systems '(("ja.*[._]euc" . japanese-iso-8bit) ("ja.*[._]jis7" . iso-2022-jp) ("ja.*[._]pck" . japanese-shift-jis) ("ja.*[._]sjis" . japanese-shift-jis) - (".*[._].*8859[-_]?1" . iso-8859-1) - (".*[._].*8859[-_]?2" . iso-8859-2) - (".*[._].*8859[-_]?3" . iso-8859-3) - (".*[._].*8859[-_]?4" . iso-8859-4) - (".*[._].*8859[-_]?5" . iso-8859-5) - (".*[._].*8859[-_]?7" . iso-8859-7) - (".*[._].*8859[-_]?8" . iso-8859-8) - (".*[._].*8859[-_]?9" . iso-8859-9) ) - "List of pairs of locale regexps and coding systems. -The first element whose locale regexp matches the start of a downcased -locale specifies the coding system to prefer when using that locale. -If the coding system is nil, there is no special preference.") + "List of pairs of locale regexps and preferred coding systems. +The first element whose locale regexp matches the start of a downcased locale +specifies the coding system to prefer when using that locale.") (defun locale-name-match (key alist) "Search for KEY in ALIST, which should be a list of regexp-value pairs. @@ -1707,10 +1705,19 @@ (setq locale-name (downcase locale-name)) - (let ((language-name (locale-name-match - locale-name locale-language-names)) - (coding-system (locale-name-match - locale-name locale-preferred-coding-systems))) + (let ((language-name + (locale-name-match locale-name locale-language-names)) + (charset-language-name + (locale-name-match locale-name locale-charset-language-names)) + (coding-system + (locale-name-match locale-name locale-preferred-coding-systems))) + + (if (and charset-language-name + (not + (equal (get-language-info language-name 'charset) + (get-language-info charset-language-name 'charset)))) + (setq language-name charset-language-name)) + (when language-name ;; Set up for this character set. This is now the right way