Mercurial > emacs
annotate lisp/language/indian.el @ 112241:98276eb8b0ed
Fix handling of unknown MIME type (bug#7651).
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Tue, 04 Jan 2011 16:10:30 +0900 |
parents | 376148b31b5e |
children | ef719132ddfa |
rev | line source |
---|---|
92067 | 1 ;;; indian.el --- Indian languages support -*- coding: utf-8; -*- |
17052 | 2 |
112218
376148b31b5e
Add 2011 to FSF/AIST copyright years.
Glenn Morris <rgm@gnu.org>
parents:
108331
diff
changeset
|
3 ;; Copyright (C) 1997, 1999, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
74544 | 4 ;; Free Software Foundation, Inc. |
112218
376148b31b5e
Add 2011 to FSF/AIST copyright years.
Glenn Morris <rgm@gnu.org>
parents:
108331
diff
changeset
|
5 ;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
62396 | 6 ;; National Institute of Advanced Industrial Science and Technology (AIST) |
7 ;; Registration Number H14PRO021 | |
17052 | 8 |
92067 | 9 ;; Maintainer: Kenichi Handa <handa@m17n.org> |
10 ;; KAWABATA, Taichi <kawabata@m17n.org> | |
42312 | 11 ;; Keywords: multilingual, i18n, Indian |
17052 | 12 |
13 ;; This file is part of GNU Emacs. | |
14 | |
94665
55b7f25d920a
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
93975
diff
changeset
|
15 ;; GNU Emacs is free software: you can redistribute it and/or modify |
17052 | 16 ;; it under the terms of the GNU General Public License as published by |
94665
55b7f25d920a
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
93975
diff
changeset
|
17 ;; the Free Software Foundation, either version 3 of the License, or |
55b7f25d920a
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
93975
diff
changeset
|
18 ;; (at your option) any later version. |
17052 | 19 |
20 ;; GNU Emacs is distributed in the hope that it will be useful, | |
21 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
22 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
23 ;; GNU General Public License for more details. | |
24 | |
25 ;; You should have received a copy of the GNU General Public License | |
94665
55b7f25d920a
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
93975
diff
changeset
|
26 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
17052 | 27 |
28 ;;; Commentary: | |
29 | |
92067 | 30 ;; This file contains definitions of Indian language environments, and |
31 ;; setups for displaying the scrtipts used there. | |
17052 | 32 |
33 ;;; Code: | |
34 | |
88414
fad0f879877f
Call define-coding-system instead of make-coding-system. All CCL program deleted.
Kenichi Handa <handa@m17n.org>
parents:
42312
diff
changeset
|
35 (define-coding-system 'in-is13194-devanagari |
fad0f879877f
Call define-coding-system instead of make-coding-system. All CCL program deleted.
Kenichi Handa <handa@m17n.org>
parents:
42312
diff
changeset
|
36 "8-bit encoding for ASCII (MSB=0) and IS13194-Devanagari (MSB=1)." |
fad0f879877f
Call define-coding-system instead of make-coding-system. All CCL program deleted.
Kenichi Handa <handa@m17n.org>
parents:
42312
diff
changeset
|
37 :coding-type 'iso-2022 |
fad0f879877f
Call define-coding-system instead of make-coding-system. All CCL program deleted.
Kenichi Handa <handa@m17n.org>
parents:
42312
diff
changeset
|
38 :mnemonic ?D |
fad0f879877f
Call define-coding-system instead of make-coding-system. All CCL program deleted.
Kenichi Handa <handa@m17n.org>
parents:
42312
diff
changeset
|
39 :designation [ascii indian-is13194 nil nil] |
fad0f879877f
Call define-coding-system instead of make-coding-system. All CCL program deleted.
Kenichi Handa <handa@m17n.org>
parents:
42312
diff
changeset
|
40 :charset-list '(ascii indian-is13194) |
89239
976a610d28e1
(in-is13194-devanagari): Fix
Kenichi Handa <handa@m17n.org>
parents:
89095
diff
changeset
|
41 :post-read-conversion 'in-is13194-post-read-conversion |
976a610d28e1
(in-is13194-devanagari): Fix
Kenichi Handa <handa@m17n.org>
parents:
89095
diff
changeset
|
42 :pre-write-conversion 'in-is13194-pre-write-conversion) |
17052 | 43 |
88414
fad0f879877f
Call define-coding-system instead of make-coding-system. All CCL program deleted.
Kenichi Handa <handa@m17n.org>
parents:
42312
diff
changeset
|
44 (define-coding-system-alias 'devanagari 'in-is13194-devanagari) |
46743
d341cac00baf
(in-is13194): Give correct `safe-chars' property.
Kenichi Handa <handa@m17n.org>
parents:
42312
diff
changeset
|
45 |
92067 | 46 (set-language-info-alist |
47 "Devanagari" '((charset unicode) | |
48 (coding-system utf-8) | |
49 (coding-priority utf-8) | |
50 (input-method . "dev-aiba") | |
51 (documentation . "\ | |
52 Such languages using Devanagari script as Hindi and Marathi | |
53 are supported in this language environment.")) | |
54 '("Indian")) | |
50256
104cf4fa2a8e
(indian-font-foundry): New variable.
Kenichi Handa <handa@m17n.org>
parents:
49704
diff
changeset
|
55 |
92067 | 56 (set-language-info-alist |
57 "Bengali" '((charset unicode) | |
58 (coding-system utf-8) | |
59 (coding-priority utf-8) | |
60 (input-method . "bengali-itrans") | |
61 (documentation . "\ | |
62 Such languages using Bengali script as Bengali and Assamese | |
63 are supported in this language environment.")) | |
64 '("Indian")) | |
50256
104cf4fa2a8e
(indian-font-foundry): New variable.
Kenichi Handa <handa@m17n.org>
parents:
49704
diff
changeset
|
65 |
92067 | 66 (set-language-info-alist |
67 "Punjabi" '((charset unicode) | |
68 (coding-system utf-8) | |
69 (coding-priority utf-8) | |
70 (input-method . "punjabi-itrans") | |
71 (documentation . "\ | |
72 North Indian language Punjabi is supported in this language environment.")) | |
73 '("Indian")) | |
74 | |
75 (set-language-info-alist | |
76 "Gujarati" '((charset unicode) | |
77 (coding-system utf-8) | |
78 (coding-priority utf-8) | |
79 (input-method . "gujarati-itrans") | |
80 (documentation . "\ | |
81 North Indian language Gujarati is supported in this language environment.")) | |
82 '("Indian")) | |
50256
104cf4fa2a8e
(indian-font-foundry): New variable.
Kenichi Handa <handa@m17n.org>
parents:
49704
diff
changeset
|
83 |
92067 | 84 (set-language-info-alist |
85 "Oriya" '((charset unicode) | |
86 (coding-system utf-8) | |
87 (coding-priority utf-8) | |
88 (input-method . "oriya-itrans") | |
89 (documentation . "\ | |
90 Such languages using Oriya script as Oriya, Khonti, and Santali | |
91 are supported in this language environment.")) | |
92 '("Indian")) | |
57537
b9687fdf909d
(indian-script-language-alist): Swap value and doc.
Kim F. Storm <storm@cua.dk>
parents:
52401
diff
changeset
|
93 |
92067 | 94 (set-language-info-alist |
95 "Tamil" '((charset unicode) | |
96 (coding-system utf-8) | |
97 (coding-priority utf-8) | |
98 (input-method . "tamil-itrans") | |
99 (documentation . "\ | |
100 South Indian Language Tamil is supported in this language environment.")) | |
101 '("Indian")) | |
57537
b9687fdf909d
(indian-script-language-alist): Swap value and doc.
Kim F. Storm <storm@cua.dk>
parents:
52401
diff
changeset
|
102 |
92067 | 103 (set-language-info-alist |
104 "Telugu" '((charset unicode) | |
105 (coding-system utf-8) | |
106 (coding-priority utf-8) | |
107 (input-method . "telugu-itrans") | |
108 (documentation . "\ | |
109 South Indian Language Telugu is supported in this language environment.")) | |
110 '("Indian")) | |
17052 | 111 |
92067 | 112 (set-language-info-alist |
113 "Kannada" '((charset unicode) | |
114 (coding-system mule-utf-8) | |
115 (coding-priority mule-utf-8) | |
116 (input-method . "kannada-itrans") | |
117 (sample-text . "Kannada (ಕನ್ನಡ) ನಮಸ್ಕಾರ") | |
118 (documentation . "\ | |
119 Kannada language and script is supported in this language | |
120 environment.")) | |
121 '("Indian")) | |
17052 | 122 |
92067 | 123 (set-language-info-alist |
124 "Malayalam" '((charset unicode) | |
125 (coding-system utf-8) | |
126 (coding-priority utf-8) | |
127 (input-method . "malayalam-itrans") | |
128 (documentation . "\ | |
129 South Indian language Malayalam is supported in this language environment.")) | |
130 '("Indian")) | |
17052 | 131 |
106541
0697cb4477c8
(indian-compose-regexp): New function.
Kenichi Handa <handa@m17n.org>
parents:
106374
diff
changeset
|
132 ;; Replace mnemonic characters in REGEXP according to TABLE. TABLE is |
0697cb4477c8
(indian-compose-regexp): New function.
Kenichi Handa <handa@m17n.org>
parents:
106374
diff
changeset
|
133 ;; an alist of (MNEMONIC-STRING . REPLACEMENT-STRING). |
0697cb4477c8
(indian-compose-regexp): New function.
Kenichi Handa <handa@m17n.org>
parents:
106374
diff
changeset
|
134 |
0697cb4477c8
(indian-compose-regexp): New function.
Kenichi Handa <handa@m17n.org>
parents:
106374
diff
changeset
|
135 (defun indian-compose-regexp (regexp table) |
0697cb4477c8
(indian-compose-regexp): New function.
Kenichi Handa <handa@m17n.org>
parents:
106374
diff
changeset
|
136 (let ((case-fold-search nil)) |
0697cb4477c8
(indian-compose-regexp): New function.
Kenichi Handa <handa@m17n.org>
parents:
106374
diff
changeset
|
137 (dolist (elt table) |
0697cb4477c8
(indian-compose-regexp): New function.
Kenichi Handa <handa@m17n.org>
parents:
106374
diff
changeset
|
138 (setq regexp (replace-regexp-in-string (car elt) (cdr elt) regexp t t))) |
0697cb4477c8
(indian-compose-regexp): New function.
Kenichi Handa <handa@m17n.org>
parents:
106374
diff
changeset
|
139 regexp)) |
0697cb4477c8
(indian-compose-regexp): New function.
Kenichi Handa <handa@m17n.org>
parents:
106374
diff
changeset
|
140 |
97841 | 141 (defconst devanagari-composable-pattern |
106634
1f0768e4ad7e
(devanagari-composable-pattern): Fixed to
Kenichi Handa <handa@m17n.org>
parents:
106543
diff
changeset
|
142 (let ((table |
107703
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
143 '(("a" . "[\u0900-\u0902]") ; vowel modifier (above) |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
144 ("A" . "\u0903") ; vowel modifier (post) |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
145 ("V" . "[\u0904-\u0914\u0960-\u0961\u0972]") ; independent vowel |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
146 ("C" . "[\u0915-\u0939\u0958-\u095F\u0979-\u097F]") ; consonant |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
147 ("R" . "\u0930") ; RA |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
148 ("n" . "\u093C") ; NUKTA |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
149 ("v" . "[\u093E-\u094C\u094E\u0955\u0962-\u0963]") ; vowel sign |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
150 ("H" . "\u094D") ; HALANT |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
151 ("s" . "[\u0951-\u0952]") ; stress sign |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
152 ("t" . "[\u0953-\u0954]") ; accent |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
153 ("N" . "\u200C") ; ZWNJ |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
154 ("J" . "\u200D") ; ZWJ |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
155 ("X" . "[\u0900-\u097F]")))) ; all coverage |
106634
1f0768e4ad7e
(devanagari-composable-pattern): Fixed to
Kenichi Handa <handa@m17n.org>
parents:
106543
diff
changeset
|
156 (indian-compose-regexp |
1f0768e4ad7e
(devanagari-composable-pattern): Fixed to
Kenichi Handa <handa@m17n.org>
parents:
106543
diff
changeset
|
157 (concat |
1f0768e4ad7e
(devanagari-composable-pattern): Fixed to
Kenichi Handa <handa@m17n.org>
parents:
106543
diff
changeset
|
158 ;; syllables with an independent vowel, or |
107703
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
159 "\\(?:RH\\)?Vn?\\(?:J?HR\\)?v*n?a?s?t?A?\\|" |
106634
1f0768e4ad7e
(devanagari-composable-pattern): Fixed to
Kenichi Handa <handa@m17n.org>
parents:
106543
diff
changeset
|
160 ;; consonant-based syllables, or |
107703
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
161 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?s?t?A?\\)\\|" |
106634
1f0768e4ad7e
(devanagari-composable-pattern): Fixed to
Kenichi Handa <handa@m17n.org>
parents:
106543
diff
changeset
|
162 ;; special consonant form, or |
1f0768e4ad7e
(devanagari-composable-pattern): Fixed to
Kenichi Handa <handa@m17n.org>
parents:
106543
diff
changeset
|
163 "JHR\\|" |
1f0768e4ad7e
(devanagari-composable-pattern): Fixed to
Kenichi Handa <handa@m17n.org>
parents:
106543
diff
changeset
|
164 ;; any other singleton characters |
1f0768e4ad7e
(devanagari-composable-pattern): Fixed to
Kenichi Handa <handa@m17n.org>
parents:
106543
diff
changeset
|
165 "X") |
1f0768e4ad7e
(devanagari-composable-pattern): Fixed to
Kenichi Handa <handa@m17n.org>
parents:
106543
diff
changeset
|
166 table)) |
97841 | 167 "Regexp matching a composable sequence of Devanagari characters.") |
168 | |
107703
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
169 (defconst bengali-composable-pattern |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
170 (let ((table |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
171 '(("a" . "\u0981") ; SIGN CANDRABINDU |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
172 ("A" . "[\u0982-\u0983]") ; SIGN ANUSVARA .. VISARGA |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
173 ("V" . "[\u0985-\u0994\u09E0-\u09E1]") ; independent vowel |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
174 ("C" . "[\u0995-\u09B9\u09DC-\u09DF\u09F1]") ; consonant |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
175 ("B" . "[\u09AC\u09AF-\u09B0\u09F0]") ; BA, YA, RA |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
176 ("R" . "[\u09B0\u09F0]") ; RA |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
177 ("n" . "\u09BC") ; NUKTA |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
178 ("v" . "[\u09BE-\u09CC\u09D7\u09E2-\u09E3]") ; vowel sign |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
179 ("H" . "\u09CD") ; HALANT |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
180 ("T" . "\u09CE") ; KHANDA TA |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
181 ("N" . "\u200C") ; ZWNJ |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
182 ("J" . "\u200D") ; ZWJ |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
183 ("X" . "[\u0980-\u09FF]")))) ; all coverage |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
184 (indian-compose-regexp |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
185 (concat |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
186 ;; syllables with an independent vowel, or |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
187 "\\(?:RH\\)?Vn?\\(?:J?HB\\)?v*n?a?A?\\|" |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
188 ;; consonant-based syllables, or |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
189 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*[NJ]?v?a?A?\\)\\|" |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
190 ;; another syllables with an independent vowel, or |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
191 "\\(?:RH\\)?T\\|" |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
192 ;; special consonant form, or |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
193 "JHB\\|" |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
194 ;; any other singleton characters |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
195 "X") |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
196 table)) |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
197 "Regexp matching a composable sequence of Bengali characters.") |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
198 |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
199 (defconst gurmukhi-composable-pattern |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
200 (let ((table |
108329
b2c0ce51877e
language/indian.el (gurmukhi-composable-pattern): Fix typo.
Kenichi Handa <handa@etlken>
parents:
108325
diff
changeset
|
201 '(("a" . "[\u0A01-\u0A02\u0A70]") ; SIGN ADAK BINDI .. BINDI, TIPPI |
b2c0ce51877e
language/indian.el (gurmukhi-composable-pattern): Fix typo.
Kenichi Handa <handa@etlken>
parents:
108325
diff
changeset
|
202 ("A" . "\u0A03") ; SIGN VISARGA |
107703
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
203 ("V" . "[\u0A05-\u0A14]") ; independent vowel |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
204 ("C" . "[\u0A15-\u0A39\u0A59-\u0A5E]") ; consonant |
108329
b2c0ce51877e
language/indian.el (gurmukhi-composable-pattern): Fix typo.
Kenichi Handa <handa@etlken>
parents:
108325
diff
changeset
|
205 ("Y" . "[\u0A2F-u0A30\u0A35\u0A39]") ; YA, RA, VA, HA |
107703
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
206 ("n" . "\u0A3C") ; NUKTA |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
207 ("v" . "[\u0A3E-\u0A4C]") ; vowel sign |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
208 ("H" . "\u0A4D") ; VIRAMA |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
209 ("N" . "\u200C") ; ZWNJ |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
210 ("J" . "\u200D") ; ZWJ |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
211 ("X" . "[\u0A00-\u0A7F]")))) ; all coverage |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
212 (indian-compose-regexp |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
213 (concat |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
214 ;; consonant-based syllables, or |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
215 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|" |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
216 ;; syllables with an independent vowel, or |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
217 "Vn?\\(?:J?HY\\)?v*n?a?A?\\|" |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
218 ;; special consonant form, or |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
219 "JHY\\|" |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
220 ;; any other singleton characters |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
221 "X") |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
222 table)) |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
223 "Regexp matching a composable sequence of Gurmukhi characters.") |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
224 |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
225 (defconst gujarati-composable-pattern |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
226 (let ((table |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
227 '(("a" . "[\u0A81-\u0A82]") ; SIGN CANDRABINDU .. ANUSVARA |
108331
f21947b4bcff
language/indian.el (gujarati-composable-pattern): Fix typo.
Kenichi Handa <handa@etlken>
parents:
108329
diff
changeset
|
228 ("A" . "\u0A83") ; SIGN VISARGA |
107703
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
229 ("V" . "[\u0A85-\u0A94\u0AE0-\u0AE1]") ; independent vowel |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
230 ("C" . "[\u0A95-\u0AB9]") ; consonant |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
231 ("R" . "\u0AB0") ; RA |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
232 ("n" . "\u0ABC") ; NUKTA |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
233 ("v" . "[\u0ABE-\u0ACC\u0AE2-\u0AE3]") ; vowel sign |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
234 ("H" . "\u0ACD") ; VIRAMA |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
235 ("N" . "\u200C") ; ZWNJ |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
236 ("J" . "\u200D") ; ZWJ |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
237 ("X" . "[\u0A80-\u0AFF]")))) ; all coverage |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
238 (indian-compose-regexp |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
239 (concat |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
240 ;; syllables with an independent vowel, or |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
241 "\\(?:RH\\)?Vn?\\(?:J?HR\\)?v*n?a?A?\\|" |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
242 ;; consonant-based syllables, or |
107923
64dd8cad5b43
Fix previous change of *-composable-pattern).
Kenichi Handa <handa@m17n.org>
parents:
107703
diff
changeset
|
243 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|" |
107703
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
244 ;; special consonant form, or |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
245 "JHR\\|" |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
246 ;; any other singleton characters |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
247 "X") |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
248 table)) |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
249 "Regexp matching a composable sequence of Gujarati characters.") |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
250 |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
251 (defconst oriya-composable-pattern |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
252 (let ((table |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
253 '(("a" . "\u0B01") ; SIGN CANDRABINDU |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
254 ("A" . "[\u0B02-\u0B03]") ; SIGN ANUSVARA .. VISARGA |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
255 ("V" . "[\u0B05-\u0B14\u0B60-\u0B61]") ; independent vowel |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
256 ("C" . "[\u0B15-\u0B39\u0B5C-\u0B5D\u0B71]") ; consonant |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
257 ("B" . "[\u0B15-\u0B17\u0B1B-\u0B1D\u0B1F-\u0B21\u0B23-\u0B24\u0B27-\u0B30\u0B32-\u0B35\u0B38-\u0B39]") ; consonant with below form |
107923
64dd8cad5b43
Fix previous change of *-composable-pattern).
Kenichi Handa <handa@m17n.org>
parents:
107703
diff
changeset
|
258 ("R" . "\u0B30") ; RA |
107703
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
259 ("n" . "\u0B3C") ; NUKTA |
108325
6a2e327273fe
language/indian.el (oriya-composable-pattern, tamil-composable-pattern, malayalam-composable-pattern): Add two-part vowels to "v" (vowel sign).
Kenichi Handa <handa@etlken>
parents:
107923
diff
changeset
|
260 ("v" . "[\u0B3E-\u0B4C\u0B56-\u0B57\u0B62-\u0B63]") ; vowel sign |
107703
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
261 ("H" . "\u0B4D") ; VIRAMA |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
262 ("N" . "\u200C") ; ZWNJ |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
263 ("J" . "\u200D") ; ZWJ |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
264 ("X" . "[\u0B00-\u0B7F]")))) ; all coverage |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
265 (indian-compose-regexp |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
266 (concat |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
267 ;; syllables with an independent vowel, or |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
268 "\\(?:RH\\)?Vn?\\(?:J?HB\\)?v*n?a?A?\\|" |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
269 ;; consonant-based syllables, or |
107923
64dd8cad5b43
Fix previous change of *-composable-pattern).
Kenichi Handa <handa@m17n.org>
parents:
107703
diff
changeset
|
270 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?a?A?\\)\\|" |
107703
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
271 ;; special consonant form, or |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
272 "JHB\\|" |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
273 ;; any other singleton characters |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
274 "X") |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
275 table)) |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
276 "Regexp matching a composable sequence of Oriya characters.") |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
277 |
97841 | 278 (defconst tamil-composable-pattern |
107703
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
279 (let ((table |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
280 '(("a" . "\u0B82") ; SIGN ANUSVARA |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
281 ("V" . "[\u0B85-\u0B94]") ; independent vowel |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
282 ("C" . "[\u0B95-\u0BB9]") ; consonant |
108325
6a2e327273fe
language/indian.el (oriya-composable-pattern, tamil-composable-pattern, malayalam-composable-pattern): Add two-part vowels to "v" (vowel sign).
Kenichi Handa <handa@etlken>
parents:
107923
diff
changeset
|
283 ("v" . "[\u0BBE-\u0BCC\u0BD7]") ; vowel sign |
107703
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
284 ("H" . "\u0BCD") ; VIRAMA |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
285 ("N" . "\u200C") ; ZWNJ |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
286 ("J" . "\u200D") ; ZWJ |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
287 ("X" . "[\u0B80-\u0BFF]")))) ; all coverage |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
288 (indian-compose-regexp |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
289 (concat |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
290 ;; consonant-based syllables, or |
107923
64dd8cad5b43
Fix previous change of *-composable-pattern).
Kenichi Handa <handa@m17n.org>
parents:
107703
diff
changeset
|
291 "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?\\|v*a?\\)\\|" |
107703
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
292 ;; syllables with an independent vowel, or |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
293 "Vv*a?\\|" |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
294 ;; any other singleton characters |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
295 "X") |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
296 table)) |
97841 | 297 "Regexp matching a composable sequence of Tamil characters.") |
298 | |
107703
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
299 (defconst telugu-composable-pattern |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
300 (let ((table |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
301 '(("a" . "[\u0C01-\u0C03]") ; SIGN CANDRABINDU .. VISARGA |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
302 ("V" . "[\u0C05-\u0C14\u0C60-\u0C61]") ; independent vowel |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
303 ("C" . "[\u0C15-\u0C39\u0C58-\u0C59]") ; consonant |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
304 ("v" . "[\u0C3E-\u0C4C\u0C55-\u0C56\u0C62-\u0C63]") ; vowel sign |
107923
64dd8cad5b43
Fix previous change of *-composable-pattern).
Kenichi Handa <handa@m17n.org>
parents:
107703
diff
changeset
|
305 ("H" . "\u0C4D") ; VIRAMA |
107703
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
306 ("N" . "\u200C") ; ZWNJ |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
307 ("J" . "\u200D") ; ZWJ |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
308 ("X" . "[\u0C00-\u0C7F]")))) ; all coverage |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
309 (indian-compose-regexp |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
310 (concat |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
311 ;; consonant-based syllables, or |
107923
64dd8cad5b43
Fix previous change of *-composable-pattern).
Kenichi Handa <handa@m17n.org>
parents:
107703
diff
changeset
|
312 "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?\\|v*a?\\)\\|" |
107703
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
313 ;; syllables with an independent vowel, or |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
314 "V\\(?:J?HC\\)?v*a?\\|" |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
315 ;; special consonant form, or |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
316 "JHC\\|" |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
317 ;; any other singleton characters |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
318 "X") |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
319 table)) |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
320 "Regexp matching a composable sequence of Telugu characters.") |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
321 |
97841 | 322 (defconst kannada-composable-pattern |
107703
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
323 (let ((table |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
324 '(("A" . "[\u0C82-\u0C83]") ; SIGN ANUSVARA .. VISARGA |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
325 ("V" . "[\u0C85-\u0C94\u0CE0-\u0CE1]") ; independent vowel |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
326 ("C" . "[\u0C95-\u0CB9\u0CDE]") ; consonant |
107923
64dd8cad5b43
Fix previous change of *-composable-pattern).
Kenichi Handa <handa@m17n.org>
parents:
107703
diff
changeset
|
327 ("R" . "\u0CB0") ; RA |
107703
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
328 ("n" . "\u0CBC") ; NUKTA |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
329 ("v" . "[\u0CBE-\u0CCC\u0CD5-\u0CD6\u0CE2-\u0CE3]") ; vowel sign |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
330 ("H" . "\u0CCD") ; VIRAMA |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
331 ("N" . "\u200C") ; ZWNJ |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
332 ("J" . "\u200D") ; ZWJ |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
333 ("X" . "[\u0C80-\u0CFF]")))) ; all coverage |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
334 (indian-compose-regexp |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
335 (concat |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
336 ;; syllables with an independent vowel, or |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
337 "\\(?:RH\\)?Vn?\\(?:J?HC\\)?v?A?\\|" |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
338 ;; consonant-based syllables, or |
107923
64dd8cad5b43
Fix previous change of *-composable-pattern).
Kenichi Handa <handa@m17n.org>
parents:
107703
diff
changeset
|
339 "Cn?\\(?:J?HJ?Cn?\\)*\\(?:H[NJ]?\\|v*n?A?\\)\\|" |
107703
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
340 ;; special consonant form, or |
107923
64dd8cad5b43
Fix previous change of *-composable-pattern).
Kenichi Handa <handa@m17n.org>
parents:
107703
diff
changeset
|
341 "JHC\\|" |
107703
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
342 ;; any other singleton characters |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
343 "X") |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
344 table)) |
97841 | 345 "Regexp matching a composable sequence of Kannada characters.") |
346 | |
347 (defconst malayalam-composable-pattern | |
106634
1f0768e4ad7e
(devanagari-composable-pattern): Fixed to
Kenichi Handa <handa@m17n.org>
parents:
106543
diff
changeset
|
348 (let ((table |
107703
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
349 '(("A" . "[\u0D02-\u0D03]") ; SIGN ANUSVARA .. VISARGA |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
350 ("V" . "[\u0D05-\u0D14\u0D60-\u0D61]") ; independent vowel |
106634
1f0768e4ad7e
(devanagari-composable-pattern): Fixed to
Kenichi Handa <handa@m17n.org>
parents:
106543
diff
changeset
|
351 ("C" . "[\u0D15-\u0D39]") ; consonant |
107703
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
352 ("Y" . "[\u0D2F-\u0D30\u0D32\u0D35]") ; YA, RA, LA, VA |
108325
6a2e327273fe
language/indian.el (oriya-composable-pattern, tamil-composable-pattern, malayalam-composable-pattern): Add two-part vowels to "v" (vowel sign).
Kenichi Handa <handa@etlken>
parents:
107923
diff
changeset
|
353 ("v" . "[\u0D3E-\u0D4C\u0D57\u0D62-\u0D63]") ; postbase matra |
107923
64dd8cad5b43
Fix previous change of *-composable-pattern).
Kenichi Handa <handa@m17n.org>
parents:
107703
diff
changeset
|
354 ("H" . "\u0D4D") ; SIGN VIRAMA |
106736
8d23ea9e4ab1
language/indian.el (malayalam-composable-pattern): Fix ZWNJ and ZWJ.
Kenichi Handa <handa@m17n.org>
parents:
106634
diff
changeset
|
355 ("N" . "\u200C") ; ZWNJ |
8d23ea9e4ab1
language/indian.el (malayalam-composable-pattern): Fix ZWNJ and ZWJ.
Kenichi Handa <handa@m17n.org>
parents:
106634
diff
changeset
|
356 ("J" . "\u200D") ; ZWJ |
106634
1f0768e4ad7e
(devanagari-composable-pattern): Fixed to
Kenichi Handa <handa@m17n.org>
parents:
106543
diff
changeset
|
357 ("X" . "[\u0D00-\u0D7F]")))) ; all coverage |
106541
0697cb4477c8
(indian-compose-regexp): New function.
Kenichi Handa <handa@m17n.org>
parents:
106374
diff
changeset
|
358 (indian-compose-regexp |
0697cb4477c8
(indian-compose-regexp): New function.
Kenichi Handa <handa@m17n.org>
parents:
106374
diff
changeset
|
359 (concat |
107703
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
360 ;; consonant-based syllables, or |
107923
64dd8cad5b43
Fix previous change of *-composable-pattern).
Kenichi Handa <handa@m17n.org>
parents:
107703
diff
changeset
|
361 "C\\(?:J?HJ?C\\)*\\(?:H[NJ]?\\|v?A?\\)\\|" |
106634
1f0768e4ad7e
(devanagari-composable-pattern): Fixed to
Kenichi Handa <handa@m17n.org>
parents:
106543
diff
changeset
|
362 ;; syllables with an independent vowel, or |
107703
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
363 "V\\(?:J?HY\\)?v*?A?\\|" |
106634
1f0768e4ad7e
(devanagari-composable-pattern): Fixed to
Kenichi Handa <handa@m17n.org>
parents:
106543
diff
changeset
|
364 ;; special consonant form, or |
107703
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
365 "JHY\\|" |
106541
0697cb4477c8
(indian-compose-regexp): New function.
Kenichi Handa <handa@m17n.org>
parents:
106374
diff
changeset
|
366 ;; any other singleton characters |
0697cb4477c8
(indian-compose-regexp): New function.
Kenichi Handa <handa@m17n.org>
parents:
106374
diff
changeset
|
367 "X") |
0697cb4477c8
(indian-compose-regexp): New function.
Kenichi Handa <handa@m17n.org>
parents:
106374
diff
changeset
|
368 table)) |
97841 | 369 "Regexp matching a composable sequence of Malayalam characters.") |
370 | |
91322
fc05151881d9
Set font-shape-text for all Indic characters
Kenichi Handa <handa@m17n.org>
parents:
90996
diff
changeset
|
371 (let ((script-regexp-alist |
106634
1f0768e4ad7e
(devanagari-composable-pattern): Fixed to
Kenichi Handa <handa@m17n.org>
parents:
106543
diff
changeset
|
372 `((devanagari . ,devanagari-composable-pattern) |
107703
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
373 (bengali . ,bengali-composable-pattern) |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
374 (gurmukhi . ,gurmukhi-composable-pattern) |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
375 (gujarati . ,gujarati-composable-pattern) |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
376 (oriya . ,oriya-composable-pattern) |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
377 (tamil . ,tamil-composable-pattern) |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
378 (telugu . ,telugu-composable-pattern) |
9898924b98ed
Fix Indic composable patterns for the new Unicode specification.
Kenichi Handa <handa@m17n.org>
parents:
107508
diff
changeset
|
379 (kannada . ,kannada-composable-pattern) |
106541
0697cb4477c8
(indian-compose-regexp): New function.
Kenichi Handa <handa@m17n.org>
parents:
106374
diff
changeset
|
380 (malayalam . ,malayalam-composable-pattern)))) |
97841 | 381 (map-char-table |
382 #'(lambda (key val) | |
383 (let ((slot (assq val script-regexp-alist))) | |
384 (if slot | |
385 (set-char-table-range | |
386 composition-function-table key | |
387 (list (vector (cdr slot) 0 'font-shape-gstring)))))) | |
388 char-script-table)) | |
50256
104cf4fa2a8e
(indian-font-foundry): New variable.
Kenichi Handa <handa@m17n.org>
parents:
49704
diff
changeset
|
389 |
33778 | 390 (provide 'indian) |
41469 | 391 |
93975
1e3a407766b9
Fix up comment convention on the arch-tag lines.
Stefan Monnier <monnier@iro.umontreal.ca>
parents:
92067
diff
changeset
|
392 ;; arch-tag: 83aa8fc7-7ee2-4364-a6e5-498f5e3b8c2f |
17052 | 393 ;;; indian.el ends here |