Mercurial > emacs
comparison lisp/international/codepage.el @ 24454:fe0089dd2d2f
(cp1250-decode-table, cp1251-decode-table, cp1253-decode-table,
cp1257-decode-table): New translation tables for MS Windows
codepages.
(cp-make-coding-systems-for-codepage): Accept 4 digit
codepages.
author | Eli Zaretskii <eliz@gnu.org> |
---|---|
date | Mon, 08 Mar 1999 12:06:07 +0000 |
parents | 766feaa457a9 |
children | 5d61c3793c06 |
comparison
equal
deleted
inserted
replaced
24453:3b8991c22dcd | 24454:fe0089dd2d2f |
---|---|
414 nil 236 147 233 nil 228 148 nil 155 214 nil nil 129 nil 215 nil] | 414 nil 236 147 233 nil 228 148 nil 155 214 nil nil 129 nil 215 nil] |
415 "Table for converting ISO-8859-4 characters into codepage 775 glyphs.") | 415 "Table for converting ISO-8859-4 characters into codepage 775 glyphs.") |
416 (setplist 'cp775-decode-table | 416 (setplist 'cp775-decode-table |
417 '(charset latin-iso8859-4 language "Latin-4" offset 160)) | 417 '(charset latin-iso8859-4 language "Latin-4" offset 160)) |
418 | 418 |
419 ;; Support for the Windows 12xx series of codepages that MS has | |
420 ;; butchered from the ISO-8859 specs. This does not add support for | |
421 ;; the extended characters that MS has added in the 128 - 159 coding | |
422 ;; range, only translates those characters that can be expressed in | |
423 ;; the corresponding iso-8859 codepage. | |
424 | |
425 ;; Codepage Mapping: | |
426 ;; | |
427 ;; Windows-1250: ISO-8859-2 (Central Europe) - differs in some positions | |
428 ;; Windows-1251: ISO-8859-5 (Cyrillic) - differs wildly | |
429 ;; Windows-1252: ISO-8859-1 (West Europe) - exact match | |
430 ;; Windows-1253: ISO-8859-7 (Greek) - differs in some positions | |
431 ;; Windows-1254: ISO-8859-9 (Turkish) - exact match | |
432 ;; Windows-1255: ISO-8859-8 (Hebrew) - exact match | |
433 ;; Windows-1256: ISO-8859-6 (Arabic) - half match | |
434 ;; Windows-1257: ISO-8859-4 (Baltic) - differs, future Latin-7 | |
435 ;; Windows-1258: VISCII (Vietnamese) - Completely different | |
436 | |
437 (defvar cp1250-decode-table | |
438 [ | |
439 160 165 162 163 164 188 140 167 168 138 170 141 143 173 142 175 | |
440 176 185 178 179 180 190 156 161 184 154 186 157 159 189 158 191 | |
441 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 | |
442 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 | |
443 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 | |
444 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 ] | |
445 "ISO-8859-2 to Windows-1250 (Central Europe) codepage decoding table") | |
446 (setplist 'cp1250-decode-table | |
447 '(charset latin-iso8859-2 language "Latin-2" offset 160)) | |
448 | |
449 (defvar cp1251-decode-table | |
450 [ | |
451 160 168 128 129 170 189 178 175 163 138 140 142 141 173 161 143 | |
452 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 | |
453 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 | |
454 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 | |
455 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 | |
456 185 184 144 131 186 190 179 191 188 154 156 158 157 167 162 159 ] | |
457 "ISO-8859-5 to Windows-1251 (Cyrillic) codepage decoding table") | |
458 (setplist 'cp1251-decode-table | |
459 '(charset cyrillic-iso8859-5 language "Cyrillic-ISO" offset 160)) | |
460 | |
461 ;; cp1253 is missing nbsp so we cannot quite translate perfectly. It | |
462 ;; also has two micro/mu characters which would require more complex | |
463 ;; processing to accomodate. | |
464 (defvar cp1253-decode-table | |
465 [ | |
466 nil 145 146 163 nil nil 166 167 168 169 nil 171 172 173 nil 151 | |
467 176 177 178 179 180 161 162 183 184 185 186 187 188 189 190 191 | |
468 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 | |
469 208 209 nil 211 212 213 214 215 216 217 218 219 220 221 222 223 | |
470 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 | |
471 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 nil ] | |
472 "ISO-8859-7 to Windows-1253 (Greek) codepage decoding table") | |
473 (setplist 'cp1253-decode-table | |
474 '(charset greek-iso8859-7 language "Greek" offset 160)) | |
475 | |
476 ;; Since Latin-7 is not yet official, and Emacs does not support it, | |
477 ;; provide translation between Windows-1257 and Latin-4 the best we | |
478 ;; can. | |
479 (defvar cp1257-decode-table | |
480 [ | |
481 160 192 nil 170 164 nil 207 167 nil 208 199 204 nil 173 222 nil | |
482 176 224 nil 186 nil nil 239 nil nil 240 231 236 nil nil 254 nil | |
483 194 nil nil nil 196 197 175 193 200 201 198 nil 203 nil nil 206 | |
484 nil 210 212 205 nil 213 214 215 168 216 nil nil 220 nil 219 223 | |
485 226 nil nil nil 228 229 191 225 232 233 230 nil 235 nil nil 238 | |
486 nil 242 244 237 nil 245 246 247 184 248 nil nil 252 nil 251 nil ] | |
487 "ISO-8859-4 to Windows-1257 (Baltic) codepage decoding table") | |
488 (setplist 'cp1257-decode-table | |
489 '(charset latin-iso8859-4 language "Latin-4" offset 160)) | |
490 | |
419 ;;;###autoload | 491 ;;;###autoload |
420 (defun cp-make-coding-systems-for-codepage (codepage iso-name offset) | 492 (defun cp-make-coding-systems-for-codepage (codepage iso-name offset) |
421 "Create a coding system to convert IBM CODEPAGE into charset ISO-NAME | 493 "Create a coding system to convert IBM CODEPAGE into charset ISO-NAME |
422 whose first character is at offset OFFSET from the beginning of 8-bit | 494 whose first character is at offset OFFSET from the beginning of 8-bit |
423 ASCII table. | 495 ASCII table. |
512 (let (alist chset sname) | 584 (let (alist chset sname) |
513 (mapatoms | 585 (mapatoms |
514 (function | 586 (function |
515 (lambda (sym) | 587 (lambda (sym) |
516 (if (and (boundp sym) | 588 (if (and (boundp sym) |
517 (string-match "\\`cp\\([1-9][0-9][0-9]\\)-decode-table\\'" | 589 (string-match "\\`cp\\([1-9][0-9][0-9][0-9]?\\)-decode-table\\'" |
518 (setq sname (symbol-name sym))) | 590 (setq sname (symbol-name sym))) |
519 (vectorp (symbol-value sym)) | 591 (vectorp (symbol-value sym)) |
520 (setq chset (get sym 'charset))) | 592 (setq chset (get sym 'charset))) |
521 (setq alist | 593 (setq alist |
522 (cons (cons (match-string 1 sname) chset) alist)))))) | 594 (cons (cons (match-string 1 sname) chset) alist)))))) |