comparison lisp/international/codepage.el @ 24454:fe0089dd2d2f

(cp1250-decode-table, cp1251-decode-table, cp1253-decode-table, cp1257-decode-table): New translation tables for MS Windows codepages. (cp-make-coding-systems-for-codepage): Accept 4 digit codepages.
author Eli Zaretskii <eliz@gnu.org>
date Mon, 08 Mar 1999 12:06:07 +0000
parents 766feaa457a9
children 5d61c3793c06
comparison
equal deleted inserted replaced
24453:3b8991c22dcd 24454:fe0089dd2d2f
414 nil 236 147 233 nil 228 148 nil 155 214 nil nil 129 nil 215 nil] 414 nil 236 147 233 nil 228 148 nil 155 214 nil nil 129 nil 215 nil]
415 "Table for converting ISO-8859-4 characters into codepage 775 glyphs.") 415 "Table for converting ISO-8859-4 characters into codepage 775 glyphs.")
416 (setplist 'cp775-decode-table 416 (setplist 'cp775-decode-table
417 '(charset latin-iso8859-4 language "Latin-4" offset 160)) 417 '(charset latin-iso8859-4 language "Latin-4" offset 160))
418 418
419 ;; Support for the Windows 12xx series of codepages that MS has
420 ;; butchered from the ISO-8859 specs. This does not add support for
421 ;; the extended characters that MS has added in the 128 - 159 coding
422 ;; range, only translates those characters that can be expressed in
423 ;; the corresponding iso-8859 codepage.
424
425 ;; Codepage Mapping:
426 ;;
427 ;; Windows-1250: ISO-8859-2 (Central Europe) - differs in some positions
428 ;; Windows-1251: ISO-8859-5 (Cyrillic) - differs wildly
429 ;; Windows-1252: ISO-8859-1 (West Europe) - exact match
430 ;; Windows-1253: ISO-8859-7 (Greek) - differs in some positions
431 ;; Windows-1254: ISO-8859-9 (Turkish) - exact match
432 ;; Windows-1255: ISO-8859-8 (Hebrew) - exact match
433 ;; Windows-1256: ISO-8859-6 (Arabic) - half match
434 ;; Windows-1257: ISO-8859-4 (Baltic) - differs, future Latin-7
435 ;; Windows-1258: VISCII (Vietnamese) - Completely different
436
437 (defvar cp1250-decode-table
438 [
439 160 165 162 163 164 188 140 167 168 138 170 141 143 173 142 175
440 176 185 178 179 180 190 156 161 184 154 186 157 159 189 158 191
441 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
442 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
443 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
444 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 ]
445 "ISO-8859-2 to Windows-1250 (Central Europe) codepage decoding table")
446 (setplist 'cp1250-decode-table
447 '(charset latin-iso8859-2 language "Latin-2" offset 160))
448
449 (defvar cp1251-decode-table
450 [
451 160 168 128 129 170 189 178 175 163 138 140 142 141 173 161 143
452 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
453 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
454 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
455 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
456 185 184 144 131 186 190 179 191 188 154 156 158 157 167 162 159 ]
457 "ISO-8859-5 to Windows-1251 (Cyrillic) codepage decoding table")
458 (setplist 'cp1251-decode-table
459 '(charset cyrillic-iso8859-5 language "Cyrillic-ISO" offset 160))
460
461 ;; cp1253 is missing nbsp so we cannot quite translate perfectly. It
462 ;; also has two micro/mu characters which would require more complex
463 ;; processing to accomodate.
464 (defvar cp1253-decode-table
465 [
466 nil 145 146 163 nil nil 166 167 168 169 nil 171 172 173 nil 151
467 176 177 178 179 180 161 162 183 184 185 186 187 188 189 190 191
468 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
469 208 209 nil 211 212 213 214 215 216 217 218 219 220 221 222 223
470 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
471 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 nil ]
472 "ISO-8859-7 to Windows-1253 (Greek) codepage decoding table")
473 (setplist 'cp1253-decode-table
474 '(charset greek-iso8859-7 language "Greek" offset 160))
475
476 ;; Since Latin-7 is not yet official, and Emacs does not support it,
477 ;; provide translation between Windows-1257 and Latin-4 the best we
478 ;; can.
479 (defvar cp1257-decode-table
480 [
481 160 192 nil 170 164 nil 207 167 nil 208 199 204 nil 173 222 nil
482 176 224 nil 186 nil nil 239 nil nil 240 231 236 nil nil 254 nil
483 194 nil nil nil 196 197 175 193 200 201 198 nil 203 nil nil 206
484 nil 210 212 205 nil 213 214 215 168 216 nil nil 220 nil 219 223
485 226 nil nil nil 228 229 191 225 232 233 230 nil 235 nil nil 238
486 nil 242 244 237 nil 245 246 247 184 248 nil nil 252 nil 251 nil ]
487 "ISO-8859-4 to Windows-1257 (Baltic) codepage decoding table")
488 (setplist 'cp1257-decode-table
489 '(charset latin-iso8859-4 language "Latin-4" offset 160))
490
419 ;;;###autoload 491 ;;;###autoload
420 (defun cp-make-coding-systems-for-codepage (codepage iso-name offset) 492 (defun cp-make-coding-systems-for-codepage (codepage iso-name offset)
421 "Create a coding system to convert IBM CODEPAGE into charset ISO-NAME 493 "Create a coding system to convert IBM CODEPAGE into charset ISO-NAME
422 whose first character is at offset OFFSET from the beginning of 8-bit 494 whose first character is at offset OFFSET from the beginning of 8-bit
423 ASCII table. 495 ASCII table.
512 (let (alist chset sname) 584 (let (alist chset sname)
513 (mapatoms 585 (mapatoms
514 (function 586 (function
515 (lambda (sym) 587 (lambda (sym)
516 (if (and (boundp sym) 588 (if (and (boundp sym)
517 (string-match "\\`cp\\([1-9][0-9][0-9]\\)-decode-table\\'" 589 (string-match "\\`cp\\([1-9][0-9][0-9][0-9]?\\)-decode-table\\'"
518 (setq sname (symbol-name sym))) 590 (setq sname (symbol-name sym)))
519 (vectorp (symbol-value sym)) 591 (vectorp (symbol-value sym))
520 (setq chset (get sym 'charset))) 592 (setq chset (get sym 'charset)))
521 (setq alist 593 (setq alist
522 (cons (cons (match-string 1 sname) chset) alist)))))) 594 (cons (cons (match-string 1 sname) chset) alist))))))