comparison lisp/international/mule.el @ 37746:0204bb9ccc1e

(auto-coding-regexp-alist): New user-option. (auto-coding-from-file-contents): New function. (set-auto-coding): Use it to determine a coding system.
author Gerd Moellmann <gerd@gnu.org>
date Wed, 16 May 2001 10:36:54 +0000
parents 42097c412119
children 0a94387bba65
comparison
equal deleted inserted replaced
37745:c05ae69405d1 37746:0204bb9ccc1e
1235 :group 'files 1235 :group 'files
1236 :group 'mule 1236 :group 'mule
1237 :type '(repeat (cons (regexp :tag "File name regexp") 1237 :type '(repeat (cons (regexp :tag "File name regexp")
1238 (symbol :tag "Coding system")))) 1238 (symbol :tag "Coding system"))))
1239 1239
1240 (defcustom auto-coding-regexp-alist
1241 '(("^BABYL OPTIONS:[ \t]*-\\*-[ \t]*rmail[ \t]*-\\*-" . no-conversion))
1242 "Alist of patterns vs corresponding coding systems.
1243 Each element looks like (REGEXP . CODING-SYSTEM).
1244 A file whose first bytes match REGEXP is decoded by CODING-SYSTEM on reading.
1245
1246 The settings in this alist take priority over `coding:' tags
1247 in the file (see the function `set-auto-coding')
1248 and the contents of `file-coding-system-alist'."
1249 :group 'files
1250 :group 'mule
1251 :type '(repeat (cons (regexp :tag "Regexp")
1252 (symbol :tag "Coding system"))))
1253
1240 (defvar set-auto-coding-for-load nil 1254 (defvar set-auto-coding-for-load nil
1241 "Non-nil means look for `load-coding' property instead of `coding'. 1255 "Non-nil means look for `load-coding' property instead of `coding'.
1242 This is used for loading and byte-compiling Emacs Lisp files.") 1256 This is used for loading and byte-compiling Emacs Lisp files.")
1243 1257
1244 (defun auto-coding-alist-lookup (filename) 1258 (defun auto-coding-alist-lookup (filename)
1250 (if (string-match (car (car alist)) filename) 1264 (if (string-match (car (car alist)) filename)
1251 (setq coding-system (cdr (car alist))) 1265 (setq coding-system (cdr (car alist)))
1252 (setq alist (cdr alist)))) 1266 (setq alist (cdr alist))))
1253 coding-system)) 1267 coding-system))
1254 1268
1269
1270 (defun auto-coding-from-file-contents (size)
1271 "Determine a coding system from the contents of the current buffer.
1272 The current buffer contains SIZE bytes starting at point.
1273 Value is either a coding system or nil."
1274 (save-excursion
1275 (let ((alist auto-coding-regexp-alist)
1276 coding-system)
1277 (while (and alist (not coding-system))
1278 (let ((regexp (car (car alist))))
1279 (when (re-search-forward regexp (+ (point) size) t)
1280 (setq coding-system (cdr (car alist)))))
1281 (setq alist (cdr alist)))
1282 coding-system)))
1283
1284
1255 (defun set-auto-coding (filename size) 1285 (defun set-auto-coding (filename size)
1256 "Return coding system for a file FILENAME of which SIZE bytes follow point. 1286 "Return coding system for a file FILENAME of which SIZE bytes follow point.
1257 These bytes should include at least the first 1k of the file 1287 These bytes should include at least the first 1k of the file
1258 and the last 3k of the file, but the middle may be omitted. 1288 and the last 3k of the file, but the middle may be omitted.
1259 1289
1260 It checks FILENAME against the variable `auto-coding-alist'. 1290 It checks FILENAME against the variable `auto-coding-alist'. If
1261 If FILENAME doesn't match any entries in the variable, 1291 FILENAME doesn't match any entries in the variable, it checks the
1262 it checks for a `coding:' tag in the first one or two lines following 1292 contents of the current buffer following point against
1263 point. If no `coding:' tag is found, it checks for local variables 1293 `auto-coding-regexp-alist'. If no match is found, it checks for a
1264 list in the last 3K bytes out of the SIZE bytes. 1294 `coding:' tag in the first one or two lines following point. If no
1295 `coding:' tag is found, it checks for local variables list in the last
1296 3K bytes out of the SIZE bytes.
1265 1297
1266 The return value is the specified coding system, 1298 The return value is the specified coding system,
1267 or nil if nothing specified. 1299 or nil if nothing specified.
1268 1300
1269 The variable `set-auto-coding-function' (which see) is set to this 1301 The variable `set-auto-coding-function' (which see) is set to this
1270 function by default." 1302 function by default."
1271 (let ((coding-system (auto-coding-alist-lookup filename))) 1303 (or (auto-coding-alist-lookup filename)
1272 1304 (auto-coding-from-file-contents size)
1273 (or coding-system 1305 (let* ((case-fold-search t)
1274 (let* ((case-fold-search t) 1306 (head-start (point))
1275 (head-start (point)) 1307 (head-end (+ head-start (min size 1024)))
1276 (head-end (+ head-start (min size 1024))) 1308 (tail-start (+ head-start (max (- size 3072) 0)))
1277 (tail-start (+ head-start (max (- size 3072) 0))) 1309 (tail-end (+ head-start size))
1278 (tail-end (+ head-start size)) 1310 coding-system head-found tail-found pos)
1279 coding-system head-found tail-found pos) 1311 ;; Try a short cut by searching for the string "coding:"
1280 ;; Try a short cut by searching for the string "coding:" 1312 ;; and for "unibyte:" at the head and tail of SIZE bytes.
1281 ;; and for "unibyte:" at the head and tail of SIZE bytes. 1313 (setq head-found (or (search-forward "coding:" head-end t)
1282 (setq head-found (or (search-forward "coding:" head-end t) 1314 (search-forward "unibyte:" head-end t)))
1283 (search-forward "unibyte:" head-end t))) 1315 (if (and head-found (> head-found tail-start))
1284 (if (and head-found (> head-found tail-start)) 1316 ;; Head and tail are overlapped.
1285 ;; Head and tail are overlapped. 1317 (setq tail-found head-found)
1286 (setq tail-found head-found) 1318 (goto-char tail-start)
1287 (goto-char tail-start) 1319 (setq tail-found (or (search-forward "coding:" tail-end t)
1288 (setq tail-found (or (search-forward "coding:" tail-end t) 1320 (search-forward "unibyte:" tail-end t))))
1289 (search-forward "unibyte:" tail-end t)))) 1321
1290 1322 ;; At first check the head.
1291 ;; At first check the head. 1323 (when head-found
1292 (when head-found 1324 (goto-char head-start)
1325 (setq pos (re-search-forward "[\n\r]" head-end t))
1326 (if (and pos
1327 (= (char-after head-start) ?#)
1328 (= (char-after (1+ head-start)) ?!))
1329 ;; If the file begins with "#!" (exec interpreter magic),
1330 ;; look for coding frobs in the first two lines. You cannot
1331 ;; necessarily put them in the first line of such a file
1332 ;; without screwing up the interpreter invocation.
1333 (setq pos (search-forward "\n" head-end t)))
1334 (if pos (setq head-end pos))
1335 (when (< head-found head-end)
1293 (goto-char head-start) 1336 (goto-char head-start)
1294 (setq pos (re-search-forward "[\n\r]" head-end t)) 1337 (when (and set-auto-coding-for-load
1295 (if (and pos 1338 (re-search-forward
1296 (= (char-after head-start) ?#) 1339 "-\\*-\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)"
1297 (= (char-after (1+ head-start)) ?!)) 1340 head-end t))
1298 ;; If the file begins with "#!" (exec interpreter magic), 1341 (setq coding-system 'raw-text))
1299 ;; look for coding frobs in the first two lines. You cannot 1342 (when (and (not coding-system)
1300 ;; necessarily put them in the first line of such a file 1343 (re-search-forward
1301 ;; without screwing up the interpreter invocation. 1344 "-\\*-\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)"
1302 (setq pos (search-forward "\n" head-end t))) 1345 head-end t))
1303 (if pos (setq head-end pos)) 1346 (setq coding-system (intern (match-string 2)))
1304 (when (< head-found head-end) 1347 (or (coding-system-p coding-system)
1305 (goto-char head-start) 1348 (setq coding-system nil)))))
1306 (when (and set-auto-coding-for-load 1349
1307 (re-search-forward 1350 ;; If no coding: tag in the head, check the tail.
1308 "-\\*-\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)" 1351 (when (and tail-found (not coding-system))
1309 head-end t)) 1352 (goto-char tail-start)
1310 (setq coding-system 'raw-text)) 1353 (search-forward "\n\^L" nil t)
1311 (when (and (not coding-system) 1354 (if (re-search-forward
1312 (re-search-forward 1355 "^\\(.*\\)[ \t]*Local Variables:[ \t]*\\(.*\\)$" tail-end t)
1313 "-\\*-\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)" 1356 ;; The prefix is what comes before "local variables:" in its
1314 head-end t)) 1357 ;; line. The suffix is what comes after "local variables:"
1315 (setq coding-system (intern (match-string 2))) 1358 ;; in its line.
1316 (or (coding-system-p coding-system) 1359 (let* ((prefix (regexp-quote (match-string 1)))
1317 (setq coding-system nil))))) 1360 (suffix (regexp-quote (match-string 2)))
1318 1361 (re-coding
1319 ;; If no coding: tag in the head, check the tail. 1362 (concat
1320 (when (and tail-found (not coding-system)) 1363 "^" prefix
1321 (goto-char tail-start) 1364 "[ \t]*coding[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*"
1322 (search-forward "\n\^L" nil t) 1365 suffix "$"))
1323 (if (re-search-forward 1366 (re-unibyte
1324 "^\\(.*\\)[ \t]*Local Variables:[ \t]*\\(.*\\)$" tail-end t) 1367 (concat
1325 ;; The prefix is what comes before "local variables:" in its 1368 "^" prefix
1326 ;; line. The suffix is what comes after "local variables:" 1369 "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*"
1327 ;; in its line. 1370 suffix "$"))
1328 (let* ((prefix (regexp-quote (match-string 1))) 1371 (re-end
1329 (suffix (regexp-quote (match-string 2))) 1372 (concat "^" prefix "[ \t]*end *:[ \t]*" suffix "$"))
1330 (re-coding 1373 (pos (point)))
1331 (concat 1374 (re-search-forward re-end tail-end 'move)
1332 "^" prefix 1375 (setq tail-end (point))
1333 "[ \t]*coding[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*" 1376 (goto-char pos)
1334 suffix "$")) 1377 (when (and set-auto-coding-for-load
1335 (re-unibyte 1378 (re-search-forward re-unibyte tail-end t))
1336 (concat 1379 (setq coding-system 'raw-text))
1337 "^" prefix 1380 (when (and (not coding-system)
1338 "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*" 1381 (re-search-forward re-coding tail-end t))
1339 suffix "$")) 1382 (setq coding-system (intern (match-string 1)))
1340 (re-end 1383 (or (coding-system-p coding-system)
1341 (concat "^" prefix "[ \t]*end *:[ \t]*" suffix "$")) 1384 (setq coding-system nil))))))
1342 (pos (point))) 1385 coding-system)))
1343 (re-search-forward re-end tail-end 'move)
1344 (setq tail-end (point))
1345 (goto-char pos)
1346 (when (and set-auto-coding-for-load
1347 (re-search-forward re-unibyte tail-end t))
1348 (setq coding-system 'raw-text))
1349 (when (and (not coding-system)
1350 (re-search-forward re-coding tail-end t))
1351 (setq coding-system (intern (match-string 1)))
1352 (or (coding-system-p coding-system)
1353 (setq coding-system nil))))))
1354 coding-system))))
1355 1386
1356 (setq set-auto-coding-function 'set-auto-coding) 1387 (setq set-auto-coding-function 'set-auto-coding)
1357 1388
1358 (defun after-insert-file-set-buffer-file-coding-system (inserted) 1389 (defun after-insert-file-set-buffer-file-coding-system (inserted)
1359 "Set `buffer-file-coding-system' of current buffer after text is inserted." 1390 "Set `buffer-file-coding-system' of current buffer after text is inserted."