Mercurial > emacs
comparison lisp/international/mule.el @ 37746:0204bb9ccc1e
(auto-coding-regexp-alist): New user-option.
(auto-coding-from-file-contents): New function.
(set-auto-coding): Use it to determine a coding system.
author | Gerd Moellmann <gerd@gnu.org> |
---|---|
date | Wed, 16 May 2001 10:36:54 +0000 |
parents | 42097c412119 |
children | 0a94387bba65 |
comparison
equal
deleted
inserted
replaced
37745:c05ae69405d1 | 37746:0204bb9ccc1e |
---|---|
1235 :group 'files | 1235 :group 'files |
1236 :group 'mule | 1236 :group 'mule |
1237 :type '(repeat (cons (regexp :tag "File name regexp") | 1237 :type '(repeat (cons (regexp :tag "File name regexp") |
1238 (symbol :tag "Coding system")))) | 1238 (symbol :tag "Coding system")))) |
1239 | 1239 |
1240 (defcustom auto-coding-regexp-alist | |
1241 '(("^BABYL OPTIONS:[ \t]*-\\*-[ \t]*rmail[ \t]*-\\*-" . no-conversion)) | |
1242 "Alist of patterns vs corresponding coding systems. | |
1243 Each element looks like (REGEXP . CODING-SYSTEM). | |
1244 A file whose first bytes match REGEXP is decoded by CODING-SYSTEM on reading. | |
1245 | |
1246 The settings in this alist take priority over `coding:' tags | |
1247 in the file (see the function `set-auto-coding') | |
1248 and the contents of `file-coding-system-alist'." | |
1249 :group 'files | |
1250 :group 'mule | |
1251 :type '(repeat (cons (regexp :tag "Regexp") | |
1252 (symbol :tag "Coding system")))) | |
1253 | |
1240 (defvar set-auto-coding-for-load nil | 1254 (defvar set-auto-coding-for-load nil |
1241 "Non-nil means look for `load-coding' property instead of `coding'. | 1255 "Non-nil means look for `load-coding' property instead of `coding'. |
1242 This is used for loading and byte-compiling Emacs Lisp files.") | 1256 This is used for loading and byte-compiling Emacs Lisp files.") |
1243 | 1257 |
1244 (defun auto-coding-alist-lookup (filename) | 1258 (defun auto-coding-alist-lookup (filename) |
1250 (if (string-match (car (car alist)) filename) | 1264 (if (string-match (car (car alist)) filename) |
1251 (setq coding-system (cdr (car alist))) | 1265 (setq coding-system (cdr (car alist))) |
1252 (setq alist (cdr alist)))) | 1266 (setq alist (cdr alist)))) |
1253 coding-system)) | 1267 coding-system)) |
1254 | 1268 |
1269 | |
1270 (defun auto-coding-from-file-contents (size) | |
1271 "Determine a coding system from the contents of the current buffer. | |
1272 The current buffer contains SIZE bytes starting at point. | |
1273 Value is either a coding system or nil." | |
1274 (save-excursion | |
1275 (let ((alist auto-coding-regexp-alist) | |
1276 coding-system) | |
1277 (while (and alist (not coding-system)) | |
1278 (let ((regexp (car (car alist)))) | |
1279 (when (re-search-forward regexp (+ (point) size) t) | |
1280 (setq coding-system (cdr (car alist))))) | |
1281 (setq alist (cdr alist))) | |
1282 coding-system))) | |
1283 | |
1284 | |
1255 (defun set-auto-coding (filename size) | 1285 (defun set-auto-coding (filename size) |
1256 "Return coding system for a file FILENAME of which SIZE bytes follow point. | 1286 "Return coding system for a file FILENAME of which SIZE bytes follow point. |
1257 These bytes should include at least the first 1k of the file | 1287 These bytes should include at least the first 1k of the file |
1258 and the last 3k of the file, but the middle may be omitted. | 1288 and the last 3k of the file, but the middle may be omitted. |
1259 | 1289 |
1260 It checks FILENAME against the variable `auto-coding-alist'. | 1290 It checks FILENAME against the variable `auto-coding-alist'. If |
1261 If FILENAME doesn't match any entries in the variable, | 1291 FILENAME doesn't match any entries in the variable, it checks the |
1262 it checks for a `coding:' tag in the first one or two lines following | 1292 contents of the current buffer following point against |
1263 point. If no `coding:' tag is found, it checks for local variables | 1293 `auto-coding-regexp-alist'. If no match is found, it checks for a |
1264 list in the last 3K bytes out of the SIZE bytes. | 1294 `coding:' tag in the first one or two lines following point. If no |
1295 `coding:' tag is found, it checks for local variables list in the last | |
1296 3K bytes out of the SIZE bytes. | |
1265 | 1297 |
1266 The return value is the specified coding system, | 1298 The return value is the specified coding system, |
1267 or nil if nothing specified. | 1299 or nil if nothing specified. |
1268 | 1300 |
1269 The variable `set-auto-coding-function' (which see) is set to this | 1301 The variable `set-auto-coding-function' (which see) is set to this |
1270 function by default." | 1302 function by default." |
1271 (let ((coding-system (auto-coding-alist-lookup filename))) | 1303 (or (auto-coding-alist-lookup filename) |
1272 | 1304 (auto-coding-from-file-contents size) |
1273 (or coding-system | 1305 (let* ((case-fold-search t) |
1274 (let* ((case-fold-search t) | 1306 (head-start (point)) |
1275 (head-start (point)) | 1307 (head-end (+ head-start (min size 1024))) |
1276 (head-end (+ head-start (min size 1024))) | 1308 (tail-start (+ head-start (max (- size 3072) 0))) |
1277 (tail-start (+ head-start (max (- size 3072) 0))) | 1309 (tail-end (+ head-start size)) |
1278 (tail-end (+ head-start size)) | 1310 coding-system head-found tail-found pos) |
1279 coding-system head-found tail-found pos) | 1311 ;; Try a short cut by searching for the string "coding:" |
1280 ;; Try a short cut by searching for the string "coding:" | 1312 ;; and for "unibyte:" at the head and tail of SIZE bytes. |
1281 ;; and for "unibyte:" at the head and tail of SIZE bytes. | 1313 (setq head-found (or (search-forward "coding:" head-end t) |
1282 (setq head-found (or (search-forward "coding:" head-end t) | 1314 (search-forward "unibyte:" head-end t))) |
1283 (search-forward "unibyte:" head-end t))) | 1315 (if (and head-found (> head-found tail-start)) |
1284 (if (and head-found (> head-found tail-start)) | 1316 ;; Head and tail are overlapped. |
1285 ;; Head and tail are overlapped. | 1317 (setq tail-found head-found) |
1286 (setq tail-found head-found) | 1318 (goto-char tail-start) |
1287 (goto-char tail-start) | 1319 (setq tail-found (or (search-forward "coding:" tail-end t) |
1288 (setq tail-found (or (search-forward "coding:" tail-end t) | 1320 (search-forward "unibyte:" tail-end t)))) |
1289 (search-forward "unibyte:" tail-end t)))) | 1321 |
1290 | 1322 ;; At first check the head. |
1291 ;; At first check the head. | 1323 (when head-found |
1292 (when head-found | 1324 (goto-char head-start) |
1325 (setq pos (re-search-forward "[\n\r]" head-end t)) | |
1326 (if (and pos | |
1327 (= (char-after head-start) ?#) | |
1328 (= (char-after (1+ head-start)) ?!)) | |
1329 ;; If the file begins with "#!" (exec interpreter magic), | |
1330 ;; look for coding frobs in the first two lines. You cannot | |
1331 ;; necessarily put them in the first line of such a file | |
1332 ;; without screwing up the interpreter invocation. | |
1333 (setq pos (search-forward "\n" head-end t))) | |
1334 (if pos (setq head-end pos)) | |
1335 (when (< head-found head-end) | |
1293 (goto-char head-start) | 1336 (goto-char head-start) |
1294 (setq pos (re-search-forward "[\n\r]" head-end t)) | 1337 (when (and set-auto-coding-for-load |
1295 (if (and pos | 1338 (re-search-forward |
1296 (= (char-after head-start) ?#) | 1339 "-\\*-\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)" |
1297 (= (char-after (1+ head-start)) ?!)) | 1340 head-end t)) |
1298 ;; If the file begins with "#!" (exec interpreter magic), | 1341 (setq coding-system 'raw-text)) |
1299 ;; look for coding frobs in the first two lines. You cannot | 1342 (when (and (not coding-system) |
1300 ;; necessarily put them in the first line of such a file | 1343 (re-search-forward |
1301 ;; without screwing up the interpreter invocation. | 1344 "-\\*-\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)" |
1302 (setq pos (search-forward "\n" head-end t))) | 1345 head-end t)) |
1303 (if pos (setq head-end pos)) | 1346 (setq coding-system (intern (match-string 2))) |
1304 (when (< head-found head-end) | 1347 (or (coding-system-p coding-system) |
1305 (goto-char head-start) | 1348 (setq coding-system nil))))) |
1306 (when (and set-auto-coding-for-load | 1349 |
1307 (re-search-forward | 1350 ;; If no coding: tag in the head, check the tail. |
1308 "-\\*-\\(.*;\\)?[ \t]*unibyte:[ \t]*\\([^ ;]+\\)" | 1351 (when (and tail-found (not coding-system)) |
1309 head-end t)) | 1352 (goto-char tail-start) |
1310 (setq coding-system 'raw-text)) | 1353 (search-forward "\n\^L" nil t) |
1311 (when (and (not coding-system) | 1354 (if (re-search-forward |
1312 (re-search-forward | 1355 "^\\(.*\\)[ \t]*Local Variables:[ \t]*\\(.*\\)$" tail-end t) |
1313 "-\\*-\\(.*;\\)?[ \t]*coding:[ \t]*\\([^ ;]+\\)" | 1356 ;; The prefix is what comes before "local variables:" in its |
1314 head-end t)) | 1357 ;; line. The suffix is what comes after "local variables:" |
1315 (setq coding-system (intern (match-string 2))) | 1358 ;; in its line. |
1316 (or (coding-system-p coding-system) | 1359 (let* ((prefix (regexp-quote (match-string 1))) |
1317 (setq coding-system nil))))) | 1360 (suffix (regexp-quote (match-string 2))) |
1318 | 1361 (re-coding |
1319 ;; If no coding: tag in the head, check the tail. | 1362 (concat |
1320 (when (and tail-found (not coding-system)) | 1363 "^" prefix |
1321 (goto-char tail-start) | 1364 "[ \t]*coding[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*" |
1322 (search-forward "\n\^L" nil t) | 1365 suffix "$")) |
1323 (if (re-search-forward | 1366 (re-unibyte |
1324 "^\\(.*\\)[ \t]*Local Variables:[ \t]*\\(.*\\)$" tail-end t) | 1367 (concat |
1325 ;; The prefix is what comes before "local variables:" in its | 1368 "^" prefix |
1326 ;; line. The suffix is what comes after "local variables:" | 1369 "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*" |
1327 ;; in its line. | 1370 suffix "$")) |
1328 (let* ((prefix (regexp-quote (match-string 1))) | 1371 (re-end |
1329 (suffix (regexp-quote (match-string 2))) | 1372 (concat "^" prefix "[ \t]*end *:[ \t]*" suffix "$")) |
1330 (re-coding | 1373 (pos (point))) |
1331 (concat | 1374 (re-search-forward re-end tail-end 'move) |
1332 "^" prefix | 1375 (setq tail-end (point)) |
1333 "[ \t]*coding[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*" | 1376 (goto-char pos) |
1334 suffix "$")) | 1377 (when (and set-auto-coding-for-load |
1335 (re-unibyte | 1378 (re-search-forward re-unibyte tail-end t)) |
1336 (concat | 1379 (setq coding-system 'raw-text)) |
1337 "^" prefix | 1380 (when (and (not coding-system) |
1338 "[ \t]*unibyte[ \t]*:[ \t]*\\([^ \t]+\\)[ \t]*" | 1381 (re-search-forward re-coding tail-end t)) |
1339 suffix "$")) | 1382 (setq coding-system (intern (match-string 1))) |
1340 (re-end | 1383 (or (coding-system-p coding-system) |
1341 (concat "^" prefix "[ \t]*end *:[ \t]*" suffix "$")) | 1384 (setq coding-system nil)))))) |
1342 (pos (point))) | 1385 coding-system))) |
1343 (re-search-forward re-end tail-end 'move) | |
1344 (setq tail-end (point)) | |
1345 (goto-char pos) | |
1346 (when (and set-auto-coding-for-load | |
1347 (re-search-forward re-unibyte tail-end t)) | |
1348 (setq coding-system 'raw-text)) | |
1349 (when (and (not coding-system) | |
1350 (re-search-forward re-coding tail-end t)) | |
1351 (setq coding-system (intern (match-string 1))) | |
1352 (or (coding-system-p coding-system) | |
1353 (setq coding-system nil)))))) | |
1354 coding-system)))) | |
1355 | 1386 |
1356 (setq set-auto-coding-function 'set-auto-coding) | 1387 (setq set-auto-coding-function 'set-auto-coding) |
1357 | 1388 |
1358 (defun after-insert-file-set-buffer-file-coding-system (inserted) | 1389 (defun after-insert-file-set-buffer-file-coding-system (inserted) |
1359 "Set `buffer-file-coding-system' of current buffer after text is inserted." | 1390 "Set `buffer-file-coding-system' of current buffer after text is inserted." |