view lisp/thingatpt.el @ 18260:a642c99198ec

(PTR_TO_OFFSET): New macro. (POS_AS_IN_BUFFER): New macro. (SYNTAX_ENTRY_VIA_PROPERTY): Set to take `syntax-table' text property into account when doing SYNTAX (c). (re_compile_fastmap): disable fastmap if any of wordbound notwordbound wordbeg wordend notsyntaxspec syntaxspec are seen. (re_search_2): SETUP_SYNTAX_TABLE_FOR_OBJECT at the start. (re_match_object): New variable. (re_match_2): SETUP_SYNTAX_TABLE_FOR_OBJECT at the start. (re_match_2_internal): For any of wordbound notwordbound wordbeg wordend notsyntaxspec syntaxspec call UPDATE_SYNTAX_TABLE before doing SYNTAX (c). [emacs]: Include charset.h and category.h [!emacs] (BASE_LEADING_CODE_P, WORD_BOUNDARY_P, CHAR_HEAD_P, SINGLE_BYTE_CHAR_P, SAME_CHARSET_P, MULTIBYTE_FORM_LENGTH, STRING_CHAR_AND_LENGTH, GET_CHAR_AFTER_2, GET_CHAR_BEFORE_2): New dummy macros. (enum re_opcode_t): New member categoryspec and notcategoryspec. (STORE_CHARACTER_AND_INCR, EXTRACT_CHARACTER, CHARSET_LOOKUP_RANGE_TABLE_WITH_COUNT, CHARSET_LOOKUP_RANGE_TABLE, CHARSET_BITMAP_SIZE, CHARSET_RANGE_TABLE_EXISTS_P, CHARSET_RANGE_TABLE CHARSET_PAST_RANGE_TABLE): New macros. (TRANSLATE): Cast return value to unsigned char, not char. (struct range_table_work_area): New structure. (EXTEND_RANGE_TABLE_WORK_AREA, SET_RANGE_TABLE_WORK_AREA, FREE_RANGE_TABLE_WORK_AREA, CLEAR_RANGE_TABLE_WORK_USED, RANGE_TABLE_WORK_USED, RANGE_TABLE_WORK_ELT): New macros. (FREE_STACK_RETURN): Call FREE_RANGE_TABLE_WORK_AREA. (regex_compile): Declare `c' and `c1' as int to store multibyte characters. Declare range_table_work and initialize it. Initialize bufp->multibyte to 0 if not emacs. For case '[' and `default', code re-written to handle multibyte characters. Add code for case 'c' and 'C' to handle category spec. (re_compile_fastmap): New local variables k, simple_char_max, and match_any_multibyte_characters. Use macro CHARSET_BITMAP_SIZE. Handle multibyte characters in cases charset, charset_not, wordchar, notwordchar, anychar, syntaxspec, notsyntaxspec, categoryspec, notcategoryspec. (STOP_ADDR_VSTRING, POS_ADDR_VSTRING): New macros. (re_search_2): Code re-written to handle multibyte characters. (AT_WORD_BOUNDARY): Macro disabled. (re_match_2_internal): New local variable multibyte. `d' is incremented while paying attention to multibyte characters if necessary. For case charset, charsetnot, wordbound, notwordbound, wordbeg, wordend, matchsyntax, and matchnotsyntax, code re-written to handle multibyte characters. Add code for case categoryspec and notcategoryspec. Declare c, c1 as unsigned int, not unsigned char.
author Richard M. Stallman <rms@gnu.org>
date Sun, 15 Jun 1997 19:00:12 +0000
parents 3ae7560f0959
children b2bc7438b6b0
line wrap: on
line source

;;; thingatpt.el --- Get the `thing' at point

;; Copyright (C) 1991,92,93,94,95,1996 Free Software Foundation, Inc.

;; Author: Mike Williams <mikew@gopher.dosli.govt.nz>
;; Keywords: extensions, matching, mouse
;; Created: Thu Mar 28 13:48:23 1991

;; This file is part of GNU Emacs.

;; GNU Emacs is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;;; Commentary:

;; This file provides routines for getting the "thing" at the location of
;; point, whatever that "thing" happens to be.  The "thing" is defined by
;; its beginning and end positions in the buffer.
;;
;; The function bounds-of-thing-at-point finds the beginning and end
;; positions by moving first forward to the end of the "thing", and then
;; backwards to the beginning.  By default, it uses the corresponding
;; forward-"thing" operator (eg. forward-word, forward-line).
;;
;; Special cases are allowed for using properties associated with the named
;; "thing": 
;;
;;   forward-op		Function to call to skip forward over a "thing" (or
;;                      with a negative argument, backward).
;;                      
;;   beginning-op	Function to call to skip to the beginning of a "thing".
;;   end-op		Function to call to skip to the end of a "thing".
;;
;; Reliance on existing operators means that many `things' can be accessed
;; without further code:  eg.
;;     (thing-at-point 'line)
;;     (thing-at-point 'page)

;;; Code:

(provide 'thingatpt)

;; Basic movement

;;;###autoload
(defun forward-thing (thing &optional n)
  "Move forward to the end of the next THING."
  (let ((forward-op (or (get thing 'forward-op)
			(intern-soft (format "forward-%s" thing)))))
    (if (fboundp forward-op)
	(funcall forward-op (or n 1))
      (error "Can't determine how to move over a %s" thing))))

;; General routines

;;;###autoload
(defun bounds-of-thing-at-point (thing)
  "Determine the start and end buffer locations for the THING at point.
THING is a symbol which specifies the kind of syntactic entity you want.
Possibilities include `symbol', `list', `sexp', `defun', `filename', `url',
`word', `sentence', `whitespace', `line', `page' and others.

See the file `thingatpt.el' for documentation on how to define
a symbol as a valid THING.

The value is a cons cell (START . END) giving the start and end positions
of the textual entity that was found."
  (let ((orig (point)))
    (condition-case nil
	(save-excursion
	  ;; Try moving forward, then back.
	  (let ((end (progn 
		       (funcall 
			(or (get thing 'end-op) 
			    (function (lambda () (forward-thing thing 1)))))
		       (point)))
		(beg (progn 
		       (funcall 
			(or (get thing 'beginning-op) 
			    (function (lambda () (forward-thing thing -1)))))
		       (point))))
	    (if (not (and beg (> beg orig)))
		;; If that brings us all the way back to ORIG,
		;; it worked.  But END may not be the real end.
		;; So find the real end that corresponds to BEG.
		(let ((real-end
		       (progn 
			 (funcall 
			  (or (get thing 'end-op) 
			      (function (lambda () (forward-thing thing 1)))))
			 (point))))
		  (if (and beg real-end (<= beg orig) (<= orig real-end))
		      (cons beg real-end)))
	      (goto-char orig)
	      ;; Try a second time, moving backward first and then forward,
	      ;; so that we can find a thing that ends at ORIG.
	      (let ((beg (progn 
			   (funcall 
			    (or (get thing 'beginning-op) 
				(function (lambda () (forward-thing thing -1)))))
			   (point)))
		    (end (progn 
			   (funcall 
			    (or (get thing 'end-op) 
				(function (lambda () (forward-thing thing 1)))))
			   (point)))
		    (real-beg
		     (progn 
		       (funcall 
			(or (get thing 'end-op) 
			    (function (lambda () (forward-thing thing -1)))))
		       (point))))
		(if (and real-beg end (<= real-beg orig) (<= orig end))
		    (cons real-beg end))))))
      (error nil))))

;;;###autoload
(defun thing-at-point (thing)
  "Return the THING at point.
THING is a symbol which specifies the kind of syntactic entity you want.
Possibilities include `symbol', `list', `sexp', `defun', `filename', `url',
`word', `sentence', `whitespace', `line', `page' and others.

See the file `thingatpt.el' for documentation on how to define
a symbol as a valid THING."
  (let ((bounds (bounds-of-thing-at-point thing)))
    (if bounds 
	(buffer-substring (car bounds) (cdr bounds)))))

;; Go to beginning/end

(defun beginning-of-thing (thing)
  (let ((bounds (bounds-of-thing-at-point thing)))
    (or bounds (error "No %s here" thing))
    (goto-char (car bounds))))

(defun end-of-thing (thing)
  (let ((bounds (bounds-of-thing-at-point thing)))
    (or bounds (error "No %s here" thing))
    (goto-char (cdr bounds))))

;;  Special cases 

;;  Lines 

;; bolp will be false when you click on the last line in the buffer
;; and it has no final newline.

(put 'line 'beginning-op
     (function (lambda () (if (bolp) (forward-line -1) (beginning-of-line)))))

;;  Sexps 

(defun in-string-p ()
  (let ((orig (point)))
    (save-excursion
      (beginning-of-defun)
      (nth 3 (parse-partial-sexp (point) orig)))))

(defun end-of-sexp ()
  (let ((char-syntax (char-syntax (char-after (point)))))
    (if (or (eq char-syntax ?\))
	    (and (eq char-syntax ?\") (in-string-p)))
	(forward-char 1)
      (forward-sexp 1))))

(put 'sexp 'end-op 'end-of-sexp)

;;  Lists 

(put 'list 'end-op (function (lambda () (up-list 1))))
(put 'list 'beginning-op 'backward-sexp)

;;  Filenames and URLs

(defvar thing-at-point-file-name-chars "~/A-Za-z0-9---_.${}#%,:"
  "Characters allowable in filenames.")

(put 'filename 'end-op    
     '(lambda () (skip-chars-forward thing-at-point-file-name-chars)))
(put 'filename 'beginning-op
     '(lambda () (skip-chars-backward thing-at-point-file-name-chars)))

(defvar thing-at-point-url-chars "~/A-Za-z0-9---_@$%&=.,"
  "Characters allowable in a URL.")

(put 'url 'end-op    
     '(lambda () (skip-chars-forward (concat ":" thing-at-point-url-chars))
	(skip-chars-backward ".,:")))
(put 'url 'beginning-op
     '(lambda ()
	(skip-chars-backward thing-at-point-url-chars)
	(or (= (preceding-char) ?:)
	    (error "No URL here"))
	(forward-char -1)
	(skip-chars-backward "a-zA-Z")))

;;  Whitespace 

(defun forward-whitespace (arg)
  (interactive "p")
  (if (natnump arg) 
      (re-search-forward "[ \t]+\\|\n" nil 'move arg)
    (while (< arg 0)
      (if (re-search-backward "[ \t]+\\|\n" nil 'move)
	  (or (eq (char-after (match-beginning 0)) 10)
	      (skip-chars-backward " \t")))
      (setq arg (1+ arg)))))

;;  Buffer 

(put 'buffer 'end-op 'end-of-buffer)
(put 'buffer 'beginning-op 'beginning-of-buffer)

;;  Symbols 

(defun forward-symbol (arg)
  (interactive "p")
  (if (natnump arg) 
      (re-search-forward "\\(\\sw\\|\\s_\\)+" nil 'move arg)
    (while (< arg 0)
      (if (re-search-backward "\\(\\sw\\|\\s_\\)+" nil 'move)
	  (skip-syntax-backward "w_"))
      (setq arg (1+ arg)))))

;;  Syntax blocks 

(defun forward-same-syntax (&optional arg)
  (interactive "p")
  (while (< arg 0)
    (skip-syntax-backward 
     (char-to-string (char-syntax (char-after (1- (point))))))
    (setq arg (1+ arg)))
  (while (> arg 0)
    (skip-syntax-forward (char-to-string (char-syntax (char-after (point)))))
    (setq arg (1- arg))))

;;  Aliases 

(defun word-at-point () (thing-at-point 'word))
(defun sentence-at-point () (thing-at-point 'sentence))

(defun read-from-whole-string (str)
  "Read a lisp expression from STR.
Signal an error if the entire string was not used."
  (let* ((read-data (read-from-string str))
	 (more-left 
	  (condition-case nil
	      (progn (read-from-string (substring str (cdr read-data)))
		     t)
	    (end-of-file nil))))
    (if more-left
	(error "Can't read whole string")
      (car read-data))))

(defun form-at-point (&optional thing pred) 
  (let ((sexp (condition-case nil 
		  (read-from-whole-string (thing-at-point (or thing 'sexp)))
		(error nil))))
    (if (or (not pred) (funcall pred sexp)) sexp)))

(defun sexp-at-point ()   (form-at-point 'sexp))
(defun symbol-at-point () (form-at-point 'sexp 'symbolp))
(defun number-at-point () (form-at-point 'sexp 'numberp))
(defun list-at-point ()   (form-at-point 'list 'listp))

;; thingatpt.el ends here.