view lisp/case-table.el @ 18260:a642c99198ec

(PTR_TO_OFFSET): New macro. (POS_AS_IN_BUFFER): New macro. (SYNTAX_ENTRY_VIA_PROPERTY): Set to take `syntax-table' text property into account when doing SYNTAX (c). (re_compile_fastmap): disable fastmap if any of wordbound notwordbound wordbeg wordend notsyntaxspec syntaxspec are seen. (re_search_2): SETUP_SYNTAX_TABLE_FOR_OBJECT at the start. (re_match_object): New variable. (re_match_2): SETUP_SYNTAX_TABLE_FOR_OBJECT at the start. (re_match_2_internal): For any of wordbound notwordbound wordbeg wordend notsyntaxspec syntaxspec call UPDATE_SYNTAX_TABLE before doing SYNTAX (c). [emacs]: Include charset.h and category.h [!emacs] (BASE_LEADING_CODE_P, WORD_BOUNDARY_P, CHAR_HEAD_P, SINGLE_BYTE_CHAR_P, SAME_CHARSET_P, MULTIBYTE_FORM_LENGTH, STRING_CHAR_AND_LENGTH, GET_CHAR_AFTER_2, GET_CHAR_BEFORE_2): New dummy macros. (enum re_opcode_t): New member categoryspec and notcategoryspec. (STORE_CHARACTER_AND_INCR, EXTRACT_CHARACTER, CHARSET_LOOKUP_RANGE_TABLE_WITH_COUNT, CHARSET_LOOKUP_RANGE_TABLE, CHARSET_BITMAP_SIZE, CHARSET_RANGE_TABLE_EXISTS_P, CHARSET_RANGE_TABLE CHARSET_PAST_RANGE_TABLE): New macros. (TRANSLATE): Cast return value to unsigned char, not char. (struct range_table_work_area): New structure. (EXTEND_RANGE_TABLE_WORK_AREA, SET_RANGE_TABLE_WORK_AREA, FREE_RANGE_TABLE_WORK_AREA, CLEAR_RANGE_TABLE_WORK_USED, RANGE_TABLE_WORK_USED, RANGE_TABLE_WORK_ELT): New macros. (FREE_STACK_RETURN): Call FREE_RANGE_TABLE_WORK_AREA. (regex_compile): Declare `c' and `c1' as int to store multibyte characters. Declare range_table_work and initialize it. Initialize bufp->multibyte to 0 if not emacs. For case '[' and `default', code re-written to handle multibyte characters. Add code for case 'c' and 'C' to handle category spec. (re_compile_fastmap): New local variables k, simple_char_max, and match_any_multibyte_characters. Use macro CHARSET_BITMAP_SIZE. Handle multibyte characters in cases charset, charset_not, wordchar, notwordchar, anychar, syntaxspec, notsyntaxspec, categoryspec, notcategoryspec. (STOP_ADDR_VSTRING, POS_ADDR_VSTRING): New macros. (re_search_2): Code re-written to handle multibyte characters. (AT_WORD_BOUNDARY): Macro disabled. (re_match_2_internal): New local variable multibyte. `d' is incremented while paying attention to multibyte characters if necessary. For case charset, charsetnot, wordbound, notwordbound, wordbeg, wordend, matchsyntax, and matchnotsyntax, code re-written to handle multibyte characters. Add code for case categoryspec and notcategoryspec. Declare c, c1 as unsigned int, not unsigned char.
author Richard M. Stallman <rms@gnu.org>
date Sun, 15 Jun 1997 19:00:12 +0000
parents 43fa316b0eca
children 90c5343cf243
line wrap: on
line source

;;; case-table.el --- code to extend the character set and support case tables.

;; Copyright (C) 1988, 1994 Free Software Foundation, Inc.

;; Author: Howard Gayle
;; Maintainer: FSF
;; Keywords: i18n

;; This file is part of GNU Emacs.

;; GNU Emacs is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING.  If not, write to the
;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
;; Boston, MA 02111-1307, USA.

;;; Commentary:

;; Written by:
;; TN/ETX/TX/UMG Howard Gayle        UUCP : seismo!enea!erix!howard
;; Telefonaktiebolaget L M Ericsson  Phone: +46 8 719 55 65
;; Ericsson Telecom     	     Telex: 14910 ERIC S
;; S-126 25 Stockholm                FAX  : +46 8 719 64 82
;; Sweden

;;; Code:

(defvar set-case-syntax-offset 0)

(defvar set-case-syntax-set-multibyte nil)

;;;###autoload
(defun describe-buffer-case-table ()
  "Describe the case table of the current buffer."
  (interactive)
  (let ((description (make-char-table 'case-table)))
    (map-char-table
     (function (lambda (key value)
		 (aset
		  description key
		  (cond ((not (natnump value))
			 "case-invariant")
			((/= key (downcase key))
			 (concat "uppercase, matches "
				 (char-to-string (downcase key))))
			((/= key (upcase key))
			 (concat "lowercase, matches "
				 (char-to-string (upcase key))))
			(t "case-invariant")))))
     (current-case-table))
    (save-excursion
     (with-output-to-temp-buffer "*Help*"
       (set-buffer standard-output)
       (describe-vector description)
       (help-mode)))))

;;;###autoload
(defun copy-case-table (case-table)
  (let ((copy (copy-sequence case-table)))
    ;; Clear out the extra slots so that they will be
    ;; recomputed from the main (downcase) table.
    (set-char-table-extra-slot copy 0 nil)
    (set-char-table-extra-slot copy 1 nil)
    (set-char-table-extra-slot copy 2 nil)
    copy))

(defsubst set-case-syntax-1 (char)
  "Offset CHAR by `set-case-syntax-offset' if CHAR is a non-ASCII 8-bit char."
  (if (and (>= char 128) (< char 256))
      (+ char set-case-syntax-offset)
    char))

;;;###autoload
(defun set-case-syntax-delims (l r table)
  "Make characters L and R a matching pair of non-case-converting delimiters.
This sets the entries for L and R in TABLE, which is a string
that will be used as the downcase part of a case table.
It also modifies `standard-syntax-table' to
indicate left and right delimiters."
  (setq l (set-case-syntax-1 l))
  (setq r (set-case-syntax-1 r))
  (aset table l l)
  (aset table r r)
  ;; Clear out the extra slots so that they will be
  ;; recomputed from the main (downcase) table.
  (set-char-table-extra-slot table 0 nil)
  (set-char-table-extra-slot table 1 nil)
  (set-char-table-extra-slot table 2 nil)
  (modify-syntax-entry l (concat "(" (char-to-string r) "  ")
		       (standard-syntax-table))
  (modify-syntax-entry r (concat ")" (char-to-string l) "  ")
		       (standard-syntax-table)))

;;;###autoload
(defun set-case-syntax-pair (uc lc table)
  "Make characters UC and LC a pair of inter-case-converting letters.
This sets the entries for characters UC and LC in TABLE, which is a string
that will be used as the downcase part of a case table.
It also modifies `standard-syntax-table' to give them the syntax of
word constituents."
  (setq uc (set-case-syntax-1 uc))
  (setq lc (set-case-syntax-1 lc))
  (aset table uc lc)
  (aset table lc lc)
  (set-char-table-extra-slot table 0 nil)
  (set-char-table-extra-slot table 1 nil)
  (set-char-table-extra-slot table 2 nil)
  (modify-syntax-entry lc "w   " (standard-syntax-table))
  (modify-syntax-entry uc "w   " (standard-syntax-table)))

;;;###autoload
(defun set-case-syntax (c syntax table)
  "Make character C case-invariant with syntax SYNTAX.
This sets the entry for character C in TABLE, which is a string
that will be used as the downcase part of a case table.
It also modifies `standard-syntax-table'.
SYNTAX should be \" \", \"w\", \".\" or \"_\"."
  (setq c (set-case-syntax-1 c))
  (aset table c c)
  (set-char-table-extra-slot table 0 nil)
  (set-char-table-extra-slot table 1 nil)
  (set-char-table-extra-slot table 2 nil)
  (modify-syntax-entry c syntax (standard-syntax-table)))

(provide 'case-table)

;;; case-table.el ends here