# HG changeset patch # User Francesco Potort # Date 784990849 0 # Node ID 2869e501c2dea1a04783049611bbfabe2aa15acd # Parent e21b0674f37c198e200bfd53972ca1ababaafa15 * etags.c [ETAGS_REGEXP]: All the new code that deals with regexps is compiled if this is defined. The new functions and variables added #ifdef ETAGS_REGEXP are not listed in this ChangeLog. [VMS]: All VMS specific code previously contained in etags-vmslib.c is now included here, modified for dealing with language and regex options intermixed with filenames. (header_file): Global variable deleted. (Lang_Function): New typedef. All language parser functions changed to this new type. (string_numeric_p, substr, prestr): Functions deleted. (readline_internal): Does the job that readline did previously. (longopts): --language and --regex options added. (lang_names, lang_extensions, lang_func, print_language_names): New structures, variables and functions for choosing languages. (print_help): Help strings updated. Calls print_language_names. (argument_type, ARGUMENT): Typedefs for dealing with language and regex options intermixed with filenames. (main): Changed the way of dealing with arguments on the command line to deal with language and regex options intermixed with filenames. (get_language, default_C_entries, Cplusplus_entries, Cstar_entries, Yacc_entries, just_read_file): New functions. (find_entries): Use the new method for choosing the language. (Pascal_functions): Allow intermixing of comment styles. (prolog_getit, skip_comment): Rewritten for speed. (readline): Rewritten to deal with regexps. diff -r e21b0674f37c -r 2869e501c2de lib-src/etags.c --- a/lib-src/etags.c Wed Nov 16 10:29:33 1994 +0000 +++ b/lib-src/etags.c Wed Nov 16 13:00:49 1994 +0000 @@ -1,6 +1,5 @@ /* Tags file maker to go with GNU Emacs Copyright (C) 1984,87,88,89,93,94 Free Software Foundation, Inc. and Ken Arnold - This file is not considered part of GNU Emacs. This program is free software; you can redistribute it and/or modify @@ -24,11 +23,15 @@ * Ed Pelegri-Llopart added C typedefs. * Gnu Emacs TAGS format and modifications by RMS? * Sam Kendall added C++. + * Francesco Potorti` reorganised C and C++ based on work by Joe Wells. +#ifdef ETAGS_REGEXPS + * Regexp tags by Tom Tromey. +#endif * * Francesco Potorti` (pot@cnuce.cnr.it) is the current maintainer. */ -char pot_etags_version[] = "@(#) pot revision number is 10.32"; +char pot_etags_version[] = "@(#) pot revision number is 11.8"; #ifdef MSDOS #include @@ -43,7 +46,7 @@ #endif #ifdef HAVE_CONFIG_H -#include <../src/config.h> +#include /* On some systems, Emacs defines static as nothing for the sake of unexec. We don't want that here since we don't use unexec. */ @@ -63,7 +66,11 @@ # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) #endif -#include "getopt.h" +#include + +#ifdef ETAGS_REGEXPS +#include +#endif /* ETAGS_REGEXPS */ extern char *getenv (); @@ -111,11 +118,17 @@ */ #ifndef DEBUG +#define DEBUG FALSE + /* real implementation */ typedef long FILEPOS; #define GET_CHARNO(pos) ((pos) + 0) #define SET_FILEPOS(pos, fp, cno) ((void) ((pos) = (cno))) + #else +#undef DEBUG +#define DEBUG TRUE + /* debugging implementation */ typedef struct { @@ -158,7 +171,6 @@ typedef struct nd_st NODE; -logical header_file; /* TRUE if .h file, FALSE o.w. */ /* boolean "functions" (see init) */ logical _wht[0177], _etk[0177], _itk[0177], _btk[0177]; @@ -171,20 +183,29 @@ char *etags_getcwd (); char *relative_filename (), *absolute_filename (), *absolute_dirname (); char *xmalloc (), *xrealloc (); + +typedef void Lang_function (); +Lang_function Asm_labels; +Lang_function default_C_entries; +Lang_function C_entries; +Lang_function Cplusplus_entries; +Lang_function Cstar_entries; +Lang_function Fortran_functions; +Lang_function Yacc_entries; +Lang_function Lisp_functions; +Lang_function Pascal_functions; +Lang_function Prolog_functions; +Lang_function Scheme_functions; +Lang_function TeX_functions; +Lang_function just_read_file; + +logical get_language (); int total_size_of_entries (); -int string_numeric_p (); -logical substr (); -logical prestr (); long readline (); - -void Asm_labels (); -void C_entries (); -int Fortran_functions (); -void Lisp_functions (); -void Pascal_functions (); -void Prolog_functions (); -void Scheme_functions (); -void TeX_functions (); +long readline_internal (); +#ifdef ETAGS_REGEXPS +void add_regex (); +#endif void add_node (); void error (); void fatal (); @@ -193,7 +214,6 @@ void getit (); void init (); void initbuffer (); -void initbuffer (); void pfnote (); void process_file (); void put_entries (); @@ -280,9 +300,12 @@ { "help", no_argument, NULL, 'H' }, { "ignore-indentation", no_argument, NULL, 'S' }, { "include", required_argument, NULL, 'i' }, + { "language", required_argument, NULL, 'l' }, { "no-defines", no_argument, NULL, 'D' }, + { "no-regex", no_argument, NULL, 'R' }, { "no-warn", no_argument, NULL, 'w' }, { "output", required_argument, NULL, 'o' }, + { "regex", required_argument, NULL, 'r' }, { "typedefs", no_argument, NULL, 't' }, { "typedefs-and-c++", no_argument, NULL, 'T' }, { "update", no_argument, NULL, 'u' }, @@ -312,8 +335,156 @@ FILEPOS linepos; struct linebuffer lb; /* used by C_entries instead of lb */ } lbs[2]; + +#ifdef ETAGS_REGEXPS +/* Structure defining a regular expression. Elements are + the compiled pattern, and the name string. */ +struct pattern +{ + struct re_pattern_buffer *pattern; + struct re_registers regs; + char *name_pattern; + logical error_signaled; +}; + +/* Number of regexps found. */ +int num_patterns = 0; + +/* Array of all regexps. */ +struct pattern *patterns = NULL; +#endif /* ETAGS_REGEXPS */ + +/* Language stuff. */ +struct lang_entry +{ + char *extension; + Lang_function *function; +}; + +/* Table of language names and corresponding functions. */ +/* It is ok for a given function to be listed under more than one + name. I just didn't. */ +/* "auto" language reverts to default behavior. */ +struct lang_entry lang_names[] = +{ + { "asm", Asm_labels }, + { "c", default_C_entries }, + { "c++", Cplusplus_entries }, + { "c*", Cstar_entries }, + { "fortran", Fortran_functions }, + { "lisp", Lisp_functions }, + { "none", just_read_file }, + { "pascal", Pascal_functions }, + { "scheme" , Scheme_functions }, + { "tex", TeX_functions }, + { "auto", NULL }, + { NULL, NULL } +}; + +/* Table of file extensions and corresponding language functions. */ +struct lang_entry lang_extensions[] = +{ + /* Assume that ".s" or ".a" is assembly code. -wolfgang. + Or even ".sa". */ + { "a", Asm_labels }, /* Unix assembler */ + { "asm", Asm_labels }, /* Microcontroller assembly */ + { "def", Asm_labels }, /* BSO/Tasking definition includes */ + { "inc", Asm_labels }, /* Microcontroller include files */ + { "ins", Asm_labels }, /* Microcontroller include files */ + { "s", Asm_labels }, + { "sa", Asm_labels }, /* Unix assembler */ + { "src", Asm_labels }, /* BSO/Tasking C compiler output */ + + /* .aux, .bbl, .clo, .cls, .dtx or .tex implies LaTeX source code. */ + { "aux", TeX_functions }, + { "bbl", TeX_functions }, + { "clo", TeX_functions }, + { "cls", TeX_functions }, + { "dtx", TeX_functions }, + { "sty", TeX_functions }, + { "tex", TeX_functions }, + + /* .l or .el or .lisp (or .cl or .clisp or ...) implies lisp source code */ + { "cl", Lisp_functions }, + { "clisp", Lisp_functions }, + { "el", Lisp_functions }, + { "l", Lisp_functions }, + { "lisp", Lisp_functions }, + { "lsp", Lisp_functions }, + + /* .scm or .sm or .scheme implies scheme source code */ + { "SCM", Scheme_functions }, + { "SM", Scheme_functions }, + { "oak", Scheme_functions }, + { "sch", Scheme_functions }, + { "scheme", Scheme_functions }, + { "scm", Scheme_functions }, + { "sm", Scheme_functions }, + { "t", Scheme_functions }, + /* FIXME Can't do the `SCM' or `scm' prefix with a version number */ + + /* Note that ".c" and ".h" can be considered C++, if the --c++ + flag was given. That is why default_C_entries is called here. */ + { "c", default_C_entries }, + { "h", default_C_entries }, + + /* .C or .H or .cpp or .cxx or .hxx or .hh or .cc or .cpp: a C++ file */ + { "C", Cplusplus_entries }, + { "H", Cplusplus_entries }, + { "cc", Cplusplus_entries }, + { "cpp", Cplusplus_entries }, + { "cxx", Cplusplus_entries }, + { "hh", Cplusplus_entries }, + { "hxx", Cplusplus_entries }, + + /* .y: a yacc file */ + { "y", Yacc_entries }, + + /* .cs or .hs: a C* file */ + { "cs", Cstar_entries }, + { "hs", Cstar_entries }, + + /* .f and .for are FORTRAN. */ + { "F", Fortran_functions }, + { "f", Fortran_functions }, + { "for", Fortran_functions }, + + /* .pl implies prolog source code */ + { "pl", Prolog_functions }, + + /* .p or .pas: a Pascal file */ + { "p", Pascal_functions }, + { "pas", Pascal_functions }, + + { NULL, NULL } +}; + +/* Non-NULL if language fixed. */ +Lang_function *lang_func = NULL; + void +print_language_names () +{ + struct lang_entry *name, *ext; + + puts ("\nThese are the currently supported languages, along with the\n\ +default extensions for files:"); + for (name = lang_names; name->extension; ++name) + { + printf ("\t%s\t", name->extension); + for (ext = lang_extensions; ext->extension; ++ext) + if (name->function == ext->function) + printf (" .%s", ext->extension); + puts (""); + } + puts ("Where `auto' means use default language for files based on filename\n\ +extension, and `none' means only do regexp processing on files.\n\ +If no language is specified and no extension is found for some file,\n\ +Fortran is tried first; if no tags are found, C is tried next."); +} + +void print_version () { #ifdef VERSION @@ -329,7 +500,7 @@ print_help () { printf ("These are the options accepted by %s. You may use unambiguous\n\ -abbreviations for the long option names. A - as file name means read file\n\ +abbreviations for the long option names. A - as file name means read\n\ names from stdin.\n\n", progname); puts ("-a, --append\n\ @@ -341,9 +512,7 @@ backward-search command instead of '/', the forward-search command."); puts ("-C, --c++\n\ - Treat files with `.c' and `.h' extensions as C++ code, not C\n\ - code. Files with `.C', `.H', `.cxx', `.hxx', or `.cc'\n\ - extensions are always assumed to be C++ code."); + Treat files whose extension defaults to C language as C++ files."); if (CTAGS) puts ("-d, --defines\n\ @@ -354,10 +523,25 @@ the tags file smaller."); if (!CTAGS) - puts ("-i FILE, --include=FILE\n\ + { + puts ("-i FILE, --include=FILE\n\ Include a note in tag file indicating that, when searching for\n\ a tag, one should also consult the tags file FILE after\n\ checking the current file."); + puts ("-l LANG, --language=LANG\n\ + Force the following files to be considered as written in the\n\ + named language up to the next --language=LANG option."); +#ifdef ETAGS_REGEXPS + puts ("-r /REGEXP/, --regex=/REGEXP/\n\ + Make a tag for each line matching pattern REGEXP in the\n\ + following files. REGEXP is anchored (as if preceded by ^).\n\ + The form /REGEXP/NAME/ creates a named tag. For example Tcl\n\ + named tags can be created with:\n\ + --regex=/proc[ \\t]+\\([^ \\t]+\\)/\\1/."); + puts ("-R, --no-regex\n\ + Don't create tags from regexps for the following files."); +#endif /* ETAGS_REGEXPS */ + } puts ("-o FILE, --output=FILE\n\ Write the tags to FILE."); @@ -385,14 +569,14 @@ Generates an index of items intended for human consumption,\n\ similar to the output of vgrind. The index is sorted, and\n\ gives the page number of each item."); + puts ("-w, --no-warn\n\ + Suppress warning messages about entries defined in multiple\n\ + files."); puts ("-x, --cxref\n\ Like --vgrind, but in the style of cxref, rather than vgrind.\n\ The output uses line numbers instead of page numbers, but\n\ beyond that the differences are cosmetic; try both to see\n\ which you like."); - puts ("-w, --no-warn\n\ - Suppress warning messages about entries defined in multiple\n\ - files."); } puts ("-V, --version\n\ @@ -400,10 +584,152 @@ -H, --help\n\ Print this help message."); + print_language_names (); + exit (GOOD); } +enum argument_type +{ + at_language, + at_regexp, + at_filename +}; + +/* This structure helps us allow mixing of --lang and filenames. */ +typedef struct +{ + enum argument_type arg_type; + char *what; + Lang_function *function; +} ARGUMENT; + +#ifdef VMS /* VMS specific functions */ + +#define EOS '\0' + +/* This is a BUG! ANY arbitrary limit is a BUG! + Won't someone please fix this? */ +#define MAX_FILE_SPEC_LEN 255 +typedef struct { + short curlen; + char body[MAX_FILE_SPEC_LEN + 1]; +} vspec; + +/* + v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names + returning in each successive call the next filename matching the input + spec. The function expects that each in_spec passed + to it will be processed to completion; in particular, up to and + including the call following that in which the last matching name + is returned, the function ignores the value of in_spec, and will + only start processing a new spec with the following call. + If an error occurs, on return out_spec contains the value + of in_spec when the error occurred. + + With each successive filename returned in out_spec, the + function's return value is one. When there are no more matching + names the function returns zero. If on the first call no file + matches in_spec, or there is any other error, -1 is returned. +*/ + +#include +#include +#define OUTSIZE MAX_FILE_SPEC_LEN +short +fn_exp (out, in) + vspec *out; + char *in; +{ + static long context = 0; + static struct dsc$descriptor_s o; + static struct dsc$descriptor_s i; + static logical pass1 = TRUE; + long status; + short retval; + + if (pass1) + { + pass1 = FALSE; + o.dsc$a_pointer = (char *) out; + o.dsc$w_length = (short)OUTSIZE; + i.dsc$a_pointer = in; + i.dsc$w_length = (short)strlen(in); + i.dsc$b_dtype = DSC$K_DTYPE_T; + i.dsc$b_class = DSC$K_CLASS_S; + o.dsc$b_dtype = DSC$K_DTYPE_VT; + o.dsc$b_class = DSC$K_CLASS_VS; + } + if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL) + { + out->body[out->curlen] = EOS; + return 1; + } + else if (status == RMS$_NMF) + retval = 0; + else + { + strcpy(out->body, in); + retval = -1; + } + lib$find_file_end(&context); + pass1 = TRUE; + return retval; +} + +/* + v1.01 nmm 19-Aug-85 gfnames - return in successive calls the + name of each file specified by the provided arg expanding wildcards. +*/ +char * +gfnames (arg, p_error) + char *arg; + logical *p_error; +{ + static vspec filename = {MAX_FILE_SPEC_LEN, "\0"}; + + switch (fn_exp (&filename, arg)) + { + case 1: + *p_error = FALSE; + return filename.body; + case 0: + *p_error = FALSE; + return NULL; + default: + *p_error = TRUE; + return filename.body; + } +} + +#ifndef OLD /* Newer versions of VMS do provide `system'. */ +system (cmd) + char *cmd; +{ + fprintf (stderr, "system() function not implemented under VMS\n"); +} +#endif + +#define VERSION_DELIM ';' +char *massage_name (s) + char *s; +{ + char *start = s; + + for ( ; *s; s++) + if (*s == VERSION_DELIM) + { + *s = EOS; + break; + } + else + *s = tolower(*s); + return start; +} +#endif /* VMS */ + + void main (argc, argv) int argc; @@ -414,11 +740,10 @@ unsigned int nincluded_files = 0; char **included_files = xnew (argc, char *); char *this_file; + ARGUMENT *argbuffer; + int current_arg = 0, file_count = 0; #ifdef VMS - char got_err; - - extern char *gfnames (); - extern char *massage_name (); + logical got_err; #endif #ifdef DOS_NT @@ -427,6 +752,15 @@ progname = argv[0]; + /* Allocate enough no matter what happens. Overkill, but each one + is small. */ + argbuffer = xnew (argc, ARGUMENT); + +#ifdef ETAGS_REGEXPS + /* Set syntax for regular expression routines. */ + re_set_syntax (RE_SYNTAX_EMACS); +#endif /* ETAGS_REGEXPS */ + /* * If etags, always find typedefs and structure tags. Why not? * Also default is to find macro constants. @@ -436,8 +770,8 @@ while (1) { - int opt; - opt = getopt_long (argc, argv, "aCdDf:o:StTi:BuvxwVH", longopts, 0); + int opt = getopt_long (argc, argv, + "-aCdDf:l:o:r:RStTi:BuvxwVH", longopts, 0); if (opt == EOF) break; @@ -449,6 +783,14 @@ long-named option. We should do nothing. */ break; + case 1: + /* This means that a filename has been seen. Record it. */ + argbuffer[current_arg].arg_type = at_filename; + argbuffer[current_arg].what = optarg; + ++current_arg; + ++file_count; + break; + /* Common options. */ case 'a': append_to_tagfile++; @@ -472,6 +814,28 @@ } tagfile = optarg; break; + case 'l': + if (!get_language (optarg, &argbuffer[current_arg].function)) + { + fprintf (stderr, "%s: language \"%s\" not recognized.\n", + progname, optarg); + goto usage; + } + argbuffer[current_arg].arg_type = at_language; + ++current_arg; + break; +#ifdef ETAGS_REGEXPS + case 'r': + argbuffer[current_arg].arg_type = at_regexp; + argbuffer[current_arg].what = optarg; + ++current_arg; + break; + case 'R': + argbuffer[current_arg].arg_type = at_regexp; + argbuffer[current_arg].what = NULL; + ++current_arg; + break; +#endif /* ETAGS_REGEXPS */ case 'S': noindentypedefs++; break; @@ -524,7 +888,15 @@ } } - if (optind == argc && nincluded_files == 0) + for (; optind < argc; ++optind) + { + argbuffer[current_arg].arg_type = at_filename; + argbuffer[current_arg].what = argv[optind]; + ++current_arg; + ++file_count; + } + + if (nincluded_files == 0 && file_count == 0) { fprintf (stderr, "%s: No input files specified.\n", progname); @@ -549,15 +921,13 @@ tagfiledir = absolute_dirname (tagfile, cwd); } - init (); /* set up boolean "functions" */ + init (); /* set up boolean "functions" */ initbuffer (&lb); initbuffer (&lbs[0].lb); initbuffer (&lbs[1].lb); initbuffer (&filename_lb); - /* - * loop through files finding functions - */ + if (!CTAGS) { if (streq (tagfile, "-")) @@ -571,40 +941,57 @@ } } -#ifdef VMS - argc -= optind; - argv += optind; - while (gfnames (&argc, &argv, &got_err) != NULL) + /* + * Loop through files finding functions. + */ + for (i = 0; i < current_arg; ++i) { - if (got_err) + switch (argbuffer[i].arg_type) { - error ("Can't find file %s\n", this_file); - argc--, argv++; - } - else - { - this_file = massage_name (this_file); -#if 0 - } - } /* solely to balance out the ifdef'd parens above */ + case at_language: + lang_func = argbuffer[i].function; + break; +#ifdef ETAGS_REGEXPS + case at_regexp: + add_regex (argbuffer[i].what); + break; #endif + case at_filename: +#ifdef VMS + while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL) + { + if (got_err) + { + error ("Can't find file %s\n", this_file); + argc--, argv++; + } + else + { + this_file = massage_name (this_file); + } #else - for (; optind < argc; optind++) - { - this_file = argv[optind]; + this_file = argbuffer[i].what; #endif - /* Input file named "-" means read file names from stdin and use them. */ - if (streq (this_file, "-")) - { - while (!feof (stdin)) - { - (void) readline (&filename_lb, stdin); - if (strlen (filename_lb.buffer) > 0) - process_file (filename_lb.buffer); + /* Input file named "-" means read file names from stdin + and use them. */ + if (streq (this_file, "-")) + { + while (!feof (stdin)) + { + /* Use readline_internal so that regexp matching */ + /* is not done on filenames. */ + (void) readline_internal (&filename_lb, stdin); + if (strlen (filename_lb.buffer) > 0) + process_file (filename_lb.buffer); + } + } + else + process_file (this_file); +#ifdef VMS } +#endif + break; } - else - process_file (this_file); } if (!CTAGS) @@ -623,13 +1010,13 @@ } if (update) { - /* update cannot be set under VMS, so we may assume that argc - and argv have not been munged. */ - for (i = optind; i < argc; i++) + for (i = 0; i < current_arg; ++i) { + if (argbuffer[i].arg_type == at_language) + continue; sprintf (cmd, "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS", - tagfile, argv[i], tagfile); + tagfile, argbuffer[i].what, tagfile); (void) system (cmd); } append_to_tagfile++; @@ -652,6 +1039,29 @@ /* + * Set the language, given the name. + */ +logical +get_language (language, func) + char *language; + Lang_function **func; +{ + struct lang_entry *lang; + + for (lang = lang_names; lang->extension; ++lang) + { + if (streq (language, lang->extension)) + { + *func = lang->function; + return TRUE; + } + } + + return FALSE; +} + + +/* * This routine is called on each file argument. */ void @@ -734,8 +1144,11 @@ find_entries (file) char *file; { - char *cp, *cp1; FILE *inf; + char *cp; + struct lang_entry *lang; + NODE *old_last_node; + extern NODE *last_node; inf = fopen (file, "r"); if (inf == NULL) @@ -745,133 +1158,39 @@ } curfile = savestr (file); cp = etags_strrchr (file, '.'); - cp1 = cp + 1; - - header_file = (cp && (streq (cp1, "h"))); - - /* .tex, .aux or .bbl implies LaTeX source code */ - if (cp && (streq (cp1, "tex") || streq (cp1, "aux") - || streq (cp1, "bbl"))) + + /* If user specified a language, use it. */ + if (lang_func != NULL) { - TeX_functions (inf); - goto close_and_return; - } - /* .l or .el or .lisp (or .cl or .clisp or ...) implies lisp source code */ - if (cp && (streq (cp1, "l") - || streq (cp1, "el") - || streq (cp1, "lsp") - || streq (cp1, "lisp") - || streq (cp1, "cl") - || streq (cp1, "clisp"))) - { - Lisp_functions (inf); - goto close_and_return; + lang_func (inf); + fclose (inf); + return TRUE; } - /* .scm or .sm or .scheme or ... implies scheme source code */ - if (cp && (streq (cp1, "sm") - || streq (cp1, "scm") - || streq (cp1, "scheme") - || streq (cp1, "t") - || streq (cp1, "sch") - || streq (cp1, "ss") - || streq (cp1, "SM") - || streq (cp1, "SCM") - /* The `SCM' or `scm' prefix with a version number */ - || (cp[-1] == 'm' && cp[-2] == 'c' && cp[-3] == 's' - && string_numeric_p (cp1)) - || (cp[-1] == 'M' && cp[-2] == 'C' && cp[-3] == 'S' - && string_numeric_p (cp1)))) - { - Scheme_functions (inf); - goto close_and_return; - } - /* Assembly code */ - if (cp && (streq (cp1, "s") - || streq (cp1, "a") /* Unix assembler */ - || streq (cp1, "sa") /* Unix assembler */ - || streq (cp1, "asm") /* Microcontroller assembly */ - || streq (cp1, "src") /* BSO/Tasking C compiler output */ - || streq (cp1, "def") /* BSO/Tasking definition includes */ - || streq (cp1, "ins") /* Microcontroller include files */ - || streq (cp1, "inc")))/* Microcontroller include files */ - { - Asm_labels (inf); - goto close_and_return; - } - /* .C or .H or .cxx or .hxx or .cc or .cpp: a C++ file */ - if (cp && (streq (cp1, "C") - || streq (cp1, "H") - || streq (cp1, "cpp") - || streq (cp1, "cxx") - || streq (cp1, "hxx") - || streq (cp1, "cc"))) + + if (cp) { - C_entries (C_PLPL, inf); /* C++ */ - goto close_and_return; - } - /* .cs or .hs: a C* file */ - if (cp && (streq (cp1, "cs") - || streq (cp1, "hs"))) - { - C_entries (C_STAR, inf); - goto close_and_return; - } - /* .y: a yacc file */ - if (cp && (streq (cp1, "y"))) - { - C_entries (YACC, inf); - goto close_and_return; - } - /* .pl implies prolog source code */ - if (cp && streq (cp1, "pl")) - { - Prolog_functions (inf); - goto close_and_return; - } - /* .p or .pas: a Pascal file */ - if (cp && (streq (cp1, "p") - || streq (cp1, "pas"))) - { - Pascal_functions (inf); - goto close_and_return; + ++cp; + for (lang = lang_extensions; lang->extension; ++lang) + { + if (streq (cp, lang->extension)) + { + lang->function (inf); + fclose (inf); + return TRUE; + } + } } - /* If .f or .for, assume it is fortran or nothing. */ - if (cp && (streq (cp1, "f") - || streq (cp1, "for"))) - { - (void) Fortran_functions (inf); - goto close_and_return; - } - /* if not a .c or .h or .y file, try fortran */ - if (cp && ((cp[1] != 'c' - && cp[1] != 'h' - && cp[1] != 'y') - || (cp[1] != 0 && cp[2] != 0))) - { - if (Fortran_functions (inf) != 0) - goto close_and_return; - rewind (inf); /* no fortran tags found, try C */ - } - C_entries (cplusplus ? C_PLPL : 0, inf); - -close_and_return: - (void) fclose (inf); + + /* Try Fortran. */ + old_last_node = last_node; + Fortran_functions (inf); + + /* No Fortran entries found. Try C. */ + if (old_last_node == last_node) + default_C_entries (inf); + fclose (inf); return TRUE; } - -/* Nonzero if string STR is composed of digits. */ - -int -string_numeric_p (str) - char *str; -{ - while (*str) - { - if (*str < '0' || *str > '9') - return 0; - } - return 1; -} /* Record a tag. */ /* Should take a TOKEN* instead!! */ @@ -1424,7 +1743,7 @@ { definedef = dignorerest; } - return (FALSE); + return FALSE; case ddefineseen: /* * Make a tag for any macro, unless it is a constant @@ -1433,11 +1752,11 @@ definedef = dignorerest; *is_func = (c == '('); if (!*is_func && !constantypedefs) - return (FALSE); + return FALSE; else - return (TRUE); + return TRUE; case dignorerest: - return (FALSE); + return FALSE; default: error ("internal error: definedef value.", 0); } @@ -1453,7 +1772,7 @@ if (typedefs) typdef = ttypedseen; funcdef = fnone; - return (FALSE); + return FALSE; } break; case ttypedseen: @@ -1475,9 +1794,9 @@ case st_C_typespec: case st_C_struct: case st_C_enum: - return (FALSE); + return FALSE; } - return (TRUE); + return TRUE; } /* @@ -1492,7 +1811,7 @@ * * This if statement deals with the typdef state machine as * follows: if typdef==ttypedseen and token is struct/union/class/enum, - * return (FALSE). All the other code here is for the structdef + * return FALSE. All the other code here is for the structdef * state machine. */ switch (toktype) @@ -1505,7 +1824,7 @@ structdef = skeyseen; structtype = toktype; } - return (FALSE); + return FALSE; } if (structdef == skeyseen) { @@ -1519,14 +1838,14 @@ structtag[0] = '\0'; /* for enum (why is it treated differently?) */ } structdef = stagseen; - return (TRUE); + return TRUE; } /* Avoid entering funcdef stuff if typdef is going on. */ if (typdef != tnone) { definedef = dnone; - return (FALSE); + return FALSE; } /* Detect GNU macros. */ @@ -1544,14 +1863,14 @@ { next_token_is_func = TRUE; - return (FALSE); + return FALSE; } if (next_token_is_func) { next_token_is_func = FALSE; funcdef = fignore; *is_func = TRUE; - return (TRUE); + return TRUE; } /* A function? */ @@ -1560,17 +1879,17 @@ case st_C_typespec: if (funcdef != finlist && funcdef != fignore) funcdef = fnone; /* should be useless */ - return (FALSE); + return FALSE; default: if (funcdef == fnone) { funcdef = ftagseen; *is_func = TRUE; - return (TRUE); + return TRUE; } } - return (FALSE); + return FALSE; } /* @@ -1604,7 +1923,7 @@ lp = curlb.buffer; \ quotednl = FALSE; \ newndx = curndx; \ -} while (FALSE) +} while (0) #define CNL \ do { \ @@ -1612,7 +1931,7 @@ if (TOKEN_SAVED_P) \ RESTORE_TOKEN; \ definedef = dnone; \ -} while (FALSE) +} while (0) #define MAKE_TAG_FROM_NEW_LB(isfun) pfnote (nameb, isfun, tok.named, \ newlb.buffer, tokoff + toklen + 1, tok.lineno, GET_CHARNO (newlinepos)) @@ -1621,8 +1940,8 @@ void C_entries (c_ext, inf) - int c_ext; /* extension of C? */ - FILE *inf; + int c_ext; /* extension of C */ + FILE *inf; /* input file */ { register char c; /* latest char read; '\0' for end of line */ register char *lp; /* pointer one beyond the character `c' */ @@ -2098,6 +2417,41 @@ } /* while not eof */ } + +/* + * Process either a C++ file or a C file depending on the setting + * of a global flag. + */ +void +default_C_entries (inf) + FILE *inf; +{ + C_entries (cplusplus ? C_PLPL : 0, inf); +} + +/* Always do C++. */ +void +Cplusplus_entries (inf) + FILE *inf; +{ + C_entries (C_PLPL, inf); +} + +/* Always do C*. */ +void +Cstar_entries (inf) + FILE *inf; +{ + C_entries (C_STAR, inf); +} + +/* Always do Yacc. */ +void +Yacc_entries (inf) + FILE *inf; +{ + C_entries (YACC, inf); +} /* Fortran parsing */ @@ -2115,9 +2469,9 @@ if (*cp == 0) { dbp += len; - return (TRUE); + return TRUE; } - return (FALSE); + return FALSE; } void @@ -2182,7 +2536,7 @@ pfcnt++; } -int +void Fortran_functions (inf) FILE *inf; { @@ -2262,7 +2616,6 @@ continue; } } - return (pfcnt); } /* @@ -2340,8 +2693,7 @@ char nambuf[BUFSIZ]; logical /* each of these flags is TRUE iff: */ - incomm1, /* point is inside {..} comment */ - incomm2, /* point is inside (*..*) comment */ + incomment, /* point is inside a comment */ inquote, /* point is inside '..' string */ get_tagname, /* point is after PROCEDURE/FUNCTION */ /* keyword, so next item = potential tag */ @@ -2358,7 +2710,7 @@ *dbp = 0; initbuffer (&tline); - incomm1 = incomm2 = inquote = FALSE; + incomment = inquote = FALSE; found_tag = FALSE; /* have a proc name; check if extern */ get_tagname = FALSE; /* have found "procedure" keyword */ inparms = FALSE; /* found '(' after "proc" */ @@ -2379,22 +2731,14 @@ /* to the beginning of the name of */ /* the procedure or function */ } - if (incomm1) /* within { - } comments */ - { - if (c == '}') - incomm1 = FALSE; - continue; - } - else if (incomm2) /* within (* - *) comments */ + if (incomment) { - if (c == '*') + if (c == '}') /* within { - } comments */ + incomment = FALSE; + else if (c == '*' && dbp[1] == ')') /* within (* - *) comments */ { - while ((c = *dbp++) == '*') - continue; - if (c == 0) - GET_NEW_LINE; - if (c == ')') - incomm2 = FALSE; + dbp++; + incomment = FALSE; } continue; } @@ -2404,19 +2748,19 @@ inquote = FALSE; continue; } - else + else switch (c) { case '\'': inquote = TRUE; /* found first quote */ continue; case '{': /* found open-{-comment */ - incomm1 = TRUE; + incomment = TRUE; continue; case '(': if (*dbp == '*') /* found open-(*-comment */ { - incomm2 = TRUE; + incomment = TRUE; dbp++; } else if (found_tag) /* found '(' after tag, i.e., parm-list */ @@ -2490,8 +2834,7 @@ /* and proceed to check for "extern" */ } - if ((!incomm1) && (!incomm2) && (!inquote) && - (!found_tag) && (!get_tagname)) + if (!incomment && !inquote && !found_tag && !get_tagname) { /* check for proc/fn keywords */ switch (c | ' ') @@ -2515,24 +2858,24 @@ */ int -L_isdef (dbp) - register char *dbp; +L_isdef (tokp) + register char *tokp; { - return ((dbp[1] == 'd' || dbp[1] == 'D') - && (dbp[2] == 'e' || dbp[2] == 'E') - && (dbp[3] == 'f' || dbp[3] == 'F')); + return ((tokp[1] == 'd' || tokp[1] == 'D') + && (tokp[2] == 'e' || tokp[2] == 'E') + && (tokp[3] == 'f' || tokp[3] == 'F')); } int -L_isquote (dbp) - register char *dbp; +L_isquote (tokp) + register char *tokp; { - return ((*(++dbp) == 'q' || *dbp == 'Q') - && (*(++dbp) == 'u' || *dbp == 'U') - && (*(++dbp) == 'o' || *dbp == 'O') - && (*(++dbp) == 't' || *dbp == 'T') - && (*(++dbp) == 'e' || *dbp == 'E') - && isspace(*(++dbp))); + return ((*(++tokp) == 'q' || *tokp == 'Q') + && (*(++tokp) == 'u' || *tokp == 'U') + && (*(++tokp) == 'o' || *tokp == 'O') + && (*(++tokp) == 't' || *tokp == 'T') + && (*(++tokp) == 'e' || *tokp == 'E') + && isspace(*(++tokp))); } void @@ -2906,10 +3249,8 @@ is gotten in compound term. */ void -prolog_getit (s, lineno, linecharno) +prolog_getit (s) char *s; - int lineno; - long linecharno; { char nambuf[BUFSIZ], *save_s, tmpc; int insquote, npar; @@ -2981,7 +3322,7 @@ else if (dbp[0] == '/' && dbp[1] == '*') /* comment. */ skip_comment (&lb, inf, &lineno, &linecharno); else /* found. */ - prolog_getit (dbp, lineno, linecharno); + prolog_getit (dbp); } } @@ -2992,45 +3333,198 @@ int *plineno; /* result */ long *plinecharno; /* result */ { - while (!substr ("*/", plb->buffer)) + char *cp; + + do { + for (cp = plb->buffer; *cp != '\0'; cp++) + if (cp[0] == '*' && cp[1] == '/') + return; (*plineno)++; - *plinecharno += readline (plb, inf) + 1; - } /* 1 for newline. */ -} - -/* Return TRUE if 'sub' exists somewhere in 's'. */ - -logical -substr (sub, s) - char *sub; - char *s; -{ - while (*s && (s = etags_strchr (s, *sub))) - if (prestr (sub, s)) - return (TRUE); - else - s++; - return (FALSE); -} - -/* Return TRUE if 'pre' is prefix of string 's'. */ - -logical -prestr (pre, s) - char *pre; - char *s; -{ - if (*pre == '\0') - return (TRUE); - else if (*pre == *s) - return (prestr (pre + 1, s + 1)); - else - return (FALSE); + *plinecharno += readline (plb, inf) + 1; /* 1 for newline. */ + } + while (!feof(inf)); } +#ifdef ETAGS_REGEXPS +/* Take a string like "/blah/" and turn it into "blah", making sure + that the first and last characters are the same, and handling + quoted separator characters. Actually, stops on the occurence of + an unquoted separator. Also turns "\t" into a Tab character. + Returns pointer to terminating separator. Works in place. Null + terminates name string. */ +char * +scan_separators (name) + char *name; +{ + char sep = name[0]; + char *copyto = name; + logical quoted = FALSE; + + for (++name; *name != '\0'; ++name) + { + if (quoted) + { + if (*name == 't') + *copyto++ = '\t'; + else if (*name == sep) + *copyto++ = sep; + else + { + /* Something else is quoted, so preserve the quote. */ + *copyto++ = '\\'; + *copyto++ = *name; + } + quoted = FALSE; + } + else if (*name == '\\') + quoted = TRUE; + else if (*name == sep) + break; + else + *copyto++ = *name; + } + + /* Terminate copied string. */ + *copyto = '\0'; + return name; +} + +/* Turn a name, which is an ed-style (but Emacs syntax) regular + expression, into a real regular expression by compiling it. */ +void +add_regex (regexp_pattern) + char *regexp_pattern; +{ + char *name; + const char *err; + struct re_pattern_buffer *patbuf; + + if (regexp_pattern == NULL) + { + /* Remove existing regexps. */ + num_patterns = 0; + patterns = NULL; + return; + } + + if (regexp_pattern[0] == '\0') + { + error ("missing regexp", 0); + return; + } + if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0]) + { + error ("%s: unterminated regexp", regexp_pattern); + return; + } + name = scan_separators (regexp_pattern); + if (regexp_pattern[0] == '\0') + { + error ("null regexp", 0); + return; + } + (void) scan_separators (name); + + patbuf = xnew (1, struct re_pattern_buffer); + patbuf->translate = NULL; + patbuf->fastmap = NULL; + patbuf->buffer = NULL; + patbuf->allocated = 0; + + err = re_compile_pattern (regexp_pattern, strlen (regexp_pattern), patbuf); + if (err != NULL) + { + error ("%s while compiling pattern", err); + return; + } + + num_patterns += 1; + if (num_patterns == 1) + patterns = xnew (1, struct pattern); + else + patterns = ((struct pattern *) + xrealloc (patterns, + (num_patterns * sizeof (struct pattern)))); + patterns[num_patterns - 1].pattern = patbuf; + patterns[num_patterns - 1].name_pattern = savestr (name); + patterns[num_patterns - 1].error_signaled = FALSE; +} + +/* + * Do the subtitutions indicated by the regular expression and + * arguments. + */ +char * +substitute (in, out, regs) + char *in, *out; + struct re_registers *regs; +{ + char *result = NULL, *t; + int size = 0; + + /* Pass 1: figure out how much size to allocate. */ + for (t = out; *t; ++t) + { + if (*t == '\\') + { + ++t; + if (!*t) + { + fprintf (stderr, "%s: pattern subtitution ends prematurely\n", + progname); + return NULL; + } + if (isdigit (*t)) + { + int dig = *t - '0'; + size += regs->end[dig] - regs->start[dig]; + } + } + } + + /* Allocate space and do the substitutions. */ + result = xnew (size + 1, char); + size = 0; + for (; *out; ++out) + { + if (*out == '\\') + { + ++out; + if (isdigit (*out)) + { + /* Using "dig2" satisfies my debugger. Bleah. */ + int dig2 = *out - '0'; + strncpy (result + size, in + regs->start[dig2], + regs->end[dig2] - regs->start[dig2]); + size += regs->end[dig2] - regs->start[dig2]; + } + else + { + switch (*out) + { + case '\t': + result[size++] = '\t'; + break; + case '\\': + *out = '\\'; + break; + default: + result[size++] = *out; + break; + } + } + } + else + result[size++] = *out; + } + result[size] = '\0'; + + return result; +} + +#endif /* ETAGS_REGEXPS */ /* Initialize a linebuffer for use */ - void initbuffer (linebuffer) struct linebuffer *linebuffer; @@ -3045,14 +3539,14 @@ * which is the length of the line including the newline, if any. */ long -readline (linebuffer, stream) +readline_internal (linebuffer, stream) struct linebuffer *linebuffer; register FILE *stream; { char *buffer = linebuffer->buffer; register char *p = linebuffer->buffer; register char *pend; - int newline; /* 1 if ended with newline, 0 if ended with EOF */ + int newline; /* 1 if ended with '\n', 0 if ended with EOF */ pend = p + linebuffer->size; /* Separate to avoid 386/IX compiler bug. */ @@ -3078,6 +3572,81 @@ return p - buffer + newline; } + +/* + * Like readline_internal, above, but try to match the input + * line against any existing regular expressions. + */ +long +readline (linebuffer, stream) + struct linebuffer *linebuffer; + FILE *stream; +{ + /* Read new line. */ + int i; + long result = readline_internal (linebuffer, stream); + +#ifdef ETAGS_REGEXPS + /* Match against all listed patterns. */ + for (i = 0; i < num_patterns; ++i) + { + int match = re_match (patterns[i].pattern, linebuffer->buffer, + (int)result, 0, &patterns[i].regs); + switch (match) + { + case -2: + /* Some error. */ + if (!patterns[i].error_signaled) + { + error ("error while matching pattern %d", i); + patterns[i].error_signaled = TRUE; + } + break; + case -1: + /* No match. */ + break; + default: + /* Match occurred. Construct a tag. */ + if (patterns[i].name_pattern[0] != '\0') + { + /* Make a named tag. */ + char *name = substitute (linebuffer->buffer, + patterns[i].name_pattern, + &patterns[i].regs); + if (name != NULL) + pfnote (name, TRUE, TRUE, linebuffer->buffer, + match, lineno, linecharno); + } + else + { + /* Make an unnamed tag. */ + pfnote (NULL, TRUE, FALSE, linebuffer->buffer, + match, lineno, linecharno); + } + break; + } + } +#endif /* ETAGS_REGEXPS */ + + return result; +} + +/* + * Read a file, but do no processing. This is used to do regexp + * matching on files that have no language defined. + */ +void +just_read_file (inf) + FILE *inf; +{ + while (!feof (inf)) + { + ++lineno; + linecharno = charno; + charno += readline (&lb, inf) + 1; + } +} + char * savestr (cp) @@ -3118,7 +3687,7 @@ if (*sp == c) r = sp; } while (*sp++); - return (r); + return r; } @@ -3136,9 +3705,9 @@ do { if (*sp == c) - return (sp); - } while (*sp++); - return (NULL); + return sp; + } while (*sp++); + return NULL; } /* Print error message and exit. */