view lispref/tindex.pl @ 89061:9a9b54d06f3d

* regex.c (RE_TARGET_MULTIBYTE_P): New macro. (GET_CHAR_BEFORE_2): Check target_multibyte, not multibyte. If that is zero, convert an eight-bit char to multibyte. (MAKE_CHAR_MULTIBYTE, CHAR_LEADING_CODE): New dummy new macros for non-emacs case. (PATFETCH): Convert an eight-bit char to multibyte. (HANDLE_UNIBYTE_RANGE): New macro. (regex_compile): Setup the compiled pattern for multibyte chars even if the given regex string is unibyte. Use PATFETCH_RAW instead of PATFETCH in many places. To handle `charset' specification of unibyte, call HANDLE_UNIBYTE_RANGE. Use bitmap only for ASCII chars. (analyse_first) <exactn>: Simplified because the compiled pattern is multibyte. <charset_not>: Setup fastmap from bitmap only for ASCII chars. <charset>: Use CHAR_LEADING_CODE to get leading codes. <categoryspec>: If multibyte, setup fastmap only for ASCII chars here. (re_compile_fastmap) [emacs]: Call analyse_first with the arg multibyte always 1. (re_search_2) In emacs, set the locale variable multibyte to 1, otherwise to 0. New local variable target_multibyte. Check it to decide the multibyteness of STR1 and STR2. If target_multibyte is zero, convert unibyte chars to multibyte before translating and checking fastmap. (TARGET_CHAR_AND_LENGTH): New macro. (re_match_2_internal): In emacs, set the locale variable multibyte to 1, otherwise to 0. New local variable target_multibyte. Check it to decide the multibyteness of STR1 and STR2. Use TARGET_CHAR_AND_LENGTH to fetch a character from D. <charset, charset_not>: If multibyte is nonzero, check fastmap only for ASCII chars. Call bcmp_translate with target_multibyte, not with multibyte. <begline>: Declare the local variable C as `unsigned'. (bcmp_translate): Change the last arg name to target_multibyte.
author Kenichi Handa <handa@m17n.org>
date Tue, 03 Sep 2002 04:09:40 +0000
parents 8cd1df305566
children 23a1cea22d13
line wrap: on
line source

#! /usr/bin/perl

# Copyright (C) 2000 Free Software Foundation, Inc.
#
# This file is part of GNU Emacs.
#
# GNU Emacs is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# GNU Emacs is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GNU Emacs; see the file COPYING.  If not, write to the
# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
# Boston, MA 02111-1307, USA.

require 5;
use Getopt::Long;

my $USAGE = <<ENDUSAGE;
Remove \@tindex lines from files that were already present in previous
versions.

Usage: $0 [--old=EXT] FILE...
       $0 --help
       $0 --version

  --help	display this help and exit
  --version	print version and exit
  --old=DIR	find old files in DIR

The script performs two passes.  In the first pass, Texinfo files from
DIR are scanned for \@tindex lines, and identifiers in them are
recorded.  In a second pass, Texinfo files in the current directory
are scanned, and \@tindex lines for identifiers that were recorded in
the first pass are removed.  Old file contents are saved in files
with extension ".orig".  A list of modified files and removed \@tindex
identifiers is printed to stdout at the end.
ENDUSAGE

sub fatal {
    print STDERR "$0: ", @_, ".\n";
    exit 1;
}

my $help = 0;
my $version = 0;
my $old;

my $rc = GetOptions ('help' => \$help, 'version' => \$version, 
                     'old=s' => \$old);
if ($version) {
    print "0.1\n";
    exit 0;
} elsif (!$rc || !$old || @ARGV) {
    print $USAGE;
    exit 1;
} elsif ($help) {
    print $USAGE;
    exit 0;
}

# Fill the hash %tindex with associations VAR -> COUNT where
# the keys VAR are identifiers mentioned in @tindex lines in the older
# files to process and COUNT is the number of times they are seen in
# the files.

my %tindex;
my %removed;
my @old_files = glob "$old/*.texi";
my @new_files = glob "*.texi";
fatal ("No Texinfo files found in `$old'") unless @old_files;
fatal ("No Texinfo files found in current directory") unless @new_files;

print "Scanning old files for \@tindex lines\n";
foreach $file (@old_files) {
    open (IN, "<$file") or fatal "Cannot open $file: $!";
    while (<IN>) {
	++$tindex{$1} if /^\s*\@tindex\s+(\S+)/;
    }
    close IN;
}

# Process current files and remove those @tindex lines which we 
# know were already present in the files scanned above.

print "Removing old \@tindex lines\n";
foreach $file (@new_files) {
    my $modified = 0;
    my $contents = "";
    
    open (IN, "< $file") or fatal "Cannot open $file.orig for reading: $!";
    while (<IN>) {
	if (/^\s*\@tindex\s+(\S+)/ && $tindex{$1}) {
	    ++$removed{$1};
	    $modified = 1;
	} else {
	    $contents = $contents . $_;
	}
    }
    
    close IN;

    if ($modified) {
	print "  $file\n";
	system ("cp $file $file.orig") == 0 or fatal "Cannot backup $file: $!";
	open (OUT, ">$file") or fatal "Cannot open $file for writing: $!";
	print OUT $contents;
	close OUT;
    }
}

# Print a list of identifiers removed.

print "Removed \@tindex commands for:\n";
my $key;
foreach $key (keys %removed) {
    print "  $key\n";
}