Back to home page
Get LXR Cross Referencer at SourceForge.net. Fast, secure and Free Open Source software downloads

LXR self cross-referenced (under SQLite)

 
 

    


File indexing completed on 2017-05-02 17:45:25

0001 #!/usr/bin/perl
0002 # -*- tab-width: 4 -*-"
0003 ###############################################
0004 #
0005 #   Runs source-tree indexation
0006 #
0007 # This program is free software; you can redistribute it and/or modify
0008 # it under the terms of the GNU General Public License as published by
0009 # the Free Software Foundation; either version 2 of the License, or
0010 # (at your option) any later version.
0011 #
0012 # This program is distributed in the hope that it will be useful,
0013 # but WITHOUT ANY WARRANTY; without even the implied warranty of
0014 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0015 # GNU General Public License for more details.
0016 #
0017 # You should have received a copy of the GNU General Public License
0018 # along with this program; if not, write to the Free Software
0019 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
0020 #
0021 ###############################################
0022 
0023 # ajl 2016-10-26
0024 # NOTE: this file contains lines commented out with a #MT prefix.
0025 #   These lines are part of an experiment to perform simultaneously
0026 #   free-text indexing and definition/usage collection.
0027 #   Results showed that the processes where competing for the I/O bus
0028 #   (file reads, provate file writes and DB I/O) and finally were
0029 #   slowed down waiting for data.
0030 #   Code is left here in case some new idea would remove the culprit.
0031 
0032 use strict;
0033 use lib 'lib', 'scripts';
0034 use Fcntl;
0035 use Getopt::Long qw(:config gnu_getopt);
0036 use IO::Handle;
0037 use File::Path qw(make_path remove_tree);
0038 use File::MMagic;
0039 
0040 use LXR::Files;
0041 use LXR::Index;
0042 use LXR::Config;
0043 use LXR::Common;
0044 
0045 use VTescape;
0046 use Tagger;
0047 
0048 my $lxrconf = $LXR::Config::confname;   # TODO: allow override through an option
0049 
0050 my %option;
0051 GetOptions  (\%option
0052             , 'help!'
0053             , 'url=s'
0054             , 'tree=s'
0055             , 'version=s'
0056             , 'allurls'
0057             , 'allversions:s'
0058             , 'reindexall'
0059             , 'checkonly'
0060             , 'accept'
0061 #MT             , 'S'
0062             , 'T'
0063             , 'novacuum'
0064             , 'U'
0065             );
0066 
0067 if ($option{'help'}) {
0068 
0069     # this may not be the best way to implement this, but at least it's something
0070     print <<END_HELP;
0071 Usage: genxref [option ...]
0072 
0073 The genxref program automatically generates LXR database cross-reference
0074 tokens for a set of URL configuration blocks and source code versions.  These
0075 are both defined in the $lxrconf configuration file.  Each "URL" is a separate
0076 source tree; LXR separates and identifies these by their URL.  Each "version" is
0077 a different version of the source tree being indexed.  See file $lxrconf or
0078 script configure-lxr.pl for configuring URLs and versions.
0079 
0080 Valid options are:
0081   --help             Print a summary of the options.
0082   --url=URL          Generate tokens for the given URL configuration block.
0083   --tree=TREE_NAME   To be used in addition to --url in multiple-trees context
0084                      if LXR configured to identify trees through 'argument'.
0085   --allurls          Generate tokens for all URL configuration blocks.
0086   --version=VERSION  Generate tokens for the given version of the code.
0087   --allversions      Generate tokens for all versions of the code (default).
0088   --reindexall       Purges existing index data
0089   --checkonly        Verify tools version and stop
0090   --accept           Accept parameter suggestions to continue with indexing
0091   -T                 Do not trace activity
0092                      Note: with "monster" trees like Linux kernel, this means
0093                            nothing will be printed for hours; check the disk
0094                            activity LED before deciding that your computer
0095                            is frozen. Don't foolishly kill the task or reboot
0096                            the computer!
0097   --novacuum         Do not VACUUM database tables after generating the
0098                      cross-references (mainly for PostgreSQL because it sollicits
0099                      password from the user preventing unattended operation;
0100                      VACUUM can always be launched manually)
0101   -U                 Same as --vacuum, but only for PostgreSQL databases
0102   
0103 Report bugs at http://sourceforge.net/projects/lxr/.
0104 END_HELP
0105     exit 0;
0106 }
0107 
0108 my $failure = 0;    # Cumulative failure indicator
0109 
0110 if  (   !$option{'checkonly'}
0111     &&  !$option{'url'}
0112     &&  !$option{'allurls'}
0113     ) {
0114     die("${VTred}ERROR:${VTnorm} in real mode (not checking mode set by ${VTbold}--checkonly${VTnorm}),\n"
0115         . "an URL must be specified through ${VTyellow}--url${VTnorm} or ${VTyellow}--allurls${VTnorm}.\n"
0116         . "Try \"genxref --help\".\n");
0117 }
0118 if  (   exists($option{'url'})
0119     &&  exists($option{'allurls'})
0120     ) {
0121     die("${VTred}ERROR:${VTnorm} options ${VTyellow}--url${VTnorm} and ${VTyellow}--allurls${VTnorm}"
0122         . "cannot be specified simultaneously.\n");
0123 }
0124 if  (   !exists($option{'allversions'})
0125     &&  !exists($option{'version'})
0126     ) {
0127 # Create a courtesy --allversions option to simplify further coding, so that we do not
0128 # repeatedly code exists($options{'allversions'} || !exists($options('version'). Checking
0129 # for --allurls will be enough.
0130     $option{'allversions'} = ' ';
0131 }
0132 
0133 
0134 my $trace = !$option{'T'};
0135 
0136 
0137 ##############################################################
0138 #
0139 #               Read lxr.conf global section
0140 #
0141 ##############################################################
0142 
0143 # Dummy sub to disable 'range' file reads
0144 sub readfile {}
0145 
0146 my @config;
0147 my $global;     # Global section in configuration file
0148 if (open(CONFIG, $lxrconf)) {
0149     my $oldsep = $/;
0150     $/ = undef;
0151     my $config_contents = <CONFIG>;
0152     $/ = $oldsep;
0153     close(CONFIG);
0154     $config_contents =~ m/(.*)/s;
0155     $config_contents = $1;    #untaint it
0156     @config = eval("\n#line 1 \"configuration file\"\n" . $config_contents);
0157     die($@) if $@;
0158 
0159     $global = shift(@config);   # Global parameters
0160 } else {
0161     print "${VTred}ERROR:${VTnorm} could not open configuration file ${VTred}$lxrconf${VTnorm}\n";
0162     exit(1) unless $option{'checkonly'};
0163     $failure |= 1;
0164 }
0165 
0166 
0167 #######################################
0168 ###
0169 ###               Part 1
0170 ###
0171 ###  Check environment
0172 ###
0173 ###
0174 
0175 printf "Checking     Perl     version ... %vd\n", $^V;
0176 if ($^V ge v5.10.0) {
0177     print VTCUU(1), "${VTgreen}[  OK  ]${VTnorm}\n";
0178 } else {
0179     print VTCUU(1), "${VTred}[${VTslow}FAILED${VTnorm}${VTred}]${VTnorm}\n";
0180     $failure |= 1;
0181 }
0182 
0183 # Check tools version
0184 #   Return value:
0185 #   .   -2. version too low for forced parm (see 1)
0186 #   .   -1  version too low
0187 #   .   0   software tool not found
0188 #   .   1   OK but conf parm set
0189 #   .   2   everything fine
0190 
0191 sub check_tool {
0192     my ($tl_param, $tl_name, $tl_option, $tl_version, $name_constraint) = @_;
0193     my $tool;
0194     my $toolloc;
0195     my $toolforced = 0;
0196     my $version;
0197 
0198     if ($global && $global->{$tl_param}) {
0199         $tool = $global->{$tl_param};
0200             # Make further tests on designated tool
0201         $toolloc = `command -v $tool 2>/dev/null`;
0202         if ($toolloc !~ s/\n$//s) {
0203             print "${VTred}'$tl_param' does not name an existing $tl_name utility${VTnorm}\n";
0204             delete $global->{$tl_param};
0205         } else {
0206             my $systoolloc = `command -v $tl_name 2>/dev/null`;
0207             if ($systoolloc =~ s/\n$//s) {
0208                 if ($systoolloc ne $toolloc) {
0209                     print "${VTyellow}'$tl_param' not equal to `command -v $tl_name`${VTnorm}\n";
0210                     print "If this is a non-system copy, ignore this warning\n";
0211                 }
0212             }
0213         }
0214     } else {
0215         print("${VTyellow}Parameter '$tl_param' not defined - trying to find $tl_name${VTnorm}\n");
0216     }
0217     if (!$toolloc) {
0218         $toolloc = `command -v $tl_name 2>/dev/null`;
0219         $toolloc =~ s/\n$//s;
0220     }
0221     if (!$toolloc) {
0222         print "${VTred}$tl_name not found,${VTnorm} `command -v $tl_name` returned a null string\n";
0223         if ($global) {
0224             delete $global->{$tl_param};
0225         }
0226         return 0;
0227     }
0228     if ($global) {
0229         if (!$global->{$tl_param}) {
0230             $global->{$tl_param} = $toolloc;
0231             $toolforced = 1;
0232             $tool = $toolloc;
0233             print "$tl_name found at ${VTyellow}$toolloc${VTnorm}\n";
0234             print "Manually update $lxrconf for permanent setting if needed\n";
0235         }
0236     } else {
0237         print "$tl_name found at ${VTyellow}$toolloc${VTnorm}\n";
0238         $tool = $toolloc;
0239     }
0240 
0241     my $nmwidth = 14;
0242     print "Checking", ' 'x(($nmwidth-length($tl_name)+1)/2);
0243     print $tl_name, ' 'x(($nmwidth-length($tl_name))/2);
0244     $version = `$tool $tl_option`;
0245     if  (   defined($name_constraint)
0246         &&  $version !~ $name_constraint
0247         ) {
0248         print "name constraint  ... $name_constraint\n";
0249         print VTCUU(1), "${VTred}[${VTslow}FAILED${VTnorm}${VTred}]${VTnorm}\n";
0250         return -1 - $toolforced;
0251     }
0252     print "version ... ";
0253     if ($version =~ m/.*$tl_name .*?((\d+\.)*\d+)/i) {
0254         $version = $1;
0255     } else {
0256         $version = undef;
0257     }
0258     print "$version\n";
0259     if (defined($tl_version) && $tl_version ne '0') {
0260         if  (  !defined($version)
0261             || eval( 'v' . $version) lt eval( 'v' . $tl_version)
0262             ) {
0263             print VTCUU(1), "${VTred}[${VTslow}FAILED${VTnorm}${VTred}]${VTnorm}\n";
0264             print "$tl_name version $tl_version or above required, found $version\n";
0265             return -1 - $toolforced; 
0266         } else {
0267             print VTCUU(1), "${VTgreen}[  OK  ]${VTnorm}\n";
0268         }
0269     } else {
0270             print VTCUU(1), "${VTyellow}Checked:${VTnorm}\n";
0271     }
0272     return 2 -$toolforced;
0273 }
0274 
0275 my $foundglimpse = 0;
0276 my $foundswishe = 0;
0277 
0278 my $ct = check_tool ( 'ectagsbin'
0279                     , 'ctags'
0280                     , '--version', '5'
0281                     , qr/exuberant/i
0282                     );
0283 if ($ct == 0) {
0284     print "genxref can't index source-tree without ctags\n";
0285     print "Find its location or install it and fix 'ectagsbin'\n";
0286     $failure |= 1;
0287 } elsif ($ct == 1) {
0288     print "ctags fixed for genxref, but LXR browsing will not work\n";
0289     $failure |= 2;
0290 } elsif ($ct < 0) {
0291     $failure |= 1;
0292 }
0293 
0294 $foundglimpse = check_tool('glimpsebin', 'glimpse', '-V', '0');
0295 check_tool('glimpseindex', 'glimpseindex', '-V', '0');
0296 $foundswishe = check_tool('swishbin', 'swish-e', '-V', '2.1');
0297 
0298 #   The following verifications are a bit "over-kill"
0299 #   because there is presently no check on version level
0300 #   on glimpse. But that could change in the future.
0301 if ($foundglimpse == 0 && $foundswishe == 0) {
0302     # No engine defined - propose to go on without
0303     print "${VTyellow}Neither 'glimpsebin' nor 'swishbin' defined${VTnorm}\n";
0304     print "${VTyellow}Disabling free-text search${VTnorm}\n";
0305     if ($global) {
0306         $global->{'glimpsebin'} = '/usr/bin/true';
0307     }
0308     $failure |= 2;
0309 } elsif ($foundglimpse == 2 && $foundswishe == 2) {
0310     # Both engines defined - do not know what to do
0311     print "${VTred}Both 'glimpsebin' and 'swishbin' defined${VTnorm}\n";
0312     print "Choose one of them by commenting the other in $lxrconf\n";
0313     $failure |= 1;
0314 } elsif ($foundglimpse <= 0 && $foundswishe <= 0) {
0315     # Can't use any search engine
0316     # (either not found or version too low)
0317     # But check if user disabled free-text search with suggested tip
0318     if  (   $foundglimpse < 0   # true has no version
0319             && $global->{'glimpsebin'} =~ m:(^|/)true$:
0320             && $foundswishe == 0
0321         ||  $foundswishe < 0
0322             && $global->{'swishbin'} =~ m:(^|/)true$:
0323             && $foundglimpse == 0
0324         ) {
0325     #   Leave $failure "as is" when user disables search
0326     } else {
0327         $failure |= 1
0328     }
0329 } elsif ($foundglimpse == 1 && $foundswishe <= 1) {
0330     # glimpse has been forced, but glimpse is prefered if both
0331     print "${VTyellow}Warning:${VTnorm} using existing ${VTbold}glimpse${VTnorm}\n";
0332     delete $global->{'swishbin'} if ($global);
0333     $failure |= 2;
0334 } elsif ($foundswishe == 1 && $foundglimpse <= 0) {
0335     # swish-e has been forced, but glimpse is prefered if both
0336     print "${VTyellow}Warning:${VTnorm} using existing ${VTbold}swish-e${VTnorm}\n";
0337     delete $global->{'glimpsebin'} if ($global);
0338     $failure |= 2;
0339 } elsif ($foundglimpse == 2) {
0340     # Standard glimpse selection, but for the case of both
0341     # engines present and swish-e has too low a version
0342     if ($foundswishe == -1) {
0343         print "${VTyellow}Warning:${VTnorm} forcing use of ${VTbold}glimpse${VTnorm}\n";
0344         delete $global->{'swishbin'} if ($global);
0345         $failure |= 2;
0346     }
0347 } elsif ($foundswishe == 2) {
0348     # Standard swish-e selection, but for the case of both
0349     # engines present and glimpse has too low a version
0350     if ($foundglimpse == -1) {
0351         print "${VTyellow}Warning:${VTnorm} forcing use of ${VTbold}swish-e${VTnorm}\n";
0352     delete $global->{'glimpsebin'} if ($global);
0353         $failure |= 2;
0354     }
0355 }
0356 
0357 # End of general checks
0358 if ($option{'checkonly'}) {
0359     print "${VTyellow}genxref stopped without indexing by --checkonly option${VTnorm}\n";
0360     exit;
0361 }
0362 if ($failure > 1) {
0363     if ($option{'accept'}) {
0364         print "\nParameter changes accepted by option --accept without writing them in $lxrconf\n";
0365         print "Indexing may eventually complete but expect malfunction in LXR browsing\n";
0366         print "till you fix configuration file $lxrconf\n\n";
0367         $failure = 0;
0368     } else {
0369         print "\ngenxref suggested changes to your configuration\n";
0370         print "You can test their effect with option ${VTyellow}--accept${VTnorm}\n";
0371     }
0372 }
0373 if ($failure > 0) {
0374     die "${VTred}Aborting due to previous errors${VTnorm}";
0375 }
0376 
0377 
0378 #######################################
0379 ###
0380 ###               Part 2
0381 ###
0382 ###  Index source tree(s)
0383 ###
0384 ###
0385 
0386 my %versionset;     # cumulative version set
0387 my $autoversionflag;
0388 my $versioninprogress;  # for exception handler
0389 my $reindexmode;    # --reindexall state equivalent and exception handler use
0390 my $milestone = 0;  # genxref's milestones during version indexation:
0391                     #  0 : in progress
0392                     # 'P': purge
0393                     # 'T': free-text search indexing
0394                     # 'D': definition collection
0395                     # 'U': usage collection
0396 my $starttime;  # milestone dates as wall time
0397 my $stepstart;  # (more meaningful to casual user than system user time)
0398 my ($purgestart, $purgeend);
0399 my ($textstart, $textend);
0400 my ($dclstart, $dclend);
0401 my ($refstart, $refend);
0402 #MT my $childpid;
0403 
0404 my %binaryfiles;
0405 
0406 my $printdirbanner; # Flag to print again directory banner
0407 # Frequency of directory name repetition
0408 my $repeatbannerevery = 25;
0409 my $repeatbannercountdown = $repeatbannerevery;
0410 
0411 my $hostname = $global->{'host_names'}[0];  # Global host name
0412 if ($option{'url'}) {   # Single 'url'
0413     @config = (1);      # Fake list to prevent looping
0414 }
0415 
0416 #   Loop on tree sections (global section already removed)
0417 foreach my $treedescr (@config) {
0418     my $url;
0419     my $host;
0420     my $virtroot;
0421     if ($url = $option{'url'}) {
0422         ($host, $virtroot) = $url =~ m!^(.*//[^/]+)(/.*)?!;
0423         $config = LXR::Config->new  ( $host
0424                                     , $virtroot
0425                                     , $option{'tree'}
0426                                     );
0427     } else {
0428         if (defined($hostname)) {
0429             $host = $hostname;
0430             $virtroot = $treedescr->{'virtroot'} // $global->{'virtroot'};
0431         } else {
0432             ($host, $virtroot)
0433                 = $treedescr->{'baseurl'} =~ m!^(.*//[^/]+)(/.*)?!;
0434         }
0435         $config = LXR::Config->new  ( $host
0436                                     , $virtroot
0437                                     , $treedescr->{'treename'}
0438                                     );
0439         $url = $host . $virtroot;
0440         print STDERR "\n${CSI}44m${VTwhite}Processing $url${VTnorm}";
0441         print STDERR " == ${CSI}41m${VTwhite}tree $treedescr->{'treename'}${VTnorm}\n"
0442             if $treedescr->{'treename'};
0443         print STDERR "\n";
0444     }
0445 
0446     die("${VTred}No matching configuration${VTnorm}") unless exists($config->{'sourceroot'});
0447 
0448     if (!exists($config->{'sourceroot'})) {
0449         die "${VTred}No 'sourceroot' for "
0450             . $config->{'virtroot'}
0451             . " - Can't run${VTnorm}\n";
0452     }
0453 
0454     if (!exists($config->{'variables'}{'v'})) {
0455         die "${VTred}Variable 'v' needed to define versions "
0456             . $config->{'virtroot'}
0457             . " - Can't run${VTnorm}\n";
0458     }
0459 
0460     $files = LXR::Files->new($config);
0461     die "${VTred}Can't create file access object ${VTnorm}" . $config->{'sourceroot'}
0462         if !defined($files);
0463     $LXR::Index::database_id++;     # Changing database
0464     $index = LXR::Index->new($config);
0465     die "${VTred}Can't create Index ${VTnorm}" . $config->{'dbname'}
0466         if !defined($index);
0467     $index->write_open();   # enable write transactions
0468 
0469     %binaryfiles = ();
0470 
0471     my @versions;
0472     $autoversionflag = 0;
0473     $repeatbannercountdown = $repeatbannerevery;
0474 
0475     if (exists($option{'allversions'})) {
0476         if  (  $files->isa('LXR::Files::CVS')
0477             && ref($config->{'variables'}{'v'}{'range'}) eq 'CODE'
0478             ) {
0479             print STDERR "Using automatic CVS version enumeration\n";
0480             $autoversionflag = 1;
0481         } else {
0482             @versions = $config->varrange('v');
0483             die "${VTred}Option --allversions cannot be used because no versions found automatically.${VTnorm}"
0484                 . "\nUse --version=VERSION or fix $lxrconf.\n"
0485                 if scalar @versions <= 0;
0486         }
0487     } else {
0488         @versions = $option{'version'};
0489         my $version_OK = 0;
0490         for ($config->varrange('v')) {
0491             if ($versions[0] eq $_) {
0492                 $version_OK = 1;
0493                 last;
0494             }
0495         }
0496         my $version_exists = 1;
0497         if  ($files->isa('LXR::Files::Plain')) {
0498             $version_exists = $files->isdir('/', $versions[0]); 
0499         }
0500         if (!$version_OK || !$version_exists) {
0501             print STDERR "${VTred}ERROR:${VTnorm} ";
0502             if ($version_OK) {
0503                 print STDERR "no corresponding directory ${VTwhite}", $versions[0], "${VTnorm} in source root\n";
0504             } elsif ($version_exists) {
0505                 print STDERR "version ${VTwhite}", $versions[0], "${VTnorm} not listed in ${VTwhite}lxr.conf${VTnorm}, ";
0506                 print STDERR "but a directory exists in source root\n";
0507             } else {
0508                 print STDERR "unknown version ${VTwhite}", $versions[0], "${VTnorm}\n";
0509             }
0510             die "${VTred}No version to index.${VTnorm}\n"
0511         }
0512     }
0513 
0514     %versionset = ();
0515     $SIG{__DIE__} = \&abortcleanup;
0516     $SIG{'INT'} = \&abortcleanup;
0517     $starttime = time();
0518     $index->saveperformance('', 0 , '', $starttime, 0);
0519     $milestone = 'P';
0520 
0521     if ($files->isa('LXR::Files::CVS') && scalar(@versions)<=0) {
0522         $reindexmode = 1;
0523         $purgestart = $starttime;
0524         $stepstart = $purgestart;
0525         $versioninprogress = 'CVS all versions';
0526         $index->saveperformance($versioninprogress, 1, $milestone, $purgestart, 0);
0527         print STDERR "\nFull database purge ... ${VTyellow}${VTslow}in progress${VTnorm}\n";
0528         $index->purgeall();
0529         $index->saveperformance('', 0 , '', $starttime, 0); # rewrite erased record
0530         $purgeend = time();
0531         print STDERR &VTCUU(1), &VTCHA(25), &VTEL(0), "${VTgreen}Done${VTnorm}\n";
0532         $index->saveperformance($versioninprogress, 1, $milestone, $purgestart, $purgeend);
0533         $$LXR::Common::HTTP{'param'}{'_showattic'} = 1;
0534         $printdirbanner = 1;
0535         $milestone = 'D';
0536         $dclstart = time();
0537         $stepstart = $dclstart;
0538         $index->saveperformance($versioninprogress, 1, $milestone, $dclstart, 0);
0539         directorytreetraversal  ( \&Tagger::processfile
0540                                 , 'head'
0541                                 , ''
0542                                 , '/'
0543                                 );
0544         $dclend = time();
0545         $index->saveperformance($versioninprogress, 1, $milestone, $dclstart, $dclend);
0546         $printdirbanner = 1;
0547         $milestone = 'U';
0548         $refstart = time();
0549         $stepstart = $refstart;
0550         $index->saveperformance($versioninprogress, 1 ,$milestone, $refstart, 0);
0551         $autoversionflag++ if $autoversionflag; # Don't do it again
0552         directorytreetraversal  ( \&Tagger::processrefs
0553                                 , 'head'
0554                                 , ''
0555                                 , '/'
0556                                 );
0557         $refend = time();
0558         $index->saveperformance($versioninprogress, 1 ,$milestone, $refstart, $refend);
0559     # Indexing summary for this version
0560         STDERR->autoflush(1);
0561         print "\n";
0562         print "\n${CSI}44m${VTwhite}Summary for $url ${VTnorm}";
0563         print " == ${CSI}41m${VTwhite}tree $config->{'treename'}${VTnorm}"
0564             if $config->{'treename'};
0565         print " == ${CSI}41m${VTwhite}all versions in CVS ${VTnorm}\n" ;
0566         print '    No free-text indexing', "\n";
0567         print '    Purge              : ', elapsed($purgeend-$purgestart), "\n";
0568         print '    Definitions parse  : ', elapsed($dclend-$dclstart), "\n";
0569         print '    References parse   : ', elapsed($refend-$refstart), "\n";
0570         print '    ...Total duration..: ', elapsed(time()-$starttime), "\n";
0571         STDERR->autoflush(0);
0572 
0573         if  (   exists($option{'allversions'})
0574             &&  $option{'allversions'} ne 'noauto'
0575             &&  $option{'allversions'} ne ' '   # don't consider courtesy value
0576             ) {
0577             dump_versionset('CVS', \%versionset);
0578         }
0579 
0580     } else {
0581 
0582 #   purgeall() is much faster than a sequence of purge($version);
0583 #   if there is only one version in the range, use it instead of
0584 #   an individual purge($version).
0585 #   In incremental indexing (no --reindexall), every file must
0586 #   later be carefully examined to determined if a very
0587 #   selective DB cleaning is needed.
0588         my $dopurge = 0;
0589         my $docareful = 0;
0590         $reindexmode = 0;
0591         if (exists($option{'reindexall'})) {
0592             $reindexmode = 1;
0593             if  (exists($option{'allversions'})) {
0594                 $purgestart = $starttime;
0595                 $index->saveperformance('', 1 , $milestone, $purgestart, 0);
0596                 print STDERR "\nFull database purge ... ${VTyellow}${VTslow}in progress${VTnorm}\n";
0597                 $index->purgeall();
0598                 $index->saveperformance('', 0 , '', $starttime, 0); # rewrite erased record
0599                 $purgeend = time();
0600                 $index->saveperformance('', 1 , $milestone, $purgestart, $purgeend);
0601                 print STDERR &VTCUU(1), &VTCHA(25), &VTEL(0), "${VTgreen}Done${VTnorm}\n";
0602             } else {
0603                 $dopurge = 1
0604             }
0605         } else {
0606             my $vc;
0607             if  (exists($option{'allversions'})) {
0608                 $index->{'version_count'} =
0609                     $index->{dbh}->prepare
0610                         ( 'select count(reindex) from '.$config->{'dbprefix'}.'times'
0611                         . ' where reindex = 1'
0612                         . ' and stepname = \'D\''
0613                         );
0614                 $index->{'version_count'}->execute();
0615             } else {
0616                 $index->{'version_count'} =
0617                     $index->{dbh}->prepare
0618                         ( 'select count(reindex) from '.$config->{'dbprefix'}.'times'
0619                         . ' where reindex = 1'
0620                         . ' and releaseid = ?'
0621                         . ' and stepname = \'D\''
0622                         );
0623                 $index->{'version_count'}->execute($option{'version'});
0624             }
0625             ($vc) = $index->{'version_count'}->fetchrow_array();
0626             $index->{'version_count'} = undef;
0627             if (0 == $vc) { # initial indexing
0628                 $reindexmode = 1;
0629             } else {
0630                 $docareful = 1;
0631             }
0632         }
0633 
0634         foreach my $version (@versions) {
0635 #MT         $childpid = undef;
0636             $versioninprogress = $version;
0637             print STDERR "\n${CSI}44m${VTwhite}Processing $url ${VTnorm}";
0638             print STDERR " == ${CSI}41m${VTwhite}tree ".$config->{'treename'}.${VTnorm}
0639                 if $config->{'treename'};
0640             print STDERR " == ${CSI}41m${VTwhite}Version $version ${VTnorm}\n" ;
0641             $milestone = 'P';
0642             $purgestart = time();
0643             $stepstart = $purgestart;
0644             $index->saveperformance($version, $reindexmode, $milestone, $purgestart, 0);
0645             if ($dopurge) {
0646                 print STDERR "\nSelective database purge ... ${VTyellow}${VTslow}in progress${VTnorm}\n";
0647                 $index->purge($version);
0648                 print STDERR &VTCUU(1), &VTCHA(30), &VTEL(0), "${VTgreen}Done${VTnorm}\n";
0649             }
0650             if ($docareful) {
0651                 cleanindex($version);
0652             }
0653             $purgeend = time();
0654             $index->saveperformance($version, $reindexmode, $milestone, $purgestart, $purgeend);
0655             $textend = 0;
0656             if ($files->isa('LXR::Files::Plain')) {
0657                 if ($foundglimpse > 0 || $foundswishe > 0) {
0658                     $milestone = 'T';
0659                     $textstart = time();
0660                     $stepstart = $textstart;
0661                     $index->saveperformance($version, $reindexmode, $milestone, $textstart, 0);
0662 #MT                 $childpid =
0663                         gensearch   ( $version
0664 #MT                                 , \&childexit
0665                                     );
0666                     $textend = time();
0667             # ATTENTION: if multi-threading code is reenabled, following line must
0668             #       be commented out
0669                 $index->saveperformance($version, $reindexmode, 'T', $textstart, $textend);
0670                 } else {
0671                     print STDERR "${VTyellow}Free-text search disabled${VTnorm}\n";
0672                 }
0673             } else {
0674                 print STDERR "${VTyellow}Free-text search setup suppressed for VCS storage${VTnorm}\n";
0675             }
0676             $printdirbanner = 1;
0677             $milestone = 'D';
0678             $dclstart = time();
0679             $stepstart = $dclstart;
0680             $index->saveperformance($version, $reindexmode, $milestone, $dclstart, 0);
0681             directorytreetraversal  ( \&Tagger::processfile
0682                                     , $version
0683                                     , ''
0684                                     , '/'
0685                                     );
0686             $dclend = time();
0687             $index->saveperformance($version, $reindexmode, $milestone, $dclstart, $dclend);
0688             $printdirbanner = 1;
0689             $milestone = 'U';
0690             $refstart = time();
0691             $stepstart = $refstart;
0692             $index->saveperformance($version, $reindexmode, $milestone, $refstart, 0);
0693             directorytreetraversal  ( \&Tagger::processrefs
0694                                     , $version
0695                                     , ''
0696                                     , '/'
0697                                     );
0698             $refend = time();
0699             $index->saveperformance($version, $reindexmode, $milestone, $refstart, $refend);
0700 
0701 #MT             if ($childpid > 0) {
0702 #MT                 waitpid($childpid, 0);
0703 #MT                 delete $SIG{'USR1'};
0704 #MT             }
0705 #MT         if (0 != $textend) {
0706 #MT             $index->saveperformance($version, $reindexmode, 'T', $textstart, $textend);
0707 #MT         }
0708     # Indexing summary for this version
0709             print "\n${CSI}44m${VTwhite}Summary for $url ${VTnorm}";
0710             print " == ${CSI}41m${VTwhite}tree $config->{'treename'}${VTnorm}"
0711                 if $config->{'treename'};
0712             print " == ${CSI}41m${VTwhite}Version $version ${VTnorm}\n" ;
0713             print '    Purge              : ', elapsed($purgeend-$purgestart), "\n";
0714             print '    Free-text indexing : ';
0715             if (0 == $textend) {
0716                 print $VTyellow, 'skipped', $VTnorm;
0717             } else {
0718                 print elapsed($textend-$textstart);
0719             }
0720             print "\n";
0721             print '    Definitions parsing: ', elapsed($dclend-$dclstart), "\n";
0722             print '    References parsing : ', elapsed($refend-$refstart), "\n";
0723             print '    ...Total duration..: ', elapsed(time()-$purgestart), "\n";
0724         }
0725     }
0726     delete $SIG{__DIE__};
0727     $SIG{'INT'} = 'DEFAULT';
0728     $index->{dbh}->do
0729             ( 'delete from '.$config->{'dbprefix'}.'times'
0730             . ' where releaseid = \'\' and reindex = 0'
0731             );
0732     $index->write_close();
0733     $index->final_cleanup();
0734 
0735     # IMPORTANT REMINDER:
0736     #   DB has been disconnected, consequently post_processing must not reference the DB
0737     #   through the DBI/DBM interface.
0738     if  (   !$option{'novacuum'}
0739         &&  !($option{'U'} && $config->{'dbname'} =~ m/^dbi:Pg:/)
0740         ) {
0741         print STDERR "\nDatabase optimisation ... ${VTyellow}${VTslow}in progress${VTnorm}\n";
0742         $index->post_processing();
0743         print STDERR &VTCUU(1), &VTCHA(27), &VTEL(0), "${VTgreen}Done${VTnorm}\n";
0744     }
0745 }
0746 
0747 
0748 #######################################
0749 ###
0750 ###               Annex
0751 ###
0752 ###  Support routines
0753 ###
0754 ###
0755 
0756 sub abortcleanup {
0757     my $abrtend;
0758     delete $SIG{__DIE__};   # avoid recursive trap
0759     $index->saveperformance($versioninprogress, $reindexmode, $milestone, $stepstart, -time());
0760     $index->saveperformance('', 0, '', $starttime, -time());
0761 #MT     ($textstart, $abrtend) = $index->getperformance($versioninprogress, $reindexmode, 'T');
0762 #MT     if  (   defined $textstart
0763 #MT         &&  0 == $abrtend
0764 #MT         ) {
0765 #MT         $index->saveperformance ( $versioninprogress
0766 #MT                                 , $reindexmode
0767 #MT                                 , 'T'
0768 #MT                                 , $textstart
0769 #MT                                 , $textend ? $textend : -time()
0770 #MT                                 );
0771 #MT     }
0772     $index->write_close();
0773     $index->final_cleanup();
0774     die $!;
0775 }
0776 
0777 #MT sub childexit {
0778 #MT     $textend = time();
0779 #MT     delete $SIG{'USR1'};
0780 #MT }
0781 
0782 sub dirbannerprint {
0783     my ($head, $releaseid, $dirname, $filename) = @_;
0784 
0785 
0786     if ($printdirbanner) {
0787         print(STDERR "${VTmagenta}$head $releaseid $dirname");
0788         $printdirbanner = undef;
0789     } else {
0790         print(STDERR
0791                   &VTCUU(1)
0792                 , &VTCHA(3 + length($head) + length($releaseid) + length($dirname))
0793                 , &VTEL(0)
0794                 , ${VTmagenta}
0795                 )
0796     }
0797     $repeatbannercountdown = $repeatbannerevery;
0798     print(STDERR "$filename${VTnorm}\n");
0799 }
0800 
0801 sub directorytreetraversal {
0802     my ($process_sub, $releaseid, $dirname, $filename) = @_;
0803     my $pathname = $dirname . $filename;
0804 
0805     if (substr($filename, -1) eq '/') {
0806         dirbannerprint('***', $releaseid, $dirname, $filename) if $trace;
0807         my $needbanner;
0808         map {   my $node = $_;
0809                 my $type = substr($node, -1);
0810                 $needbanner //= $type eq '/';
0811                 if  (   $type ne '/'
0812                     &&  $needbanner
0813                     ) {
0814                     $needbanner = undef;
0815                     dirbannerprint('***', $releaseid, $dirname, $filename) if $trace;
0816                 }
0817                 directorytreetraversal  ( $process_sub
0818                                         , $releaseid
0819                                         , $pathname
0820                                         , $_
0821                                         );
0822             } $files->getdir($pathname, $releaseid);
0823         $index->forcecommit();
0824     } elsif (!exists $binaryfiles{$releaseid}{$pathname}) {
0825         my $didprocess;
0826         if  ($autoversionflag) {
0827         # Some 'Files' objects need this variable properly set to
0828         # determine the possible versions for a file,
0829         # in particular to return correct values for varrange sub.
0830             $LXR::Common::pathname = $pathname;
0831             my @versions = $config->varrange('v');
0832             foreach my $releaseid (@versions) {
0833                 if ($repeatbannercountdown <=0) {
0834                     $printdirbanner = 1;
0835                     dirbannerprint('**=', $releaseid, $dirname, '') if $trace;
0836                 }
0837                 if (1 == $autoversionflag) {        # Only in first pass
0838                     $versionset{$releaseid} = '';   # remember this version
0839                 }
0840                 my $didoneprocess = &$process_sub($pathname, $releaseid, $config, $files, $index, $trace);
0841                 if ($didoneprocess) {
0842                     $repeatbannercountdown--;
0843                 } else {
0844                     $binaryfiles{releaseid}{$pathname} = 1;
0845                 }
0846                 $didprocess //= $didoneprocess;
0847             }
0848         } else {
0849             if ($repeatbannercountdown <=0) {
0850                 $printdirbanner = 1;
0851                 dirbannerprint('**=', $releaseid, $dirname, '') if $trace;
0852             }
0853             $didprocess = &$process_sub($pathname, $releaseid, $config, $files, $index, $trace);
0854             $repeatbannercountdown-- if $didprocess;
0855         }
0856         $printdirbanner //= $didprocess;
0857     }
0858 }
0859 
0860 sub feedswish {
0861     my ($pathname, $releaseid, $swish, $filelist) = @_;
0862 
0863     if (substr($pathname, -1) eq '/') {
0864         print STDERR "&&& $pathname $releaseid \n" if $trace;
0865         map { feedswish($pathname . $_, $releaseid, $swish, $filelist) }
0866           $files->getdir($pathname, $releaseid);
0867     } else {
0868         if (my $fh = $files->getfilehandle($pathname, $releaseid)) {
0869             if  (   $files->getfilesize($pathname, $releaseid) > 0
0870                 &&  LXR::Lang::parseable($pathname, $releaseid)
0871                 ) {
0872                 print STDERR "&&> $pathname $releaseid \n" if $trace;
0873                 print $filelist "$pathname\n";
0874                 my $contents = $files->getfile($pathname, $releaseid);
0875                 $swish->print
0876                     ( "Path-Name: $pathname\n"
0877                     , 'Content-Length: ' . length($contents) . "\n"
0878                     , "Document-Type: TXT\n"
0879                     , "\n", $contents
0880                     );
0881             } else {
0882                 $binaryfiles{$releaseid}{$pathname} = 1;
0883             }
0884             close($fh);
0885         }
0886     }
0887 }
0888 
0889 sub gensearch {
0890     my $releaseid = shift;
0891 #MT my $cldhandler = shift;
0892 
0893 #MT # Since using DBI in multi-threading is very hard to get it right,
0894 #MT # it is easier to temporarily close the connection to the DB and
0895 #MT # reopen it later when we really need it.
0896 #MT $index->write_close();
0897 #MT $index->final_cleanup();
0898 #MT my $pid;
0899 #MT my $parentpid = $$;
0900 #MT $pid = fork() if !$option{'S'};
0901 #MT if (defined $pid) {
0902 #MT     if (0 != $pid) {
0903 #MT         $SIG{'USR1'} = $cldhandler;
0904 #MT     # This is the parent: restore DB connection
0905 #MT         $index = LXR::Index->new($config);
0906 #MT         $index->write_open();
0907 #MT         return $pid ;
0908 #MT     }
0909 #MT     # Remove the signal handlers because DB connection is down
0910 #MT     delete $SIG{__DIE__};
0911 #MT     $SIG{'INT'} = 'DEFAULT';
0912 #MT } else {
0913 #MT     # We did not fork, reopen DB connection
0914 #MT     $index = LXR::Index->new($config);
0915 #MT     $index->write_open();
0916 #MT }
0917 
0918 
0919 # In case we did not fork, continue with traditional sequential processing
0920 
0921     if ($config->{'glimpsedir'} && $config->{'glimpseindex'}) {
0922 
0923         # Canonize options
0924         my $glixopts;
0925         if (exists($config->{'glimpseindexopts'})) {
0926             $glixopts = $config->{'glimpseindexopts'};
0927         } else {
0928             $glixopts = '-o -n -B'; # small index, numbers, large hashing
0929         };
0930         $glixopts =~ s/-+/-/g;      # make sure a single - at beginning
0931         $glixopts =~ s/-[RIFfa]//g; # remove unwanted or offending options
0932         $glixopts =~ s/-( |$)//g;   # remove orphan dashes
0933         $glixopts =~ s/ +/ /g;      # merge spaces
0934         if ($glixopts =~ m/^\s*$/) {
0935             $glixopts = '-o -n -B';
0936         }
0937         $glixopts =~ s/^-*/-/;
0938 
0939         # Determine indexation mode: full or incremental
0940 
0941         my $indexdir = $config->{'glimpsedir'} . '/' . $releaseid;
0942         my $indextmp = $config->{'glimpsedir'} . '/%@%glix%@%/';
0943         remove_tree($indextmp);
0944         if (! exists($option{'reindexall'})) {
0945             $glixopts .= ' -f';
0946             system('cp', '-r', $indexdir, $indextmp);
0947         } else {
0948             make_path($indextmp, {mode => 0755});
0949             if (-e $indexdir.'.glimpse_exclude') {
0950                 system('cp', $indexdir.'.glimpse_exclude', $indextmp);
0951             }
0952         }
0953 
0954         # Create the exclusion file only if it does not exist
0955         my $exclude = $indextmp . '.glimpse_exclude';
0956         if  (   exists($config->{'ignoredirs'})
0957             &&  !-e $exclude
0958             ) {
0959             if (open (EXCLUDE, '>', $exclude)) {
0960                 foreach (@{$config->{'ignoredirs'}}) {
0961                     print EXCLUDE '/', $_, "/\n"
0962                 }
0963                 close(EXCLUDE);
0964             } else {
0965                 print STDERR $VTred , "Can't create "
0966                     , $VTnorm, $VTbold, $exclude
0967                     , $VTnorm, "\n"
0968                     , $VTyellow, 'No automatic directory exclusion from parameter'
0969                     , $VTnorm, $VTbold, "'ignoredirs'"
0970                     , $VTnorm, "\n";
0971             }
0972         }
0973 
0974         # Do it now
0975         system  ( $config->{'glimpseindex'}
0976                 , split(' ', $glixopts)
0977                 , '-H', $indextmp
0978                 , $config->{'sourceroot'} . '/' . $releaseid
0979                 );
0980 
0981         # Need to chmod the glimpse files so everybody can read them.
0982         system('chmod 644 '. $indextmp . '.glimpse*');
0983         print 'Permissions changed to rw-r--r--', "\n";
0984 
0985         # Permute indexes
0986         my $del_locn = $config->{'glimpsedir'}.'/%@%delete%@%me%@%/';
0987         system('mv', $indexdir, $del_locn);
0988         system('mv', $indextmp, $indexdir);
0989         remove_tree($del_locn);
0990     }
0991 
0992     if ($config->{'swishdir'} && $config->{'swishbin'}) {
0993         my $swish = IO::Handle->new();
0994         die ${VTred} . $config->{'swishdir'} . " does not exist${VTnorm}"
0995             unless -d $config->{'swishdir'};
0996         my $filelist = IO::File->new($config->{'swishdir'} . "/$releaseid.filenames", 'w')
0997           or die "${VTred}can't open $releaseid.filenames for writing${VTnorm}";
0998 
0999         # execute swish, as a pipe we can write to
1000 
1001         open( $swish
1002             , '|' . $config->{'swishbin'}
1003               . ' -S prog -i stdin -v 1 -c '.$config->{'swishconf'}
1004               . ' -f '.$config->{'swishdir'}.'/'.$releaseid.'.index'
1005             )
1006           or die ${VTred} . "Couldn't exec " . $config->{'swishbin'} . ":${VTnorm $!}\n";
1007 
1008         feedswish('/', $releaseid, $swish, $filelist);
1009 
1010         $swish->close();
1011         $filelist->close();
1012     }
1013 
1014     print STDERR "\n";
1015 #MT if (defined $pid) {
1016 #MT     kill(10, $parentpid);   # 10 = POSIX::SIGUSR1
1017 #MT     exit(0) ;
1018 #MT }
1019 #MT return $pid;
1020 }
1021 
1022 sub dump_versionset {
1023     my ($prefix, $versionset) = @_;
1024     my $vfh;
1025 
1026     my $treeid = $config->{'virtroot'} . '_' . $config->{'treename'};
1027     $treeid =~ s|([^-a-zA-Z0-9.\@_])|sprintf('%%%02X', ord($1))|ge;
1028     my $versionfile = 'custom.d/'.$prefix.$treeid;
1029     if (!open($vfh, '>', $versionfile)) {
1030         print "${VTyellow}Can't open : version set not saved${VTnorm}\n";
1031         return;
1032     }
1033     my $count = 0;
1034     foreach my $version (sort keys %$versionset) {
1035         $count++;
1036         print $vfh $version, "\n";
1037     }
1038     close($vfh);
1039     print "=== $count versions written to ${VTyellow}$versionfile${VTnorm}\n";
1040 }
1041 
1042 sub elapsed {
1043     my $time = shift;
1044 
1045     my $seconds = $time % 60;
1046     $time = ($time - $seconds) / 60;
1047     my $minutes = $time %60;
1048     return ($time-$minutes)/60 . ':' . sprintf('%2.2d:%2.2d', $minutes, $seconds);
1049 }
1050 
1051 #   Look at each file recorded in the DB for this version
1052 #   and check if it has changed.
1053 #   A filename+release is translated into a revision id.
1054 #   Note that deleted/moved files will return undef.
1055 #   The revision id is compared to the recorded revision.
1056 #   If they do not match, all information pertaining to
1057 #   the recorded revision is erased.
1058 #
1059 #   Since the candidate files are more frequently replaced
1060 #   than deleted, symbols are not erased when their reference
1061 #   counts decrement to zero because the new file version will
1062 #   very likely reuse these symbols. Definitions and references
1063 #   are always erased.
1064 #   There is a small penalty for non-reused symbols, but this is
1065 #   fixed on next reindexall.
1066 sub cleanindex {
1067     my $releaseid = shift;
1068     my @files;      # Files from the DB for this release
1069     my $dirname;
1070     my $filename;
1071 
1072     $index->getallfilesinit($releaseid);
1073     while (my ($fid, $pathname, $revision, $relcount) = $index->nextfile()) {
1074         next if $files->filerev($pathname, $releaseid) eq $revision;
1075         $pathname =~ m!(.*/)(.+)$!;
1076         $filename = $2;
1077         if ($dirname ne $1) {
1078             $dirname = $1;
1079             print STDERR "${VTmagenta}%%% $releaseid $dirname${VTnorm}\n";
1080         }
1081         print STDERR "--- $releaseid $filename $revision";
1082         print STDERR " ... ${VTyellow}${VTslow}cleaning${VTnorm}";
1083         print STDERR VTCUB(12);
1084 #       $index->emptycache();
1085         if ($relcount > 1) {
1086             print STDERR VTEL(0), "${VTred}not purgeable yet${VTnorm}\n";
1087         } else {
1088             $index->purgefile($fid, $releaseid);
1089             print STDERR VTEL(0), "${VTyellow}purged${VTnorm}\n";
1090         }
1091         $index->removerelease($fid, $releaseid);
1092         $index->forcecommit();
1093     }
1094 }