Back to home page
Get LXR Cross Referencer at SourceForge.net. Fast, secure and Free Open Source software downloads

LXR self cross-referenced (under SQLite)

 
 

    


File indexing completed on 2017-01-01 14:49:53

0001 #!/usr/bin/perl -T
0002 ######################################################################
0003 #
0004 # search -- Freetext search
0005 #
0006 #   Arne Georg Gleditsch <argggh@ifi.uio.no>
0007 #   Per Kristian Gjermshus <pergj@ifi.uio.no>
0008 #
0009 #
0010 # This program is free software; you can redistribute it and/or modify
0011 # it under the terms of the GNU General Public License as published by
0012 # the Free Software Foundation; either version 2 of the License, or
0013 # (at your option) any later version.
0014 #
0015 # This program is distributed in the hope that it will be useful,
0016 # but WITHOUT ANY WARRANTY; without even the implied warranty of
0017 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0018 # GNU General Public License for more details.
0019 #
0020 # You should have received a copy of the GNU General Public License
0021 # along with this program; if not, write to the Free Software
0022 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
0023 #
0024 ######################################################################
0025 
0026 use strict;
0027 use lib do { $0 =~ m{(.*)/} ? "$1/lib" : 'lib' }; # if LXR modules are in ./lib
0028 
0029 =head1 search script
0030 
0031 This script manages the HTTP requests for free-text search.
0032 
0033 =cut
0034 
0035 use LXR::Common;
0036 use LXR::Config;
0037 use LXR::Template;
0038 
0039 #
0040 # Global variables
0041 #
0042 my $maxhits = 1000;
0043 
0044 
0045 =head2 C<filename_matches ($filetext, $advanced, $casesensitive, $file)>
0046 
0047 Function C<filename_matches> tells if search results for $file should
0048 be kept for display.
0049 
0050 =over
0051 
0052 =item 1
0053 
0054 C<$filetext>
0055 
0056 a I<string> containing the desired name fragment or pattern
0057 
0058 =item 2
0059 
0060 C<$advanced>
0061 
0062 an I<integer> equal to 1 if C<$filetext> is a pattern, 0 otherwise
0063 
0064 =item 3
0065 
0066 C<$casesensitive>
0067 
0068 an I<integer> equal to 1 if comparison is case-sensitive, 0 otherwise
0069 
0070 =item 4
0071 
0072 
0073 C<$file>
0074 
0075 a I<string> containing the filename to check
0076 
0077 =back
0078 
0079 Result is 1 if fragment C<$filetext> is present (either as a substring
0080 or a matching pattern) inside filename C<$file>.
0081 
0082 =cut
0083 
0084 sub filename_matches {
0085     my ($filetext, $advanced, $casesensitive, $file) = @_;
0086     if ($advanced) {
0087         if ($casesensitive) {
0088             if ($file =~ m/$filetext/) {
0089                 return 1;
0090             }
0091         } elsif ($file =~ m/$filetext/i) {
0092             return 1;
0093         }
0094     } else {
0095         if ($casesensitive) {
0096             if (index($file, $filetext) != -1) {
0097                 return 1;
0098             }
0099         } elsif (index(lc($file), lc($filetext)) != -1) {
0100             return 1;
0101         }
0102     }
0103     return 0;
0104 }
0105 
0106 
0107 =head2 C<glimpsesearch ($searchtext, $filetext, $advanced, $casesensitive)>
0108 
0109 Function C<glimpsesearch> searches the tree with I<glimpse>.
0110 
0111 =over
0112 
0113 =item 1
0114 
0115 C<$searchtext>
0116 
0117 a I<string> containing the text to look for
0118 
0119 =item 2
0120 
0121 C<$filetext>
0122 
0123 a I<string> containing the desired name fragment or pattern
0124 
0125 =item 3
0126 
0127 C<$advanced>
0128 
0129 an I<integer> equal to 1 if C<$filetext> is a pattern, 0 otherwise
0130 
0131 =item 4
0132 
0133 C<$casesensitive>
0134 
0135 an I<integer> equal to 1 if comparison is case-sensitive, 0 otherwise
0136 
0137 =back
0138 
0139 I<Glimpse> is launched to search the source-tree for the string given.
0140 Results are then filtered on the optional filename.
0141 The remaining hits are returned in an array.
0142 
0143 =cut
0144 
0145 sub glimpsesearch {
0146     my ($searchtext, $filetext, $advanced, $casesensitive) = @_;
0147     my $sourceroot = $config->{'sourceroot'} . '/' . $releaseid . '/';
0148 
0149     $ENV{'PATH'} = '/bin:/usr/local/bin:/usr/bin:/usr/sbin';
0150     $! = '';
0151     open    ( GLIMPSE
0152             , $config->{'glimpsebin'}
0153         # Don't scan files ending in ,v or ~
0154                 ." -F '-v (\\,v\|\\~)\$' "
0155         # Should we match casesensitive ?
0156                 . ($casesensitive ? '' : '-i')
0157         # Location of index datadbase
0158                 . ' -H '
0159                 . $config->{'glimpsedir'}.'/'.$releaseid
0160         # The pattern to search for
0161                 ." -y -n '$searchtext' 2>&1 |"
0162             )
0163     or die "Glimpse subprocess died unexpextedly: $!\n";
0164 
0165     my $numlines     = 0;
0166     my @glimpselines = ();
0167     my @glimpsemsgs  = ();
0168     while (<GLIMPSE>) {
0169         if (s/^$sourceroot//) {
0170         my ($file) = $_ =~ m/(.*?):\s*/;
0171     # Keep only occurrences matching file name if given
0172         next if $filetext && !filename_matches($filetext, $advanced, $casesensitive, $file);
0173         $numlines++;
0174         push(@glimpselines, $_);
0175         if ($numlines >= $maxhits) {
0176             last;
0177         } }
0178         else {
0179     # Ignore information message
0180             next if m/^using working-directory/;
0181     # Strip off eventual command name
0182             s/^.*?glimpse:\s*//;
0183             push (@glimpsemsgs, $_);
0184         }
0185     }
0186 
0187     close(GLIMPSE);
0188 
0189     my $retval = $? >> 8;
0190 
0191     # The manpage for glimpse says that it returns 2 on syntax errors or
0192     # inaccessible files. It seems this is not the case.
0193     # We will have to work around it for the time being.
0194 
0195     if ($retval == 0) {
0196         my @ret;
0197         my $i = 0;
0198         foreach my $glimpseline (@glimpselines) {
0199             last if ($i > $maxhits);
0200 
0201             my ($file, $line, $text) = $glimpseline =~ m/(.*?):\s*(\d+)\s*:(.*)/;
0202 
0203             $text =~ s/&/&amp;/g;
0204             $text =~ s/</&lt;/g;
0205             $text =~ s/>/&gt;/g;
0206 
0207             push @ret, [ $file, $line, $text ];
0208         } continue {
0209             $i++;
0210         }
0211 # Sort the results ascending per file name
0212         return sort {$$a[0] cmp $$b[0]} @ret;
0213     } elsif ($retval == 1) {
0214         my $glimpsebin = $config->{'glimpsebin'};
0215         my $glimpseresponse = join('<br>', @glimpselines);
0216         my $glimpseresponse =~ s/$glimpsebin/Reason/;
0217         my $glimpseresponse =~ s/glimpse: error in searching index//;
0218         print("<p class='error'>Search failed</p>\n<p>$glimpseresponse</p>\n");
0219         foreach (@glimpsemsgs) {
0220             print("<p class='error'>$_</p>\n");
0221         }
0222         return;
0223     } else {
0224         print("<p class='error'>Unexpected return value $retval from Glimpse (usually means <i>syntax error</i>)</p>\n");
0225         foreach (@glimpsemsgs) {
0226             print("<p class='error'>$_</p>\n");
0227         }
0228         return;
0229     }
0230 }
0231 
0232 
0233 =head2 C<swishsearch ($searchtext, $filetext, $advanced, $casesensitive)>
0234 
0235 Function C<swishsearch> searches the tree with I<Swish-e>.
0236 
0237 =over
0238 
0239 =item 1
0240 
0241 C<$searchtext>
0242 
0243 a I<string> containing the text to look for
0244 
0245 =item 2
0246 
0247 C<$filetext>
0248 
0249 a I<string> containing the desired name fragment or pattern
0250 
0251 =item 3
0252 
0253 C<$advanced>
0254 
0255 an I<integer> equal to 1 if C<$filetext> is a pattern, 0 otherwise
0256 
0257 =item 4
0258 
0259 C<$casesensitive>
0260 
0261 an I<integer> equal to 1 if comparison is case-sensitive, 0 otherwise
0262 
0263 =back
0264 
0265 I<Swish-e> is launched to search the source-tree for the string given.
0266 Results are then filtered on the optional filename.
0267 The remaining hits are returned in an array.
0268 
0269 =cut
0270 
0271 sub swishsearch {
0272     my ($searchtext, $filetext, $advanced, $casesensitive) = @_;
0273 
0274   my $swishIndex = $config->{'swishdir'} . '/' . $releaseid . '.index';
0275     if (!-e $swishIndex) {
0276         print '<p class="error">';
0277         print "Version '$releaseid' has not been indexed and is unavailable for searching.";
0278         print '</p>';
0279         return;
0280     }
0281     
0282     $ENV{'PATH'} = '/bin:/usr/local/bin:/usr/bin:/usr/sbin';
0283   my $swishCommand =  $config->{'swishbin'}
0284                     . ' -f ' . $swishIndex
0285                     . ' -s swishdocpath'
0286                     . ' -w \'(' . $searchtext . ')\''
0287                     ;
0288   my $ret = `$swishCommand`;
0289   my @result = grep { not /^[\#\.]/ } split(/\n/, $ret);
0290 
0291     my $retval = $?;
0292     my @ret;
0293     if ($retval == 0) {
0294         my $numlines = 0;
0295         foreach my $hit (@result) {
0296             print $hit, "<br>\n" if $hit =~ /No such file or directory/;    # feeble attempt to print possible errors (e.g. incomplete LD_LIBRARY_PATH causes linking errors)
0297             next if substr($hit, 0, 4) eq 'err:';   # skip; only 'no results' errors happen with return value 0
0298 
0299             my ($score, $file) = $hit =~ m/^(\d+) \/(.+) "(.+)" \d+/;
0300             next if $filetext && !filename_matches($filetext, $advanced, $casesensitive, $file);
0301             push @ret, [ $file, $score ];
0302             $numlines++;
0303 
0304             last if ($numlines >= $maxhits);
0305         }
0306         return @ret;
0307     } else {
0308         print( "<p class='error'>Search failed: internal error</p><br>\n<p>"
0309                 . join('<br>', @result)
0310                 . "<\p>\n");
0311         return;
0312     }
0313 }
0314 
0315 
0316 =head2 C<checkvalidref ($file)>
0317 
0318 Function C<checkvalidref> is a "$variable" substitution function.
0319 It returns CSS class name C<searchinvalid>
0320 if the file timestamp is incorrect,
0321 meaning the file has been modified after I<genxref> indexing.
0322 Otherwise, it returns an empty string.
0323 
0324 =over
0325 
0326 =item 1
0327 
0328 C<$file>
0329 
0330 a I<string> containing the filename to check
0331 
0332 =back
0333 
0334 Since the search engines return OS-relative path, the filename
0335 must be prefixed with a / to observe LXR file designation rule.
0336 
0337 =cut
0338 
0339 sub checkvalidref {
0340     my ($file) = @_;
0341     $file = '/' . $file;
0342 
0343     if  (   !$index->filetimestamp
0344                 ( $file
0345                 , $files->filerev($file, $releaseid)
0346                 )
0347         &&  LXR::Lang::parseable($file, $releaseid)
0348         ) {
0349         'searchinvalid'
0350     } else {
0351         ''
0352     }
0353 }
0354 
0355 
0356 =head2 C<printresults ($templ, $searchtext, @results)>
0357 
0358 Function C<printresults> is a "$function" substitution function.
0359 It returns an HTML string which is the concatenation of its
0360 expanded argument applied to every search result.
0361 
0362 =over
0363 
0364 =item 1
0365 
0366 C<$templ>
0367 
0368 a I<string> containing the template (i.e. argument)
0369 
0370 =item 2
0371 
0372 C<$searchtext>
0373 
0374 a I<string> containing the looked-for text
0375 
0376 =item 3
0377 
0378 C<@results>
0379 
0380 an I<array> containing the search results
0381 
0382 =back
0383 
0384 Because of the different nature of the results,
0385 processing is split depending on the search engines.
0386 With I<Glimpse>, line for the hit was grabbed,
0387 while with I<Swish-e>, only a relevance score is available.
0388 
0389 B<Note:>
0390 
0391 =over
0392 
0393 =item
0394 
0395 I<Both search engines start by looking into their private "index"
0396 files before accessing the source-tree files.
0397 A first consequence is any file added after genxref indexing cannot
0398 be seen and searched; there may exist unreported hits in these
0399 files. Since it looks like words are looked up in some "index" or
0400 "cache", a second consequence is new words are likely not to be
0401 reported in modified files.>
0402 
0403 For these reasons, background is highlighted for modified files.
0404 
0405 =back
0406 
0407 =cut
0408 
0409 sub printresults {
0410     my $templ   = shift;
0411     my $searchtext = shift;
0412     my @results = @_;
0413     my $ret;
0414     my ($file, $lastfile);
0415 
0416     foreach (@results) {
0417 
0418             # glimpse and swish-e searches provide different data for each result
0419         if ($config->{'glimpsebin'}) {
0420             my (@params) = @$_;
0421             $file     = $params[0];
0422             my $fileonce = $file if $lastfile ne $file;
0423             my $line     = $params[1];
0424             my $text     = $params[2];
0425             my $searchtextq = quotemeta ($searchtext);
0426 # May not work always because glimpse pattern are different form Perl's
0427 # but when it works highlights the occurence of searchtext
0428             $text =~ s!($searchtextq)!<span class="searchtext">$1</span>!g;
0429 
0430             $ret .= expandtemplate
0431                 ( $templ
0432                 ,   ( 'text'    => sub { return "<pre class='searchtext'>$text</pre>" }
0433                     , 'file'    => sub { fileref($file, 'searchfile', "/$file") }
0434                     , 'fileonce'=> sub { if ($fileonce) {
0435                                             return fileref($fileonce, 'searchfile', "/$file")
0436                                         } else {
0437                                             return '&nbsp;'
0438                                         }
0439                                     }
0440                     , 'line'    => sub { fileref($line, 'searchline', "/$file", $line) }
0441                     , 'fileref' => sub { fileref("$file, line $line", 'searchfile', "/$file", $line) }
0442                     , 'tdfile'  => sub { if ($fileonce) {
0443                                             return 'searchfile'
0444                                         } else {
0445                                             return 'searchfilevoid'
0446                                         }
0447                                     }
0448                     , 'searchinvalid'   => sub { checkvalidref($file) }
0449                     )
0450                 );
0451         } else {
0452             my (@params) = @$_;
0453             $file     = $params[0];
0454             my $fileonce = $file if $lastfile ne $file;
0455             my $score    = $params[1];
0456             $ret .= expandtemplate
0457                 ( $templ
0458                 ,   ( 'text'    => sub { return $score }
0459                     , 'file'    => sub { fileref($file, 'searchfile', "/$file") }
0460                     , 'fileonce'=> sub { if ($fileonce) {
0461                                             return fileref($fileonce, 'searchfile', "/$file")
0462                                         } else {
0463                                             return '&nbsp;'
0464                                         }
0465                                     }
0466                     , 'line'    => sub { return '' }
0467                     , 'fileref' => sub { fileref($file, 'searchfile', "/$file") }
0468                     , 'tdfile'  => sub { if ($fileonce) {
0469                                             return 'searchfile'
0470                                         } else {
0471                                             return 'searchfilevoid'
0472                                         }
0473                                     }
0474                     , 'searchinvalid'   => sub { checkvalidref($file) }
0475                     )
0476                 );
0477         }
0478         $lastfile = $file;
0479     }
0480     return $ret;
0481 }
0482 
0483 
0484 =head2 C<search ()>
0485 
0486 Sub C<search> is the main driver for free-text search.
0487 
0488 It dispatches search to the correct search engine, then calls
0489 the result editor.
0490 
0491 B<Note:>
0492 
0493 =over
0494 
0495 =item
0496 
0497 I<Filename search may give inaccurate results if source-tree has
0498 been modified since last F<genxref> indexation because search is
0499 done against an internal list captured at indexing time.>
0500 
0501 =back
0502 
0503 =cut
0504 
0505 sub search {
0506     my $templ;
0507     my $errorsig = "<!-- ! -->";
0508 
0509     $templ = gettemplate    ( 'htmlsearch'
0510                             , $errorsig
0511                             );
0512     if ($templ =~ m/^$errorsig/) {
0513         die "Free-text search not available without 'htmlsearch' template\n";
0514     }
0515 
0516     my $searchtext    = $HTTP->{'param'}{'_string'};
0517     my $filetext      = $HTTP->{'param'}{'_filestring'};
0518     my $advanced      = $HTTP->{'param'}{'_advanced'};
0519     my $casesensitive = $HTTP->{'param'}{'_casesensitive'};
0520 
0521     $searchtext =~ s/\+/ /g;    # Reverse <form> space encoding
0522 
0523     my @results;
0524     if ($searchtext ne '') {
0525         if ($config->{'glimpsebin'}) {
0526             @results = glimpsesearch($searchtext, $filetext, $advanced, $casesensitive);
0527         } elsif ($config->{'swishbin'} && $config->{'swishdir'}) {
0528             @results = swishsearch($searchtext, $filetext, $advanced, $casesensitive);
0529         } else {
0530             warn "No freetext search engine configured.\n";
0531         }
0532     } elsif ($filetext ne '') {
0533         my $FILELISTING;
0534         if ($config->{'swishdir'} && $config->{'swishbin'}) {
0535             unless ($FILELISTING = IO::File->new($config->{'swishdir'} . "/$releaseid.filenames")) {
0536                 warn    "Version '$releaseid' has not been indexed and is unavailable for searching<br>Could not open "
0537                         . $config->{'swishdir'}
0538                         . "/$releaseid.filenames\n";
0539                 return;
0540             }
0541         } elsif ($config->{'glimpsedir'} && $config->{'glimpsebin'}) {
0542             unless ($FILELISTING =
0543                 IO::File->new($config->{'glimpsedir'} . '/' . $releaseid . "/.glimpse_filenames"))
0544             {
0545                 warn    "Version '$releaseid' has not been indexed and is unavailable for searching\n"
0546                         . 'Could not open '
0547                         . $config->{'glimpsedir'}
0548                         . "/$releaseid/.glimpse_filenames\n";
0549                 return;
0550             }
0551         } else {
0552             warn "Freetext search engine required for file search, and no freetext search engine is configured\n";
0553             return;
0554         }
0555         my $sourceroot = $config->{'sourceroot'} . '/' . $releaseid . '/';
0556         while (<$FILELISTING>) {
0557             chomp;
0558             s/^$sourceroot//;
0559             push @results, [ $_ ] if filename_matches($filetext, $advanced, $casesensitive, $_);
0560         }
0561         close($FILELISTING);
0562     }
0563 
0564     print expandtemplate
0565         ( $templ
0566         ,   ( 'variables'           => \&varexpand
0567             , 'searchtext'          => sub { $_ = $searchtext; s/&/&amp;/g; s/</&lt;/g; s/>/&gt;/g; return $_; }
0568             , 'searchtext_escaped'  => sub { $_ = $searchtext; s/&/&amp;/g; s/\"/&quot;/g; s/</&lt;/g; s/>/&gt;/g; return $_; }
0569             , 'filetext_escaped'    => sub { $_ = $filetext; s/\"/&quot;/g; return $_; }
0570             , 'advancedchecked'     => sub { $advanced ? 'checked' : '' }
0571             , 'casesensitivechecked'=> sub { $casesensitive ? 'checked' : '' }
0572             , 'varbtnaction'        => sub { varbtnaction(@_, 'search') }
0573             , 'urlargs'             => sub { urlexpand('-', 'search') }
0574             , 'noquery'             => sub { return 'hidden' if $searchtext eq '' && $filetext eq '' }
0575             , 'maxhits_message'     => sub {
0576                 return @results == $maxhits
0577                   ? "<b>Too many hits, displaying first $maxhits</b><br>"
0578                   : '';
0579                 }
0580 
0581             , 'results'     => sub { printresults(@_, $searchtext, @results) }
0582             , 'resultcount' => sub { return scalar @results }
0583             , 'indexstate'  => sub { displayindexstate('search') }
0584             )
0585         );
0586 }
0587 
0588 
0589 =head2 Script entry point
0590 
0591 Builds the header and footer and launches C<search>
0592 for the real job.
0593 
0594 =cut
0595 
0596 httpinit();
0597 std_http_headers('search');
0598 makeheader('search');
0599 if ($files->isa('LXR::Files::Plain')) {
0600     if  (   $config->{'glimpsebin'}
0601             && $config->{'glimpsebin'} =~ m!^(.*/)?true$!
0602         ||  $config->{'swishbin'}
0603             && $config->{'swishbin'} =~ m!^(.*/)?true$!
0604         ) {
0605         warn "Free-text search disabled by configuration file!\n";
0606     } else {
0607         &search;
0608     }
0609 } else {
0610     warn "Free-text search not available with VCSs!\n";
0611 }
0612 makefooter('search');
0613 
0614 httpclean;
0615