#!/usr/bin/perl # # Acknowledgements # # Thanks to Guy Brooker (guy@jw.estec.esa.nl) for his AA interface, # which was the starting point for this program. # # Paul Clark # paul@cs.arizona.edu # # Michael Smith # msmith@cs.arizona.edu # # Modifications # # 2/22/94 Version 1.0, shell script version Paul Clark # 4/21/94 Version 1.1, multiple archives support Paul Clark # 4/22/94 Version 1.2, perl script Paul Clark # 8/05/94 Version 1.3, verbosity&security Paul Clark #10/05/94 Version 1.4, more security, improved # output Paul Clark # 2/15/95 Version 2.0, made layout of archives distributed, # cleaned up interface, msmith # **** **** **** **** CONFIGURABLE VARIABLES **** **** **** **** $GLIMPSEHTTP_HOME = "/home/ftp/pub/earthops_html"; $GLIMPSE_LOC = "/usr/local/bin/glimpse"; # Path to your scripts $CGIBIN = "cgi-bin"; # Glimpse options # use -j ONLY for Glimpse v3.5 or later $GLIMPSE_OPT = "-j -y"; # for Glimpse v3.0 or earlier, uncomment the following line # $GLIMPSE_OPT = "-y"; # **** **** **** **** NO CONFIGURATION NEEDED BELOW **** **** **** **** # If you want per-line access $FSSERV = "/$CGIBIN/mfs" ; # Set file name pattern where to suppress HTML tags # Comment out to cancel suppression $SUPPRESS_HTML_TAGS = "\\.s?html?\$"; # **** **** **** **** Done settings **** **** **** **** $path_info = $ENV{'PATH_INFO'}; $_ = $path_info; # //$indexdir/$path is the format of the PATH_INFO # might as well start the message now print "Content-type: text/html\n\n"; print "\n"; print "\n"; if ( m|^/([0-9]*)(.*)$| ) { $length = $1; $path = $2; $path =~ s|"||g; } else { &err_badargs; } $indexdir = substr($path,0,$length); $relpath = substr($path,$length,length($path)); # print "
indexdir=$indexdir
relpath=$relpath
"; open(CONF,"$indexdir/archive.cfg") || &err_conf; line: while () { @_ = split(/\t/); $title = $_[0]; $urlpath = $_[1]; } &err_badargs unless $indexdir; close(CONF); ($ENV{'HOME'} = $indexdir) || &err_badargs; # some versions of Glimpse need it # Ensure that Glimpse is available on this machine -x $GLIMPSE_LOC || &err_noglimpse ; # Ensure that index is available -r "$indexdir/.glimpse_index" || &err_noindex($indexdir) ; # To support an ISINDEX type search, set query string if given # an argument on the command line $prefix="whole=on&case=off&query=" if ( $#ARGV >= 0 ); # Check that a query has been made ($query = $ENV{'QUERY_STRING'}) || &err_noquery ; # Strip the variables out from the query string, # and assign them into variables, prefixed by 'QS_' @qvars = split( /\&/, $prefix . $query ); foreach (@qvars) { split(/=/); $fname = $_[0]; $fvalue = $_[1]; $fvalue =~ s/\'//g; $cmd = "\$QS_$fname = '$fvalue';" ; # print ">>>",$cmd,"\n"; $cmd = eval $cmd if ( $fname =~ /^[a-z_A-Z]\w*$/ ); } $QS_query =~ s|\+| |g; $QS_query =~ s|%(\w\w)|sprintf("%c", hex($1))|ge; $pquery = $QS_query; $QS_query =~ s|\'|\'\"\'\"\'|g; $OPT_errors="-$QS_errors" if $QS_errors =~ /^[0-8]$/; $OPT_errors="-B" if $QS_errors =~ /^Best\+match$/; # remove the '-i' from case if the switch is on $OPT_case="-i"; $OPT_case="" if $QS_case =~ /^on$/; $OPT_whole="-w" unless $QS_whole =~ /^on$/; $OPT_age = "-Y $QS_age" if $QS_age =~ /^[0-9]+$/; # print "OPT_age = $OPT_age
\n"; $path =~ s/\./\\./g; $path =~ s/\'//g; $OPT_filter="-F '$path'" if $path; if ($QS_maxlines =~ /\d+/) { $maxlines = $&; } else { $maxlines = 20; } if ($QS_maxfiles =~ /\d+/) { $maxfiles = $&; } else { $maxfiles = 100; } $highlight = $QS_query; $highlight =~ s/^\W+//; $highlight = join("|",split(/\W+/,$highlight)); # check if the query contains any words &err_badquery if !$highlight; $highlight = '\b('.$highlight.')\b' if $OPT_whole; print "Search Results for: \"$pquery\"\n"; print "\n"; print "
\n"; print "Search results for:
\"$pquery\"

\n"; print "on archive:
$title\n"; # hack hack print "
\n"; print "\n"; print "
\n"; # hacked to pop up a mainwindow in netscape. if($relpath){ # print "

subdirectory $relpath

\n"; print "

subdirectory $relpath

\n"; } print "

\n"; chdir $indexdir; # the default is *no* jump to lines. If line=on, tell glimpse to get lines if($QS_lines){ $OPT_linenums="-n"; print "File name (modification date), and list of matched lines (preceded by line numbers)
\n"; }else{ print "File name (modification date), and list of matched lines
\n"; } # $cmd = "exec $GLIMPSE_LOC -y -n $OPT_case $OPT_whole $OPT_errors -H . " . # "$OPT_filter '$QS_query' 2>&1 |"; $cmd = "exec $GLIMPSE_LOC $GLIMPSE_OPT $OPT_linenums $OPT_age $OPT_case $OPT_whole $OPT_errors -H $indexdir " . "$OPT_filter '$QS_query' 2>&1 |"; if (!open(GOUT, $cmd )) { print "

Cannot execute glimpse

\n"; exit; } $prevfile = ""; $lcount = 0; $fcount = 0; line: while () { if($QS_lines){ # look for line number, too ( /^([^:]*):([^:]*):\s*(\d+):(.*)/ ) || next; $file = $1; $date = $2; $line = $3; $string = $4; }else{ ( /^([^:]*):([^:]*):(.*)/ ) || next; $file = $1; $date = $2; $string = $3; } # skip the file if it isn't in this index directory directory next unless $file =~ s|^$indexdir||o; if ($file ne $prevfile) { $linecount = 0; if ($fcount>$maxfiles) { print "

Limit of $maxfiles files exceeded...

\n"; $file = ""; $fcount = "at least $fcount"; $lcount = "at least $lcount"; last line; } print "" if ( $prevfile ne "" ); $prevfile = $file ; # print # "
",$file,"", # ", ($date)
    \n" ; # ----------------------------------------------------------------------- print "
    ",$file,"", ", ($date)
      \n" ; $fcount++ ; } $lcount++ ; $linecount++; if ($linecount>=$maxlines) { print "
    • Limit of $maxlines matched " . "lines per file exceeded...\n" if $linecount==$maxlines; next line; } if ($SUPPRESS_HTML_TAGS && $file =~ /$SUPPRESS_HTML_TAGS/o) { $string =~ s#\]*\>?##g; } $string =~ s/\&/\&/g; $string =~ s/\/\>/g; if($QS_lines){ # BOLDING if ($OPT_case) { $string =~ s#$highlight#$&#gio; } else { $string =~ s#$highlight#$&#go; } print "
    • \n" ; print "line ",$line,":",$string,"\n" ; }else{ print "
    • $string\n"; } } print "
    \n" if $file ; print "
    " ; print "

    Summary for query \"",$QS_query,"\":

    \n" ; print "GlimpseHTTP\n"; print "search found ",$lcount," matches in ",$fcount," files
    \n" ; print "(Some matches may be to HTML tags which may not be shown.)\n"; # hack hack print "
    \n"; print "\n"; print "
    \n"; print "\n" ; print "\n"; close(GOUT); unlink "/tmp/.glimpse_tmp.$gpid"; sub diag_exit { # exit on error print "\n"; exit 1; } sub err_noquery { # The script was called without a query. # Provide an ISINDEX type response for browsers # without form support. print <<'EOM' ; Glimpse Gateway

    Glimpse Gateway

    This is a gateway to Glimpse. Type a pattern to search in your browser's search dialog.

    What is Glimpse ?

    Glimpse (which stands for GLobal IMPicit SEarch) is an indexing and query system that allows you to search through all your files very quickly. For example, a search for Schwarzkopf allowing two misspelling errors in 5600 files occupying 77MB took 7 seconds on a SUN IPC. Glimpse supports most of agrep's options (agrep is our powerful version of grep) including approximate matching (e.g., finding misspelled words), Boolean queries, and even some limited forms of regular expressions.
    Glimpse's running time is typically slower than systems tems using inverted indexes, but its index is an order of magnitude smaller (typically 2-5% of the size of the files).

    Authors of Glimpse

    Udi Manber, Sun Wu, and Burra Gopal
    Department of Computer Science, University of Arizona, Tucson, AZ 85721.
    glimpse\@cs.arizona.edu

    Glimpse
    glimpse\@cs.arizona.edu
    EOM &diag_exit; } sub err_noglimpse { # # Glimpse was not found # Report a useful message # print <<'EOM' ; Glimpse not found

    Glimpse not found

    This gateway relies on Glimpse search tool. If it is installed, please set the correct path in the script file. Otherwise obtain the latest version from ftp.cs.arizona.edu EOM &diag_exit; } sub err_noindex { local ($indexdir) = @_; # Glimpse index was not found # Give recommendations for indexing print "Glimpse Index not found\n"; print "\n"; print "\n"; print "

    Glimpse Index in directory '$indexdir' not found

    \n"; print "Glimpse cannot proceed without index.\n"; print "Please check if the directory being searched is indexed\n"; print "by glimpseindex.\n"; print "\n"; &diag_exit; } sub err_badargs { # Glimpse archive was not found print "Glimpse Archive not found\n"; print "\n"; print "\n"; print "

    Glimpse Archive not found

    \n"; print "There was a problem with the arguments passed to aglimpse.\n"; print "Please check your settings.\n"; print "\n"; &diag_exit; } sub err_conf { # Glimpse archive Configuration File was not found print "Glimpse Archive Configuration File not found\n"; print "\n"; print "\n"; print "

    Glimpse Archive Configuration File not found

    \n"; print "Cannot open configuration file $indexdir/archive.cfg\n"; print "\n"; &diag_exit; } sub err_badquery { print "Query is too broad\n"; print "\n"; print "\n"; print "

    Query is too broad

    \n"; print "The query \"$pquery\" doesn't contain any words and ". "thus will take too much time. Please refine your query.\n"; print "\n"; &diag_exit; }