#!/usr/bin/perl
#
# NAME
#   FindTest
#
# SYNOPSIS
#   <form method=get action="/cgi-bin/abc/FindTest">
#
# DESCRIPTION
#   This is the tune-finder CGI script invoked by findtune.html,  and
#   should be installed in your web server's "/cgi/abc/" directory.
#
#   We expect GET or POST params giving a title or  pattern,  and  we
#   search  the  ABCbot's  tune index files for matching entries.  We
#   produce a HTML listing  of  the  matches,  formatted  to  deliver
#   either  the original file, or invoke the tune script to extract a
#   tune and format it.
#
# REQUIRES
#   

	push @INC, '.';
	$host = `hostname`;		# Get our host name
	$host =~ s/[\r\s]+$//;	# Strip off trailing white stuff
	$host =~ s/\..*//;		# Strip off everything after the first dot
if (-f "$host-cgilocal.pm") {
	require "$host-cgilocal.pm";
} else {
	require "cgilocal.pm";	# Where things are kept on this machine.
}
	require "sendsubs.pm";	# Routines to send messages
	require "HTMLenc.pm";	# HTML encoding routines.
	require "outtune.pm";	# Contains the AdjTitle() routine.
	require "formats.pm";	# Common routines dealing with file formats.

# AUTHOR
#   John Chambers <jc@trillian.mit.edu>
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

$| = 1;			# Unbuffered STDOUT.
umask 0002;		# Output files must be group writable.
$" = ',';		# Used in verbose messages.
($me = $0) =~ s".*/"";
$V = $ENV{"V_$me"} || $ENV{"D_$me"} || 1;			# Verbose level.
($ss,$mm,$hh,$DD,$MM,$YY) = gmtime(time);			# Current date/time.
$ymd = sprintf("%d-%02d-%02d",1900+$YY,1+$MM,$DD);
$hms = sprintf("%02d:%02d:%02d",$hh,$mm,$ss);
$RA = $ENV{REMOTE_ADDR} || '0.0.0.0';				# Who called us.
#V = 3 if $RA =~ /^207\.172\./;
$UA = $ENV{HTTP_USER_AGENT};						# What sort of client?

$exitstat  =  0;	# Exit code, set to nonzero if serious problem.
$ONeills   =  1;	# Enable O'Neill Project kludge/heuristic.
$ryancole  =  1;	# Enable Ryan/Cole Project kludge/heuristic.
$VilMusPr  =  1;	# Enable Village Music Project kludge/heuristic.
$GBmax     = 16 unless $GBmax;	# Max length of GBcode.
$JCmax     = 15 unless $JCmax;	# Max length of JCcode.
$mimetype  = 'text/html';		# Default output doc type.

$get = ($me =~ /Test/) ? 'TestGet' : 'gettune';

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
$dir = 'ndx';
&wsend("<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01//EN'>\n");
&wsend("<html>\n<head>\n\t<title>ABC tune finder</title>\n</head>\n<body>\n");

$logfile  = "$tmpdir/find$$.log";
$outfile  = "$tmpdir/lastfind.log";	# Single name for debugging.
if (!open(L,">>$logfile")) {
	&wsend("<b>Can't write \"$logfile\" ($!)\n");
	exit 1;
}
$Lopen = 1;		# Logfile is now open.

unlink($outfile);
link($logfile,$outfile);
select L; $| = 1; select STDOUT;

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
$cgi = new CGI_Lite;
%data = $cgi->parse_form_data();

$lmt = $data{L} || 100;
$pat = $data{P} || $data{pattern};
$pat =~ s#^[\s/"]+##;	# Trim initial white space and quotes.
$pat =~ s#[\s/"]+$##;	# Trim trailing white space and quotes.
$pat =~ s/\s\s+/ /;		# Reduce internal white space.

&lsend("From: $RA $ymd $hms V=$V $lmt \"$pat\".\n");
&lsend("Client: \"$UA\"\n") if $V>1;

chomp($cwd = `pwd`);
$data{indexdir} = $indexdir; 
$codes = $data{codes};
if ($V>0) {
	&lsend("Form data:\n");
	for $xx (sort keys %data) {
		$yy = $data{$xx};
		&lsend("$xx = \"$yy\"\n");
	}
}
$F0 = $data{F0};
$F1 = $data{F1};
$F2 = $data{F2};
$F3 = $data{F3};
&wsend("F0=\"$F0\"<br>\n") if $V>2;
&wsend("F1=\"$F1\"<br>\n") if $V>2;
&wsend("F2=\"$F2\"<br>\n") if $V>2;
&wsend("F3=\"$F3\"<br>\n") if $V>2;
$w = $F3 ? 3 : $F2 ? 2 : $F1 ? 1 : $F0 ? 0 : 3;
$t = &abcrequest($0,$tmpdir,$RA,$ymd,$hms,$lmt,"",$pat,"");
if (($i = $tmin - $t) > 0) {	# Should we delay the request?
	&lsend("Delay $i sec.") if $V>1;
	sleep $i;	# Delay until $tmin sec since last request
}

&wsend("w=\"$w\"<br>\n") if $V>2;
@wFile = ('G', 'Get', 'Get',  'Get');
@wTXT  = ('T', 'T',   'TXT',  'TXT');
@wABC  = ('A', 'A',   'ABC',  'ABC');
@wPS   = ('P', 'P',   'PS',   'PS');
@wEPS  = ('E', 'E',   'EPS',  'EPS');
@wPDF  = ('P', 'P',   'PDF',  'PDF');
@wGIF  = ('G', 'G',   'GIF',  'GIF');
@wPNG  = ('P', 'P',   'PNG',  'PNG');
@wMIDI = ('M', 'M',   'MIDI', 'MIDI');
@wcols = (
	"G T A P E P G P M    index meter key      <a href=/~jc/doc/TFhdrs.html>Has     </a> Title\n",
	"<b>   Get T A P E P G P M    index meter key      <a href=/~jc/doc/TFhdrs.html>Has     </a> Title</b>\n",
	"<b>   Get TXT ABC PS EPS PDF GIF PNG MIDI    index meter key      <a href=/~jc/doc/TFhdrs.html>Has     </a> Title</b>\n",
	"<b>   Get TXT ABC PS EPS PDF GIF PNG MIDI    Index Meter Key      <a href=/~jc/doc/TFhdrs.html>Has     </a> Title</b>\n",
);

for $pat (@blacklist) {
	if ($RA =~ $pat) {
		print STDERR "$0: Blacklist address $RA matches '$pat'\n" if $V>1;
		&lsend("Blacklist: address $RA matches '$pat'\n") if $V>1;
		&wsend("<p>Sorry; your address $RA matches an entry on our blacklist.\n");
		&wsend("<p>This is probably due to requests from your address to resemble a search robot,\n");
		&wsend("<p>and the requests were too heavy a load on our server.\n");
		&wsend("<p>Send email to <a href=\"jc\@trillian.mit.edu\">John Chambers</a> for further information.\n");
		exit -1;
	}
}

$idlines = sprintf("P: $pat\nH: %d-%02d-%02d %02d:%02d:%02d UT\n",1900+$YY,1+$MM,$DD,$hh,$mm,$ss);

if ($V>1) {
	&lsend("From: $RA\n$idlines");
	&lsend("Data:\n");
	for $x (sort keys %data) {$y = $data{$x}; &lsend("\t$x\t$y\n")}
}
if ($V>2) {
	&lsend("Environment:\n");
	for $x (sort keys %ENV) {$y = $ENV{$x}; &lsend("\t$x\t$y\n")}
}

&wsend("Looking for \"<b>$pat</b>\".<br>\n") if ($V>1 && $pat);

&wsend("<form method=get action=\"/~jc/cgi/abc/findtune\">\n");
if ($w > 0) {
	&wsend("<center>\n");
	&wsend("<table border=0>\n");
	&wsend("\t<tr>\n");
	&wsend("\t\t<td><img src=\"/~jc/img/music.gif\" alt=[TUNES]></td>\n");
	&wsend("\t\t<td><strong>JC's ABC tune finder</strong></td>\n");
	&wsend("\t\t<td><img src=\"/~jc/img/music.gif\" alt=[TUNES]></td>\n");
	&wsend("\t</tr>\n");
	&wsend("</table>\n");
	&wsend("</center>\n");
}
&wsend("<center>\n");
&wsend("</center>\n");
&wsend("<center>\n");
&wsend("\t<input type=text name=P size=64 value=\"$pat\"><br>\n");
&wsend("\t<input type=submit name=F2 value=\"find (wide)\">\n");
&wsend("\t<input type=submit name=F1 value=\"find (thin)\">\n");
&wsend("\t<input type=submit name=F0 value=\"find (min)\">\n");
&wsend("\tLimit: <input type=text name=L size=6 value=$lmt><br>\n");
&wsend("</center>\n");
&wsend("</form>\n");

&fmtsTable() if $w > 0;
&wsend("<center>Matches for \"<b>$pat</b>\":</center>\n") if ($w>0 && $V>0);

#wsend("(Codes $codes)\n");
if ($codes eq 'on') {
	&wsend("(The big numbers are some experimental <a href=\"/~jc/music/abc/ndx/Codes.html\">musical codes</a>.)\n");
} else {
#	&wsend("\n");
}
&wsend("<pre>");

chdir $indexdir;
chomp($cwd = `pwd`);
&lsend("Changed to $cwd\n") if $V>1; 
@files = glob('??.html'); 

file:
for $file (@files) {
	if (!open(F,$file)) {
		&wsend("<b>File: \"$file\" Can't read ($!) </b>\n");
		next file;
	}
	&lsend("file: $file\n") if $V>2;
line:
	for $line (<F>) {
		unless ($re = eval {qr/$pat/i}) {
			&lsend("Invalid perl pattern \"$pat\"\n");
			&wsend("<br>Invalid perl pattern <b>$pat</b>\n");
			exit 1;
		} elsif ($pat && ($line =~ /$re/)) {
			&matched($line);
			if (defined($lmt) && ($matches >= $lmt)) {
				last file;
			}
		} else {
			&lsend("-: $line") if $V>4;
		}
	}
}
&wsend("</pre>\n");

if (@match) {
#	$matches = @match;
	&lsend("$matches matches.\n") if $V>1;
	&wsend("</pre>\n");
	if ($w > 0) {
		&wsend("<p>$matches matches.\n");
		&wsend("<br>If an index is zero, the entire file is returned;\n");
		&wsend("	if nonzero, only that tune is returned.\n") if $zeroes;
		&wsend("<br>[1] <a href=\"/~jc/music/book/oneills/\">O'Neill's</a> tunes.\n") if $gotONeills;
		&wsend("<br>[2] <a href=\"/~jc/music/book/ryan-cole\">Ryan/Cole</a> tunes.\n") if $gotRyanCole;
		&wsend("<br>[3] <a href=\"http://www.performance.salford.ac.uk/research/vmp/index.htm\">Village Music Project</a> tunes.\n") if $gotVilMusPr;
	}
} else {
	&wsend("<b>No matches.</b>\n");
	&lsend("No matches.\n") if $V>1;
}
if ($w > 0) {
	&wsend("<hr>\n");
	&wsend("Send comments, bug reports, and other suggestions to\n");
	&wsend("	<a href=\"mailto:jc\@trillian.mit.edu\">John Chambers at MIT</a>.\n");
	&wsend("<hr>\n");
}
&lsend("Exit $exitstat.\n") if $V>1;
exit $exitstat;

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Given all the info for one match, this routine sends the info to the client
# as  a single line in a consistent format.  The line starts with a series of
# links that retrieve the tune in various formats.  These links are  followed
# with info about the tune, which we may vary from time to time..

sub msend {
	local($url,$ndx,$GBcode,$JCcode,$M,$key,$hdrs,$ttl,$ct) = @_;
	local($nam,$H,$K,$X);
	$nam = &AdjTitle($ttl);	# &name($url,$ttl);
	$nam =~ s/[^A-Za-z0-9]//g;
	$ttl = &abc2html($ttl);
	$url =~ s"^http:/+"http://"i;
	$hdrs =~ s/\s+//g;
	$fl  = '' unless $w;
	$ct  =~ s/^\s*\d+\s*//;
	&lsend("msend: url=\"$url\" hdrs=\"$hdrs\"\n") if $V>2;
	&wsend("$fl<a href=\"$url\">$wFile[$w]</a> ");
	&wsend("<a href=\"$cgiurl/$get?F=TXT&U=$url&X=$ndx&T=$ct&N=$nam.abc\">$wTXT[$w]</a> ");
	&wsend("<a href=\"$cgiurl/$get?F=ABC&U=$url&X=$ndx&T=$ct&N=$nam.abc\">$wABC[$w]</a> ");
	&wsend("<a href=\"$cgiurl/$get?F=PS&U=$url&X=$ndx&T=$ct&N=$nam.ps\">$wPS[$w]</a> ");
	&wsend("<a href=\"$cgiurl/$get?F=EPS&U=$url&X=$ndx&T=$ct&N=$nam.eps\">$wEPS[$w]</a> ");
	&wsend("<a href=\"$cgiurl/$get?F=PDF&U=$url&X=$ndx&T=$ct&N=$nam.pdf\">$wPDF[$w]</a> ");
	&wsend("<a href=\"$cgiurl/$get?F=GIF&U=$url&X=$ndx&T=$ct&N=$nam.gif\">$wGIF[$w]</a> ");
	&wsend("<a href=\"$cgiurl/$get?F=PNG&U=$url&X=$ndx&T=$ct&N=$nam.png\">$wPNG[$w]</a> ");
	&wsend("<a href=\"$cgiurl/$get?F=MIDI&U=$url&X=$ndx&T=$ct&N=$nam.midi\">$wMIDI[$w]</a> ");
	$X = substr("            $ndx",-8,8);
	$K = substr("$key            ",0,8);
	$M = substr("$M              ",0,5);
	$H = $hdrs;
	$H .= (' ' x (8 - length($hdrs))) if length($hdrs) < 8;
	&lsend("msend: H=\"$H\"\n") if $V>2;
	if ($codes eq 'on') {
		$GBcode = substr(($GBcode . ('_' x $GBmax)),0,$GBmax) if length($GBcode) > $GBmax;
		$JCcode = substr(($JCcode . ('_' x $JCmax)),0,$JCmax) if length($JCcode) > $JCmax;
		&wsend("$X $GBcode $JCcode $M $K $H $ttl\n");
	} else {
		&wsend("$X $M $K $H $ttl\n");
	}
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# This routine is called when we find an acceptable line.  We break it  apart
# into  its  fields,  and  generate a single line of HTML that is sent to the
# client.

sub matched {
	chomp(local($line) = @_);
	local($ct,$M,$hdrs,$key,$ndx,$T,$ttl,$url,$X);
	&wsend($wcols[$w]) if $matches++ < 1;
	push @match, $line;
	&lsend("$line\n") if $V>3;
	$fl = '   ';
	if ((index($line,'/music/book/oneills/') >= 0)
	||  (index($line,'/music/book/oneills/') >= 0)) {
		$fl = '[1]';
		++$gotONeills;
	} elsif (index($line,'/music/book/ryan-cole/') >= 0) {
		$fl = '[2]';
		++$gotRyanCole;
	} elsif (index($line,'/www.performance.salford.ac.uk/research/vmp/') >= 0) {
		$fl = '[3]';
		++$gotVilMusPr;
	} elsif (index($line,'/www.salford.ac.uk/media/research/') >= 0) {
		$fl = '[3]';
		++$gotVilMusPr;
	}
	$sz = $ct = $url = $ndx = $c1 = $c2 = $key = $hdrs = $ttl = '';
	if (($sz,$ct,$url,$ndx,$c1,$c2,$M,$key,$hdrs,$ttl) 
	= ($line =~ m'<!-- *(\d*) (.*) *--><A HREF="(.*)">abc</A>\s*_*([\d/.]+)\s+C1:(\d*)\s+C2:([ud]*)\s+M:(.*)\s+K:(.*)\s+H:([^\s]*)\s*</TT>(.*)'i)) {
		&lsend("M9: U=\"$url\" X=\"$ndx\" M=\"$M\" K=\"$key\" H=\"$hdrs\" T=\"$ttl\" ($ct)\n") if $V>3;
		&msend($url,$ndx,$c1,$c2,$M,$key,$hdrs,$ttl,$ct);
	} elsif (($sz,$ct,$url,$ndx,$c1,$c2,$M,$key,$ttl) 
	= ($line =~ m'<!-- *(\d*) (.*) *--><A HREF="(.*)">abc</A>\s*_*([\d/.]+)\s+C1:(\d*)\s+C2:([usd]*)\s+M:([-\w/+|]*)\s+K:(\w+.*)\s*</tt>(.*)'i)) {
		&lsend("M8: U=\"$url\" X=\"$ndx\" M=\"$M\" K=\"$key\" T=\"$ttl\" ($ct)\n") if $V>3;
		&msend($url,$ndx,$c1,$c2,$M,$key,$hdrs,$ttl,$ct);
	} elsif (($sz,$ct,$url,$ndx,$c1,$c2,$M,$key,$ttl) 
	= ($line =~ m'<!-- *(\d*) (.*) *--><A HREF="(.*)">abc</A>\s*_*([\d/.]+)\s+(\d+)\s+([usd]+)\s+([-\w/+|]*)\s+(\w+.*)\s*</tt>(.*)'i)) {
		&lsend("M7: U=\"$url\" X=\"$ndx\" M=\"$M\" K=\"$key\" T=\"$ttl\" ($ct)\n") if $V>3;
		&msend($url,$ndx,$c1,$c2,$M,$key,$hdrs,$ttl,$ct);
	} elsif (($sz,$ct,$url,$ndx,$c1,$c2,$key,$ttl) 
	= ($line =~ m'<!-- *(\d*) (.*) *--><A HREF="(.*)">abc</A>\s*_*([\d/.]+)\s+(\d+)\s+([usd]+)\s+(\w+.*)\s*</tt>(.*)'i)) {
		&lsend("M6: U=\"$url\" X=\"$ndx\" K=\"$key\" T=\"$ttl\" ($ct)\n") if $V>3;
		&msend($url,$ndx,$c1,$c2,'',$key,$hdrs,$ttl,$ct);
	} elsif (($ct,$url,$ndx,$c1,$c2,$key,$ttl) 
	= ($line =~ m'<!-- *(.*) *--><A HREF="(.*)">abc</A>\s*_*([\d/.]+)\s+(\d+)\s+([usd]+)\s+(\w+.*)\s*</tt>(.*)'i)) {
		&lsend("M5: U=\"$url\" X=\"$ndx\" K=\"$key\" T=\"$ttl\" ($ct)\n") if $V>3;
		&msend($url,$ndx,$c1,$c2,'',$key,$hdrs,$ttl,$ct);
	} elsif (($ct,$url,$ndx,$c1,$c2,$key,$ttl) 
	= ($line =~ m'<!-- *(.*) *--><A HREF="(.*)">abc</A>\s*_*([\d/.]+)\s(\d+)\s(\s+)\s(\w+.*)\s*</tt>(.*)'i)) {
		&lsend("M4: U=\"$url\" X=\"$ndx\" K=\"$key\" T=\"$ttl\" ($ct)\n") if $V>3;
		&msend($url,$ndx,$c1,$c2,'',$key,$hdrs,$ttl,$ct);
	} elsif (($ct,$url,$ndx,$key,$ttl) 
	= ($line =~ m'<!-- *(.*) *--><A HREF="(.*)">ABC</A>\s*_*([\d/.]+)\s+(\w+.*)\s*</TT>(.*)'i)) {
		&lsend("M3: U=\"$url\" X=\"$ndx\" K=\"$key\" T=\"$ttl\"\n") if $V>3;
		&msend($url,$ndx,'','','',$key,$hdrs,$ttl);
	} elsif (($ct,$url,$ndx,$ttl) = ($line =~ m'<!-- *(.*) *--><A HREF="(.*)">abc</A>\s*_*([\d/.]+)\s.*</tt>(.*)'i)) {
		&lsend("M2: U=\"$url\" X=\"$ndx\" T=\"$ttl\"\n") if $V>3;
		&msend($url,$ndx,'','','',$key,$hdrs,$ttl);
	} elsif (($ct,$url,$ndx,$ttl) = ($line =~ m'<!-- *(.*) *--><A HREF="(.*)">Get</A>.*\s+_*([\d/.]+)\s.*</tt>(.*)'i)) {
		&lsend("M1: U=\"$1\" X:$ndx $ttl\n") if $V>3;
		&msend($url,$ndx,'','','',$key,$hdrs,$ttl);
#	} else {
#		&wsend("<b>No match: $line</b>\n");
	}
	++$zeroes if ($ndx eq '0');
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Here we extract a symbolic name from  a  URL  (or  possibly  from  a  title
# string). It will be appended to the URL that calls the tune CGI script, but
# not used there.  This is a subtle trick to make browsers see the name  that
# we  want  them to see, so if the user does a "Save", this name will be used
# for the file name. But browsers can be rather flakey in this regard, and it
# doesn't always work quite right.

sub name {
	local($url,$ttl) = @_;
	if ($url =~ m"([^/]+)\.([^/]+)$") {
		return $1;
	}
	return 'UNKNOWN';
}
