(.*)/i)) { $doctitle = $1; $doctitle =~ s"

#!/usr/bin/perl =head1 NAME TuneList - rewrite a list of tunes as an HTML list. TuneTable - rewrite a list of tunes as an HTML table. =head1 SYNOPSIS TuneList [options] [" This restricts the depth of directory searches to . This is mostly to avoid infinite loops. The default is 2. +t Produce an html table rather than a list. +type (default: off) +key (default: off) +seq (default: off) +time (default: off) +orig (default: off) -abc (default: on) -ps (default: on) -gif (default: on) -tune (default: on) These enable or disable the production of various columns in the output. The defaults are chosen for personal, idiosyncratic reasons, and you might want to modify them to match your own personal preferences. =head1 BUGS This program is highly experimental, in alpha state, and all that. Use it at your own risk. (Not much risk, there, actually, but I thought I'd give the usual friendly warnings.) Just don't write the output back over the input, and check its output with a browser or two, and there shouldn't be many problems. Of course, there are constant problems with slight spelling variations. This program doesn't even attempt to tackle this issue. =head1 REQUIRES When given a URL, this file uses the LWP::Simple module, available at any CPAN site. We require this only when given a URL; this program should run ok for local files with just the basic perl5. =head1 AUTHOR: John Chambers "http://trillian.mit.edu/~jc/music/" =cut use LWP::Simple; use HTML::Entities; ($me = $0) =~ s'.*/''; $D = $ENV{"D_$me"} || $ENV{"T_$me"} || $ENV{"V_$me"} || 0; @URL = ('http://trillian.mit.edu/~jc/music/abc/Test/'); # Default URL. $max{orig} = 10; # We pad origin fields to this many bytes. $max{type} = 2; # We pad type fields to this many bytes. $max{depth} = 2; # Default depth limit for directories. $dirdepth = 0; # The current depth in directories. # Here are flags for the various columns of output: %want = (type => 0, orig => 0, abc => 1, ps => 1, gif => 1, tune => 1,); # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Scan the command-line arguments, processing them as we go. Input # # files are read and appended to @doc; URLs are accumulated in @URL. # # Options are processed as read, so they will only affect things to # # their right, except for URLS which we save for last. # # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # for $a (@ARGV) { if (($fl,$opt) = ($a =~ m'^([-+])(.*)'i)) { if ($opt =~ m'^A$'i) { $alltunes = 1} elsif ($opt =~ m'^D(\d+)$'i) { $max{depth} = $1} elsif ($opt =~ m'^L$'i) { $want{list} = ($fl eq '-') ? 0 : 1} elsif ($opt =~ m'^T$'i) { $want{table} = ($fl eq '-') ? 0 : 1} else { $want{$opt} = ($fl eq '-') ? 0 : 1} } elsif ($a =~ m'^<') { print STDERR "$me: Add \"$a\" to doc ...\n" if $D>2; push @doc, &load($a); print STDERR "$me: Added \"$a\" to doc.\n" if $D>2; ++$infiles; } elsif ($a =~ m'\|$') { print STDERR "$me: Add \"$a\" to doc ...\n" if $D>2; push @doc, &load($a); print STDERR "$me: Added \"$a\" to doc.\n" if $D>2; ++$infiles; } else { $URL[$urls++] = $a} } if (!$infiles) { print STDERR "$me: Add STDIN to doc ...\n" if $D>2; @doc = ; print STDERR "$me: Added STDIN to doc.\n" if $D>2; } if (!($want{table} || $want{list})) { if ($me =~ /tabl/i) {$want{table} = 1} elsif ($me =~ /list/i) {$want{list} = 1} } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Read the original tune list, and extract what we can from it. This # # gets a bit tricky, due to the variety of input formats that we # # expect, because the input was generally typed by humans with no # # regard for the needs of dumb programs. # # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # chomp @doc; line: for $line (@doc) { next line if !$line; ++$lines; print STDERR "$me: l=\"$line\"\n" if $D>2; if (!$titl && ($line =~ m/\s*(.*)/i)) { $doctitle = $1; $doctitle =~ s".*""i; print STDERR "$me: doctitle=\"$doctitle\"\n" if $D>2; next line; } if ($line =~ m"^\s*") { if ($line =~ m"^\s*.* ") { &ttline($line); } else { $ttfl = 1; $ttline = $line; } ++$matches; @footer = (); next line; } if ($ttfl) { $ttline .= " $line"; if ($line =~ " ") { &ttline($ttline); $ttfl = 0; } ++$matches; @footer = (); next line; } if (!$html && ($line =~ m"^(\S+)\s+([A-Z]\S*)\s+([A-Z].*)$")) { &ttline("$1 $2 $3 "); ++$matches; @footer = (); next line; } if ($matches < 1) { push @header, $line; # Everything before the first matched line. } else { push @footer, $line; # Everything after the last matched line. } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Run through the list of URLs, reading each one. If the data looks # # like a directory, we read it recursively. If the data is a .abc file, # # we read it, extract the title(s), and add it to our %U table for # # later use.. # # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # for $u (@URL) { print STDERR "$me: URL=\"$u\"\n" if $D>2; if ($u =~ m"\.abc/*$") { &scanabc($u); # Extract abc header. } else { &htmldir($u); # Extract html directory. } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Now we produce the output document. First, we print any accumulated # # header info (unmatched lines at the start of the input), and a # # Then either a list or table of tunes, followed by whatever footer # # stuff was present in the input after the last matched line. # # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # if ($doctitle) { print "$doctitle\n"; } else { print "Tune table\n"; } for $l (@header) { ++$ishtml if (!$ishtml && ($l =~ /<\w*>/)); $l = HTML::Entities::encode_entities($l) if (!$ishtml); print "$l\n"; ++$gothr if ($l =~ //i); } print "\n" if !$gothr; if ($want{table}) { &abctable; } elsif ($want{list}) { &abclist; } else { &abclist; } print "\n" if !$gothr; for $l (@footer) { ++$ishtml if (!$ishtml && ($l =~ /<\w*>/)); if (!$ishtml) { $l = HTML::Entities::encode_entities($l); $l =~ s#\<(\w+@[\w.]+)\>#$&#; } print "$l\n" } exit 0; # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # The routines use a number of global arrays to hold data about tunes # as they are encountered. As an example of the global arrays, suppose # we find that URL/foo.abc contains the tunes: # X: 1 # T: George's Jig # T: Sam's Tune # O: Wales # K: Am # ... # X: 2 # T: Fred's Folly # T: George's Jig # O: Kerry # K: Em # ... # We will leave behind the following global data: # # The number of times each tune is encountered: # $Tcount{georgesjig} = 2 # $Tcount{fredsfolly} = 1 # $Tcount{samstune} = 1 # The URL for each tune: # $U{abc:1:fredsfolly} = 'URL/foo.abc' # $U{abc:1:georgesjig} = 'URL/foo.abc' # $U{abc:2:georgesjig} = 'URL/foo.abc' # $U{abc:1:samstune} = 'URL/foo.abc' # The list of tunes in each URL: # $Tunes{URL/foo.abc} = ('1:georgesjig', '1:samstune', '1:fredsfolly', '2:georgesjig') # Whether a tune is wanted in the output: # $Tout{georgesjig} = 1 if $alltunes # Each tune's title: # $Title{1:fredsfolly} = "Fred's Folly" # $Title{1:georgesjig} = "George's Jig" # $Title{2:georgesjig} = "George's Jig" # $Title{1:samstune} = "Sam's Tune" # Each tune's key: # $Tkey{1:fredsfolly} = 'Em' (The first occurrence of fredsfolly has K:Em) # $Tkey{1:georgesjig} = 'Am' # $Tkey{2:georgesjig} = 'Em' # $Tkey{1:samstune} = 'Am' # The X: number of each tune: # $Tseq{1:fredsfolly} = 2 (The first occurrence of fredsfolly has X:1) # $Tseq{1:georgesjig} = 1 # $Tseq{2:georgesjig} = 2 # $Tseq{1:samstune} = 1 # The O: origin of each tune: # $Torig{1:fredsfolly} = 'Kerry' # $Torig{1:georgesjig} = 'Wales' (The first occurrence of georgesjig claims it's from Wales) # $Torig{2:georgesjig} = 'Kerry' (The second occurrence of georgesjig claims it's from Kerry) # $Torig{1:samstune} = 'Wales' # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Produce the new listing. At present, we just write to stdout, but # # maybe eventually we'll try pushing the result out to a web page. We # # output only those things in the %Tname list, which should have an # # entry for each line in the original input file. # # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # sub abclist { local($orig,$name,$tabc,$tgif,$titl,$tps,$type,$u); if (1) { # Produce a header line? $Tname{ '' } = 'title'; # Will sort to first place in list. $Title{'1:'} = 'Title'; # Set up header info ... $Torig{'1:'} = substr(('Origin' . ('_' x $max{orig})),0,$max{orig}); $Ttype{'1:'} = substr(('Type' . ('_' x $max{type})),0,$max{type}); } for $name (sort keys %Tname) { next if !$Tout{$name}; $nuna = "1:$name"; $orig = &pad($Torig{$nuna}, '_', 'orig'); $titl = &abc2html($Title{$nuna}); $tabc = &findURL('abc',1,$name,$titl); $tps = &findURL('ps',1,$name,$titl); $tgif = &findURL('gif',1,$name,$titl); $type = &pad($Ttype{$nuna},'_','type'); if (!$titl) { print STDERR "$me: No title for \"$nuna\"\n" if $D>0; } print ""; if ($want{type}) {print " $type"} if ($want{orig}) {print " $orig"} if ($want{abc} && $tabc) {print " abc"} else {print "___"} if ($want{ps} && $tps ) {print " ps"} else {print "__ "} if ($want{gif} && $tgif) {print " gif"} else {print " ___"} print ""; if ($want{tune}) {print "$titl \n"} } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Produce the new table. At present, we just write to stdout, but # maybe eventually we'll try pushing the result out to a web page. We # output only those things in the %Tune table, which should have an # entry for each line in the original input file. # sub abctable { local($orig,$n,$name,$nuna,@ps,$tabc,$tgif,$titl,$tkey,$tseq,$type,$u); print "\n\n"; if ($want{type}) {print "\t\n"; for $name (sort keys %Tname) { $n = $Tcount{$name}; next if !$Tout{$name}; print "\n"; for ($i=1; $i <= $n; $i++) { $nuna = "$i:$name"; $titl = &abc2html($Title{$nuna}); if (!$titl) {print STDERR "$me: No title for \"$nuna\"\n" if $D>0} print "\n"; if ($want{type}) { $type = $Ttype{$nuna}; print "\t" } if ($want{orig}) { $orig = $Torig{$nuna}; print "\t" } if ($want{abc}) { $tabc = &findURL('abc',$i,$name,$titl); print "\t\n"); } if ($want{ps}) { $tps = &findURL('ps',$i,$name,$titl); print "\t\n"); } if ($want{gif}) { $tgif = &findURL('gif',$i,$name,$titl); print "\t\n"); } if ($want{key}) { $tkey = $Tkey{$nuna}; print "\t\n"); } if ($want{tune}) {print "\t\n"} print "\n"; } } print "type"} if ($want{orig}) {print "\t origin"} if ($want{abc}) {print "\t abc\n"} if ($want{ps}) {print "\t ps\n"} if ($want{gif}) {print "\t gif\n"} if ($want{key}) {print "\t key\n"} if ($want{tune}) {print "\t tune\n"} print " $type $orig "; if ($tabc) { print "abc"; $tseq = $Tseq{$nuna}; print " $tseq" if $tseq; } else { print '___'; } print(" "; if ($tps) {print "ps "} else {print '__'} print(" "; if ($tgif) {print "gif "} else {print '___'} print(" "; if ($tkey) {print $tkey} else {print '___'} print(" $titl\n"; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Convert the abc escape sequences to HTML. # sub abc2html { local($s) = @_; $s =~ s#\\(o)#\&${1}slash;#ig; $s =~ s#\\a(a)#\&${1}ring;#ig; $s =~ s#\\"(\w)#\&${1}uml;#ig; $s =~ s#\\'(\w)#\&${1}acute;#ig; $s =~ s#\\`(\w)#\&${1}grave;#ig; $s =~ s#\\,(\w)#\&${1}cedille;#ig; $s =~ s#\\~(\w)#\&${1}tilde;#ig; $s; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # &cache(name,type,orig,tune,abc,ps,gif) # Remember an assortment of info about a tune. # sub cache { local($name,$type,$orig,$titl,$abc,$ps,$gif) = @_; local($n,$nuna); $n = ++$Tcount{$name}; $nuna = "$n:$name"; $Ttype{$nuna} = $type; $Torig{$nuna} = $orig; $Title{$nuna} = $titl; if ($abc) { $U{"abc:$nuna"} .= "$abc "; $U{"abc:$name"} .= "$abc "; } if ($gif) { $U{"gif:$nuna"} .= "$gif "; $U{"gif:$name"} .= "$gif "; } if ($ps) { $U{"ps:$nuna"} .= "$ps "; $U{"ps:$name"} .= "$ps "; } $max{type} = Max($max{type},length($type)); $max{orig} = Max($max{orig},length($orig)); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Canonicalize a name. We do InterCaps, and strip out all funny chars. # sub canon { local($name) = @_; local($init,$rest); $name = lc($name); # Lower-case everything. $name =~ s"&(\w)\w*;"$1"g; # De-htmlize the name. $name =~ s/,.*//; # Discard everything after a comma. $name =~ s"\W+""g; # Delete non-alpha chars. $Tname{$name} = 1; # Note that we've seen the name. return $name; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Register a file under its name and type. For example, if we note # the file 'http://foo.bar/qux.gif', we call # &file('gif','http://foo.bar/qux','http://foo.bar/qux.gif') # This will leave behind global information: # $U{'gif:qux'] = 'http://foo.bar/qux.gif' # This tells us how to find a gif file for the name 'qux'. Note that # we canonicalize the name, so Foo_Bar.ps will be remembers under the # key 'foobar'. # sub file { local($ext,$pth,$url) = @_; (local($nam) = $pth) =~ s'.*/''; print STDERR "$me/file: $url\n" if $D>1; $nam = &canon($nam); $U{"$ext:$nam"} .= "$url "; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # This looks in several arrays for the "right" URL to produce for a # # specific tune, given the name, number, and type codes. If we can't # # find a usable URL, we return a null string. # # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # sub findURL { local($typ,$num,$nam,$ttl) = @_; local($u); for $lis ($U{"$typ:$num:$nam"}, $U{"$typ:$nam"}) { @lis = split ' ', $lis; for $u (@lis) { return $u if $u; } } if ($ttl =~ /^(\w*):\s*(.*)/) { return $u if ($u = &findURL($typ,$num,&canon($2),$2)); } return ''; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Grovel through an html directory listing, and check out each of the # files listed. Directories cause recursive traversal. Files with # interesting suffixes are read. # sub htmldir { local($path) = @_; local($item,$href,$line,@data); @data = &load($path); $html = 0; # Not (yet) known to be HTML format. print STDERR "$me: path=\"$path\"\n" if $D>2; ++$dirdepth; $path =~ s"/+$""; for $line (@data) { next if !$line; if (($href,$item) = ($line =~ m#(.*)#i)) { $html = 1; # It looks like HTML format. if ($href =~ '/$') { if (!($item =~ /Parent Dir/i)) { # Ignore parent directory. &htmldir("$path/$href") # Recurse into directory. if ($dirdepth < $max{depth}); } } elsif ($href =~ m'(.*)\.abc$'i) { &scanabc("$path/$href") } elsif ($href =~ m'(.*)\.gif$'i) { &file('gif',$1,"$path/$href"); } elsif ($href =~ m'(.*)\.ps$'i ) { &file('ps',$1,"$path/$href")} } elsif (-d $line) { &htmldir("$path/$line") if ($dirdepth < $max{depth}); } elsif (-f $line) { if ($line =~ m'.*/(.*)\.abc$'i) {&scanabc("$path/$line")} } else { print STDERR "$me/htmldir: Can't parse \"$line\"\n" if $D>2; } } --$dirdepth; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Here's a routine to get either a local file, a local directory, or a # web page. If the arg is the name of a directory, we read it and # convert its contents to html, so that later code has only one format # for directories. With web pages, there's a minor kludge in that the # LWP::Simple packet's get routine doesn't do array context, so we # have to do the split ourself. This routine will also work if the arg # is a shell command, ending with '|'. # sub load { local($name) = @_; local($txt,@doc,$path); print STDERR "$me/load: $name\n" if $D>1; ($path = $name) =~ s/^[<\s]+//; if (-d $path) { if (opendir(DOC,$path)) { @doc = grep !/^\./, readdir DOC; for $d (@doc) {$d =~ s#(.*)#$1#} } else { print STDERR "$me: Can't read \"$path\" ($!)\n"; @doc = (); } } elsif (open(DOC,$name)) { @doc = ; close DOC; } else { $txt = LWP::Simple::get($path); @doc = split(/\n/, $txt); } print STDERR "$me/load: Got $#doc lines from $path\n" if $D>4; @doc; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # Here's a routine that's missing from perl We just return the max of # all our parameters, which should be numeric. sub Max { local($n) = shift; for (@_) {$n = $_ if $_ > $n; shift} return $n; } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # This pads an orig field to $max{orig} chars. The pad arg is the pad # char; if null, we don't do any padding. # sub pad { local($str,$pad,$fld) = @_; local($m); $m = 1 if !($m = $max{$fld}); return ($pad ? ($str ? ($str . ($pad x ($m - length($str)))) : ($pad x $m)) : $str); } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # This routine reads an abc source file and extracts whatever info we # are currently looking for there. # sub scanabc { local($URL) = @_; local(@a,$line,$name,$n,$K,$O,$T,$TX,$t,$h,$X); @a = &load($URL); $t = 0; # Count the tunes. line: for $line (@a) { # Read thru the abc file. if ($line =~ m/^(X):\s*(\d+)/) { $X = $2; } elsif ($line =~ m/^(T):\s*(.*)/) { $T = $2; if ($T =~ /^\d/) { if ($T =~ /^\d+x\d+\w/) { $Ttype{$nuna} = $T; $max{type} = Max($max{type},length($T)); } next line; } ++$t; $name = &canon($T); # Canonicalize the tune'sname. $n = ++$Tcount{$name}; # Number of instances of this tune. $TX[$t] = $X; # Remember the tune's X: index. $nuna = "$n:$name"; # Num+name for this tune. $U{"abc:$name"} .= "$URL "; # Add to list of possible URLs. $U{"abc:$nuna"} .= "$URL "; # Add to list of specific URLs. # $Tunes{$URL} .= "$nuna "; # Mapping from URL -> num+name. $Title{$nuna} = $T; # Remember this instance's title. $Tout{$name} = 1 # Force it into output, if ($alltunes); # if caller asked for everything. } elsif ($line =~ m/^(O):\s*(.*)/) { $O = $2; for ($i=1; $i<=$t; $i++) { # Search thru tunes in this file. if ($TX[$i] == $X) { # If this is the current tune, $Tseq{$nuna} = $X if $X; # Remember X and O fields. $Torig{$nuna} = $O if $O; $max{orig} = Max($max{orig},length($O)); } } } elsif ($line =~ m/^(K):\s*([\^=_\s\w]+)/) { $K = $2; for ($i=1; $i<=$t; $i++) { if ($TX[$i] == $X) { $Tseq{$nuna} = $X if $X; $Tkey{$nuna} = $K if $K; } } } } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # # This routine parses the "......" lines that are found in # previous TuneList output files. This routine will need more cases as # we proceed with feeding it newer (and more complex) lines from # various listing. Stay tuned ... # sub ttline { local($line) = @_; local($titl,$type,$orig,$orig,$name,$ltyp,$href,%hrefs,$n); if ($line =~ s#^\s*(.*)\s+(.*)\s*##) { $type = $1; $orig = $2; $orig =~ s"[.\s]+$""; } while ($line) { print STDERR "$me/ttline: Field \"$line\"\n" if $D>5; if ($line =~ s#^\s*([a-z]+)\s*##) { print STDERR "$me/ttline: Drop: \"$1\"\n" if $D>4; next; } if ($line =~ s#^\s*([a-z]+)##i) { $href = $1; $ltyp = $2; print STDERR "$me/ttline: Link type $ltyp \"$href\"\n" if $D>3; $hrefs{$ltyp} = $href; next; } if (!$html && ($line =~ s#\s*([A-Z].*)##)) { $titl = $1; while ($titl =~ s#(<[^<]*>)##) { print STDERR "$me/ttline: Removed \"$1\" from tune.\n" if $D>6; } $name = &canon($titl); print STDERR "$me/ttline: Tune name \"$name\" title \"$titl\"\n" if $D>3; $Tout{$name} = 1; # Include it in output. &cache($name,$type,$orig,$titl,$hrefs{abc},$hrefs{ps},$hrefs{gif}); if ($titl =~ m#^(\w+)\s*:\s*(.*)\s*#) { # "Word:" deleted. $titl = $2; $name = &canon($titl); $Tout{$name} = 1; # Include it in output. &cache($name,$type,$orig,$titl,$hrefs{abc},$hrefs{ps},$hrefs{gif}); } } } return $line; }