#!/usr/bin/perl
# NAME
# ListSplit - Split tune list into files by name.
# SYNOPSYS
# ListSplit ListBot.*
# DESCRIPTION
# This reads the HTML tune list generated by TuneList, and writes a
# series of files XX.html, where the XX is the initial two letters
# of the title.
# Note that if there are several tunes with the same canonical name
# and URL, we remember only the last one. This is consistent with
# our scheme for TuneBot, which produces a file ending with the
# date. If we combine several such files from different runs, each
# tune+URL seen last will be the one used.
# AUTHOR
# John Chambers
$P = 'xxyyzz';
$| = 1;
$V = 2;
$l = 2; # Number of chars to use for file name.
if ($ARGV[0] =~ /^\d+$/) {$l = shift @ARGV}
$Xmax = 6;
$Kmax = 12;
$C1max = 16;
$C2max = 15;
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# First, read in all the data. We match several line formats, some of which
# are historical, so that we get all the info produced by several versions of
# TuneBot. The data will be in an essentially random order in which the tunes
# were discovered.
for $l (<>) {
++$found if $l =~ m"$P";
if ($l =~ m#.*href="(.*)">abc *_*(\d+)\s(\d+)\s(\d+)\s(.*) *(.*)#i) {
$T{"$1:$3"} = "$4:$5:$6:$7:$8"; # TITLE:URL -> X:C1:C2:K:Title
if ($l =~ m"$P") {++$match; print STDERR "Match1: $l"}
} elsif ($l =~ m#.*href="(.*)">abc *_*(\d+) (.*) *(.*)#i) {
$T{"$1:$3"} = "$4:::$5:$6"; # TITLE:URL -> X:::K:Title
if ($l =~ m"$P") {++$match; print STDERR "Match1: $l"}
} elsif ($l =~ m#.*href="(.*)">abc *_*(\d+) .* *(.*)#i) {
$T{"$1:$3"} = "$4::::$5"; # TITLE:URL -> X::::Title
if ($l =~ m"$P") {++$match; print STDERR "Match2: $l"}
} elsif ($l =~ m#.*href="(.*)">abc *_*(\d+) .* *(.*)#i) {
$T{"$1:$3"} = "$4::::$5"; # TITLE:URL -> X::::Title
if ($l =~ m"$P") {++$match; print STDERR "Match3: $l"}
# } elsif ($l =~ m"") {
# $T{$1} = $l;
} else {
print STDERR "$0: Ignore $l" if $V>2;
}
}
print STDERR "$0: $found found $match matched with \"$P\"\n" if $V>1 && $match>0;
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# All the data has been read into the %T array. We now run through it in
# lexical order, and output the data. At present, we use the first two chars
# of the (upper-case) TTL field to decide which output file to produce. Every
# time these two chars change, we create a new output file.
for $key (sort keys %T) {
if (($TTL,$URL) = ($key =~ m"^(\w+):(.*)$")) {
$C = substr($TTL . '__',0,2);
$val = $T{$key};
if (($X,$C1,$C2,$K,$Title) = ($val =~ /^(\d+):(.*):(.*):(.*):(.*)$/)) {
if ($C ne $B) {
print O "
\n";
&line('','','ABC','X','Key','Code 1','Code 2','Title');
print "$ndxfile\n" if $V>1;
$B = $C;
}
&line($TTL,$URL,'ABC',$X,$K,$C1,$C2,$Title);
}
}
}
print O "
\n";
$ndxfile = "ndx/$C.html";
if (!open(O,">$ndxfile")) {
print STDERR "$0: Can't write \"$ndxfile\" [$!]\n";
close O;
}
print O "
ABC tunes starting with $C\n";
close O;
exit 0;
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Write one line to the current output file. The first time this is called #
# for a new file, the args will be constants to produce the column titles, #
# with no URL. The rest of the calls will be with variable args to generate #
# one tune reference. #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
sub line {
local($TTL,$URL,$F,$X,$K,$C1,$C2,$Title) = @_;
$C1 = substr(($C1 . (' ' x $C1max)),0,$C1max);
$C2 = substr(($C2 . (' ' x $C2max)),0,$C2max);
$K = substr($K . (' ' x $Kmax), 0, $Kmax);
$X = substr((' ' x $Xmax) . $X, -$Xmax, $Xmax);
if ($URL) {
print O "
$F $X $C1 $C2 $K $Title\n";
} else {
print O "
$F $X $C1 $C2 $K $Title\n";
}
}