#!/usr/bin/perl -dw
#
#NAME
# unpack - Unpack key:value file to single-name files.
# unpack:name
# unpack:host
# ...
#
#SYNOPSIS
# unpack [file]..
#
#REQUIRES
	$HOME = $ENV{'HOME'} || '.';
	push @INC, "$HOME/lib", "$HOME/sh";
	require "Backup.pm";	# Back up files rather than overwrite them
#
#DESCRIPTION
# This takes a file full of blocks of key:value data and splits it up into  a
# set of single-entry files. The data in the file is expected to be in blocks
# of lines, with each block containing at least one "key"  field  giving  the
# block's name.  For example:
#
#   name:	Joe Smith
#   phone:	735-271-1538
#   Adr1:	17 Prince Street
#   Adr2:	Sometown, MA 01234
#
#   name:	Susan Bjorkdal
#	Adr1:	453 South St.
#   Adr2:	Anotherburg, NH 12345
#
# If this file is fed to unpack:name, the result will be  two  output  files:
# name/JoeSmith and name/SusanBjorkdal, each containing that person's data.
#
# Blocks of data are separated by blank lines.  Each block  will  be  written
# as-is to the output file.  Each block must contain at least one key ("name"
# in the above example); blocks without a key will be  ignored.   Thus,  this
# program  may  be  used to extract blocks of data identified by "key:" lines
# from larger files with other text, such as email messages.
#
# This is a normal "perl filter", reading a list of files or STDIN  if  there
# are no files named on the command line, so you can use it in a pipeline.
#
# This program may be linked to a name of the form "unpack:foo", which  means
# to use "foo" as the default key. A "foo" directory will be created, and the
# output files will be in that directory.  The default key is  "name",  which
# will be used if there are no command-line arguments naming keys.
#
#EXAMPLE
# cd people/
# unpack:name Dancers Musicians name/*
#
# In this example, we unpack the two files Dancers Musicians  into  the  name
# directory.   We also tell unpack to read all the name/* files last, so that
# the data there will overwrite anything in the two big files.
#
#OPTIONS
#  If an option starts with '+' or '-', '+' means on or enable  the  feature;
#  '-'  means  off  or  disable  the  feature.  The case of the option letter
#  doesn't matter.
#  
#  :xyz
#    says to add "xyz" to the list of block keys. If there are no block keys,
#    the key "name" will be used.
#
#  +s.xyz
#    says to add the suffix .xyz to output file names.  The  default  is  -s,
#    which adds no suffix.
#
#  +vN
#    sets the verbose/debug level to the number N.  The default is  1,  which
#    produces error and serious warning messages.  Higher levels are used for
#    debugging.
#
#BUGS
#
#AUTHOR
# John Chamber <jc@trillian.mit.edu> Dec 2000
# You may use this program as you like, unless you try to  claim  it's  yours
# and  prevent  me  from using it myself.  If you make interesting additions,
# please send me a copy.

$| = 1;
$exitstatus = 0;		# Set to nonzero for serious errors
($P = $0) =~ s".*/"";	# Program name minus any directories
$V = $ENV{"V_$P"} || 3;	# Verbosity

$keys = 0;		# Count the block keys
$suff = '';		# Output file suffix

if ($P =~ /^(\w+):(.*$)/) {	# Does our name contain any keys?
	print "Progname '$P' contains '$1', '$2'\n" if $V>1;
	@keylist = split('\s*:\s*',$2);	# List of keys
	for $k (@keylist) {$keytbl{$k} = ++$keys}
	$mainkey = $keylist[0];				# Main key
	print "Keys: [" . join(',',@keylist) . "]\n" if $V>1;
}

for $arg (@ARGV) {
	if (($flg,$opt) = ($arg =~ /^([-+])(.*)$/)) {
		print "$P: Opt '$arg'\n" if $V>1;
		if ($opt =~ /^S(.*)/i) {
			$suff = $1;
			print "$P: File suffix '$suff'\n" if $V>1;
		} else {
			print "$P: Unknown option '$arg' ignored.\n" if $V>0;
		}
	} elsif ($arg =~ /^:(.+)$/) {
		print "$P: Key '$1'\n" if $V>1;
		push @keylist, $1;
		$keytbl{$1} = ++$keys;
	} else {
		print "$P: Arg '$arg' treated as file name.\n" if $V>1;
		push @files, $arg;
	}
}
@ARGV = @files;		# Strip out option and key args.

if ($keys == 0) {
	push @keylist, 'name';
	$keytbl{'name'} = ++$keys;
}
print "$P: We have $keys keys.\n" if $V>1;
$mainkey = $keylist[0];
print "$P: Main key: '$mainkey'\n" if $V>1;
print "$P: All keys: " . join(', ',@keylist) . "\n" if $V>1;

# First, read in all the data for all the names, and extract all  the
# data fields into the %data table:
#
if (-d $mainkey) {
	print "Key directory '$mainkey' exists.\n" if $V>0;
} else {
	if (mkdir($mainkey,0755)) {
		print "Key directory '$mainkey' created.\n" if $V>0;
	} else {
		print "### Can't create directory '$mainkey' [$!]\n" if $V>0;
	}
}

$name = '';		# Last-seen name from a key line
@klist = ();	# Keys found in current block
@block = ();	# Current data block
line:
for $line (<>) {		# Read the input files one line at a time
	$line =~ s/[\r\s]+$//;
	unless ($line) {	# Blank line terminates block
		print "$P: Blank line terminates block of data.\n" if $V>1;
		unless (@block) {
			print "$P: Empty block ignored.\n" if $V>2;
			next line;
		}
		if (($k = int(@blockkeys)) < 1) {
			print "$P: Block has no keys; ignored.\n" if $V>1;
			@block = ();	# Discard block
			next line;
		}
		print "$P: $k keys found in block.\n" if $V>2;
		&saveblock();	# Write this block to its file
		@block = ();	# Discard block
		next line;
	}
	push @block, $line;

 	if (($flag,$datum) = ($line =~ /^\s*(\S+)\s*:\s*(.*)\s*$/)) {
 		print "Line: flag='$flag' datum='$datum'\n" if $V>1;
 		if ($keytbl{$flag}) {
 			print "Key: '$flag'='$datum'\n" if $V>1;
 			$name = &cname($datum);		# Only use alfanums from name
			push @blockkeys, "$flag:$name";	# Save canonicalized key
 	#	} else {
 	#		print "Data '$flag'='$datum'\n" if $V>1;
 	#		$info{"$name:$flag"} = $datum;
 	#		print "Data: $name:$flag\t= $datum\n" if $V>2;
 		}
# 	} else {
# 		print "Ignore $line\n" if $V>2;
 	}
}

print "$P: Exit with status $exitstatus.\n" if $V>1;
exit $exitstatus;

sub cname {my $F='cname';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Canonicalize a key's datum.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local($d) = @_;
	$d =~ s/\s+/_/g;
#	$d =~ s/\W+//g;
	return $d;
}

sub dumpblock {my $F='dumpblock';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local($l);
	print "+++++++\n";
	for $l (@block) {print "\t$l\n"}
	print "-------\n";
}

sub saveblock {my $F='saveblock';
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
	local($f,$k,$kval,$l,$lines,$writing);
	unless (@blockkeys) {
		print "$P/$F: This block has no keys:\n" if $V>1;
		&dumpblock();
		@block = ();	# Discard the block
		return 0;
	}
filekey:
	for $k (@blockkeys) {	# Look for keys to use as file names
		if ($k =~ /$mainkey:(.*)/) {
			$kval = $1;
			print "$P/$F: Key '$k' value '$kval'\n" if $V>1;
			$f = "$mainkey/$kval$suff";
			print "$P/$F: File '$f'\n" if $V>1;
			if (open(F,">$f")) {
				print "$P/$F: Writing $f ...\n" if $V>1;
				++$writing;
				last filekey;
			} else {
				print "$P: Can't write $f [$!]\n" if $V>0;
			}
		}
	}
	unless ($writing) {
		print "$P: Unable to find writable key name for this block:\n";
		&dumpblock(@block);
		return 0;
	}
	for $l (@block) {
		print F "$l\n";
		++$lines;
		print "$P/$F: LINE $lines: $l\n" if $V>2;
	}
	close F;
	@block = ();		# Discard the block
	return $lines;
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Old code:
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# # Next, run through the names in alphabetical order, and write a  new
# # file for each name:
# #
# entry:
# for $entry (sort keys %info) {
# 	$datum = $info{$entry};
# 	if ($entry =~ /^(\w+):$/) {
# 		close OUT;		# Close previous output file
# 		$file = "$mainkey/$1";	# Path for this name
# 		$datum = $names{$1};	# Get the full name
# 		$writing = 0;	# Note that we can't write now
# 		if (-f $file) {
# 			print STDERR "File \"$file\" replaced.\n" if $V>1;
# 		}
# 		unless (open(OUT,">$file")) {
# 			print STDERR "File \"$file\" unwritable ($!)\n" if $V>0;
# 			next entry;
# 		}
# 		$writing = 1;	# We can write now
# 		print OUT "N	$datum\n";
# 	} elsif (($nm,$flag) = ($entry =~ /^(\w+):(.*)/)) {
# 		print OUT "$flag	$datum\n" if $writing;
# 		
# 	}
# }
