#!/bin/perl
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Copyright (c) 1992 by John Chambers.   Permission  to  use  is  granted  to #
# everyone,  as  long  as  you give me credit for my work (and mistakes), and #
# take credit for what you add or delete.  No warrantee of any kind is stated #
# or  implied.   Whether this code works is dependent, among other things, on #
# your operating system and which version of perl you may have.               #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
#	sortmail [file]...
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# This optionally uses unpackmail to unpack the user's mail, and  scans  each #
# message  for  evidence that it belongs in an assortment of local newsgroups #
# or directories or whatever.  This is rather idiosyncratic, of  course,  and #
# depends  on  what  the  user needs partitioned and/or classified.  The main #
# thing we do, actually, is to look for  various  kinds  of  archives  (shar, #
# uuencoded  tar,  etc)  and  rename them to indicate what they are and which #
# piece each file contains.  Any such file that is identified is deleted from #
# the  mail  directory.   Unrecognized  messages  are left in the user's Mail #
# directory, and their names are output.                                      #
#                                                                             #
# BUG:                                                                        #
#   No file locking is done; incoming mail may be lost.                       #
# BUG:                                                                        #
#   We destroy the input file; if new mail arrived during unpacking,  we  may #
#   have destroyed it, too.                                                   #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #

chop($d = `date`);
print("$d $0 @ARGV\n");
$V  = $ENV{"V_sortmail"} || $ENV{"D_sortmail"} || 0;
$HM = $ENV{"HOME"} || "/";
$ME = $ENV{"USER"} || $ENV{"LOGNAME"} || "unknown";
$MB = $ENV{"MAIL"} || "/usr/mail/$ME";
$NL = "/u/news/lib";
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#
# We first process the command-line args.  Usually there won't  be  any, #
# but  we  will accept a list of files to be examined, or a mailbox name #
# preceded by "-m".                                                      #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#
for ($a = 0; $a <= $#ARGV; $a++) {
	$arg = $ARGV[$a];
	if ($arg =~ /[-+%][Dd](.*)/) {	# Debug level.
		$V = $1;
	} elsif ($arg =~ /[-+][Mm](.*)/) {	# Mailbox name.
		$MB = $1 || $ARGV[++$a];
		print "Mailbox file: \"$MB\"\n" if $V;
		next;
	} else {	# Anything else is taken as a filename.
		@files[++$#files] = $arg;
		print "File: \"$MB\"\n" if $V > 1;
	}
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# If there were no files named on the command line (or only a mailbox #
# was  named),  we  look  into  the mailbox and try to unpack it into #
# individual mail files.                                              #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
if ($#files < 0) {
	print "The mailbox is \"$MB\"\n" if $V > 1;
	if (! -s $MB) {
		print("No mail for $ME\n");
		sleep 10;
		exit 0;
	}
	$MD = (-d "$HM/Mail") ? "$HM/Mail" : "$HM/M";
	system "mkdir $MD" if (! -d $MD);
	$filelist = `unpackmail $MD/ <$MB`;
	if ($? || $V > 1) {
		print "==> Mailbox $MB not emptied.\n";
		system "ls -l $MB";
	} else {
		truncate($MB,0);
	}
	@files = split(' ',$filelist);
}
@FL = ();	# List of files to show the user.
for $f (@files) {
	print("File: $f\n"); # if $V;
	if (!open(F,"<$f")) {
		print("Can't read \"",$f,"\"\n");
		next;
	}
	$hdrs =		# Count the header lines we like.
	$lines = 0;	# Count the lines in the file.
	$C = $F = $S = $T = $U = '';
	for $l (<F>) {	# Read thru the header lines.
		chop $l;
#		print "H: $l\n"; if $V > 1;
#		if ($l =~ /^\s*$/) 
		if (($hdrs > 5) || +(+$lines > 50)) {
			close(F);
			last;
		}
		if ($l =~ /^Subj[a-z]*:\s*(.+)/i) {
			$S = $1;
			++$hdrs;
		}
		if ($l =~ /^From:*\s(.*)/i) {
			$F = $1;
			++$hdrs;
		}
		if ($l =~ /^To:\s(.*)/i) {
			$T = $1;
			++$hdrs;
		}
		if ($l =~ /^Cc:\s(.+)/i) {
			$C = $1;
			++$hdrs;
		}
		if ($l =~ /^From\s+([^\s]*)\s/i) {
			$U = $1;	# UUCP From path.
			++$hdrs;
		}
	}
#	print "C: $C\n"; if $V > 1;
#	print "F: $F\n"; if $V > 1;
#	print "S: $S\n"; if $V > 1;
#	print "T: $T\n"; if $V > 1;
#	print "U: $U\n"; if $V > 1;
#
# Looking for stuff from mailing lists is incredibly ad-hoc:
#
	if (("$T" =~ /.*\btaylor-uucp\b/) 
	||  ("$C" =~ /.*\btaylor-uucp\b/)
	||  ("$F" =~ /.*\btaylor-uucp\b/)
	||  ("$U" =~ /.*\btaylor-uucp\b/)
	) {
		print("Taylor uucp S:\t$S\n");
		print("Taylor uucp U:\t$U\n");
		print("Taylor uucp T:\t$T\n");
		print("Taylor uucp C:\t$C\n");
		$mf = &mknam("$MD/tuucp/");
		system "Mv $f $mf";
		next;
	}
	if (("$T" =~ /.*\besixlist\b/) 
	||  ("$C" =~ /.*\besixlist\b/)
	||  ("$F" =~ /.*\besixlist\b/)
	||  ("$U" =~ /.*\besixlist\b/)
	) {
		print("Esix list S:\t$S\n");
		print("Esix list U:\t$U\n");
		print("Esix list T:\t$T\n");
		print("Esix list C:\t$C\n");
		$mf = &mknam("$MD/esix/");
		system "Mv $f $mf";
		next;
	}
	if (("$T" =~ /.*\bsmail3-users\b/) 
	||  ("$C" =~ /.*\bsmail3-users\b/)
	||  ("$F" =~ /.*\bsmail3-users\b/)
	||  ("$U" =~ /.*\bsmail3-users\b/)
	) {
		print("Smail list S:\t$S\n");
		print("Smail list U:\t$U\n");
		print("Smail list T:\t$T\n");
		print("Smail list F:\t$F\n");
		print("Smail list C:\t$C\n");
		$mf = &mknam("$MD/smail/");
		system "Mv $f $mf";
		next;
	}
	if (("$T" =~ /.*\bCPSR\b/) 
	||  ("$C" =~ /.*\bCPSR\b/)
	||  ("$F" =~ /.*\bCPSR\b/)
	||  ("$U" =~ /.*\bCPSR\b/)
	) {
		print("CPSR list S:\t$S\n");
		print("CPSR list U:\t$U\n");
		print("CPSR list T:\t$T\n");
		print("CPSR list F:\t$F\n");
		print("CPSR list C:\t$C\n");
		$mf = &mknam("$MD/CPSR/");
		system "Mv $f $mf";
		next;
	}
# The Subject: line is the most useful discriminant.
#	$S = `egrep '^Subject:' $f`;
#
# Shar kits arrive with all sorts of bizarre subjects, and
# we will have to add to this list as new ones show up:
	if ("$S" =~ /\s*Re:/i) {	# Re: real message.
		@FL = (@FL , $f);	# Add it to list to show to user.
		next;
	}
	if ("$S" =~ /^\s*Connection Report .*/) {
		print "UUCP $S\n";
		system "rm $f";
		next;
	}
	if ("$S" =~ /([A-Za-z0-9._-]+)(..) \[tar\+compress\+btoa\]/) {
		print "Compressed tar file.\n";
		$mf = "$1$2";
		system "Mv $f $mf";
		print "File: $f\t==> $mf\n";
		next;
	}
	if ("$S" =~ /([A-Za-z0-9_-]+)\.sh(..) of /) {
		$mf = "$1.sh$2";
		system "Mv $f $mf";
		print("File: $f\t==> $mf\n");
		next;
	}
	if ("$S" =~ /Subject:(.+) shar[, ]*part.(.*) *$/) {
		$kit = $1;
		$part = $2;
		$kit =~ s/^\s+// ;	# Strip initial spaces from kit name.
		$kit =~ s/\s+$// ;	# Strip off trailing spaces.
		$kit =~ s/\s+/_/g;	# Convert internal spaces to underscores.
		$part =~ s/^\s//  ;	# Strip initial spaces from part number.
		$part =~ s/\s.*// ;	# Strip trailing junk from part number.
		$mf = "$kit.sh$part";	# This should give us a usable kit name.
		system "Mv $f $mf";	# Rename it, being paranoid about collisions.
		print("File: $f\t==> $mf\n");
		next;
	}
	if ("$S" =~ /([A-Za-z0-9_-]+)\.uu(..) of /) {	# GNU shar does this.
		$mf = "$1.uu$2";
		system "Mv $f $mf";
		print("File: $f\t==> $mf\n");
		next;
	}
	if ("$S" =~ /([A-Za-z0-9_-]+)\.kit([0-9]*)/) {
		$mf = "$1.kit$2";
		system "Mv $f $mf";
		print("File: $f\t==> $mf\n");
		next;
	}
	if ("$S" =~ /GERDA *(.*)$/io) {	# La Gerda rakonto el la Esperanta bando.
		printf "GERDA  1='$1'\n";
		$mf = "esp/gerda/$1";
		printf "GERDA mf='$mf'\n";
		$mf =~ s/\s+/_/g;
		printf "GERDA mf='$mf'\n";
		system "Cp $f $mf";
		print("File: $f\t--> $mf\n");
		next;
	}
	if ("$S" =~ /.*Newsgroup.*created/) {
			system "$NL/inews <$f -t '$S' -n mail.news";
			system "rm $f";
			next;
	}
	@FL = (@FL , $f);	# Note any files not processed above.
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# If there were any files that we couldn't identify, we show them to the  #
# user by invoking the editor with the file names on its command line.    #
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
if (@FL) {
	$ED = @ENV{"EDITOR"} || "vi";
	if ($V) {print("Left: @FL\n"); sleep 5}
	system "$ED @FL";
} else {
	print("No unprocessed mail.\n");
	sleep 10;
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
# Given a prefix, add chars to it to give a non-existent filename.
sub mknam
{
	($prefix) = @_;
	$n = ++$index{$prefix};
	if ($V) {print "mknam: prefix=\"$prefix\" n=$n.\n";}
	$file = $prefix . $n;
	if ($V) {print "mknam: file=\"$file\".\n";}
	while ( -f $file) {
		if ($V) {print "mknam: file=\"$file\" exists.\n";}
		$n = ++$index{$prefix};
		if ($V) {print "mknam: prefix=\"$prefix\" n=$n.\n";}
		$file = $prefix . $n;
		if ($V) {print "mknam: file=\"$file\".\n";}
	}
	return $file;
}
