/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
NAME
   align - vertically align data fields.
SYNOPSIS
   align [-options] [lcr][+-][n]...

DESCRIPTION
   Align the input in vertical columns.  The column  indicators  start
   with  'l',  'c',  or  'r',  depending  on  whether  they  are to be
   left-aligned, centered, or right-aligned.  If there is a number, it
   is a field width.  A '-' means n or fewer characters; a '+' means n
   chars or more; neither means exactly n. Without a number, the field
   will be the width of the longest value.

   A  line  man  have  "junk"  after the last field; this junk will be
   preserved as a final left-aligned field.  (But see the BUGS section
   below for a possible exception.)

OPTIONS
   The options fields may start with a '-'  or  '+'  flag.   For  some
   options, '+' means "enable" and '-' is "disable".  For others, this
   isn't meaningful, and you can use either flag.  The options are:

   -b[n]
      This means to buffer n lines of input before generating  output;
      the  default  is -b50.  If the width of a column's longest value
      changes after the nth line, then the output won't be  completely
      aligned.   In  pipes,  you may want this value to be smaller, as
      nothing will be output until the pipe is full.

   -d[n][file]
      This sets the  debug  counter  and  output  file.   The  D_align
      environment variable does the same thing.  The default is "-d1",
      with output going to stderr (file 2).

   +i<n><c>
      Indent the output by <n> columns using the character  <c>.   For
      ease of typing, <c> may be 's' to get spaces or 't' to get tabs.
      The default is "-i", no  indenting  at  all;  "+i"  alone  meant
      "+i1t".

   -s"chars"
   +s"chars"
      This sets the list of separator characters to whatever is in the
      rest  of  the option string.  The default includes a space and a
      tab.  The '+' flag means  to  presever  the  separators  in  the
      output;  the  default -s just aligns the fields with only spaces
      as separators.  Note that the quotes used above are just a hint;
      you don't need to use them unless you need to protect the string
      from your shell.

   -t
   +t
      These control trimming of white space around fields. The default
      +t  means  that if the -s option is used, align should trim away
      spaces and tabs around separators before aligning fields. If the
      -t  option  is  used, they will be considered part of the field.
      Without the -s option, spaces and tabs  act  as  separators,  so
      they  are always considered to be not part of the field, and the
      't' option is pointless.

   Note  that the default, if neither 's' nor 't' options are used, is
   that any string  of  white  space  is  considered  a  single  field
   separator.  The 's' option defines one or more separators, and each
   is considered a field separator.  Thus if you use "+s|",  then  any
   '|' is a new field, and "|||" contains two empty fields.

   Note  also that the output always has white space trimmed away from
   the right edge of all lines.

FILES
   At present, align only reads from stdin.  Perhaps it could use  the
   usual  loop  to  open files named on the command line.  But you can
   always use cat to do that, and pipe the catenated data to align.

BUGS
   There is no mechanism at present to include  newlines  as  possible
   separators, since a newline is always a line terminator.

   There is no way to specify an output column separator.

AUTHOR
   John Chambers (jc@trillian.mit.edu)

* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
#include "V.h"	/* JC's debug package */

#ifndef FALSE
#define FALSE 0		/* C's false value (usually only zero will work) */
#endif
#ifndef TRUE
#define TRUE 1		/* C's true value (usually any nonzero will work) */
#endif
#ifndef CHARS
#define CHARS 256	/* Size of character set on this machine */
#endif
#define COLS 100	/* Max number of columns */
#define INBUF 500	/* Max number of chars in a line */

char align[COLS];	/* 'l', 'c', or 'r' */
char bound[COLS];	/* '-', '*', or '+' */
int  width[COLS];	/* Columns width (so far) */

int  indent    = 0;
char indentch  = '\t';

Flag sep[CHARS];		/* Separators */
char*spaces = " \t";
Flag sepfl = 0;	/* Save separators */
Flag trimfl = 1;	/* Trim spaces around separators */
char * trim();

int  bline  =  0;	/* Buffer line number */
int  iline  =  0;	/* Input  line number */
int  oline  =  0;	/* Output line number */
int  blines =100;	/* Buffer line count */
int  ilines =  0;	/* Input  line count */
int  olines =  0;	/* Output line count */
int  cols = 0;
char **inbuf;		/* List of input lines */
char***field;		/* List of field pointers */
char **insep;		/* List of field separators */

Str obuf = {0};

/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* Add one char to the output buffer.  If it is a line terminator, trim the
* line and write it to stdout.
*/
FCT outchar(c)
{
	Fenter("outchar");
	V6 "c: '%c'",Dsp(c) D;
	if (obuf.l >= obuf.m)
		if (!MinStr(&obuf,obuf.l * 2 + 1))
			Fail;
	switch (obuf.v[obuf.l++] = c) {
	case '\n':
	case '\r':
		trim(obuf.v,obuf.l);
		obuf.l = Strlen(obuf.v);
		obuf.v[obuf.l++] = c;
		Write(1,obuf.v,obuf.l);
		obuf.l = 0;
	}
fail:
	Fexit;
	return obuf.l;
}
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* Output one line.
*/
outline(n)
{	int b, f, i;
	int lpad, size, rpad;

	Fenter("outline");
	b = n % blines;
	V7 "Line %d is buffer %d.",n,b D;
	for (i=0; i<indent; i++)
		outchar(indentch);
	for (f=0; f<=cols; f++) {
		size = strlen(field[b][f]);
		switch (align[f]) {
		  default:
			V2 "### Undefined alignment %02X='%c'",B8(align[f]),dsp(align[f]) D;
		case 'l':
			lpad = 0;
			rpad = size < width[f] ? width[f] - size : 0;
			break;
		case 'c':
			lpad = size < width[f] ? (width[f]-size)/2 : 0;
			rpad = width[f] - size - lpad;
			break;
		case 'r':
			lpad = size < width[f] ? width[f] - size : 0;
			rpad = 0;
			break;
		}
		V5 "Field%2d width=%d lpad=%d size=%d rpad=%d.",f,width[f],lpad,size,rpad D;
		if (f && !sepfl) outchar(' ');
		for (i=0; i<lpad; i++) outchar(' ');
		for (i=0; i<size; i++) outchar(field[b][f][i]);
		if (f < cols)	/* Don't right-pad the last field */
		for (i=0; i<rpad; i++) outchar(' ');
		if (sepfl && insep[b][f])
			outchar(insep[b][f]);
	}
	outchar('\n');
	++olines;
	FExit;
	return(0);
}
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* Set up a list of separator characters:
*/
delimiters(p)
	byte*p;
{	int  c, i;
	Fenter("delimiters");
	for (i=0; i<CHARS; i++)
		sep[i] = FALSE;
	for (i=0; c=p[i] ; i++) {
		V2 "Separator: %02X='%c'",B8(c),Dsp(c) D;
		sep[c] = TRUE;
	}
	FExit;
}

/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* Trim away white space from the end of *p.  The length, n, will be calculated
* if  it  is  negative.   Trimming  is done by replaceing the white chars with
* nulls.  Note that the return value is always p.  This makes it easy to  pass
* the result to a printf call, but the new length isn't returned in any way.
*/
char * trim(p,n)
	char *p;
	int   n;
{	int   c, i;
	Fenter("trim");
	V5 "Called n=%d \"%s\"",n,p D;
	if (n < 0)
		n = strlen(p);
	while (n>0 && isspace(p[n-1]))
		p[--n] = 0;		/* Trim away white space on right*/
	V7 "\"%s\"",p D;
	while ((c = *p) && isspace(c))
		++p;			/* Trim on left, too */
	V7 "\"%s\"",p D;
	FExit;
	return p;
}

/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
*/
int main(ac,av)
	char**av;
{	int   a, c, f, i, n;
	int   c0, c1, c2, cx;
	char *ip, *p;

	ac = Vinit(ac,av);
	delimiters(spaces);
	/*
	* Note that in this loop, a is the argument counter and c is the column
	* counter.
	*/
	for (a=1, c=0; a < ac; a++) {
		V5 "Arg%3d=\"%s\"",a,av[a] D;
		align[c] = 'l';	/* Column defaults */
		bound[c] = '*';
		width[c] =  0 ;
		switch (c0 = av[a][0]) {
		case '-':
		case '+':
			V5 "Option \"%s\"",av[a] D;
			switch (c1 = av[a][1]) {
			case 'b':		/* Buffer length (lines) */
			case 'B':
				if (sscanf(av[a]+2,"%d",&blines) < 1)
					blines *= 2;
					break;
			case 'd':		/* Debug option */
			case 'D':
				V5 "Vopt(\"%s\")",av[a] D;
				Vopt(av[a]+2);
				V2 "Debug level %d." D;
				break;
			case 'i':
			case 'I':
				if (indent = (c0 == '+')) {
					for (i=0, p=av[a]+2; (cx = *p) && isdigit(cx); p++) {
						i = (i * 10) + (cx - '0');
					}
					indent = i ? i : 1;
					switch (cx) {
					case 0:
					case 't': indentch = '\t'; break;
					case 's': indentch = ' ' ; break;
					default : indentch = c;
					}
				}
				break;
			case 's':		/* Separator list */
			case 'S':
				sepfl = (c0 == '+');	/* Preserve separators? */
				delimiters(av[a]+2);
				break;
			case 't':
			case 'T':
				trimfl = (c0 == '+');	/* Trim fields? */
				break;
			  default:
				V1 "Unknown option \"%s\" ignored.",av[a] D;
				break;
			}
			break;
		case 'l':
		case 'c':
		case 'r':
			p = av[a];
			align[c] = c0;
			bound[c] = '*';			/* Unbounded column */
			switch (c1 = *++p) {
			case  0 :
				width[c] = 0;		/* Min col width */
				break;
			case '+':
			case '-':
				bound[c] = c1;
				++p;
			case '9': case '8': case '7':
			case '6': case '5': case '4':
			case '3': case '2': case '1':
			case '0':
				width[c] = atoi(p);
				break;
			default:
				V1 "Unknown width \"%s\" treated as %c.",av[a],align[c] D;
				break;
			}
			V4 "Col%3d align=%c bound=%c width=%d."
				,c,align[c],bound[c],width[c] D;
			++c;
			break;
		default:
			V1 "Unknown column \"%s\" ignored.",av[a] D;
			break;
		}
	}
	if (c <= 0) {		/* We oughta have at least one column */
		align[c] = 'l';
		bound[c] = '*';
		width[c] = 0;
		++c;
	}
	cols = c;
	align[cols] = 'l';
	width[cols] =  0 ;
	V4 "There are %d buffers, %d columns.",blines,cols D;
	if (!field || !*field) {
		field = (char***)MallocM(blines*sizeof(char**),"field[]");
		for (i=0; i<blines; i++)
			field[i] = (char**)MallocM((cols+1)*sizeof(char*),"field");
	}
	if (sepfl && (!insep || !*insep)) {
		insep = (char**)MallocM(blines*sizeof(char**),"insep[]");
		for (i=0; i<blines; i++)
			insep[i] = (char*)MallocM((cols+1)*sizeof(char),"insep");
	}
	inbuf = (char**)MallocM(blines*sizeof(char**),"inbuf[]");
	for (i=0; i<blines; i++)
		inbuf[i] = (char*)MallocM(INBUF+1,"inbuf");
/*
* Now it's time to start reading the input, line at a time.  Each line
* is  parsed into fields, and stuck in the buffer.  When the buffer is
* full, or we hit EOF, we start  writing  the  oldest  lines,  aligned
* appropriately.  Note that we first trim away initial white stuff, to
* handle things like output from ps that  would  otherwise  appear  to
* have an initial "null" field on some lines and not on others.
*/
	while (fgets(inbuf[bline],INBUF,stdin)) {
		ip = inbuf[bline];
		if (trimfl)
			ip = trim(ip,-1);
#if 0
		n = strlen(ip);
		while (n>0 && ((c=ip[n-1]) == '\n' || sep[c]))
			ip[--n] = 0;	/* Trim away newlines and final separators */
#endif
		V5 "Line%5d: \"%s\"",ilines,ip D;
		field[bline][0] = ip;	/* First field starts with first char */
		c1 = '+';				/* Pretend that data precedes it */
		for (f=c=c1=0; (c0=ip[c]) && (f<=cols); c++) {


			if (sep[c0]) {
				V5 "Col %d is separator.",c D;
				if (sepfl)		/* Are we preserving separators? */
					insep[bline][f] = c0;
				ip[c] = 0;	/* Wipe out all separators */
				V5 "Field[%2d][%2d]=\"%s\"",bline,f,field[bline][f] D;
				if (trimfl)
					field[bline][f] = trim(field[bline][f],-1);
			} else
			if (sep[c1]) {		/* First char of field? */
				field[bline][++f] = &ip[c];
				V5 "Col %d starts field %d.",c,f D;
				if (f >= cols)
					break;
			}
			c1 = c0;	/* Remember preceding char */
		}
		if (trimfl) 	/* Remember to trim the last field */
			field[bline][f] = trim(field[bline][f],-1);
		while (f<cols) {
			field[bline][++f] = "";
			if (sepfl)
				insep[bline][f] = 0;
		}
		for (f=0; f<=cols; f++) {
			V6 "Field[%2d][%2d]=\"%s\"",bline,f,field[bline][f] D;
			if (width[f] < (n=strlen(field[bline][f]))) {
				width[f] = n;
				V6 "width[%d]=%d.",f,n D;
			}
		}
		++ilines;		/* Count of input lines processed */
		bline = ilines % blines;
		V6 "bline=%d ilines=%d olines=%d.",bline,ilines,olines D;
		if (ilines - olines >= blines )
			outline(olines);
	}
	V5 "EOF" D;
	while (ilines > olines)
		outline(olines);
done:
	exit(0);
}
