#!/usr/bin/env perl
# -*- perl -*-
# 'di';
# 'ig00';
#-----------------------------------------------------------------------
# Module Name: $Source: /nfs/acis/a1/database/perl/RCS/dbnormal,v $
# Purpose: Turn input data deck into canonical database form
# Language: Perl 5
# Assumptions: Data base is RFG-standard *.cdb format
# Part Number:
# Author: Robert F. Goeke (goeke@space.mit.edu
# References:
# Copyright: Massachusetts Institute of Technology 2004
#-----------------------------------------------------------------------
########################################################################
# Initial Constants
########################################################################
$Archive = "./history"; # The default history directory
$Base = "parts"; # The default input/output/format basename
$BIN = "/nfs/acis/a1/database/bin"; # The default place for other programs
$IDB = ".idb"; # The default input suffix
$FDB = ".fdb"; # The default format suffix
$CDB = ".cdb"; # The default output suffix
$SDB = ".sdb"; # The default syntax suffix
$XDB = ".xdb"; # The default calculation rule suffix
$Dbprint = "$BIN" . "/" . "dbprint"; # Required executable
# $Dbprint = "/nfs/acis/a1/database/bin/dbprint"; # Required executable
$Field = "Field_names:"; # The default string to identify field names
# The RCS version string
$SUM = "SUM"; # A keyword in the XDB file
$VERSION = '$Id: dbnormal,v 1.14 2015/12/22 23:39:14 goeke Exp goeke $';
########################################################################
########################################################################
# Main Program executes here
########################################################################
########################################################################
&Setup;
&Check;
&Gather if (!$Retval);
&Typos if (!$Retval);
&Build if (!$Retval);
&PostProcess if (!$Retval && $Execute);
&History if (!$Retval);
&Cleanup;
##########################That's all, folks! ###########################
########################################################################
########################################################################
# Build the compressed data base of single line per record entries
# by squeezing out white space and sorting
# and rebuild the input data base according to a user-oriented format
# We leave $OUTPUT write-protected (idea is that only this program
# may write to it!)
########################################################################
sub Build {
my ($jj,$thisdwg);
my $lastdwg = "0";
my $sortorder = "order";
# NB "sort" demands comparison routine specified without "&"
# and no comma twixt args !!!
@Sorted = sort($sortorder @Records);
# We pass through and check for duplicate entries
# /before/ we open the output file
# The whitespace is squeezed here, however
foreach $jj (@Sorted) {
$thisdwg = $jj;
$thisdwg =~ s/(^$Sep$Single\s+\S+)\s+$Sep.*/$1/;
$thisdwg =~ s/\s+/ /g; # normalize whitespace
next if ($thisdwg =~ "xxxx"); # AMC special case
if ($thisdwg ne $lastdwg ) {
$lastdwg = $thisdwg;
} else {
printf "Duplicate $thisdwg entries\n";
$Retval++;
}
}
return if ($Retval);
if (-r $Output) {
chmod (0755, $Output) ||
die ("Could not change write access for \'$Output\' ");
}
open(OUTPUT,">$Output") ||
die "Could not open output pipe to $Output: $!";
foreach $jj (@Sorted) {
$jj =~ s/\s+/ /g; # normalize whitespace
print OUTPUT "$jj\n";
}
close(OUTPUT) || die "$!";
chmod 0444, $Output ||
die "Failed to write-protect $Output";
close(INPUT) || die "$!";
system("$Dbprint $Print $Format <$Output >$Input") &&
die "System error encountered executing $Dbprint";
}
########################################################################
# Check for spurious field identifiers
# Truth is defined in a $FIELD specification buried in the data base,
# typically in =dwg 00000 =gen $FIELD stuff,morestuff,and,so,forth
# By definition, the field separator is the first character read
# Single field descriptor saved in $Single for run-on checking
#
# Exits with input pointer at top of file
#
# Returns non-zero if errors found
########################################################################
sub Check {
my ($jj);
while() {
next if ( ! /$Field/ );
/$Field\s+(\S+)\s+/;
@Items = split(/,/,$1);
$Single = $Items[0]; # for run-on record checking later
last; # no need to do more
}
if ($Verbose) {
printf "Field names found:\n";
foreach $jj (@Items) {
printf "$jj ";
}
printf "\n";
}
foreach $jj (@Items) { # make a associative array for lookups
$Items{$jj}++;
}
seek(INPUT,0,0) || die "Problem respooling; $!";
$_ = ;
$Sep = substr($_,0,1);
printf "Field character found: $Sep\n" if ($Verbose);
do {
if ($_ =~ /$Sep/) { # skip if this is just be a continuation line
chop;
@qfield = split(/$Sep/);
foreach $jj (@qfield) {
$jj =~ s/^(\S*).*/$1/;
next if (!$jj); # before the first $Sep is naught
next if ($Items{$jj}); # existance proof
printf "*** Bad field descriptor in $Input: $Sep$jj\n $_\n";
$Retval++;
}
}
} while ();
seek(INPUT,0,0) || die "Problem respooling; $!";
if ($Execute) {
while () {
chop;
next if (/(^\s*#)|(^\s*$)/); # skip comments and blank lines
@qfield = split(/=/);
foreach $jj (@qfield) {
$jj =~ s/^(\S*).*/$1/;
next if (!$jj); # before the first $Sep is naught
next if ($Items{$jj}); # existance proof
printf "*** Bad field descriptor in $Execute: $Sep$jj\n";
$Retval++;
}
}
seek(EXECUTE,0,0) || die "Problem respooling; $!";
}
return;
}
########################################################################
# Cleanup rituals
########################################################################
sub Cleanup {
# OUTPUT and INPUT were closed in &Build
# FORMAT and SYNTAX will be closed on exit
printf "$0 exits with error message\n" if ($Retval);
exit $Retval;
}
########################################################################
# Gather the data from the input deck
# By definition: blank lines separate records
# Returns with entire unsorted mess in @Records
########################################################################
sub Gather {
my $newline = 0;
my $index = 0;
my $content;
while () {
chop;
if (/\S/) {
$newline++;
if (! $Records[$index]) { # if this is first entry
s/^\s*//; # strip leading white space
s/\s+/ /g; # then reduce remaining white space
$Records[$index] = $_;
} else { # else append
s/\s+/ /g; # reduce white space
$Records[$index] = $Records[$index] . " " . $_;
}
next;
} else {
# printf "%s\n",$Records[$index] if ($Verbose>1);
$index++ if ($newline);
$newline = 0;
}
}
printf "%d input records processed\n", $#Records + 1 if ($Verbose);
}
########################################################################
# The general HELP! message
# Short if $Verbose = 0, else not
# NB this subroutine does not return
########################################################################
sub Help {
print "usage: $0 [-a[rchive] archive_directory\n";
print " [-b[ase] base_filename]\n";
print " [-h[elp] \n";
print " [-f[ormat] format_file\n";
print " [-i[nput] input_file]\n";
print " [-n[ame] \"Field_name_string\"]\n";
print " [-o[utput] output_file]\n";
print " [-p[rint] \"options passed to dbprint]\"\n";
print " [-s[yntax] syntax_file]\n";
print " [-x[ecute] calc_file]\n";
print " [-v[erbose]\n";
exit if (!$Verbose);
print "flags: -a specifies an archive directory (auto-saves $CDB files\n";
print " -b specifies a basename for the input, output, format, and syntax files\n";
print " NB that <$IDB>, <$CDB>, <$FDB>, <$SDB> will be attached to this basename\n";
print " -h displays full help message\n";
print " -f specifies format file used to regenerate the input file; default $Base$FDB\n";
print " -i specifies free-form data input file; default $Base$IDB\n";
print " -n specifies a string which preceding valid field names; default $Fielde\n";
print " -o specifies canonical data output file; default $Base$CDB\n";
print " -p specifies an option string passed to dbprint when $Base$IDB is regenerated\n";
print " -s specifies syntax data file for field checks; default $Base$SDB\n";
print " -x specifies calculation rule file for post-processing: default $Base$XDB\n";
print " NB post-processing does happens /after/ input file is regenerated\n";
print " -v specifies verbose processing mode; multiple -v allowed\n";
print "We currently require that 'dbprint' resides in $BIN\n";
exit;
}
########################################################################
# Archive the latest (validated) file using RCS
# NB we only (really) save the last version worked on this day!
########################################################################
sub History {
my ($sec,$min,$hour,$mday,$mon,$year) = gmtime(time);
my $date = sprintf("%02d-%02d-%04d", $mday, $mon+1, $year+2000-100);
my $version = sprintf("%02d.%02d%02d", $year-100, $mon+1, $mday);
# If a version of today's date already exists, we have to delete it
system("rcs -u$version $Output 2>/dev/null");
if ( ! $? ) { # if file exists, above completed normally so unlock
system("rcs -q -o$version $Output") && die; # delete
system("rcs -q -l $Output") && die; # lock
}
system("ci -f$version -l -m\"Revision date: $date\" $Output") &&
die; # update
}
########################################################################
# Return the detail level of the record
# Example: =dwg abbccdd.ee would return level 4
########################################################################
sub Level {
my $ret;
$_ = $_[0];
if (/^[0-9]0000$/) { $ret = 0; }
elsif (/^[0-9]{3}00$/) { $ret = 1; }
elsif (/^[0-9]{5}$/) { $ret = 2; }
else { $ret = length($_)/2 - 1; }
return($ret);
}
########################################################################
# This is used in &Build as the sort criteria for records
# Must return negative integer, 0, positive integer for <,=,>
# Two input values are passed by reference as $a and $b
# We insist that the strings being passed in are not empty
# (see last &Gather test)
########################################################################
sub order {
$a =~ /$Sep$Single\s+(\S*)\s*($Sep|$)/ ;
defined($1) || die "Sort failure for $Sep$Single: $!";
my $first = $1;
$b =~ /$Sep$Single\s+(\S*)\s*($Sep|$)/ ;
defined($1) || die "Sort failure for $Sep$Single: $!";
my $second = $1;
if ($Reverse) {
return ($second cmp $first);
} else {
return ($first cmp $second);
}
}
########################################################################
# Follow the post-processing rules
# If quantities are A/R or some such, we use "1" for calculations
#
# The rules are of form "=target << =item1 * =item2 + =item3"
# where the database values for the items are substituted and the
# resulting equation evaluated, the answer being inserted as the
# field value for the target.
# If the keyword SUM appears as the second token, the items from the
# next greatest depth of detail are summed -- after all other rules.
# Then, upon the depth decreasing, the sums are recalled /before/
# any other rules are applied.
#
# &gofigure and &goupdate are tightly coupled subroutines only called
# by this subroutine (and use the local variables extensively).
########################################################################
sub PostProcess {
my ($index,$hindex,@dump,$hold,%totals);
local ($jj,$kk,@target,@def,@rule,$ans); # used in &gofigure
print "Post-processing rules\n" if ($Verbose);
# cut up the rules and put them into $target[],$def[],$rule[]
# in the process, make a list in $hold[] of SUM rules
$index = $hindex = 0;
while() {
# The last rule in an XDB file /must/ have a \n at the end
chop;
next if (/(^\s*#)|(^\s*$)/); # skip comments and blank lines
($target[$index],$def[$index],@dump) = split(/\s+/);
$rule[$index] = join(" ",@dump);
if ($def[$index] eq $SUM) { # special note of $SUM rules
$hold[$hindex] = $index;
$hindex++;
}
print "$target[$index] : $def[$index] : $rule[$index]\n" if ($Verbose);
$index++;
}
# Now, for each record (in reverse order) process each rule
my $priordepth = -1;
for ($jj=$#Records; $jj>0; $priordepth=$depth,$jj--) {
my $touch = 0;
$depth = &Level(&Value($Single,$Records[$jj]));
if ($depth<$priordepth) { # just moved up one step
foreach $kk (@hold) {
$ans = $totals{$target[$kk],$depth+1};
&goupdate;
$totals{$target[$kk],$depth+1} = 0; # clear total
}
}
for ($kk=0; $kk<$index; $kk++) {
if ($def[$kk] ne $SUM) {
&gofigure;
&goupdate;
$touch++ if ($ans);
}
}
foreach $kk (@hold) {
&gofigure;
$totals{$target[$kk],$depth} += $ans;
}
print "$Records[$jj]\n" if ($Verbose>1 && $touch);
$priordepth = $depth;
}
}
########################################################################
# A private function of &PostProcess calculates a value give a rule
########################################################################
sub gofigure {
my $mm;
my (@calc) = split(/\s+/,$rule[$kk]);
foreach $mm (@calc) {
next if ($mm !~ /^$Sep/);
if ($Records[$jj] !~ /$mm/) {
$mm = 0;
} else {
$mm = &Value($mm,$Records[$jj]);
$mm = 1 if ($mm =~ /[^0-9.]/); # math only wants numbers
}
}
eval "\$ans = @calc";
die "Error in evaluating rule\n \"@rule\"\n as\n \"@calc\"\n$@" if ($@);
}
########################################################################
# A private function of &PostProcess inserts a field and value
# Returns with no action if $ans = 0
########################################################################
sub goupdate {
return if (!$ans);
if (&Value($target[$kk],$Records[$jj])) {
print "*** Attempt to overwrite field contents of $target[$kk] in record\n*** $Records[$jj]\n";
print "*** Rule being used at this time\n*** $target[$kk] $def[$kk] $rule[$kk]\n";
print "$ans\n";
exit 1;
} else {
$Records[$jj] =~ s/$target[$kk]\s+($Sep|$)/$1/; # scrub empty field
}
$Records[$jj] .= " $target[$kk] " . $ans if ($ans); # add to end
}
########################################################################
# Process all the command line arguments
# Open input and output files
# NB that output must be closed and write protected before progam exit!
########################################################################
sub Setup {
$Verbose = $Retval = $Reverse = 0;
$Print = "";
my $foo;
while ( $foo = shift(@ARGV) ) {
if ( $foo =~ /^-[aA]/ ) {
$Archive = shift(@ARGV);
die "no argument to -a flag; use -h flag for help" if (!$Archive);
} elsif ( $foo =~ /^-[bB]/ ) {
$Base = shift(@ARGV);
die "no argument to -b flag; use -h flag for help" if (!$Base);
} elsif ( $foo =~ /^-[fF]/ ) {
$Format = shift(@ARGV);
die "no argument to -f flag; use -h flag for help" if (!$Format);
} elsif ( $foo =~ /^-[hH]/ ) {
$Verbose++; &Help;
} elsif ( $foo =~ /^-[iI]/ ) {
$Input = shift(@ARGV);
die "no argument to -i flag; use -h flag for help" if (!$Input);
} elsif ( $foo =~ /^-[nN]/ ) {
$Field = shift(@ARGV);
die "no argument to -n flag; use -h flag for help" if (!$Field);
} elsif ( $foo =~ /^-[oO]/ ) {
$Output = shift(@ARGV);
die "no argument to -o flag; use -h flag for help" if (!$Output);
} elsif ( $foo =~ /^-[pP]/ ) {
$Print = shift(@ARGV);
die "no argument to -p flag; use -h flag for help" if (!$Print);
} elsif ( $foo =~ /^-[rR]/ ) {
$Reverse++;
} elsif ( $foo =~ /^-[tT]/ ) {
$Syntax = shift(@ARGV);
die "no argument to -t flag; use -h flag for help" if (!$Syntax);
} elsif ( $foo =~ /^-[xX]/ ) {
$Execute = shift(@ARGV);
die "no argument to -x flag; use -h flag for help" if (!$Execute);
} elsif ( $foo =~ /^-[vV]/ ) {
$Verbose++;
} else {
print STDERR "Unknown argument: $foo\n";
&Help;
}
}
$Input = $Base . $IDB if (!$Input);
$Output = $Base . $CDB if (!$Output);
$Format = $Base . $FDB if (!$Format);
$Syntax = $Base . $SDB if (!$Syntax);
if ($Execute && ! -r $Execute) {
print "*** Could not find file $Execute for calculation rules\n";
$Retval++;
} else {
$Execute = $Base . $XDB if (!$Execute);
$Execute = 0 if (! -r $Execute);
}
if (! -x $Dbprint) {
print "*** Could not find program $Dbprint to regenerate $Input\n";
$Retval++;
}
open(INPUT,"$Input") || die ("Input file \'$Input\' $!");
open(SYNTAX,"$Syntax") || die ("Syntax file \"$Syntax\": $!");
if ($Execute) {
open(EXECUTE,"$Execute") || die "Error opening post-processing rules: $!";
}
print "$VERSION\n" if ($Verbose);
}
###########################################################################
# Take rules from syntax file and run typo check lest database be corrupted
# Syntax file form: key rule; Comments have # at start of line
#
# We find the key, assume arbitrary whitespace, then check to see if
# there is /any/ field contents, and if so, apply the rule with
# a postpend of arbitrary whitespace and/or end-of-line.
###########################################################################
sub Typos {
my (@error,$content);
print "Doing consistency checking:\n" if ($Verbose);
# We use the first Field entry for testing for run-together records
@error = grep(/$Sep$Single.*$Sep$Single/,@Records);
if ($#error>=0) { # for no grep return, $#error = -1
print "*** Following records seem to be run together:\n";
foreach $index (@error) {
print "$index\n";
}
$Retval++;
}
# The first Field specified /must/ by convention have an entry
foreach $content (@Records) {
$content =~ /$Sep$Single\s+([^$Sep\s]+)/;
if (!defined($1)) {
printf "*** No content in $Sep$Single field in record\n$content\n";
$Retval++;
}
}
return if ($Retval);
my ($key,$rule,$which);
print "Doing syntax checking:\n" if ($Verbose);
while () {
next if (!/\S+/ || /^\s*#/);
if ( /;;/ ) {
s/(.*);;\s+(.*)$/$1/;
$comment = $2;
} else {
$comment = ""; # Lest old comment still retained
}
/($Sep\S+)\s+(.*)/;
$key = $1; $rule = $2;
$rule =~ s/\s+$//; # Trim trailing white space out of rule
# $comment = defined($comment) ? $comment : "";
print "Key> $key ::: Rule> $rule\n" if ($Verbose);
print " Comment> $comment\n" if ($Verbose);
# We check first to see if field is defined, then
# check to see if the contents satisfy the rule
# The final "whitespace, NL, or $Sep" is generic to all rules
# If the field contents starts with "#", this represents an
# embedded comment and the test is skipped
@error = grep(!/$key\s+($rule)?\s*($Sep|$)/,
grep(/$key\s+[^#]/,@Records));
# If there were any failures, publish and set the perish flag
if ($#error>=0) { # for no grep return, $#error = -1
print "*******************************************************\n";
print "*** Error in $key fields -->> $comment\n";
print "*******************************************************\n";
foreach $index (@error) {
print " $index\n";
}
$Retval++;
}
}
}
########################################################################
# Return the field contents if available, else return 0
# Call as &Value(field_descriptor,single_record_string)
# NB field_descriptor /must/ exist in record string
########################################################################
sub Value {
$_[1] =~ /$_[0]\s+([^$Sep\s]+)/;
return( defined($1) ? $1 : 0 );
}
########################################################################
# Pod follows
########################################################################
=for html
DB -- dbnormal
=head2 NAME
dbnormal -- Processes input data file into a canonical form
=head2 USAGE
dbnormal [-a[rchive] archive_directory
[-b[ase] base_filename]
[-h[elp]
[-f[ormat] format_file
[-i[nput] input_file]
[-n[ame] "Field_name_string"]
[-o[utput] output_file]
[-p[rint] "options passed to dbprint]"
[-s[yntax] syntax_file]
[-x[ecute] calc_file]
[-v[erbose]
=head2 FLAGS
-a specifies an archive directory (auto-saves .cdb files
-b specifies a basename for the input, output, format, and syntax files
NB that <.idb>, <.cdb>, <.fdb>, <.sdb> will be attached to this basename
-h displays full help message
-f specifies format file used to regenerate the input file; default parts.fdb
-i specifies free-form data input file; default parts.idb
-n specifies a string which lists valid field names; default Field_names:
-o specifies canonical data output file; default parts.cdb
-p specifies an option string passed to dbprint when parts.idb is regenerated
-s specifies syntax data file for field checks; default parts.sdb
-x specifies calculation rule file for post-processing: default parts.xdb
NB post-processing does happens /after/ input file is regenerated
-v specifies verbose processing mode; multiple -v allowed
=head2 DESCRIPTION
This program will accept an input data file -- formatted as desribed below --
and compress the data into a form consisting of a single line per record,
excess white space removed, and sorted using the contents of the first two
fields and the primary and secondary sort keys respectively. The
compressed output is
written to a file and archived locally using rcs; the revision number
is derived from the current date, viz: YY.MMDD .
Input records are composed of an arbitrary number of fields in arbitary
order. Each field is
defined by a name followed by an arbitrary character string which forms
its value. Field names
are arbirary non-space character strings preceeded by a unique, common to
the entire file, printable character. The first character encountered when
reading the input file is defined as the separation character. The field
value string ends at the next encountered separation character, the end of
the current record, or the end-of-file. The separation character may not
be contained in the value string; no escape mechanism is provided. Finally,
the record may contain arbitrary new lines and is terminated either by an
empty line or end-of-file.
After compression, but before other steps, the input data is subject to
a variety of internal consistency checks. The primary sort key, for
instance, must be unique. The first record read must contain a list of
valid field names against which all subsequent records are checked.
The field values are checked against rules given (as Perl regular expressions)
in the syntax file.
Upon successful completion of all of the above, the program
overwrites the input file with the newly sorted
records using the template given by the format file. See
B,
the program which is invoked here, for a description of that process
=head2 BUGS
None reported yet.
=head2 SEE ALSO
=head4 High Level programs
=head4 Low Level programs
dbprint
=head2 AUTHOR
Bob Goeke
=head2 RCS Information
$Id: dbnormal,v 1.14 2015/12/22 23:39:14 goeke Exp goeke $
=cut
########################################################################
# History follows
########################################################################
# $Log: dbnormal,v $
# Revision 1.14 2015/12/22 23:39:14 goeke
# Change explicit paths for ACIS use
#
# Revision 1.13 2013/07/19 12:25:57 goeke
# added pod
#
# Revision 1.12 2013/07/09 17:23:27 goeke
# Fixed bug so that rcs uses $Output, not "parts.cdb"
#
# Revision 1.10 2006/08/16 13:09:51 goeke
# Add test for duplicate primary key entries
#
# Revision 1.9 2006/01/18 13:33:38 goeke
# Chop whitespace in @Records down to a single space, necessary for ...
# Add provision for comment in syntax checker -->> leading #
#
# Revision 1.8 2005/11/14 01:50:49 goeke
# Added -print option to pass options to dbprint
#
# Revision 1.7 2005/11/04 14:31:32 goeke
# Fix bug in typo handling comments in *.sdb file; change to ";;" separator
#
# Revision 1.6 2005/11/03 22:04:06 goeke
# Fixed bug in processing continuation lines when building records
# Moved two tests from &Gather to &Typo
# Added ability to add comments to syntax checks
# Minor edits to help text
#
# Revision 1.5 2005/09/22 17:57:28 goeke
# Archive to an RCS file system (saves 4x over gz)
#
# Revision 1.4 2004/12/13 18:53:29 goeke
# Fixed logic errors in &PostProcess and simplified same.
#
# Revision 1.3 2004/12/13 14:13:50 goeke
# Can now handle multiple SUMs in XDB files
#
# Revision 1.2 2004/12/10 15:19:00 goeke
# Implementation of XDB files; only one SUM keyword allowed
#
# Revision 1.1 2004/12/08 13:30:13 goeke
# Added hardwired add up of costs; this is a proof-of-concept release only
#
# Revision 1.0 2004/12/06 20:29:04 goeke
# First production release with same functionality as prior /bin/sh version.
#
# Revision 0.5 2004/12/06 18:43:06 goeke
# Working though cdb build and idb regeneration (using dbprint)
#
# Revision 0.4 2004/12/03 22:00:01 goeke
# Working on Build, calling external programs like sort and dbprint
#
# Revision 0.3 2004/12/03 21:05:49 goeke
# Got typo checking going with separate syntax file
#
# Revision 0.2 2004/12/03 15:19:17 goeke
# Got the first field descriptor checks working
#
# Revision 0.1 2004/12/02 14:27:32 goeke
# This is an in-process version of conversion from sh to perl
#
#-----------------------------------------------------------------------
# from RFG/PCT DOS version of 4/88 updated in sh to 10/15/04
#-----------------------------------------------------------------------