#!/usr/bin/env perl # -*- Perl -*- # ###################################################################### # Provide a facility for batch editing from a TSV input file ###################################################################### $Version = '$Id: dbbatch,v 1.9 2013/07/19 12:25:43 goeke Exp goeke $'; $Basename = "parts"; # Default basename for *.cdb, *.idb $Field = "Field_names:"; # Default field name designator $CMD = "*EDIT*"; # Field name for editor commands $TmpTail = "--"; # Play name until done $WORKFILE = "/tmp/dbbatch.tmp"; $Add = 0; # Default is not to blindly add # @Fields contains the field names present in the edit file # @Contents contains the filed contents present in each line ###################################################################### # Primary control ###################################################################### &Setup; &Edit; &Cleanup; exit 0; ###################################################################### # Subroutines in alphabetic order ###################################################################### ###################################################################### # Since we aren't using working files, Cleanup() is simple ###################################################################### sub Cleanup { print "*** Moving $WorkOut to $EditOut\n" if $Verbose; rename ($WorkOut,$EditOut) || die "Error renaming file; $!"; print "*** Done\n" if $Verbose; print "*** Time to run dbnormal to rebuild *.cdb file\n" if $Verbose; close TMP; # unlink TMP; } ###################################################################### # Apply the edits to the data file # NB Since we are taking a *.cdb file for input # we need to add blank lines between records to get a valid *.idb ###################################################################### sub Edit { my ($cmd,$ii,$ktest,$tmp,$whereami); local ($key,$who,$instr); $_ = ; print IDB "$_\n"; # Squirt out first line $Sep = substr($_,0,1); # Sep is first char of database /$Field\s+([^,]+),/; die "Could not extract primary sort key from database" if (!defined($1)); $key = $Sep . $1; print "Sort field is $key\n" if ($Verbose>1); while() { next if ( /^\s*\#/ ) ; # Skip comments next if ( /^\s*$/ ) ; # Skip blank lines @Fields = split(/\t/); # Finally get field names last; # on to the next task } foreach $ii (@Fields) { # normalize field names next if ( $ii !~ /\S+/ ); # skip empty columns $ii =~ tr/A-Z/a-z/; $ii = $Sep . $ii; $ii = &Fix($ii); # sometimes the last entry has a newline $ktest++ if ( $ii eq $key ); # must be a sort key in here somewhere print "$ii " if ($Verbose>1); } print "\n" if ($Verbose>1); if (!$ktest) { printf STDERR "Mandatory \"$key\" field is missing in edit file"; exit 2; } if ($Add) { # Append mode print IDB "$_\n" while (); # blind copy the current data while () { next if ( /^\s*\#/ ) ; # Skip comments next if ( /^\s*$/ ) ; # Skip blank lines @Contents = split(/\t/); # gather input data &Edit_Append; } } else { # Edit mode $instr = -1; # Find constants in edit array $tmp = $Sep . $CMD; $tmp =~ tr/A-Z/a-z/; # get "*edit*" in canonical form for ($ii=0; $ii<=$#Fields; $ii++) { # find the sort key location $Sort = $ii if ( $Fields[$ii] eq $key ); $instr = $ii if ( $Fields[$ii] =~ /$tmp/ ); # an ordinary "eq" doesn't work with funky characters in $tmp } if ($instr < 0) { printf "Mandatory \"$CMD\" field is missing in edit file"; exit 2; } while () { next if ( /^\s*\#/ ) ; # Skip comments next if ( /^\s*$/ ) ; # Skip blank lines @Contents = split(/\t/); # gather edit data $errornote = $.; # flag line number for error report $cmd = $Contents[$instr]; # this is the editing command $cmd = &Fix($cmd); $who = &Fix($Contents[$Sort]); # this is the sort key contents if ($cmd =~ /^[Aa]/) { # if an "append", just do it &Edit_Append; next; } elsif ($cmd =~ /^[Nn]/) { # if a "no change", move on next; } elsif ($cmd !~ /^[CcDd]/) { print "Edit line $errornote contains illegal entry $Contents[$instr] in $CMD column\n"; print "Must contain one of A, C, D, or N (for add, change, delete, or no-change)\n"; print "Contents: $_"; exit 2; } $whereami = tell(CDB); # save the current file pointer while () { # scroll input data to same line /$key ([^$Sep]+) $Sep/; die "Failed to find sort key value in cdb input" if (!defined($1)); goto NORMAL if ($1 eq $who); print IDB "$_\n"; # note extra blank line for *.idb format } if ($cmd =~ /^[Dd]/ && $ForceDelete) { # yes, we fell off, but it is OK seek(CDB,$whereami,0); # go back to where we were next; # and continue } print "--- Failed to find existing database entry for \"$key $Contents[$Sort]\"\n"; print "--- This will happen if the input edit file is not sorted on the $key field\n"; exit 2; NORMAL: print "Original: $_" if ($Verbose>1); if ($cmd =~ /^[Dd]/) { # if the command is delete, skip on print "DELETED\n" if ($Verbose>1); next; } &Edit_Change($_); # Since it wasn't A,D,orN, must be Edit! } print IDB "$_\n" while (); # and finish with rest of *.cdb } } ###################################################################### # Apply the append edits to the data file # Requires: @Fields and @Contents ###################################################################### sub Edit_Append { my ($ii,$where); $where = tell(CDB); # save the current file pointer while () { # scroll input data /$key ([^$Sep]+) $Sep/; die "Failed to find sort key value in cdb input" if (!defined($1)); if ($1 eq $who) { # if this record already exists if ($ForceAdd) { # just skip this if flagged print "*** Record $Contents[$Sort] exists; skipping\n" if ($Verbose>1); return; } print "--- Record \"$key $Contents[$Sort]\" already exists in data base"; exit 3; } } seek(CDB,$where,0); # go back to where we were for ( $ii=0; $ii<=$#Fields; $ii++) { next if ($ii == $instr); # This is the *edit* command printf IDB "%s %s ", $Fields[$ii], &Fix($Contents[$ii],$Fields[$ii]) ; printf "%s %s ", $Fields[$ii], &Fix($Contents[$ii],$Fields[$ii]) if ($Verbose>1); } print IDB "\n\n"; print "\n" if ($Verbose>1); } ###################################################################### # Apply the changes to the data file # Requires: @Fields and @Contents and $Sep # To-be-edited CDB line comes in as argument ###################################################################### sub Edit_Change { my ($ii,$work); $work = $_[0]; for ($ii=0; $ii<=$#Fields; $ii++) { next if ($ii == $instr); # This is the *edit* command if (!defined($Contents[$ii])) { print STDERR "Incomplete edit definition; cannot continue\n"; print STDERR "Error was in line $errornote of input edit file\n"; print STDERR "Each field defined in line 1 must have an entry, even if null\n"; exit 1; } $Contents[$ii] = &Fix($Contents[$ii]) if (length($Contents[$ii]) != 0); # If the original line had this field called out, replace contents if ($work =~ /$Fields[$ii]/) { $work =~ s/($Fields[$ii] )([^$Sep]*)($Sep|\s+$)/$1$Contents[$ii] $3/; } else { # but if not, add the field name and contents $work .= " " . $Fields[$ii] . " " . $Contents[$ii]; } } print IDB "$work\n\n"; # and pump out the corrected line print "Edited: $work\n" if ($Verbose>1); } ################################################################### # Getting rid of end-fluff off passed argument and return same # First arg is contents, second is field name ################################################################### sub Fix { my $foo = $_[0]; my $name = $_[1]; # Excel sometimes quotes field contents on the way to TSV $foo =~ s/^\"(.*)\"$/$1/; # Excel sometimes puts in an 8-bit character for ... $foo =~ s/\205/.../; # Excel also puts in a different 8-bit character for a single . $foo =~ s/\311/./g; # Now deal with a spurious newline while ($foo =~ /\s$/) { chop($foo); } # Or rev fields without a leading 0 $foo =~ s/^([1-9])$/0$1/ if ($name =~ /rev/); # And then there are the drawing numbers with trailing zeros missing $foo =~ s/([0-9]{5}\.)([0-9]{1}$|[0-9]{3}$|[0-9]{5}$|[0-9]{7}$)/${1}${2}0/; return($foo); } ################################################################## # The universal help message ################################################################## sub Help { print STDERR " Usage: $0 [-a] [-b basename] [-D] [-h] [-e filename] -a[dd] incorporates (blindly) all inputs as additions to database -A[dd] do not complain that a record marked for addition alreay exists (but do not update or duplicate the existing entry) -b[ase] following is basename: basename.cdb is used as the compressed database to be edited basename.idb is used as the edited database input file default basename is \"parts\". -D[elete] do not complain if record marked for deletion does not exist -e[dit] following filename contains the edit instructions default is to use STDIN. -h[elp] produces this help messagn. -v[erbose] produces diagnotics on STDOUT. This command takes a Tab-Separated-Value file, in format identical to that produced by the custom report generator (dbreport), and produces a new *.idb file based upon applying those edits to the existing *.cdb file. In the \"change\" mode, which is the default, an \"$CMD\" field must be included in each line, whose valid contents may only be \"A\" to indicate this line is an addition to the database \"C\" to indicate that only those fields appearing in the input line are changed in the output file \"D\" to indicate that this line is deleted from the database \"N\" to indicate that no change is occuring for this line Lines starting with a \"#\" are considered comments and ignored. "; exit 0; } ################################################################### # Initial processing of arguments # Defaults: get all items in data base, contents of all fields # -w is assumed if neither -p or -t is selected ################################################################### sub Setup() { my $funk = $Verbose = 0; while ( $funk = shift(@ARGV) ) { if ( $funk =~ /^-e/ ) { if ( ! ($EditFile = shift(@ARGV)) ) { print STDERR "-e flag must be followed by a directory name\n"; &Help; } if ( ! -r $EditFile ) { print STDERR "Cannot read the input edit file $EditFile\n"; &Help; } next; } if ( $funk =~ /^-b/ ) { if ( ! ($Basename = shift(@ARGV)) ) { print STDERR "-b flag must be followed by a basename\n"; &Help; } next; } if ( $funk =~ /^-a/ ) { $Add++; next; } if ( $funk =~ /^-A/ ) { $ForceAdd++; next; } if ( $funk =~ /^-D/ ) { $ForceDelete++; next; } if ( $funk =~ /^-h/ ) { &Help; } if ( $funk =~ /^-v/ ) { $Verbose++; next; } if ( $funk =~ /^-V/ ) { print "$Version\n"; exit 0; } if ( $funk =~ /^-/ ) { print STDERR "Unknown flag: $funk \n"; &Help; } { print STDERR "Unknown argument: $funk"; &Help; } } $PartsSource = $Basename . ".cdb"; $EditOut = $Basename . ".idb"; $WorkOut = $EditOut . $TmpTail; print "$Version\n" if ($Verbose); print "*** Open database: $PartsSource\n" if ($Verbose); open(CDB,"$PartsSource") || die "Could not read $PartsSource"; print "*** Open temp output file: $WorkOut\n" if ($Verbose); open(IDB,">$WorkOut") || die "Could not open $WorkOut for write"; if ( defined($EditFile) ) { print "*** Open edit command file: $EditFile\n" if ($Verbose); open(EDIT,"$EditFile") || die "Could not open $EditFile for read"; print "*** Open DOS newline filter file: $WORKFILE\n" if ($Verbose); open(NL,">$WORKFILE") || die "Could not open $WORKFILE to write"; while () { $_ =~ s/\r/\n/g; print NL } close NL; # Gotta do this before starting read! open(EDIT,"$WORKFILE") || die "Could not open $WORKFILE to read"; } else { print "*** Taking edit commands from STDIN\n" if ($Verbose); open(EDIT,"-") || die $!; } } ######################################################################## # Pod follows # NB the CSS addition makes possible the desired indent with # =over/=item...=item/=back construct when used in an HTML context ######################################################################## =for html DB -- dbbatch =head2 NAME dbbatch -- Batch input of data into canonical data base =head2 USAGE dbbatch [-a] [-b basename] [=D] [-h] [-e filename] =head2 FLAGS -a[dd] incorporates (blindly) all inputs as additions to database -A[dd] do not complain that a record marked for addition alreay exists (but do not update or duplicate the existing entry) -b[ase] following is basename: basename.cdb is used as the compressed database to be edited basename.idb is used as the edited database input file default basename is "parts". -D[elete] do not complain if record marked for deletion does not exist -e[dit] following filename contains the edit instructions default is to use STDIN. -h[elp] produces this help messagn. -v[erbose] produces diagnotics on STDOUT. =head2 DESCRIPTION This command takes a Tab-Separated-Value file, in format identical to that produced by the custom report generator (dbreport), and produces a new *.idb file based upon applying those edits to the existing *.cdb file. In the "change" mode, which is the default, an "*EDIT*" field must be included in each line, whose valid contents may only be =over =item "A" to indicate this line is an addition to the database =item "C" to indicate that only those fields appearing in the input line are changed in the output file =item "D" to indicate that this line is deleted from the database =item "N" to indicate that no change is occuring for this line =back Lines starting with a "#" are considered comments and ignored. =head2 BUGS None reported yet. =head2 SEE ALSO =head4 High Level programs dbreport =head4 Low Level programs =head2 AUTHOR Bob Goeke =head2 RCS Information $Id: dbbatch,v 1.9 2013/07/19 12:25:43 goeke Exp goeke $ =cut ###################################################################### # History Follows ###################################################################### # # $Log: dbbatch,v $ # Revision 1.9 2013/07/19 12:25:43 goeke # Added pod # # Revision 1.8 2008/04/15 20:08:48 goeke # Added fix for \r instead of \n at end of line # Added fix for 8-bit character \311 # # Revision 1.7 2005/10/28 14:52:42 goeke # Fixed bug in that last entry in a line wasn't being inserted when # program was invoked with a "-a" flag and no *EDIT* entry. # But then the *EDIT* entry /was/ being inserted for cases where it # was being used, so added a test to skip in edit_append and edit_change. # # Revision 1.6 2005/03/29 21:53:58 goeke # Adding rerun provisions: -A and -D flags # along with checks to see if new records would actually be duplicates # # Revision 1.5 2005/03/29 21:04:44 goeke # Clean up the edit fields with a Fixit subroutine for several cases. # Now complain if change or delete record cannot be found. # # Revision 1.4 2005/03/29 17:24:42 goeke # Fixed bunches of bugs in Change mode # In the process separated out Edit_Append and Edit_Change # # Revision 1.3 2005/03/28 18:38:04 goeke # Fixed bugs in Append mode. # Needed "print" rather than "printf" to avoid characters in variables # from being interpreted as formatting characters # Added newlines to translate input *.cdb files to *.idb format # # Revision 1.2 2005/03/25 18:54:25 goeke # Added add, change, delete on a per-line basis in edit command file # # Revision 1.1 2005/03/25 17:05:54 goeke # Initial revision # # ######################################################################