#!/usr/bin/perl -w # universal database load routine # Author: Thomas Pfau # Created: October 23, 1999 # Version: $Id: dbload.pl,v 1.1 1999/10/23 22:19:59 pfau Exp pfau $ # Written for the Linux Professional Institute # Copyright # # This program is Copyright 1999 by Thomas Pfau # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # If you do not have a copy of the GNU General Public License write to # the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, # MA 02139, USA. use strict; use DBI; use Getopt::Long; # get command line options my %opts; $opts{dbconnect} = "dbi:Pg:"; $opts{delimiter} = "\t"; my $result = GetOptions(\%opts, "control=s", #control file "delimiter=s", #field delimiter char "table=s", #table to load "dbconnect=s", #database connect string "dbuser=s", #database user name "dbpass=s" #database password ); # if a control file was specified, parse it my %fieldmap; my $errs=0; if ($opts{control}) { open(CONTROL,"<$opts{control}") || die "Can't open control file $opts{control}\n"; while () { next if /^(\#.*)?$/; # ignore comments and blank lines $opts{dbconnect} = $1,next if /^\s*dbconnect\s*=\s*(.*)/i; $opts{dbuser} = $1,next if /^\s*dbuser\s*=\s*([^\s])\s*/i; $opts{dbpass} = $1,next if /^\s*dbpass\s*=\s*([^\s])\s*/i; $opts{delimiter} = $1,next if /^\s*delimiter\s*=\s*"(.)"\s*/i; $opts{table} = $1,next if /^\s*table\s*=\s*([^\s]*)\s*$/i; $fieldmap{$1} = $2,next if /^\s*(\w*)\s*->\s*(\w*)\s*$/; print "Unrecognized input in control file at line $.\n"; $errs++; } close CONTROL; } # exit if there were errors in the control file if ($errs > 0) { printf STDERR "Errors encountered in control file, exiting\n"; exit(2); } # if no table specified, abort if (! $opts{table}) { printf STDERR "Table to load must be specified either in a control file\n"; printf STDERR "or on the command line with --table=\n"; exit(4); } # connect to database my $dbh = DBI->connect($opts{dbconnect}, $opts{dbuser}, $opts{dbpass}, { PrintError => 0 }) || die "Failed to connect to database\n", $DBI::errstr, "\n"; my $file; foreach $file (@ARGV) { # open each input file, read the line containing field names, and break # the field names out into an array open(DATAFILE, "<$file") || die "Can't open data file: $!\n"; my $line=; chop $line; my @fields = split(/$opts{delimiter}/,$line); # translate field names my $f; foreach $f (@fields) { $f = $fieldmap{$f} if $fieldmap{$f}; } # build an array of question marks to be inserted as the parameters # into the SQL statement my $cnt = @fields; my @parms = ("?") x $cnt; # build an insert statement and parse it $" = ","; my $stmt = qq{ INSERT INTO $opts{table}(@fields) VALUES (@parms) }; my $sth = $dbh->prepare($stmt) || die "Statement parse error\n", $dbh->{errstr}, "\n"; my $rowcount = 0; my $badcount = 0; my $badfile = "${file}.BAD"; # process data records # break the line apart into an array of fields and pass it as # parameters to the insert statement while ($line = ) { chop $line; @fields = split(/$opts{delimiter}/, $line); while (scalar @fields < $cnt) { push @fields,'\n'; } if ($sth->execute(@fields)) { $rowcount++; } else { print STDERR "Insert error; record $.\n",$sth->errstr,"\n"; open(BADFILE,">$badfile") unless $badcount > 0; print BADFILE $line,"\n"; $badcount++; } } # close the file close DATAFILE; close BADFILE if $badcount; print "File $file: $rowcount records loaded"; print ", $badcount rejected to $badfile\n" if $badcount > 0; } # disconnect from the database $dbh->disconnect; # Documentation =head1 NAME dbload.pl - Perl routine to load delimited records into a database =head1 SYNOPSIS $ dbload.pl --control=file.name \ --table=db_table \ --dbconnect="dbi:Pg:dbname=my_db" \ --delimiter=":" \ data.file... =head1 DESCRIPTION dbload.pl loads a database table from delimited fields in the input files. The first line of each input data file should contain a delimited list of field names. Options may be specified on the command line or in a control file. The control file can also contain field name mappings if the field names in the data file do not match the field names in the database. The program first parses any command line options. A control file is then parsed if one was specified. Any remaining arguments on the command line are then read as data files. The first line of each data file is translated as a delimited list of field names. Each name is translated through the field mappings specified in the control file. The resulting list of field names is used to build an SQL INSERT statement. The remaining lines of the data file are then read, split into fields and passed to the database by executing the statement. =head1 COMMAND LINE OPTIONS =over 8 =item --control= Reads the specified file as a control file. =item --dbconnect= Uses the specified string to connect to the database. For more information on the format of this string, see the help for DBI module and the DBD backend for your database. The default connect string is "dbi:Pg:". =item --dbuser= =item --dbpass= Specifies authorization information for the database connection. =item --delimiter=char Uses the specified character as the field delimiter. If not specified, fields are delimited by tab characters. =item --table= Loads data into the specified table. The table name must be specified either on the command line or in a control file. =back =head1 CONTROL FILE OPTIONS =over 8 =item dbconnect = string Uses the specified string to connect to the database. Whitespace following the equals sign is removed and any remaining text on the line is used as the connect string. =item delimiter = "c" Specifies 'c' as the field delimiter character. The default is a tab. =item dbuser = =item dbpass = Specifies authorization information for the database connection. =item table = table_name Specifies the name of the database table to load. =item file_field -> table_field Enters a field name mapping. The column named 'file_field' will be loaded into the database column named 'table_field'. =back =head1 AUTHOR Thomas Pfau =head1 COPYRIGHT AND DISCLAIMER This program is Copyright 1999 by Thomas Pfau This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. If you do not have a copy of the GNU General Public License write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. =cut