292 lines
5.6 KiB
Perl
292 lines
5.6 KiB
Perl
#!/usr/bin/env perl
|
|
|
|
# classes for DelimParser::Reader and DelimParser::Writer
|
|
|
|
package DelimParser;
|
|
use strict;
|
|
use warnings;
|
|
use Carp;
|
|
|
|
####
|
|
sub new {
|
|
my ($packagename, $fh, $delimiter) = @_;
|
|
|
|
unless ($fh && $delimiter) {
|
|
confess "Error, need filehandle and delimiter params";
|
|
}
|
|
|
|
|
|
my $self = { _delim => $delimiter,
|
|
_fh => $fh,
|
|
|
|
# set below in _init()
|
|
_column_headers => [],
|
|
};
|
|
|
|
|
|
bless ($self, $packagename);
|
|
|
|
return($self);
|
|
}
|
|
|
|
|
|
####
|
|
sub get_fh {
|
|
my $self = shift;
|
|
return($self->{_fh});
|
|
}
|
|
|
|
####
|
|
sub get_delim {
|
|
my $self = shift;
|
|
return($self->{_delim});
|
|
}
|
|
|
|
####
|
|
sub get_column_headers {
|
|
my $self = shift;
|
|
return(@{$self->{_column_headers}});
|
|
}
|
|
|
|
####
|
|
sub set_column_headers {
|
|
my $self = shift;
|
|
my (@columns) = @_;
|
|
|
|
$self->{_column_headers} = \@columns;
|
|
|
|
return;
|
|
}
|
|
|
|
####
|
|
sub get_num_columns {
|
|
my $self = shift;
|
|
return(length($self->get_column_headers()));
|
|
}
|
|
|
|
|
|
###
|
|
sub reconstruct_header_line {
|
|
my $self = shift;
|
|
my @column_headers = $self->get_column_headers();
|
|
|
|
my $header_line = join("\t", @column_headers);
|
|
return($header_line);
|
|
}
|
|
|
|
###
|
|
sub reconstruct_line_from_row {
|
|
my $self = shift;
|
|
my $row_href = shift;
|
|
unless ($row_href && ref $row_href) {
|
|
confess "Error, must set row_href as param";
|
|
}
|
|
|
|
my @column_headers = $self->get_column_headers();
|
|
|
|
my @vals;
|
|
foreach my $col_header (@column_headers) {
|
|
my $val = $row_href->{$col_header};
|
|
push (@vals, $val);
|
|
}
|
|
|
|
my $row_text = join("\t", @vals);
|
|
|
|
return($row_text);
|
|
|
|
}
|
|
|
|
|
|
##################################################
|
|
package DelimParser::Reader;
|
|
use strict;
|
|
use warnings;
|
|
use Carp;
|
|
use Data::Dumper;
|
|
|
|
our @ISA;
|
|
push (@ISA, 'DelimParser');
|
|
|
|
sub new {
|
|
my ($packagename) = shift;
|
|
my $self = $packagename->DelimParser::new(@_);
|
|
|
|
$self->_init();
|
|
|
|
return($self);
|
|
}
|
|
|
|
|
|
####
|
|
sub _init {
|
|
my $self = shift;
|
|
|
|
my $fh = $self->get_fh();
|
|
my $delim = $self->get_delim();
|
|
|
|
my $header_row = <$fh>;
|
|
chomp $header_row;
|
|
|
|
unless ($header_row) {
|
|
confess "Error, no header row read.";
|
|
}
|
|
|
|
my @fields = split(/$delim/, $header_row);
|
|
|
|
$self->set_column_headers(@fields);
|
|
|
|
|
|
return;
|
|
}
|
|
|
|
####
|
|
sub get_row {
|
|
my $self = shift;
|
|
|
|
my $fh = $self->get_fh();
|
|
my $line = <$fh>;
|
|
unless ($line) {
|
|
return(undef); # eof
|
|
}
|
|
|
|
my $delim = $self->get_delim();
|
|
my @fields = split(/$delim/, $line);
|
|
chomp $fields[$#fields]; ## it's important that this is done after the delimiter splitting in case the last field is actually empty.
|
|
|
|
|
|
my @column_headers = $self->get_column_headers();
|
|
|
|
my $num_col = scalar (@column_headers);
|
|
my $num_fields = scalar(@fields);
|
|
|
|
if ($num_col != $num_fields) {
|
|
confess "Error, line: [$line] " . Dumper(\@fields) . " is lacking $num_col fields: " . Dumper(\@column_headers);
|
|
}
|
|
|
|
my %dict;
|
|
foreach my $colname (@column_headers) {
|
|
my $field = shift @fields;
|
|
$dict{$colname} = $field;
|
|
}
|
|
|
|
return(\%dict);
|
|
}
|
|
|
|
|
|
####
|
|
sub get_row_val {
|
|
my ($self, $row_href, $key) = @_;
|
|
|
|
if (! exists $row_href->{$key}) {
|
|
confess "Error, row: " . Dumper($row_href) . " doesn't include key: [$key]";
|
|
}
|
|
|
|
return($row_href->{$key});
|
|
}
|
|
|
|
|
|
|
|
##################################################
|
|
|
|
package DelimParser::Writer;
|
|
use strict;
|
|
use warnings;
|
|
use Carp;
|
|
|
|
our @ISA;
|
|
push (@ISA, 'DelimParser');
|
|
|
|
sub new {
|
|
my ($packagename) = shift;
|
|
my ($ofh, $delim, $column_fields_aref, $FLAGS) = @_;
|
|
|
|
## FLAGS can be:
|
|
# NO_WRITE_HEADER|...
|
|
|
|
unless (ref $column_fields_aref eq 'ARRAY') {
|
|
confess "Error, need constructor params: ofh, delim, column_fields_aref";
|
|
}
|
|
|
|
my $self = $packagename->DelimParser::new($ofh, $delim);
|
|
|
|
$self->_initialize($column_fields_aref, $FLAGS);
|
|
|
|
return($self);
|
|
}
|
|
|
|
|
|
####
|
|
sub _initialize {
|
|
my $self = shift;
|
|
my $column_fields_aref = shift;
|
|
my $FLAGS = shift;
|
|
|
|
unless (ref $column_fields_aref eq 'ARRAY') {
|
|
confess "Error, require column_fields_aref as param";
|
|
}
|
|
|
|
|
|
my $ofh = $self->get_fh();
|
|
my $delim = $self->get_delim();
|
|
|
|
|
|
|
|
$self->set_column_headers(@$column_fields_aref);
|
|
|
|
unless (defined($FLAGS) && $FLAGS =~ /NO_WRITE_HEADER/) {
|
|
my $output_line = join($delim, @$column_fields_aref);
|
|
print $ofh "$output_line\n";
|
|
}
|
|
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
####
|
|
sub write_row {
|
|
my $self = shift;
|
|
my $dict_href = shift;
|
|
|
|
unless (ref $dict_href eq "HASH") {
|
|
confess "Error, need dict_href as param";
|
|
}
|
|
|
|
my $num_dict_fields = scalar(keys %$dict_href);
|
|
|
|
my @column_headers = $self->get_column_headers();
|
|
|
|
|
|
my $delim = $self->get_delim();
|
|
|
|
my @out_fields;
|
|
for my $column_header (@column_headers) {
|
|
my $field = $dict_href->{$column_header};
|
|
unless (defined $field) {
|
|
confess "Error, missing value for required column field: $column_header";
|
|
}
|
|
if ($field =~ /$delim/) {
|
|
# don't allow any delimiters to contaminate the field value, otherwise it'll introduce offsets.
|
|
$field =~ s/$delim/ /g;
|
|
}
|
|
# also avoid newlines, which will also break the output formatting.
|
|
if ($field =~ /\n/) {
|
|
$field =~ s/\n/ /g;
|
|
}
|
|
|
|
push (@out_fields, $field);
|
|
}
|
|
|
|
my $outline = join("\t", @out_fields);
|
|
|
|
my $ofh = $self->get_fh();
|
|
|
|
print $ofh "$outline\n";
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
1; #EOM
|
|
|