biyelunwen/99.scripts/trinity_utils/PerlLib/Fastq_reader.pm

174 lines
3.5 KiB
Perl

package Fastq_reader;
use strict;
use warnings;
use Carp;
sub new {
my ($packagename, $fastqFile) = @_;
## note: fastqFile can be a filename or an IO::Handle
my $self = { fastqFile => undef,
fileHandle => undef };
bless ($self, $packagename);
## create filehandle
my $filehandle = undef;
if (ref $fastqFile eq 'IO::Handle') {
$filehandle = $fastqFile;
}
else {
if ( $fastqFile =~ /\.gz$/ ) {
## TODO: need to handle the failure case, since not picked up here as I thought it would!!
open ($filehandle, "gunzip -c $fastqFile | ") or die "Error: Couldn't open compressed $fastqFile\n";
}
elsif ($fastqFile =~ /\.bz2$/) {
open ($filehandle, "bunzip2 -c $fastqFile | ") or die "Error, couldn't open compressed $fastqFile $!";
} else {
open ($filehandle, $fastqFile) or die "Error: Couldn't open $fastqFile\n";
}
$self->{fastqFile} = $fastqFile;
}
$self->{fileHandle} = $filehandle;
return ($self);
}
#### next() fetches next Sequence object.
sub next {
my $self = shift;
my $filehandle = $self->{fileHandle};
my $next_text_input = "";
if (! eof($filehandle)) {
for (1..4) {
$next_text_input .= <$filehandle>;
}
}
my $read_obj = undef;
if ($next_text_input) {
eval {
$read_obj = Fastq_record->new($next_text_input);
};
if ($@) {
confess "Error, $@ :: fastq file: " . $self->{fastqFile};
}
}
return ($read_obj); #returns null if not instantiated.
}
#### finish() closes the open filehandle to the query database.
sub finish {
my $self = shift;
my $filehandle = $self->{fileHandle};
close $filehandle;
$self->{fileHandle} = undef;
}
##############################################
package Fastq_record;
use strict;
use warnings;
use Carp;
sub new {
my $packagename = shift;
my ($text_lines) = @_;
my @split_text = split(/\n/, $text_lines);
unless (scalar @split_text == 4) {
confess "Error, fastQ entry doesn't have 4 lines: " . $text_lines;
}
my ($name_line, $seq_line, $plus, $qual_line) = @split_text;
unless ($name_line =~ /^\@/) {
confess "Error, cannot identify first line as read name line: " . $text_lines;
}
my ($read_name, $rest) = split(/\s+/, $name_line);
$read_name =~ s/^\@//;
my $pair_dir = 0; # assume single
if ($read_name =~ /^(\S+)\/([12])$/) {
$read_name = $1;
$pair_dir = $2;
}
elsif (defined($rest) && $rest =~ /^([12]):/) {
$pair_dir = $1;
}
my $self = { core_read_name => $read_name,
pair_dir => $pair_dir, # (0, 1, or 2), with 0 = unpaired.
sequence => $seq_line,
quals => $qual_line,
record => $text_lines,
};
bless ($self, $packagename);
return ($self);
}
####
sub get_core_read_name {
my $self = shift;
return ($self->{core_read_name});
}
####
sub get_full_read_name {
my $self = shift;
my $read_name = $self->{core_read_name};
if ($self->{pair_dir}) {
return(join("/", $read_name, $self->{pair_dir}));
}
}
####
sub get_sequence {
my $self = shift;
return($self->{sequence});
}
####
sub get_quals {
my $self = shift;
return($self->{quals});
}
####
sub get_fastq_record {
my $self = shift;
return($self->{record});
}
1; #EOM