78 lines
1.5 KiB
Perl
78 lines
1.5 KiB
Perl
#!/usr/bin/env perl
|
|
|
|
use strict;
|
|
use warnings;
|
|
|
|
my $usage = "usage: $0 inputFile out.cleanReads out.malformedReads\n\n";
|
|
|
|
my $inputFile = $ARGV[0] or die $usage;
|
|
my $cleanReads = $ARGV[1] or die $usage;
|
|
my $malformedReads = $ARGV[2] or die $usage;
|
|
|
|
|
|
open (my $ofh_clean, ">$cleanReads") or die "Error, can't write to $cleanReads";
|
|
open (my $ofh_malformed, ">$malformedReads") or die "Error, cannot write to $malformedReads";
|
|
|
|
open (my $fh, $inputFile) or die "Error, cannot open $inputFile";
|
|
|
|
my $counter = 0;
|
|
my $num_clean = 0;
|
|
my $num_dirty = 0;
|
|
|
|
my @rec;
|
|
|
|
my $line = <$fh>;
|
|
|
|
while ($line) {
|
|
|
|
if ($line =~ /^\@/) {
|
|
$counter++;
|
|
|
|
print STDERR "\r[$counter] [$num_clean clean] [$num_dirty dirty] " if ($counter % 10000 == 0);
|
|
|
|
push (@rec, $line);
|
|
|
|
$line = <$fh>;
|
|
for (1..3) {
|
|
push (@rec, $line);
|
|
$line = <$fh>;
|
|
}
|
|
|
|
my $record_text = join("", @rec);
|
|
|
|
my $header = shift @rec;
|
|
my $seq = shift @rec;
|
|
my $qual_header = shift @rec;
|
|
my $qual_line = shift @rec;
|
|
|
|
chomp $header;
|
|
chomp $seq if $seq;
|
|
chomp $qual_header if $qual_header;
|
|
chomp $qual_line if $qual_line;
|
|
|
|
if ($header && $seq && $qual_header && $qual_line &&
|
|
$qual_header =~ /^\+/ && length($seq) == length($qual_line)) {
|
|
|
|
# can do some more checks here if needed to be sure that the lines are formatted as expected.
|
|
print $ofh_clean join("\n", $header, $seq, $qual_header, $qual_line) . "\n";
|
|
$num_clean++;
|
|
}
|
|
else {
|
|
print $ofh_malformed $record_text;
|
|
$num_dirty++;
|
|
}
|
|
@rec = ();
|
|
} else {
|
|
$line = <$fh>;
|
|
}
|
|
|
|
}
|
|
|
|
exit(0);
|
|
|
|
|
|
|
|
|
|
|
|
|