69 lines
1.3 KiB
Perl
69 lines
1.3 KiB
Perl
#!/usr/bin/env perl
|
|
|
|
use strict;
|
|
use warnings;
|
|
|
|
my $usage = "\n\tusage: $0 cd-hit.clstr\n\n" .
|
|
"try running cd-hit first like so:\n" .
|
|
"\tcd-hit-est -o cdhit -c 0.98 -i Trinity.fasta -p 1 -d 0 -b 3 -T 10\n\n";
|
|
|
|
my $cdhit_file = $ARGV[0] or die $usage;
|
|
|
|
main: {
|
|
|
|
my $num_bad_clusters = 0;
|
|
|
|
my $cluster;
|
|
my @trans;
|
|
|
|
open(my $fh, $cdhit_file) or die $!;
|
|
while (<$fh>) {
|
|
chomp;
|
|
if (/^>/) {
|
|
if (@trans) {
|
|
$num_bad_clusters += &examine_cluster($cluster, \@trans);
|
|
}
|
|
$cluster = $_;
|
|
@trans = ();
|
|
}
|
|
else {
|
|
push (@trans, $_);
|
|
}
|
|
}
|
|
close $fh;
|
|
|
|
if (@trans) {
|
|
$num_bad_clusters += &examine_cluster($cluster, \@trans);
|
|
}
|
|
|
|
print "Num bad clusters: $num_bad_clusters\n";
|
|
|
|
exit($num_bad_clusters);
|
|
}
|
|
|
|
####
|
|
sub examine_cluster {
|
|
my ($cluster, $trans_aref) = @_;
|
|
|
|
my @trans = @$trans_aref;
|
|
|
|
my %cluster_ids;
|
|
foreach my $tran (@trans) {
|
|
$tran =~ /TRINITY_(DN\d+)_/;
|
|
$cluster_ids{$1}++;
|
|
}
|
|
|
|
my $num_clusters = scalar (keys %cluster_ids);
|
|
if ($num_clusters != 1) {
|
|
print STDERR "ERROR, got multiple clusters represented:\n"
|
|
. "$cluster\n" . join("\n", @trans) . "\n\n";
|
|
return(1);
|
|
}
|
|
else {
|
|
return(0);
|
|
}
|
|
}
|
|
|
|
|
|
|