91 lines
2.2 KiB
Perl
91 lines
2.2 KiB
Perl
#!/usr/bin/env perl
|
|
|
|
use strict;
|
|
use warnings;
|
|
use POSIX qw (ceil);
|
|
use FindBin;
|
|
use lib ("$FindBin::RealBin/../../PerlLib");
|
|
use Fasta_reader;
|
|
|
|
my $usage = "usage: $0 Trinity.fasta [length_bin_size=100] [out_prefix='dist']\n\n";
|
|
|
|
my $trinity_fasta = $ARGV[0] or die $usage;
|
|
my $length_bin_size = $ARGV[1] || 100;
|
|
my $out_prefix = $ARGV[2] || "dist";
|
|
|
|
|
|
main: {
|
|
|
|
|
|
my %comp_counter;
|
|
my %length_bin_counter;
|
|
|
|
my $fasta_reader = new Fasta_reader($trinity_fasta);
|
|
|
|
while (my $seq_obj = $fasta_reader->next()) {
|
|
|
|
my $acc = $seq_obj->get_accession();
|
|
|
|
my $sequence = $seq_obj->get_sequence();
|
|
my $sequence_length = length($sequence);
|
|
|
|
my $bin = ceil($sequence_length/$length_bin_size);
|
|
|
|
$length_bin_counter{$bin}++;
|
|
|
|
my $comp_name;
|
|
|
|
if ($acc =~ /^(.*comp\d+_c\d+)/) {
|
|
$comp_name = $1;
|
|
}
|
|
elsif ($acc =~ /^(.*c\d+_g\d+)/) {
|
|
$comp_name = $1;
|
|
}
|
|
else {
|
|
die "Error, cannot get component/gene id from acc: $acc";
|
|
}
|
|
|
|
$comp_counter{$comp_name}++;
|
|
}
|
|
|
|
## get component dist
|
|
my %comp_size_counter;
|
|
foreach my $count (values %comp_counter) {
|
|
$comp_size_counter{$count}++;
|
|
}
|
|
|
|
## write component dist
|
|
open (my $ofh, ">$out_prefix.comp_sizes.txt") or die $!;
|
|
print $ofh join("\t", "transPerComp", "num_components") . "\n";
|
|
foreach my $comp_size (sort {$a<=>$b} keys %comp_size_counter) {
|
|
|
|
my $comp_size_count = $comp_size_counter{$comp_size};
|
|
print $ofh join("\t", $comp_size, $comp_size_count) . "\n";
|
|
}
|
|
close $ofh;
|
|
|
|
## write length distribution
|
|
open ($ofh, ">$out_prefix.trans_lengths.txt") or die $!;
|
|
print $ofh join("\t", "#length_bin", "count_trans") . "\n";
|
|
foreach my $length_bin (sort {$a<=>$b} keys %length_bin_counter) {
|
|
|
|
my $length = $length_bin * $length_bin_size;
|
|
|
|
my $count = $length_bin_counter{$length_bin};
|
|
|
|
print $ofh join("\t", $length, $count) . "\n";
|
|
}
|
|
close $ofh;
|
|
|
|
|
|
print STDERR "\n\nDone. See files $out_prefix.comp_sizes.txt and $out_prefix.trans_lengths.txt\n\n";
|
|
|
|
|
|
exit(0);
|
|
}
|
|
|
|
|
|
|
|
|
|
|