66 lines
1.5 KiB
Perl
66 lines
1.5 KiB
Perl
#!/usr/bin/env perl
|
|
|
|
use strict;
|
|
use warnings;
|
|
|
|
use FindBin;
|
|
use lib ("$FindBin::Bin/../../PerlLib");
|
|
use Fasta_reader;
|
|
use Process_cmd;
|
|
|
|
|
|
my $usage = "\n\n\tusage: $0 Trinity.fasta [out_prefix='GC_content']\n\n";
|
|
|
|
my $fasta_file = $ARGV[0] or die $usage;
|
|
my $out_prefix = $ARGV[1] || "GC_content";
|
|
|
|
main: {
|
|
|
|
my $dat_out_file = "$out_prefix.dat";
|
|
|
|
open(my $ofh, ">$dat_out_file") or die "Error, cannot write to $dat_out_file";
|
|
|
|
my $fasta_reader = new Fasta_reader($fasta_file);
|
|
while (my $seq_obj = $fasta_reader->next()) {
|
|
|
|
my $acc = $seq_obj->get_accession();
|
|
my $sequence = $seq_obj->get_sequence();
|
|
|
|
my $seq_len = length($sequence);
|
|
|
|
my $gc_count = 0;
|
|
while ($sequence =~ /[gc]/ig) {
|
|
$gc_count++;
|
|
}
|
|
my $pct_gc = sprintf("%.2f", $gc_count / $seq_len * 100);
|
|
|
|
print $ofh join("\t" ,$acc, $pct_gc) . "\n";
|
|
}
|
|
|
|
# generate histogram
|
|
my $R_code = <<__eoR__;
|
|
|
|
data = read.table("$dat_out_file", header=F, row.names=1)
|
|
pdf("$dat_out_file.hist.pdf")
|
|
hist(data[,1], br=100)
|
|
message("\n\nmean: ", sprintf("%.2f", mean(data[,1])), ", median: ", median(data[,1]), "\n\n")
|
|
dev.off()
|
|
|
|
__eoR__
|
|
|
|
;
|
|
|
|
{
|
|
my $Rscript_file = "$out_prefix.R";
|
|
open (my $ofh, ">$Rscript_file") or die "Error, cannot write to file: $Rscript_file";
|
|
print $ofh $R_code;
|
|
close $ofh;
|
|
|
|
# run it
|
|
my $cmd = "Rscript $Rscript_file";
|
|
&process_cmd($cmd);
|
|
}
|
|
|
|
exit(0);
|
|
}
|