114 lines
2.1 KiB
Perl
114 lines
2.1 KiB
Perl
#!/usr/bin/env perl
|
|
|
|
use strict;
|
|
use warnings;
|
|
|
|
use FindBin;
|
|
use lib("$FindBin::RealBin/../../PerlLib");
|
|
use Fasta_reader;
|
|
use List::Util qw(min max);
|
|
use Data::Dumper;
|
|
|
|
my $usage = "usage: $0 transcriptome.fasta polymorphRatePercentage\n\n";
|
|
|
|
my $transcriptome = $ARGV[0] or die $usage;
|
|
my $poly_rate = $ARGV[1] or die $usage;
|
|
|
|
if ($poly_rate < 1) {
|
|
print STDERR "\n\n** WARNING: polymorphRatePercentage expects a percentage, and your input value is quite small: $poly_rate ** \n\n";
|
|
}
|
|
|
|
my %mutations;
|
|
|
|
main: {
|
|
|
|
my $fasta_reader = new Fasta_reader($transcriptome);
|
|
|
|
my $counter = 0;
|
|
|
|
while (my $seq_obj = $fasta_reader->next()) {
|
|
|
|
my $acc = $seq_obj->get_accession();
|
|
my $sequence = $seq_obj->get_sequence();
|
|
|
|
my $num_snps = int($poly_rate/100 * length($sequence) + 0.5);
|
|
|
|
if ($num_snps < 1) { next; } # exclude since not helpful here.
|
|
|
|
&print_fasta("aleA;$acc", $sequence);
|
|
|
|
$sequence = &mutate($sequence, $num_snps);
|
|
|
|
&print_fasta("aleB;$acc", $sequence);
|
|
|
|
$counter++;
|
|
print STDERR "\r[$counter] ";
|
|
|
|
}
|
|
print STDERR "\n\n";
|
|
|
|
#print STDERR Dumper(\%mutations);
|
|
|
|
exit(0);
|
|
}
|
|
|
|
|
|
####
|
|
sub print_fasta {
|
|
my ($acc, $sequence) = @_;
|
|
|
|
$sequence =~ s/(\S{60})/$1\n/g;
|
|
|
|
chomp $sequence;
|
|
|
|
print ">$acc\n$sequence\n";
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
####
|
|
sub mutate {
|
|
my ($sequence, $num_snps) = @_;
|
|
|
|
my %seen;
|
|
|
|
my @chars = qw(G A T C);
|
|
|
|
|
|
my @seq = split(//, uc $sequence);
|
|
|
|
for (1..$num_snps) {
|
|
|
|
my $pos; # select unique sites (sampling w/o replacement)
|
|
|
|
do {
|
|
$pos = int(rand(length($sequence)));
|
|
|
|
} while ($seen{$pos});
|
|
|
|
$seen{$pos} = 1;
|
|
|
|
my $nuc = uc $seq[$pos];
|
|
|
|
my @others = grep { $_ ne $nuc } @chars;
|
|
|
|
my $substitution = $others[ int(rand(scalar(@others))) ];
|
|
|
|
#print STDERR "$nuc -> $substitution\n";
|
|
|
|
#$mutations{"$nuc,$substitution"}++;
|
|
|
|
$seq[$pos] = lc $substitution;
|
|
|
|
}
|
|
|
|
$sequence = join("", @seq);
|
|
|
|
return($sequence);
|
|
}
|
|
|
|
|
|
|
|
|