99 lines
1.8 KiB
Perl
99 lines
1.8 KiB
Perl
#!/usr/bin/env perl
|
|
|
|
use strict;
|
|
use warnings;
|
|
|
|
use FindBin;
|
|
use lib ("$FindBin::RealBin/../../PerlLib");
|
|
use Fasta_reader;
|
|
|
|
my $usage = "usage: $0 file.fasta [kmer_length=25]\n\n";
|
|
|
|
my $fasta_file = $ARGV[0] or die $usage;
|
|
my $kmer_size = $ARGV[1] || 25;
|
|
|
|
|
|
main: {
|
|
|
|
my $fasta_reader = new Fasta_reader($fasta_file);
|
|
my %trans_seqs = $fasta_reader->retrieve_all_seqs_hash();
|
|
|
|
my %acc_to_kmers = &get_kmers(\%trans_seqs);
|
|
|
|
my @accs = keys %acc_to_kmers;
|
|
|
|
|
|
for (my $i = 0; $i < $#accs; $i++) {
|
|
|
|
my $acc_i = $accs[$i];
|
|
my $kmers_href_i = $acc_to_kmers{$acc_i};
|
|
|
|
|
|
for (my $j = $i + 1; $j <= $#accs; $j++) {
|
|
|
|
my $acc_j = $accs[$j];
|
|
my $kmers_href_j = $acc_to_kmers{$acc_j};
|
|
|
|
if (&have_kmer_in_common($kmers_href_i, $kmers_href_j)) {
|
|
|
|
print join("\t", $acc_i, $acc_j) . "\n";
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
exit(0);
|
|
|
|
}
|
|
|
|
####
|
|
sub get_kmers {
|
|
my ($trans_seqs_href) = @_;
|
|
|
|
my %acc_to_kmers;
|
|
|
|
my $counter = 0;
|
|
foreach my $acc (keys %$trans_seqs_href) {
|
|
$counter++;
|
|
print STDERR "-getting kmers for $counter\n";
|
|
|
|
|
|
my $trans_seq = $trans_seqs_href->{$acc};
|
|
|
|
my %kmers = &extract_kmers($trans_seq);
|
|
$acc_to_kmers{$acc} = \%kmers;
|
|
}
|
|
|
|
return(%acc_to_kmers);
|
|
}
|
|
|
|
####
|
|
sub extract_kmers {
|
|
my ($seq) = @_;
|
|
|
|
my %kmers;
|
|
|
|
for (my $i = 0; $i <= length($seq) - $kmer_size; $i++) {
|
|
my $kmer = substr($seq, $i, $kmer_size);
|
|
|
|
$kmers{$kmer} = 1;
|
|
}
|
|
|
|
return(%kmers);
|
|
}
|
|
|
|
####
|
|
sub have_kmer_in_common {
|
|
my ($kmers_i, $kmers_j) = @_;
|
|
|
|
foreach my $kmer (keys %$kmers_i) {
|
|
|
|
if (exists $kmers_j->{$kmer}) {
|
|
return(1);
|
|
}
|
|
}
|
|
|
|
return(0);
|
|
}
|
|
|