321 lines
6.5 KiB
Perl
321 lines
6.5 KiB
Perl
#!/usr/bin/env perl
|
|
|
|
package Overlap_info;
|
|
|
|
use strict;
|
|
use warnings;
|
|
use List::Util qw (min max);
|
|
use Carp;
|
|
use Data::Dumper;
|
|
|
|
|
|
## works on coordinate pairs: [a1, a2], [b1, b2]
|
|
sub overlap {
|
|
my ($coordsA_aref, $coordsB_aref) = @_;
|
|
|
|
my ($lendA, $rendA) = sort {$a<=>$b} @$coordsA_aref;
|
|
|
|
my ($lendB, $rendB) = sort {$a<=>$b} @$coordsB_aref;
|
|
|
|
if ($lendA <= $rendB && $rendA >= $lendB) {
|
|
return(1);
|
|
}
|
|
else {
|
|
return(0);
|
|
}
|
|
}
|
|
|
|
|
|
sub contains {
|
|
my ($larger, $smaller) = @_;
|
|
|
|
my ($smaller_lend, $smaller_rend) = sort {$a<=>$b} @$smaller;
|
|
my ($larger_lend, $larger_rend) = sort {$a<=>$b} @$larger;
|
|
|
|
if ($smaller_lend >= $larger_lend && $smaller_rend <= $larger_rend) {
|
|
return(1);
|
|
}
|
|
|
|
else {
|
|
#print "no containment: [$larger_lend-$larger_rend] no containment of [$smaller_lend-$smaller_rend]\n";
|
|
return(0);
|
|
}
|
|
}
|
|
|
|
|
|
## works on coordinate pairs: [a1, a2], [b1, b2]
|
|
sub overlap_length {
|
|
my ($coordsA_aref, $coordsB_aref) = @_;
|
|
|
|
if (&overlap($coordsA_aref, $coordsB_aref)) {
|
|
|
|
my ($lendA, $rendA) = sort {$a<=>$b} @$coordsA_aref;
|
|
|
|
my ($lendB, $rendB) = sort {$a<=>$b} @$coordsB_aref;
|
|
|
|
if ($lendA > $lendB) {
|
|
# swap em
|
|
($lendA, $rendA, $lendB, $rendB) = ($lendB, $rendB, $lendA, $rendA);
|
|
}
|
|
|
|
my $overlap_lend = max($lendA, $lendB);
|
|
my $overlap_rend = min($rendA, $rendB);
|
|
|
|
my $overlap_len = $overlap_rend - $overlap_lend + 1;
|
|
return($overlap_len);
|
|
|
|
}
|
|
else {
|
|
return(0);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
## works on sets of coordinates: ([ [a1,a2], [a3,a4], ...]) , ([ [b1,b2], [b3,b4], ... ])
|
|
sub sum_overlaps {
|
|
my ($coordsets_A_aref, $coordsets_B_aref) = @_;
|
|
|
|
|
|
## Be sure that no intra-set coordinates overlap each other. TODO: force this sanity check
|
|
|
|
my $sum = 0;
|
|
|
|
foreach my $coordset_A_aref (@$coordsets_A_aref) {
|
|
|
|
foreach my $coordset_B_aref (@$coordsets_B_aref) {
|
|
|
|
$sum += &overlap_length($coordset_A_aref, $coordset_B_aref);
|
|
}
|
|
}
|
|
|
|
return($sum);
|
|
}
|
|
|
|
|
|
## overlap, compatible, and A contains B
|
|
sub compatible_overlap_A_contains_B {
|
|
my ($coordsets_A_aref, $coordsets_B_aref) = @_;
|
|
|
|
my ($A_lend, $A_rend) = &get_coordset_span(@$coordsets_A_aref);
|
|
my ($B_lend, $B_rend) = &get_coordset_span(@$coordsets_B_aref);
|
|
|
|
|
|
if (&contains([$A_lend, $A_rend], [$B_lend, $B_rend])
|
|
&&
|
|
&compatible_overlap($coordsets_A_aref, $coordsets_B_aref) ) {
|
|
|
|
return(1);
|
|
}
|
|
else {
|
|
return(0);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
## overlap and encode identical internal boundaries.
|
|
sub compatible_overlap {
|
|
my ($coordsets_A_aref, $coordsets_B_aref) = @_;
|
|
|
|
my $verbose_check = 0;
|
|
|
|
print "* Compatibility check between: " . Dumper($coordsets_A_aref) . " and " . Dumper($coordsets_B_aref) if $verbose_check;
|
|
|
|
|
|
my @A_coordsets = &_order_coordsets(@$coordsets_A_aref);
|
|
my @B_coordsets = &_order_coordsets(@$coordsets_B_aref);
|
|
|
|
my @A_span = &get_coordset_span(@A_coordsets);
|
|
my @B_span = &get_coordset_span(@B_coordsets);
|
|
|
|
unless (&overlap(\@A_span, \@B_span)) {
|
|
print "-no overlap of spans: @A_span, @B_span\n" if $verbose_check;
|
|
return(0);
|
|
}
|
|
|
|
|
|
my ($i, $j);
|
|
my $found_overlap_flag = 0;
|
|
|
|
overlap_search:
|
|
for ($i = 0; $i <= $#A_coordsets; $i++) {
|
|
|
|
for ($j = 0; $j <= $#B_coordsets; $j++) {
|
|
|
|
if (&overlap($A_coordsets[$i], $B_coordsets[$j])) {
|
|
|
|
|
|
$found_overlap_flag = 1;
|
|
|
|
|
|
|
|
last overlap_search;
|
|
}
|
|
}
|
|
}
|
|
|
|
unless ($found_overlap_flag) {
|
|
print "-no overlap of segments.\n" if $verbose_check;
|
|
return(0);
|
|
}
|
|
|
|
## if its not the first segment of A or B, then they're incompatible.
|
|
if (! ($i == 0 || $j == 0) ) {
|
|
print "-overlap doesn't anchor at first segment of either entry. $i, $j\n" if $verbose_check;
|
|
return(0);
|
|
}
|
|
|
|
|
|
while ($i <= $#A_coordsets && $j <= $#B_coordsets) {
|
|
|
|
my @a_coords = @{$A_coordsets[$i]};
|
|
my @b_coords = @{$B_coordsets[$j]};
|
|
|
|
|
|
## left junction check.
|
|
if ($i != 0 && $j != 0) {
|
|
## left bounds should have matching edges.
|
|
if ($a_coords[0] != $b_coords[0]) {
|
|
print "-(internal seg) left bounds fail to match: " . Dumper(\@A_coordsets) . Dumper(\@B_coordsets) if $verbose_check;
|
|
return(0);
|
|
}
|
|
}
|
|
## right junction check:
|
|
if ($i != $#A_coordsets && $j != $#B_coordsets) {
|
|
if ($a_coords[1] != $b_coords[1]) {
|
|
print "-(internal seg) right bounds fail to match: " . Dumper(\@A_coordsets) . Dumper(\@B_coordsets) if $verbose_check;
|
|
return(0);
|
|
}
|
|
}
|
|
|
|
## left bound check
|
|
if ($i == 0 && $j != 0) {
|
|
if ($a_coords[0] < $b_coords[0]) {
|
|
print "-(i first seg), fail left: " . Dumper(\@A_coordsets) . Dumper(\@B_coordsets) if $verbose_check;
|
|
return(0);
|
|
}
|
|
}
|
|
if ($i != 0 && $j == 0) {
|
|
if ($b_coords[0] < $a_coords[0]) {
|
|
print "-(j first seg), fail left: " . Dumper(\@A_coordsets) . Dumper(\@B_coordsets) if $verbose_check;
|
|
return(0);
|
|
}
|
|
}
|
|
|
|
## right bound check.
|
|
if ($i == $#A_coordsets && $j != $#B_coordsets) {
|
|
if ($a_coords[1] > $b_coords[1]) {
|
|
print "-(i last seg), fail right: " . Dumper(\@A_coordsets) . Dumper(\@B_coordsets) if $verbose_check;
|
|
return(0);
|
|
}
|
|
}
|
|
|
|
if ($i != $#A_coordsets && $j == $#B_coordsets) {
|
|
if ($b_coords[1] > $a_coords[1]) {
|
|
print "-(j laset seg), fail right: " . Dumper(\@A_coordsets) . Dumper(\@B_coordsets) if $verbose_check;
|
|
return(0);
|
|
}
|
|
}
|
|
|
|
$i++;
|
|
$j++;
|
|
|
|
}
|
|
|
|
## must be compatible
|
|
print "-made it. Must be compatible.\n" if $verbose_check;
|
|
return(1);
|
|
|
|
|
|
|
|
}
|
|
|
|
####
|
|
sub _order_coordsets {
|
|
my (@coordsets) = @_;
|
|
|
|
my @ret_coords;
|
|
|
|
foreach my $coordpair (@coordsets) {
|
|
my ($lend, $rend) = sort {$a<=>$b} @$coordpair;
|
|
push (@ret_coords, [$lend, $rend]);
|
|
}
|
|
|
|
|
|
@ret_coords = sort {$a->[0]<=>$b->[0]} @ret_coords;
|
|
|
|
return(@ret_coords);
|
|
}
|
|
|
|
sub get_gap_coords {
|
|
my ($coordsets_aref) = @_;
|
|
|
|
my @coordsets = &_order_coordsets(@$coordsets_aref);
|
|
|
|
my @gaps;
|
|
|
|
for (my $i = 1; $i <= $#coordsets; $i++) {
|
|
|
|
my $prev_rend = $coordsets[$i-1]->[1];
|
|
my $curr_lend = $coordsets[$i]->[0];
|
|
|
|
push (@gaps, [$prev_rend + 1, $curr_lend - 1]);
|
|
}
|
|
|
|
return(@gaps);
|
|
}
|
|
|
|
|
|
|
|
sub get_coordset_span {
|
|
my @coordsets = @_;
|
|
|
|
my @coords;
|
|
foreach my $coordset (@coordsets) {
|
|
push (@coords, @$coordset);
|
|
}
|
|
|
|
my $min_coord = min(@coords);
|
|
my $max_coord = max(@coords);
|
|
|
|
return($min_coord, $max_coord);
|
|
}
|
|
|
|
|
|
sub coordsets_to_string {
|
|
my @coordsets = @_;
|
|
|
|
my @text;
|
|
foreach my $coordset (@coordsets) {
|
|
my ($lend, $rend) = @$coordset;
|
|
push (@text, "($lend,$rend)");
|
|
}
|
|
|
|
my $text_line = join("--", @text);
|
|
return($text_line);
|
|
}
|
|
|
|
|
|
|
|
|
|
sub _coords_are_identical {
|
|
my ($coordpair_A_aref, $coordpair_B_aref) = @_;
|
|
|
|
if ($coordpair_A_aref->[0] == $coordpair_B_aref->[0]
|
|
&&
|
|
$coordpair_A_aref->[1] == $coordpair_B_aref->[1]) {
|
|
|
|
return(1);
|
|
}
|
|
|
|
else {
|
|
return(0);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
1; #EOM
|
|
|