biyelunwen/99.scripts/trinity_utils/util/misc/PerlLib/SegmentGraph.pm

477 lines
11 KiB
Perl

package SegmentGraph;
use strict;
use warnings;
use Carp;
use Data::Dumper;
sub new {
my $packagename = shift;
my $self = { id_to_node => {}, # primary container for all graph elements
_next_edges => {}, # id => { id => 1 }
_prev_edges => {},
};
bless ($self, $packagename);
return($self);
}
####
sub delete_node {
my $self = shift;
my ($node) = @_;
my $node_ID = $node->get_ID();
my @next_nodes = $self->get_next_nodes($node);
foreach my $next_node (@next_nodes) {
$self->_delete_prev_edge_to($next_node, $node_ID);
}
my @prev_nodes = $self->get_prev_nodes($node);
foreach my $prev_node (@prev_nodes) {
$self->_delete_next_edge_to($prev_node, $node_ID);
}
delete( $self->{id_to_node}->{$node_ID} );
return;
}
sub get_next_nodes {
my $self = shift;
my ($node) = @_;
my $node_ID = $node->get_ID();
my @next_nodes;
if (my $href = $self->{_next_edges}->{$node_ID}) {
my @ids = keys %$href;
foreach my $id (@ids) {
my $next_node = $self->{id_to_node}->{$id} or confess "Error, no node found for ID: $id";
push (@next_nodes, $next_node);
}
}
return(@next_nodes);
}
sub get_prev_nodes {
my $self = shift;
my ($node) = @_;
my $node_ID = $node->get_ID();
my @prev_nodes;
if (my $href = $self->{_prev_edges}->{$node_ID}) {
my @ids = keys %$href;
foreach my $id (@ids) {
my $next_node = $self->{id_to_node}->{$id} or confess "Error, no node found for ID: $id";
push (@prev_nodes, $next_node);
}
}
return(@prev_nodes);
}
sub add_segment {
my $self = shift;
my ($lend, $rend, $parent_feature_name) = @_;
unless ($lend && $rend && $parent_feature_name) {
confess "Error, need params (lend, rend, parent_feature_name)";
}
#print "ADDING SEGMENT: $lend,$rend\n";
my $overlapping_node = $self->find_overlapping_segment($lend, $rend);
if ($overlapping_node) {
my ($overlapping_node_lend, $overlapping_node_rend) = $overlapping_node->get_coords();
#print "Found overlapping nodes: ($lend,$rend) to ($overlapping_node_lend, $overlapping_node_rend)\n";
## number of things could happen here...
if ($overlapping_node->has_same_coordinates($lend, $rend)) {
$overlapping_node->add_owners($parent_feature_name);
}
elsif ($overlapping_node->overlaps_coordinates($lend, $rend)) {
## fracture into segments based on overlaps
my @coords = sort {$a<=>$b} ($lend, $rend, $overlapping_node->get_coords());
## bounds stay the same, but internal set needs to be adjusted.
my @fractured_segments = ( [$coords[0], $coords[1]-1],
[$coords[1], $coords[2]],
[$coords[2]+1, $coords[3]] );
my @remaining_input_segments;
my @contained_by_both;
my @contained_by_overlapping_segment_only;
foreach my $fractured_segment (@fractured_segments) {
my ($seg_lend, $seg_rend) = @$fractured_segment;
if ($seg_lend > $seg_rend) { next; }
#print "Searching fragment: $seg_lend,$seg_rend\n";
# corresponds to the input and the overlapping segments
if ($overlapping_node->envelops_coordinates($seg_lend, $seg_rend)
&&
($seg_lend >= $lend && $seg_rend <= $rend)
) {
#print "fragment contained by segment: $seg_lend, $seg_rend\n";
push (@contained_by_both, $fractured_segment);
}
elsif ($overlapping_node->envelops_coordinates($seg_lend, $seg_rend)) {
## a part of the original overlapping segment
push (@contained_by_overlapping_segment_only, $fractured_segment);
}
# just the input segment
elsif ($seg_lend >= $lend && $seg_rend <= $rend) {
#print "adding to remaining segment: $seg_lend, $seg_rend\n";
push (@remaining_input_segments, $fractured_segment);
}
else {
confess "Error, ended up with coordinate segment that isnt placed: $seg_lend,$seg_rend ";
}
}
my @owners = $overlapping_node->get_owners();
$self->delete_node($overlapping_node);
#print "Contained by both: " . Dumper(\@contained_by_both) . "\n";
#print "Contained by overlapping segment only: " . Dumper(\@contained_by_overlapping_segment_only). "\n";
#print "Remaining input segments: " . Dumper(\@remaining_input_segments) . "\n";
foreach my $seg_coords_aref (@contained_by_both) {
my ($seg_lend, $seg_rend) = @$seg_coords_aref;
$self->_add_segment_node($seg_lend, $seg_rend, [@owners, $parent_feature_name]);
}
foreach my $seg_coords_aref (@contained_by_overlapping_segment_only) {
my ($seg_lend, $seg_rend)= @$seg_coords_aref;
$self->_add_segment_node($seg_lend, $seg_rend, \@owners);
}
foreach my $remaining_input_seg_aref (@remaining_input_segments) {
my ($seg_lend, $seg_rend) = @$remaining_input_seg_aref;
$self->add_segment($seg_lend, $seg_rend, $parent_feature_name); # recursive call
}
}
}
else {
## add it
$self->_add_segment_node($lend, $rend, $parent_feature_name);
}
return;
}
####
sub get_all_nodes {
my $self = shift;
my $nodes_href = $self->{id_to_node};
my @nodes = values %$nodes_href;
@nodes = sort {$a->{lend} <=> $b->{lend}
||
$a->{rend} <=> $b->{rend} } @nodes;;
return(@nodes);
}
####
sub toString {
my $self = shift;
my $text = "";
my @nodes = $self->get_all_nodes(); # already nicely sorted
foreach my $node (@nodes) {
my ($seg_lend, $seg_rend) = $node->get_coords();
my @owners = sort $node->get_owners();
$text .= "$seg_lend-$seg_rend\t@owners\n";
}
return($text);
}
####
sub find_overlapping_segment {
my $self = shift;
my ($lend, $rend) = @_;
my @nodes = $self->get_all_nodes();
foreach my $node (@nodes) {
unless (ref $node) {
confess "Error, got node thats not a ref" . Dumper($node);
}
if ($node->overlaps_coordinates($lend, $rend)) {
return($node);
}
}
}
####
sub identify_all_owners {
my $self = shift;
my %all_owners;
my @nodes = $self->get_all_nodes();
foreach my $node (@nodes) {
my @owners = $node->get_owners();
foreach my $owner (@owners) {
$all_owners{$owner} = 1;
}
}
return(keys %all_owners);
}
###################
## Private methods
##################
####
sub _add_segment_node {
my $self = shift;
my ($lend, $rend, $parent_feature_name) = @_;
my $segment_node = SegmentNode->new($lend, $rend, $parent_feature_name);
my $segment_node_ID = $segment_node->get_ID();
$self->{id_to_node}->{$segment_node_ID} = $segment_node;
return;
}
####
sub _delete_prev_edge_to {
my $self = shift;
my ($node_obj, $prev_node_ID) = @_;
unless (ref $node_obj) {
confess "Error, need node_obj as parameter";
}
my $node_ID = $node_obj->get_ID();
my $prev_edges_href = $self->{_prev_edges};
delete($prev_edges_href->{$node_ID}->{$prev_node_ID});
unless (%{$prev_edges_href->{$node_id}}) {
delete($prev_edges_href->{$node_id});
}
return;
}
####
sub _delete_next_edge_to {
my $self = shift;
my ($node_obj, $next_node_ID) = @_;
unless (ref $node_obj) {
confess "Error, need node_obj as parameter";
}
my $node_ID = $node_obj->get_ID();
my $next_edges_href = $self->{_next_edges};
delete($next_edges_href->{$node_ID}->{$next_node_ID});
unless (%{$next_edges_href->{$node_id}}) {
delete($next_edges_href->{$node_id});
}
return;
}
#####################################################################################################
package SegmentNode;
use strict;
use warnings;
use Carp;
my $NODE_COUNTER = 0;
sub new {
my $packagename = shift;
my ($lend, $rend, $parent_feature_name) = @_;
unless ($lend && $rend && $parent_feature_name) {
die "Error, need params(lend, rend, parent_feature_name)";
}
my $self = { lend => $lend,
rend => $rend,
owners => {},
ID => ++$NODE_COUNTER,
};
bless ($self, $packagename);
my @owners;
if (ref $parent_feature_name eq "ARRAY") {
@owners = @$parent_feature_name;
}
else {
@owners = ($parent_feature_name);
}
$self->add_owners(@owners);
return($self);
}
####
sub get_ID {
my $self = shift;
return($self->{ID});
}
####
sub has_same_coordinates {
my $self = shift;
my ($lend, $rend) = @_;
if ($self->{lend} == $lend && $self->{rend} == $rend) {
return(1);
}
else {
return(0);
}
}
####
sub add_owners {
my $self = shift;
my (@parent_feature_names) = @_;
foreach my $parent_feature_name (@parent_feature_names) {
$self->{owners}->{$parent_feature_name} = 1;
}
return;
}
####
sub get_owners {
my $self = shift;
return(keys %{$self->{owners}});
}
####
sub is_contained_by_coordinates {
my $self = shift;
my ($lend, $rend) = @_;
if ($self->{lend} >= $lend && $self->{rend} <= $rend) {
return(1);
}
else {
return(0);
}
}
####
sub envelops_coordinates {
my $self = shift;
my ($lend, $rend) = @_;
if ($self->{lend} <= $lend && $self->{rend} >= $rend) {
return(1);
}
else {
return(0);
}
}
####
sub overlaps_coordinates {
my $self = shift;
my ($lend, $rend) = @_;
if ($lend <= $self->{rend} && $rend >= $self->{lend}) {
return(1);
}
else {
return(0);
}
}
####
sub get_coords {
my $self = shift;
return($self->{lend}, $self->{rend});
}
####
sub has_owners {
my $self = shift;
my @accs = @_;
foreach my $acc (@accs) {
if (! exists $self->{owners}->{$acc}) {
return(0);
}
}
return(1); # must have had them all
}
1; #EOM