2426 lines
87 KiB
Perl
2426 lines
87 KiB
Perl
#!/usr/local/bin/perl
|
|
|
|
package main;
|
|
our $SEE;
|
|
|
|
package CDNA::Splice_graph_assembler;
|
|
|
|
use strict;
|
|
use warnings;
|
|
use Carp;
|
|
use Overlap_piler;
|
|
use CDNA::CDNA_alignment;
|
|
use CDNA::Alignment_segment;
|
|
use Data::Dumper;
|
|
no warnings "recursion";
|
|
|
|
our $FUZZ_DIST = 20; # bp's not to trust at non-splice termini; untrustworthy alignment extensions.
|
|
|
|
## allowable connections between gene structure components:
|
|
my %ACCEPTABLE_CONNECTIONS = ( "terminal_left_exon" => {"intron" => 1},
|
|
"internal_exon" => {"intron" => 1},
|
|
|
|
"intron" => {"internal_exon" => 1,
|
|
"terminal_right_exon" => 1,
|
|
},
|
|
);
|
|
|
|
|
|
####
|
|
sub new {
|
|
my $packagename = shift;
|
|
|
|
my $self = {
|
|
_graph_nodes => [], # each and every graph node (main repository)
|
|
_incoming_alignments => [], # alignments to assemble
|
|
_assemblies => [], # resulting alignment assemblies
|
|
_unincorporated_alignments => [],
|
|
|
|
_valid_splice_paths => [], # Splice_graph_path objects
|
|
_assembled_splice_paths => [], # compatible splice paths chained into splice path assemblies.
|
|
|
|
## other helpers
|
|
_graph_node_hashkey_lookup => {}, # key is lend,rend,type,orient
|
|
_graph_node_via_nodeID => {}, # node access by nodeID
|
|
|
|
## various node types: (all subsets of _graph_nodes and accessible via helpers above.
|
|
_internal_exons => [],
|
|
_introns => [],
|
|
_terminal_left_exons => [],
|
|
_terminal_right_exons => [],
|
|
_singleton_exons => [],
|
|
|
|
## misc
|
|
_terminal_exon_nodeID_to_nonsplice_position_list => {}, # track coords of non-splice site ends in terminal exon supports
|
|
};
|
|
|
|
bless ($self, $packagename);
|
|
|
|
return ($self);
|
|
}
|
|
|
|
####
|
|
sub assemble_alignments {
|
|
my $self = shift;
|
|
my @alignments = @_;
|
|
|
|
## build the splicing graph.
|
|
$self->build_splicing_graph(@alignments);
|
|
|
|
## Chain together splice paths that are compatible:
|
|
print "-Chaining compatible splice paths\n" if $SEE;
|
|
$self->_chain_compatible_splice_paths();
|
|
|
|
## Extend splice paths into maximally scoring complete paths with terminal exons
|
|
print "-Extending splice paths into maximal structures.\n" if $SEE;
|
|
$self->_extend_splice_paths_to_termini(); ## products are included in the assemblies list.
|
|
|
|
## Add the singleton assemblies
|
|
$self->_append_singletons_to_assembly_list(); ## convert the singletons to cdna alignments and add to assembly list
|
|
|
|
## assign the incoming alignments to the assemblies that contain them.
|
|
print "-Assigning transcript alignments to assemblies\n" if $SEE;
|
|
my $have_unincorporated_alignments_flag = $self->_correlate_assemblies_with_incoming_alignments();
|
|
if ($have_unincorporated_alignments_flag) {
|
|
## find paths from unincorporated terminal segments.
|
|
print "-Not all alignments were included. Exploring assemblies from uninorporated alignments\n" if $SEE;
|
|
$self->_explore_assemblies_from_unincorporated_alignments();
|
|
print "-Assigning transcript alignments to assemblies, again...\n" if $SEE;
|
|
$have_unincorporated_alignments_flag = $self->_correlate_assemblies_with_incoming_alignments();
|
|
if ($have_unincorporated_alignments_flag) {
|
|
## something horribly has gone wrong.
|
|
confess "Error, not all incoming alignments are accounted for!\n" . $self->toString();
|
|
}
|
|
}
|
|
|
|
print $self->toString() if $SEE;
|
|
}
|
|
|
|
|
|
####
|
|
sub build_splicing_graph {
|
|
my $self = shift;
|
|
my @alignments = @_;
|
|
|
|
$self->set_incoming_alignments(@alignments);
|
|
|
|
## going to process the alignments in several phases:
|
|
# -decompose alignments into structural components
|
|
# -identify valid paths to connect components
|
|
# -extract assemblies using valid paths
|
|
# -associate transcripts with assemblies
|
|
|
|
## first, examine the alignments with the most segments first:
|
|
@alignments = reverse sort {$a->get_num_segments() <=> $b->get_num_segments()} @alignments;
|
|
|
|
|
|
## build components of the splice graph
|
|
## add internal exons and all introns to splice graph: (unambiguous structures)
|
|
## terminal structures are not as well defined. Apply these later.
|
|
|
|
print "-Decomposing alignments into gene structure components.\n" if $SEE;
|
|
foreach my $alignment (@alignments) {
|
|
if ($alignment->get_num_segments() > 1) {
|
|
$self->_decompose_unambiguous_alignment_structures_add_nodes($alignment);
|
|
}
|
|
}
|
|
|
|
## Add terminal exons:
|
|
## Only add terminal exons where it's clear that it's not just part of an existing internal exon
|
|
|
|
print "-Adding terminal exon segments\n" if $SEE;
|
|
my @single_segments; # capture those alignments w/o introns
|
|
|
|
foreach my $alignment (@alignments) {
|
|
my $segment_orient = $alignment->get_spliced_orientation();
|
|
if ($alignment->get_num_segments() > 1) {
|
|
foreach my $segment ($alignment->get_alignment_segments()) {
|
|
if ($segment->is_first() || $segment->is_last()) {
|
|
|
|
my $exon_type = ($segment->is_first()) ? "terminal_left_exon" : "terminal_right_exon";
|
|
|
|
$self->_try_terminal_exon_addition($exon_type, $segment, $segment_orient);
|
|
|
|
## at this point, introns and internal exons are scored only by perfect support (exact boundaries).
|
|
## terminal exons, on the other hand, have evidence scored based on boundary match.
|
|
## Now, need to augment scores of internal exons that encompass terminal exons
|
|
|
|
|
|
my ($segment_lend, $segment_rend) = $segment->get_coords();
|
|
|
|
$self->_augment_internal_exon_scores_using_terminal_alignment_segment($exon_type, $segment_lend,
|
|
$segment_rend, $segment_orient);
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
push (@single_segments, $alignment);
|
|
}
|
|
}
|
|
|
|
## reconstruct some internal exons based on overlapping right/left terminal exons
|
|
print "-Merging overlapping right-to-left terminal exons\n" if $SEE;
|
|
$self->_merge_overlapping_right_to_left_terminal_exons();
|
|
|
|
## apply intron-less segments as evidence to internal and terminal segments
|
|
## and instantiate single exons where they're not simply supporting existing structures.
|
|
print "-Analyzing intronless alignments\n" if $SEE;
|
|
$self->_analyze_intronless_alignments(@single_segments);
|
|
|
|
print "-Building the splice graph\n" if $SEE;
|
|
$self->_build_splice_graph(); # could/should have done this earlier during alignment parse for efficiency.
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
####
|
|
sub _find_internal_exons_encompassing_coords_and_share_boundary {
|
|
my $self = shift;
|
|
my ($exon_type, $segment_lend, $segment_rend, $segment_orient) = @_;
|
|
|
|
unless ($exon_type =~ /left|right/) {
|
|
confess "invalide terminal exon type: $exon_type\n";
|
|
}
|
|
|
|
my @internal_exons_found;
|
|
|
|
foreach my $internal_exon (@{$self->{_internal_exons}}) {
|
|
my $internal_exon_orient = $internal_exon->get_orient();
|
|
my ($internal_exon_lend, $internal_exon_rend) = $internal_exon->get_coords();
|
|
|
|
## if internal exon encompasses the terminal exon, add it to its evidence collection
|
|
if ( ($segment_orient eq $internal_exon_orient)
|
|
&&
|
|
($internal_exon_lend <= ($segment_lend + $FUZZ_DIST) && ($segment_rend - $FUZZ_DIST) <= $internal_exon_rend) # internal encompasses it
|
|
&&
|
|
( ($exon_type eq "terminal_right_exon" && $internal_exon_lend == $segment_lend)
|
|
||
|
|
($exon_type eq "terminal_left_exon" && $internal_exon_rend == $segment_rend) ) ## a splice boundary in common
|
|
)
|
|
{
|
|
push (@internal_exons_found, $internal_exon);
|
|
}
|
|
}
|
|
return (@internal_exons_found);
|
|
}
|
|
|
|
sub _find_terminal_exons_encompassing_segment {
|
|
my $self = shift;
|
|
my ($lend, $rend, $orient) = @_;
|
|
|
|
my @nodes_encompassing_segment;
|
|
foreach my $node_obj (@{$self->{_terminal_left_exons}}, @{$self->{_terminal_right_exons}}) {
|
|
my ($node_lend, $node_rend) = $node_obj->get_coords();
|
|
my $node_orient = $node_obj->get_orient();
|
|
if ( ($node_orient eq $orient || $orient eq '?') &&
|
|
($lend + $FUZZ_DIST >= $node_lend) &&
|
|
($rend - $FUZZ_DIST <= $node_rend) ) {
|
|
push (@nodes_encompassing_segment, $node_obj);
|
|
}
|
|
}
|
|
|
|
return (@nodes_encompassing_segment);
|
|
}
|
|
|
|
|
|
|
|
|
|
####
|
|
sub get_assemblies {
|
|
my $self = shift;
|
|
return (@{$self->{_assemblies}});
|
|
}
|
|
|
|
####
|
|
sub get_incoming_alignments {
|
|
my $self = shift;
|
|
return (@{$self->{_incoming_alignments}});
|
|
}
|
|
|
|
####
|
|
sub get_unincorporated_alignments {
|
|
my $self = shift;
|
|
return (@{$self->{_unincorporated_alignments}});
|
|
}
|
|
|
|
|
|
####
|
|
sub _add_alignment_assembly {
|
|
my $self = shift;
|
|
my (@cdna_alignments) = @_;
|
|
|
|
push (@{$self->{_assemblies}}, @cdna_alignments);
|
|
return;
|
|
}
|
|
|
|
|
|
|
|
####
|
|
sub set_incoming_alignments {
|
|
my $self = shift;
|
|
my @alignments = @_;
|
|
@{$self->{_incoming_alignments}} = @alignments;
|
|
return;
|
|
}
|
|
|
|
|
|
####
|
|
sub _decompose_unambiguous_alignment_structures_add_nodes {
|
|
my $self = shift;
|
|
my ($alignment) = @_;
|
|
|
|
my $spliced_orient = $alignment->get_spliced_orientation();
|
|
|
|
my @path_nodes;
|
|
|
|
## Add internal exons
|
|
my @segments = $alignment->get_alignment_segments();
|
|
foreach my $segment (@segments) {
|
|
#print "seg: " . $segment->toString() . "\n";
|
|
if ($segment->is_internal()) {
|
|
my ($exon_lend, $exon_rend) = $segment->get_coords();
|
|
my $node = $self->_add_internal_exon($exon_lend, $exon_rend, $spliced_orient);
|
|
push (@path_nodes, $node);
|
|
}
|
|
}
|
|
|
|
## Add introns
|
|
my @intron_coords = $alignment->get_intron_coords();
|
|
|
|
foreach my $intron_coordset (@intron_coords) {
|
|
my ($intron_lend, $intron_rend) = @$intron_coordset; #already sorted
|
|
my $node = $self->_add_intron($intron_lend, $intron_rend, $spliced_orient);
|
|
push (@path_nodes, $node);
|
|
}
|
|
|
|
## add a path
|
|
$self->_extract_and_add_path_from_node_list(@path_nodes);
|
|
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
####
|
|
sub _extract_and_add_path_from_node_list {
|
|
my $self = shift;
|
|
my @path_nodes = @_;
|
|
|
|
## sort by genome order:
|
|
@path_nodes = sort {$a->{lend} <=> $b->{lend}} @path_nodes;
|
|
|
|
my @ordered_nodeID_list;
|
|
my @coords;
|
|
foreach my $path_node (@path_nodes) {
|
|
my $nodeID = $path_node->get_nodeID();
|
|
push (@ordered_nodeID_list, $nodeID);
|
|
push (@coords, $path_node->get_coords());
|
|
|
|
}
|
|
my $orient = $path_nodes[0]->get_orient();
|
|
@coords = sort {$a<=>$b} @coords;
|
|
my $lend = shift @coords;
|
|
my $rend = pop @coords;
|
|
my $splice_graph_path = Splice_graph_path->new($lend, $rend, $orient, \@ordered_nodeID_list);
|
|
|
|
$self->_add_splice_graph_path($splice_graph_path);
|
|
|
|
return;
|
|
}
|
|
|
|
####
|
|
sub _try_terminal_exon_addition {
|
|
my $self = shift;
|
|
my ($exon_type, $segment, $orient) = @_;
|
|
|
|
my ($segment_lend, $segment_rend) = $segment->get_coords();
|
|
|
|
print "Examining terminal exon: $exon_type, $segment_lend, $segment_rend\n" if $SEE;
|
|
|
|
## only consider this a genuine terminal exon if it doesn't appear to be part of an
|
|
## existing internal exon from a more complete alignment
|
|
if (my @internal_segments = $self->_find_internal_exons_encompassing_coords_and_share_boundary($exon_type, $segment_lend, $segment_rend, $orient)) {
|
|
if ($SEE) {
|
|
print "$exon_type, $segment_lend-$segment_rend, $orient, found already represented by internal exons:\n";
|
|
foreach my $internal_segment (@internal_segments) {
|
|
print "\t" . $internal_segment->toString() . "\n";
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
my $exon = $self->_find_existing_terminal_exon ($exon_type, $segment_lend, $segment_rend, $orient);
|
|
if ($exon) {
|
|
print "-found existing terminal exon with shared boundary: " . $exon->toString() . "\n" if $SEE;
|
|
my $nodeID = $exon->get_nodeID();
|
|
## check for boundary adjustment:
|
|
my ($exon_lend, $exon_rend) = $exon->get_coords();
|
|
my $nonsplice_coord = undef;
|
|
if ($exon_type eq "terminal_left_exon") {
|
|
$nonsplice_coord = $segment_lend;
|
|
if ($segment_lend < $exon_lend) {
|
|
print "-extending left boundary to $segment_lend\n" if $SEE;
|
|
$exon->set_coords($segment_lend, $exon_rend); # extend left boundary
|
|
}
|
|
}
|
|
elsif ($exon_type eq "terminal_right_exon") {
|
|
$nonsplice_coord = $segment_rend;
|
|
if ($segment_rend > $exon_rend) {
|
|
print "-extending right boundary to $segment_rend\n" if $SEE;
|
|
$exon->set_coords($exon_lend, $segment_rend);
|
|
}
|
|
}
|
|
else {
|
|
confess "Error, exon type $exon_type not accounted for. "; # should never get here anyway
|
|
}
|
|
$exon->increment_evidence_support(); ## account for extra evidence
|
|
$self->_add_to_terminal_exon_nonsplice_position_list($nodeID, $nonsplice_coord);
|
|
|
|
}
|
|
else {
|
|
## add new terminal exon:
|
|
$self->_add_graph_node($exon_type, $segment_lend, $segment_rend, $orient);
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
####
|
|
sub _add_to_terminal_exon_nonsplice_position_list {
|
|
my $self = shift;
|
|
my ($nodeID, $nonsplice_coord) = @_;
|
|
|
|
my $nodeID_to_pos_list_href = $self->{_terminal_exon_nodeID_to_nonsplice_position_list};
|
|
|
|
my $list_aref = $nodeID_to_pos_list_href->{$nodeID};
|
|
unless (ref $list_aref) {
|
|
$list_aref = $nodeID_to_pos_list_href->{$nodeID} = [];
|
|
}
|
|
push (@$list_aref, $nonsplice_coord);
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
####
|
|
sub _find_existing_terminal_exon {
|
|
my $self = shift;
|
|
my ($exon_type, $segment_lend, $segment_rend, $orient) = @_;
|
|
my $exon_list_aref = undef;
|
|
if ($exon_type eq "terminal_left_exon") {
|
|
$exon_list_aref = $self->{_terminal_left_exons};
|
|
}
|
|
elsif ($exon_type eq "terminal_right_exon") {
|
|
$exon_list_aref = $self->{_terminal_right_exons};
|
|
}
|
|
else {
|
|
confess "exon_type $exon_type not accepted for terminal exons";
|
|
}
|
|
|
|
foreach my $exon (@$exon_list_aref) {
|
|
my ($lend, $rend) = $exon->get_coords();
|
|
if (
|
|
($exon->get_orient() eq $orient) &&
|
|
(
|
|
($exon_type eq "terminal_left_exon" && $rend == $segment_rend)
|
|
||
|
|
($exon_type eq "terminal_right_exon" && $lend == $segment_lend)
|
|
)
|
|
)
|
|
{
|
|
|
|
return ($exon); # found it!
|
|
}
|
|
}
|
|
|
|
return (undef); # didn't find one.
|
|
}
|
|
|
|
####
|
|
sub _augment_internal_exon_scores_using_terminal_alignment_segment {
|
|
my $self = shift;
|
|
my ($exon_type, $segment_lend, $segment_rend, $segment_orient) = @_;
|
|
|
|
my @relevant_internal_exons = $self->_find_internal_exons_encompassing_coords_and_share_boundary($exon_type, $segment_lend, $segment_rend, $segment_orient);
|
|
foreach my $internal_exon (@relevant_internal_exons) {
|
|
|
|
$internal_exon->increment_evidence_support();
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
####
|
|
sub _merge_overlapping_right_to_left_terminal_exons {
|
|
my $self = shift;
|
|
|
|
## looking for this situation:
|
|
# <----------- right terminal exon
|
|
# ---------> left terminal exon
|
|
# that can be merged into:
|
|
# <--------------> an internal exon
|
|
#
|
|
|
|
my %nodeIDs_targeted_for_removal; # if they fully overlap, construct a nice internal exon w/o extensions beyond splice boundary
|
|
|
|
foreach my $right_terminal_exon (@{$self->{_terminal_right_exons}}) {
|
|
my $right_orient = $right_terminal_exon->get_orient();
|
|
my ($right_lend, $right_rend) = $right_terminal_exon->get_coords();
|
|
my $right_nodeID = $right_terminal_exon->get_nodeID();
|
|
|
|
foreach my $left_terminal_exon (@{$self->{_terminal_left_exons}}) {
|
|
my $left_orient = $left_terminal_exon->get_orient();
|
|
my ($left_lend, $left_rend) = $left_terminal_exon->get_coords();
|
|
my $left_nodeID = $left_terminal_exon->get_nodeID();
|
|
|
|
unless ($right_orient eq $left_orient) { next; } # must be transcribed on same strand!
|
|
|
|
## check for overlap:
|
|
unless ($left_lend <= $right_rend && $left_rend >= $right_lend) { next;}
|
|
|
|
## make sure right's left splice is before left's right splice (doh! should have better names).
|
|
unless ($right_lend < $left_rend) { next; }
|
|
|
|
## check for extensions:
|
|
my $left_overhang = $right_lend - $left_lend;
|
|
my $right_overhang = $right_rend - $left_rend;
|
|
|
|
my $merge_flag = 0;
|
|
if ($left_overhang <= $FUZZ_DIST && $right_overhang <= $FUZZ_DIST) {
|
|
## nice merge, as in illustration
|
|
$merge_flag = 1;
|
|
## also, target these for deletion now.
|
|
$nodeIDs_targeted_for_removal{$right_nodeID} = 1;
|
|
$nodeIDs_targeted_for_removal{$left_nodeID} = 1;
|
|
}
|
|
else {
|
|
## must check position lists to see if transcripts are contained that have
|
|
## nicely overlapping boundaries
|
|
if ($self->_right_left_terminal_exons_overlap_via_position_lists($right_nodeID, $left_nodeID, $right_lend, $left_rend)) {
|
|
$merge_flag = 1;
|
|
if ($left_overhang <= $FUZZ_DIST) { ## check for insufficient overhang
|
|
$nodeIDs_targeted_for_removal{$left_nodeID} = 1;
|
|
}
|
|
if ($right_overhang <= $FUZZ_DIST) {
|
|
$nodeIDs_targeted_for_removal{$right_nodeID} = 1;
|
|
}
|
|
}
|
|
}
|
|
if ($merge_flag) {
|
|
$self->_merge_terminal_exons($right_terminal_exon, $left_terminal_exon);
|
|
}
|
|
}
|
|
}
|
|
|
|
## process deletions:
|
|
if (%nodeIDs_targeted_for_removal) {
|
|
$self->_purge_nodes(%nodeIDs_targeted_for_removal);
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
####
|
|
sub _merge_terminal_exons {
|
|
my $self = shift;
|
|
my ($right_terminal_exon, $left_terminal_exon) = @_;
|
|
|
|
my ($right_lend, $right_rend) = $right_terminal_exon->get_coords();
|
|
my ($left_lend, $left_rend) = $left_terminal_exon->get_coords();
|
|
|
|
my ($exon_lend, $exon_rend) = ($right_lend, $left_rend); ## splice junctions for new internal exon
|
|
my $orient = $right_terminal_exon->get_orient();
|
|
if ($orient ne $left_terminal_exon->get_orient()) {
|
|
confess "Error, trying to merge two terminal exons with opposite transcriptional orientations!";
|
|
}
|
|
|
|
if ($self->_graph_node_exists("internal_exon", $exon_lend, $exon_rend, $orient)) {
|
|
confess "Error, trying to merge two terminal exons into an internal exon that already exists!";
|
|
}
|
|
|
|
my $node = $self->_add_graph_node("internal_exon", $exon_lend, $exon_rend, $orient);
|
|
## add to it the evidence from the other nodes being merged:
|
|
my $evidence_support_to_add = $left_terminal_exon->get_num_evidence_support() + $right_terminal_exon->get_num_evidence_support();
|
|
$evidence_support_to_add -= 1; # node already has value of one.
|
|
if ($evidence_support_to_add) {
|
|
$node->increment_evidence_support($evidence_support_to_add);
|
|
}
|
|
return;
|
|
}
|
|
|
|
|
|
####
|
|
sub _add_splice_graph_path {
|
|
my $self = shift;
|
|
my ($splice_graph_path) = @_;
|
|
|
|
## add it as long as:
|
|
# -it's not a subpath of an already existing path
|
|
# -if an existing path is a subpath of this, remove it and replace it with this path
|
|
|
|
my @current_valid_splice_paths = $self->_get_valid_splice_paths();
|
|
|
|
## check to see if splice_graph_path is already represented in the current path list:
|
|
foreach my $current_valid_splice_path (@current_valid_splice_paths) {
|
|
if ($splice_graph_path->is_subpath_of($current_valid_splice_path)) {
|
|
return; # nothing to do; path is already included as a subset of the current path set
|
|
}
|
|
}
|
|
|
|
# if got this far, our new path is not already fully represented.
|
|
# add it, and any other existing splice paths that are not a subpath of it.
|
|
my @new_valid_splice_paths = ($splice_graph_path);
|
|
foreach my $current_valid_splice_path (@current_valid_splice_paths) {
|
|
if (! $current_valid_splice_path->is_subpath_of($splice_graph_path)) {
|
|
push (@new_valid_splice_paths, $current_valid_splice_path);
|
|
}
|
|
}
|
|
|
|
$self->_set_valid_splice_paths(@new_valid_splice_paths);
|
|
|
|
return;
|
|
}
|
|
|
|
####
|
|
sub _get_valid_splice_paths {
|
|
my $self = shift;
|
|
return (@{$self->{_valid_splice_paths}});
|
|
}
|
|
|
|
####
|
|
sub _set_valid_splice_paths {
|
|
my $self = shift;
|
|
my @valid_splice_paths = @_;
|
|
## completely stomps the existing contents!!!
|
|
|
|
@{$self->{_valid_splice_paths}} = @valid_splice_paths;
|
|
return;
|
|
}
|
|
|
|
sub _graph_node_exists {
|
|
my $self = shift;
|
|
my ($type, $lend, $rend, $orient) = @_;
|
|
my $hashkey = $self->_get_hash_key($type, $lend, $rend, $orient);
|
|
return (exists $self->{_graph_node_hashkey_lookup}->{$hashkey});
|
|
}
|
|
|
|
sub _get_graph_node_via_coords_n_type {
|
|
my $self = shift;
|
|
my ($type, $lend, $rend, $orient) = @_;
|
|
my $hashkey = $self->_get_hash_key($type, $lend, $rend, $orient);
|
|
my $node = $self->{_graph_node_hashkey_lookup}->{$hashkey};
|
|
unless ($node) {
|
|
confess "Error, no graph node retrieved based on data ($type, $lend, $rend, $orient)";
|
|
}
|
|
return ($node);
|
|
}
|
|
|
|
|
|
|
|
####
|
|
sub _add_graph_node {
|
|
my $self = shift;
|
|
my ($type, $lend, $rend, $orient) = @_;
|
|
my $hash_key = $self->_get_hash_key($type, $lend, $rend, $orient);
|
|
|
|
## only add the node if it doesn't already exist!
|
|
my $graph_node_hashkey_lookup_href = $self->{_graph_node_hashkey_lookup};
|
|
if (my $existing_node = $graph_node_hashkey_lookup_href->{$hash_key}) {
|
|
## increment evidence for existing node:
|
|
$existing_node->increment_evidence_support();
|
|
return ($existing_node);
|
|
}
|
|
else {
|
|
# add it
|
|
my $node = Splice_graph_node->new($type, $lend, $rend, $orient);
|
|
push (@{$self->{_graph_nodes}}, $node); # add to complete node list
|
|
## helpers for node access:
|
|
$graph_node_hashkey_lookup_href->{$hash_key} = $node; # store in lookup table
|
|
my $nodeID = $node->get_nodeID();
|
|
$self->{_graph_node_via_nodeID}->{$nodeID} = $node;
|
|
|
|
## splay based on type:
|
|
my $type = $node->get_type();
|
|
|
|
if ($type eq "internal_exon") {
|
|
push (@{$self->{_internal_exons}}, $node);
|
|
}
|
|
elsif ($type eq "intron") {
|
|
push (@{$self->{_introns}}, $node);
|
|
}
|
|
elsif ($type eq "terminal_left_exon") {
|
|
push (@{$self->{_terminal_left_exons}}, $node);
|
|
}
|
|
elsif ($type eq "terminal_right_exon") {
|
|
push (@{$self->{_terminal_right_exons}}, $node);
|
|
}
|
|
elsif ($type eq "singleton_exon") {
|
|
push (@{$self->{_singleton_exons}}, $node);
|
|
}
|
|
else {
|
|
confess "Error, do not recognize type: $type for node";
|
|
}
|
|
return ($node);
|
|
}
|
|
|
|
}
|
|
|
|
####
|
|
sub _add_intron {
|
|
my $self = shift;
|
|
my ($intron_lend, $intron_rend, $spliced_orient) = @_;
|
|
|
|
my $node = $self->_add_graph_node("intron", $intron_lend, $intron_rend, $spliced_orient);
|
|
return ($node);
|
|
}
|
|
|
|
####
|
|
sub _add_internal_exon {
|
|
my $self = shift;
|
|
my ($exon_lend, $exon_rend, $spliced_orient) = @_;
|
|
my $node = $self->_add_graph_node("internal_exon", $exon_lend, $exon_rend, $spliced_orient);
|
|
return ($node);
|
|
}
|
|
|
|
|
|
####
|
|
sub get_graph_nodes {
|
|
my $self = shift;
|
|
return (sort {$a->{lend}<=>$b->{lend}} @{$self->{_graph_nodes}});
|
|
}
|
|
|
|
|
|
|
|
|
|
####
|
|
sub get_graph_node_via_nodeID {
|
|
my $self = shift;
|
|
my $nodeID = shift;
|
|
my $node_obj = $self->{_graph_node_via_nodeID}->{$nodeID} or confess "Error, no node found based on nodeID: $nodeID\n" . $self->toString();
|
|
return ($node_obj);
|
|
}
|
|
|
|
####
|
|
sub toString {
|
|
my $self = shift;
|
|
my @graph_nodes = $self->get_graph_nodes();
|
|
my $num_graph_nodes = scalar (@graph_nodes);
|
|
my $text = "Splice_graph_assembler instance with $num_graph_nodes graph nodes:\n";
|
|
foreach my $graph_node (@graph_nodes) {
|
|
$text .= $graph_node->toString() . "\n";
|
|
}
|
|
|
|
$text .= "\tvalid paths thru nodes:\n";
|
|
foreach my $splice_path ($self->_get_valid_splice_paths()) {
|
|
$text .= "\t" . $splice_path->toString() . "\n";
|
|
}
|
|
|
|
$text .= "\tassembled splice paths:\n";
|
|
foreach my $splice_path (@{$self->{_assembled_splice_paths}}) {
|
|
$text .= "\t" . $splice_path->toString() . "\n";
|
|
}
|
|
|
|
$text .= "\tFinal assemblies with termini\n";
|
|
foreach my $assembly ($self->get_assemblies()) {
|
|
my @node_list = @{$assembly->{__Splice_graph_assembler_nodeID_list}};
|
|
$text .= "\t" . join (",", @node_list) . "\n";
|
|
}
|
|
|
|
$text .= $self->toAlignIllustration(60);
|
|
|
|
return ($text);
|
|
|
|
}
|
|
|
|
=item toAlignIllustration()
|
|
|
|
=over 4
|
|
|
|
B<Description:> illustrates the individual cDNAs to be assembled along with the final products.
|
|
|
|
B<Parameters:> $max_line_chars(optional)
|
|
|
|
$max_line_chars is an integer representing the maximum number of characters in a single line of output to the terminal. The default is 100.
|
|
|
|
B<Returns:> $alignment_illustration_text
|
|
|
|
$alignment_illustration_text is a string containing a paragraph of text which illustrates the alignments and assemblies. An example is below:
|
|
|
|
---> <--> <-----> <---> <---------------- (+)gi|1199466
|
|
|
|
---> <--> <-----> <---> <------------ (+)gi|1209702
|
|
|
|
----> <--> <---- (+)AV827070
|
|
|
|
----> <--> <--- (+)AV828861
|
|
|
|
----> <--> <--- (+)AV830936
|
|
|
|
---> <--> <- (+)H36350
|
|
|
|
ASSEMBLIES: (1)
|
|
|
|
----> <--> <-----> <---> <---------------- (+) gi|1199466, gi|1209702, AV827070, AV828861, AV830936, H36350
|
|
|
|
|
|
|
|
|
|
=back
|
|
|
|
=cut
|
|
|
|
;
|
|
|
|
sub toAlignIllustration () {
|
|
my $self = shift;
|
|
my $max_line_chars = shift;
|
|
$max_line_chars = ($max_line_chars) ? $max_line_chars : 100; #if not specified, 100 chars / line is default.
|
|
|
|
## Get minimum coord for relative positioning.
|
|
my @coords;
|
|
my @alignments = @{$self->{_incoming_alignments}};
|
|
foreach my $alignment (@alignments) {
|
|
my @c = $alignment->get_coords();
|
|
push (@coords, @c);
|
|
}
|
|
@coords = sort {$a<=>$b} @coords;
|
|
print "coords: @coords\n" if $::SEE;
|
|
my $min_coord = shift @coords;
|
|
my $max_coord = pop @coords;
|
|
my $rel_max = $max_coord - $min_coord;
|
|
my $alignment_text = "";
|
|
## print each alignment followed by assemblies:
|
|
my $num_alignments = $#alignments + 1;
|
|
$alignment_text .= "Individual Alignments: ($num_alignments)\n";
|
|
my $i = 0;
|
|
foreach my $alignment (@alignments) {
|
|
$alignment_text .= (sprintf ("%3d ", $i)) . $alignment->toAlignIllustration($min_coord, $rel_max, $max_line_chars) . "\n";
|
|
$i++;
|
|
}
|
|
|
|
my @assemblies = @{$self->{_assemblies}};
|
|
my $num_assemblies = $#assemblies + 1;
|
|
$alignment_text .= "\n\nASSEMBLIES: ($num_assemblies)\n";
|
|
foreach my $assembly (@assemblies) {
|
|
$alignment_text .= " " . $assembly->toAlignIllustration($min_coord, $rel_max, $max_line_chars) . "\n";
|
|
}
|
|
|
|
if (my @unincorporated_alignments = $self->get_unincorporated_alignments()) {
|
|
my $num_unincorporated = scalar @unincorporated_alignments;
|
|
$alignment_text .= "\n\nUNINCORPORATED_ALIGNMENTS($num_unincorporated)\n";
|
|
foreach my $alignment (@unincorporated_alignments) {
|
|
$alignment_text .= " " . $alignment->toAlignIllustration($min_coord, $rel_max, $max_line_chars) . "\n";
|
|
}
|
|
}
|
|
|
|
return ($alignment_text);
|
|
}
|
|
|
|
####
|
|
sub _get_hash_key {
|
|
my $self = shift;
|
|
my ($type, $lend, $rend, $orient) = @_;
|
|
return ("$type,$lend,$rend,$orient");
|
|
}
|
|
|
|
####
|
|
sub _purge_nodes {
|
|
my $self = shift;
|
|
my (%nodeIDs) = @_;
|
|
|
|
## perform deletions based on hashkeys
|
|
foreach my $nodeID (keys %nodeIDs) {
|
|
print "PURGING node: $nodeID\n" if $SEE;
|
|
my $node = $self->{_graph_node_via_nodeID}->{$nodeID};
|
|
my $type = $node->get_type();
|
|
my ($lend, $rend) = $node->get_coords();
|
|
my $orient = $node->get_orient();
|
|
|
|
my $hashkey = $self->_get_hash_key($type, $lend, $rend, $orient);
|
|
delete $self->{_graph_node_hashkey_lookup}->{$hashkey};
|
|
delete $self->{_graph_node_via_nodeID}->{$nodeID};
|
|
}
|
|
|
|
## now, do array replacements:
|
|
foreach my $node_list_aref ( $self->{_graph_nodes},
|
|
## only deleting the terminal exons, even though this could be more generic
|
|
$self->{_terminal_left_exons},
|
|
$self->{_terminal_right_exons},
|
|
) {
|
|
my @replacments;
|
|
my $need_replacement_flag = 0;
|
|
foreach my $node (@$node_list_aref) {
|
|
my $nodeID = $node->get_nodeID();
|
|
if ($nodeIDs{$nodeID}) {
|
|
## must delete!
|
|
$need_replacement_flag = 1;
|
|
}
|
|
else {
|
|
push (@replacments, $node);
|
|
}
|
|
}
|
|
if ($need_replacement_flag) {
|
|
@$node_list_aref = @replacments; ## Doing Replacment
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
####
|
|
sub _right_left_terminal_exons_overlap_via_position_lists {
|
|
my $self = shift;
|
|
my ($right_nodeID, $left_nodeID, $left_splice_coord, $right_splice_coord) = @_;
|
|
|
|
## looking for the following:
|
|
#
|
|
# ## termini of transcripts in right terminal
|
|
# <---------------------X---------------------- ## right terminal exon
|
|
# -------------------X---------------------------> ## left terminal exon
|
|
# ## termini of transcripts in left terminal
|
|
#
|
|
#
|
|
# The X termini show the endpoints of other transcripts incorporated that would define a proper merging situation.
|
|
|
|
# look for boundary pair such that both are included within the splice junctions
|
|
# and right boundary >= left boundary
|
|
|
|
my $right_pos_list_aref = $self->{_terminal_exon_nodeID_to_nonsplice_position_list}->{$right_nodeID};
|
|
my $left_pos_list_aref = $self->{_terminal_exon_nodeID_to_nonsplice_position_list}->{$left_nodeID};
|
|
|
|
foreach my $right_pos (@$right_pos_list_aref) {
|
|
unless ($right_pos > $left_splice_coord && $right_pos < $right_splice_coord) { next; }
|
|
|
|
foreach my $left_pos (@$left_pos_list_aref) {
|
|
unless ($left_pos > $left_splice_coord && $left_pos < $right_splice_coord) { next; }
|
|
|
|
if ($right_pos >= $left_pos) {
|
|
return (1); # found suitable case
|
|
}
|
|
}
|
|
}
|
|
|
|
return (0); # no such example found.
|
|
}
|
|
|
|
####
|
|
sub _analyze_intronless_alignments {
|
|
my $self = shift;
|
|
my @intronless_segments = @_; ## actually alignment objects.
|
|
|
|
|
|
print "method: _analyze_intronless_alignments()\n" if $SEE;
|
|
|
|
my %applied_segment_indices;
|
|
|
|
{
|
|
## hack in a hidden attribute that uniquely identifies each of these segments.
|
|
## perl allows this, but I'm not so happy with doing it. for now, it'll be fine.
|
|
## This __intronless_segment_index will be used to identify those segments that are
|
|
## applied to existing structures (internal/terminal exons), and those left over
|
|
## and still need to be accounted for.
|
|
|
|
my $id = 0;
|
|
foreach my $intronless_segment (@intronless_segments) {
|
|
$id++;
|
|
$intronless_segment->{__intronless_segment_ID} = $id;
|
|
}
|
|
}
|
|
|
|
## increment evidence for internal exons containing intronless segment
|
|
## and extend terminal exons overlapping intronless segments
|
|
|
|
## first, examine the internal_segments:
|
|
foreach my $internal_exon (@{$self->{_internal_exons}}) {
|
|
my $internal_exon_orient = $internal_exon->get_orient();
|
|
my ($internal_exon_lend, $internal_exon_rend) = $internal_exon->get_coords();
|
|
|
|
|
|
|
|
foreach my $intronless_segment (@intronless_segments) {
|
|
my $intronless_segment_orient = $intronless_segment->get_spliced_orientation();
|
|
my ($intronless_lend, $intronless_rend) = $intronless_segment->get_coords();
|
|
|
|
if ($intronless_segment_orient eq '?' || $intronless_segment_orient eq $internal_exon_orient) {
|
|
|
|
## look for encapsulation
|
|
if ( ($intronless_lend + $FUZZ_DIST) >= $internal_exon_lend &&
|
|
($intronless_rend - $FUZZ_DIST) <= $internal_exon_rend) {
|
|
|
|
$internal_exon->increment_evidence_support();
|
|
$applied_segment_indices{ $intronless_segment->{__intronless_segment_ID} } = 1; # incorporated already
|
|
|
|
print "-intronless segment $intronless_lend, $intronless_rend, $intronless_segment_orient found incorporated in internal exon: " . $internal_exon->toString() . "\n" if $SEE;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
## examine left terminal exons
|
|
## first sort so we examine the right boundaries in order from right to left to faciliate extensions
|
|
@intronless_segments = reverse sort {$a->{rend}<=>$b->{rend}} @intronless_segments;
|
|
|
|
foreach my $left_terminal_exon (@{$self->{_terminal_left_exons}}) {
|
|
my $left_terminal_exon_orient = $left_terminal_exon->get_orient();
|
|
|
|
# -----------------> # left terminal exon
|
|
|
|
foreach my $intronless_segment (@intronless_segments) {
|
|
|
|
my ($left_terminal_lend, $left_terminal_rend) = $left_terminal_exon->get_coords(); # do it here because it might change below
|
|
my $intronless_segment_orient = $intronless_segment->get_spliced_orientation();
|
|
my ($intronless_lend, $intronless_rend) = $intronless_segment->get_coords();
|
|
|
|
unless ($intronless_segment_orient eq '?' || $intronless_segment_orient eq $left_terminal_exon_orient) { next; }
|
|
|
|
## must overlap
|
|
unless ($left_terminal_lend <= $intronless_rend && $left_terminal_rend >= $intronless_lend) { next; }
|
|
|
|
unless ($intronless_rend - $FUZZ_DIST <= $left_terminal_rend) { next; }
|
|
## must be :
|
|
# -----------------------> # left terminal exon
|
|
# ---------------- # intronless segment, overlaps but not passed the splice boundary
|
|
|
|
# extend lend if intronless segment passes it
|
|
if ($intronless_lend < $left_terminal_lend) {
|
|
# update coords:
|
|
$left_terminal_exon->set_coords($intronless_lend, $left_terminal_rend);
|
|
}
|
|
|
|
## if got this far, evidence is incorporated.
|
|
$left_terminal_exon->increment_evidence_support();
|
|
$applied_segment_indices{ $intronless_segment->{__intronless_segment_ID} } = 1; # incorporated
|
|
print "-intronless segment $intronless_lend, $intronless_rend, $intronless_segment_orient incorporated in left terminal exon: " . $left_terminal_exon->toString() . "\n" if $SEE;
|
|
}
|
|
}
|
|
|
|
## examine right terminal exons
|
|
## first sort so we examine the left boundaries in order from left to right to faciliate extensions
|
|
@intronless_segments = sort {$a->{lend}<=>$b->{lend}} @intronless_segments;
|
|
|
|
foreach my $right_terminal_exon (@{$self->{_terminal_right_exons}}) {
|
|
my $right_terminal_exon_orient = $right_terminal_exon->get_orient();
|
|
|
|
# <----------------- # right terminal exon
|
|
|
|
foreach my $intronless_segment (@intronless_segments) {
|
|
|
|
my ($right_terminal_lend, $right_terminal_rend) = $right_terminal_exon->get_coords(); # do it here because it might change below
|
|
my $intronless_segment_orient = $intronless_segment->get_spliced_orientation();
|
|
my ($intronless_lend, $intronless_rend) = $intronless_segment->get_coords();
|
|
|
|
unless ($intronless_segment_orient eq '?' || $intronless_segment_orient eq $right_terminal_exon_orient) { next; }
|
|
|
|
## must overlap
|
|
unless ($right_terminal_lend <= $intronless_rend && $right_terminal_rend >= $intronless_lend) { next; }
|
|
|
|
## must be :
|
|
# <---------------- # right terminal exon
|
|
# ---------------- # intronless segment, overlaps but not passed the splice boundary
|
|
|
|
unless ($intronless_lend + $FUZZ_DIST >= $right_terminal_lend) { next; }
|
|
|
|
# extend lend if intronless segment passes it
|
|
if ($intronless_rend > $right_terminal_rend) {
|
|
# update coords:
|
|
$right_terminal_exon->set_coords($right_terminal_lend, $intronless_rend);
|
|
}
|
|
|
|
## if got this far, evidence is incorporated.
|
|
$right_terminal_exon->increment_evidence_support();
|
|
$applied_segment_indices{ $intronless_segment->{__intronless_segment_ID} } = 1; # incorporated
|
|
print "-intronless segment $intronless_lend, $intronless_rend, $intronless_segment_orient incorporated in right terminal exon: " . $right_terminal_exon->toString() . "\n" if $SEE;
|
|
}
|
|
}
|
|
|
|
## check to see if there are any unincorporated single segments, and if so, instantiate maximal single segments that contain them.
|
|
my $have_leftover_single_segments_flag = 0;
|
|
foreach my $intronless_segment (@intronless_segments) {
|
|
unless ($applied_segment_indices{ $intronless_segment->{__intronless_segment_ID} }) {
|
|
$have_leftover_single_segments_flag = 1;
|
|
last;
|
|
}
|
|
}
|
|
unless ($have_leftover_single_segments_flag) {
|
|
## all done; they've all been accounted for.
|
|
print "-All intronless segments are accounted for.\n" if $SEE;
|
|
return;
|
|
}
|
|
|
|
## if got here, we have some single segments that haven't been accounted for.
|
|
print "-some intronless segments are as of yet unincorporated. Need to instantiate singleton exons\n" if $SEE;
|
|
$self->_instantiate_maximal_single_exons(\@intronless_segments, \%applied_segment_indices);
|
|
|
|
return;
|
|
}
|
|
|
|
####
|
|
sub _instantiate_maximal_single_exons {
|
|
my $self = shift;
|
|
my ($intronless_alignments_aref, $applied_segment_indices_href) = @_;
|
|
|
|
print "\nmethod: _instantiate_maximal_single_exons\n" if $SEE;
|
|
|
|
## get all coords for segments:
|
|
my %segment_id_to_coords;
|
|
my %segment_id_to_alignment;
|
|
my $plus_strand_overlap_assembler = new Overlap_piler();
|
|
my $minus_strand_overlap_assembler = new Overlap_piler();
|
|
my $got_plus_strand_flag = 0;
|
|
my $got_minus_strand_flag = 0;
|
|
|
|
foreach my $alignment (@$intronless_alignments_aref) {
|
|
print "Got intronless alignment: " . $alignment->toToken() . "\n" if $SEE;
|
|
my ($align_lend, $align_rend) = $alignment->get_coords();
|
|
my $align_id = $alignment->{__intronless_segment_ID};
|
|
$segment_id_to_coords{$align_id} = [$align_lend, $align_rend];
|
|
$segment_id_to_alignment{$align_id} = $alignment;
|
|
|
|
my $spliced_orient = $alignment->get_spliced_orientation();
|
|
if ($spliced_orient eq '+' || $spliced_orient eq '?') {
|
|
$plus_strand_overlap_assembler->add_coordSet($align_id, $align_lend, $align_rend);
|
|
print "Adding $align_lend-$align_rend [s$spliced_orient] to plus strand assembler\n" if $SEE;
|
|
if ($spliced_orient eq '+') {
|
|
$got_plus_strand_flag = 1;
|
|
}
|
|
}
|
|
if ($spliced_orient eq '-' || $spliced_orient eq '?') {
|
|
$minus_strand_overlap_assembler->add_coordSet($align_id, $align_lend, $align_rend);
|
|
print "Adding $align_lend-$align_rend [s$spliced_orient] to minus strand assembler\n" if $SEE;
|
|
if ($spliced_orient eq '-') {
|
|
$got_minus_strand_flag = 1;
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
## unless we have some known spliced orientation, then just assemble everything using the plus strand assembler; either could be used w/ no difference.
|
|
unless ($got_plus_strand_flag || $got_minus_strand_flag) {
|
|
$got_plus_strand_flag = 1;
|
|
}
|
|
|
|
|
|
my @singleton_clusters;
|
|
if ($got_plus_strand_flag) {
|
|
print "\nBuilding Plus strand clusters of singletons:\n" if $SEE;
|
|
my @plus_strand_clusters = $plus_strand_overlap_assembler->build_clusters();
|
|
if ($SEE) {
|
|
print "\nSingleton assemblies, PLUS strand assembler:\n";
|
|
@plus_strand_clusters = sort {$a->[0]<=>$b->[0]} @plus_strand_clusters;
|
|
foreach my $cluster (@plus_strand_clusters) {
|
|
print "clustered IDs [+]: " . join ("-", @$cluster) . "\n";
|
|
}
|
|
}
|
|
push (@singleton_clusters, @plus_strand_clusters);
|
|
}
|
|
if ($got_minus_strand_flag) {
|
|
print "\nBuilding Minus strand clusters of singletons:\n" if $SEE;
|
|
my @minus_strand_clusters = $minus_strand_overlap_assembler->build_clusters();
|
|
if ($SEE) {
|
|
print "\nSingleton assemblies, MINUS strand assembler:\n";
|
|
@minus_strand_clusters = sort {$a->[0]<=>$b->[0]} @minus_strand_clusters;
|
|
foreach my $cluster (@minus_strand_clusters) {
|
|
print "clustered IDs [-]: " . join ("-", @$cluster) . "\n";
|
|
}
|
|
}
|
|
push (@singleton_clusters, @minus_strand_clusters);
|
|
}
|
|
|
|
## convert to singleton assemblies:
|
|
my @singleton_assemblies;
|
|
foreach my $singleton_cluster (@singleton_clusters) {
|
|
my @ids = @$singleton_cluster;
|
|
my ($min_lend, $max_rend);
|
|
my %aligned_orient_counts;
|
|
my %spliced_orient_counts;
|
|
foreach my $id (@ids) {
|
|
my ($lend, $rend) = @{$segment_id_to_coords{$id}};
|
|
if (! defined ($min_lend)) {
|
|
($min_lend, $max_rend) = ($lend, $rend);
|
|
}
|
|
else {
|
|
if ($lend < $min_lend) { $min_lend = $lend; }
|
|
if ($rend > $max_rend) { $max_rend = $rend; }
|
|
}
|
|
my $alignment = $segment_id_to_alignment{$id};
|
|
my $spliced_orient = $alignment->get_spliced_orientation();
|
|
my $aligned_orient = $alignment->get_aligned_orientation();
|
|
if ($spliced_orient ne '?') {
|
|
$spliced_orient_counts{$spliced_orient}++;
|
|
}
|
|
$aligned_orient_counts{$aligned_orient}++;
|
|
}
|
|
|
|
## get orientation value with most support:
|
|
my @spliced_orients = reverse sort {$spliced_orient_counts{$a}<=>$spliced_orient_counts{$b}} keys %spliced_orient_counts;
|
|
if (scalar @spliced_orients > 1) {
|
|
# should only be one spliced orient, or no spliced orient if everything was '?'
|
|
confess "Error, captured a singleton assembly with multiple spliced orientations.";
|
|
}
|
|
my $assembly_spliced_orient = shift @spliced_orients;
|
|
|
|
unless ($assembly_spliced_orient) {
|
|
## no evidence for assembly spliced orientation.
|
|
## take the aligned orientation that has the greatest support.
|
|
my @aligned_orients = reverse sort {$aligned_orient_counts{$a}<=>$aligned_orient_counts{$b}} keys %aligned_orient_counts;
|
|
$assembly_spliced_orient = shift @aligned_orients;
|
|
}
|
|
|
|
unless ($assembly_spliced_orient =~ /^[\+\-]$/) {
|
|
confess "Error, couldn't decide upon assembly orientation for singleton assembly";
|
|
}
|
|
|
|
push (@singleton_assemblies, { lend => $min_lend,
|
|
rend => $max_rend,
|
|
ids => [@ids],
|
|
length => $max_rend - $min_lend + 1,
|
|
num_alignments_included => scalar @ids,
|
|
orient => $assembly_spliced_orient,
|
|
}
|
|
);
|
|
}
|
|
|
|
## sort so that we maximize for number of alignments included and alignment span
|
|
@singleton_assemblies = reverse sort {$a->{num_alignments_included}<=>$b->{num_alignments_included}
|
|
||
|
|
$a->{length}<=>$b->{length}} @singleton_assemblies;
|
|
|
|
my %ids_accounted_for;
|
|
|
|
SINGLETON_ASSEMBLY_ANALYSIS:
|
|
foreach my $singleton_assembly (@singleton_assemblies) {
|
|
my ($lend, $rend, $orient, $ids_aref) = ($singleton_assembly->{lend},
|
|
$singleton_assembly->{rend},
|
|
$singleton_assembly->{orient},
|
|
$singleton_assembly->{ids});
|
|
|
|
|
|
## make sure singleton contains an alignment that wasn't already applied elsewhere:
|
|
my $has_alignment_not_applied_elsewhere_flag = 0;
|
|
foreach my $id (@$ids_aref) {
|
|
unless ($applied_segment_indices_href->{$id}) {
|
|
$has_alignment_not_applied_elsewhere_flag = 1;
|
|
last;
|
|
}
|
|
}
|
|
unless ($has_alignment_not_applied_elsewhere_flag) {
|
|
## no reason to bother pursuing it. Everythings accounted for in other already instantiated exons.
|
|
next SINGLETON_ASSEMBLY_ANALYSIS;
|
|
}
|
|
|
|
|
|
my $found_id_not_accounted_for_flag = 0; ## track those IDs that are built into other already instantiated singleton assemblies
|
|
foreach my $id (@$ids_aref) {
|
|
unless ($ids_accounted_for{$id}) {
|
|
$found_id_not_accounted_for_flag = 1;
|
|
last;
|
|
}
|
|
}
|
|
if ($found_id_not_accounted_for_flag) {
|
|
## add a singleton assembly:
|
|
$self->_add_graph_node("singleton_exon", $lend, $rend, $orient);
|
|
foreach my $id (@$ids_aref) {
|
|
$ids_accounted_for{$id} = 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
## Ensure that all are accounted for now:
|
|
foreach my $intronless_alignment (@$intronless_alignments_aref) {
|
|
my $id = $intronless_alignment->{__intronless_segment_ID};
|
|
if (! $applied_segment_indices_href) {
|
|
## if not applied before entering this method, then should be applied now
|
|
if (! $ids_accounted_for{$id}) {
|
|
my $unaccounted_for_intronless_alignment = $segment_id_to_alignment{$id};
|
|
confess "Error, intronless alignment " . $unaccounted_for_intronless_alignment->toToken() . "\n"
|
|
. "is not accounted for after instantiating maximal single exons.\n";
|
|
}
|
|
}
|
|
}
|
|
print "-all intronless alignments should now be accounted for.\n" if $SEE;
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
####
|
|
sub _chain_compatible_splice_paths {
|
|
my $self = shift;
|
|
|
|
## wrap splice path objs into structs for scoring and dynamic programming to build chains:
|
|
my @path_structs;
|
|
foreach my $splice_path ($self->_get_valid_splice_paths()) {
|
|
my $pathID = $splice_path->get_pathID();
|
|
my @nodeIDs = $splice_path->get_ordered_nodeIDs();
|
|
my $evidence_support_count = $self->_get_evidence_support_from_nodeIDs(@nodeIDs);
|
|
my $orient = $splice_path->get_orient();
|
|
my ($lend, $rend) = $splice_path->get_coords();
|
|
|
|
my $struct = { splice_path_obj => $splice_path,
|
|
path_score => $evidence_support_count,
|
|
struct_score => $evidence_support_count,
|
|
nodeIDs => [@nodeIDs],
|
|
lend => $lend,
|
|
rend => $rend,
|
|
orient => $orient,
|
|
pathID => $pathID,
|
|
prev => undef, # previous link in a chain of compatible structs
|
|
};
|
|
push (@path_structs, $struct);
|
|
}
|
|
|
|
@path_structs = sort {$a->{lend}<=>$b->{lend}} @path_structs;
|
|
|
|
## score them:
|
|
for (my $i = 0; $i < $#path_structs; $i++) {
|
|
|
|
my $i_path_struct = $path_structs[$i];
|
|
my ($i_lend, $i_rend, $i_orient, $i_splice_path, $i_nodeIDs_aref) = ($i_path_struct->{lend},
|
|
$i_path_struct->{rend},
|
|
$i_path_struct->{orient},
|
|
$i_path_struct->{splice_path_obj},
|
|
$i_path_struct->{nodeIDs});
|
|
|
|
|
|
for (my $j = $i+1; $j <= $#path_structs; $j++) {
|
|
|
|
## No self comparisons:
|
|
if ($i == $j) { next; }
|
|
|
|
## struct j comes after struct i
|
|
my $j_path_struct = $path_structs[$j];
|
|
my ($j_lend, $j_rend, $j_orient, $j_splice_path, $j_nodeIDs_aref) = ($j_path_struct->{lend},
|
|
$j_path_struct->{rend},
|
|
$j_path_struct->{orient},
|
|
$j_path_struct->{splice_path_obj},
|
|
$j_path_struct->{nodeIDs});
|
|
|
|
## must have same orient:
|
|
unless ($i_orient eq $j_orient) { next; }
|
|
|
|
## check for overlap:
|
|
unless ($i_lend <= $j_rend && $i_rend >= $j_lend) { next; }
|
|
|
|
## must be compatible:
|
|
unless ($i_splice_path->is_compatible($j_splice_path)) { next; }
|
|
|
|
## double check that neither is a subset of the other:
|
|
if ($i_splice_path->is_subpath_of($j_splice_path) || $j_splice_path->is_subpath_of($i_splice_path)) {
|
|
confess "Error, trying to chain two splice paths where one is a subpath of the other:\n" . $self->toString()
|
|
. "\nOffending paths:\n"
|
|
. $i_splice_path->toString() . "\n"
|
|
. $j_splice_path->toString() . "\n";
|
|
}
|
|
|
|
## calculate path score ending at struct j
|
|
## don't count the same nodes twice, though:
|
|
my @nodes_found_in_both_paths = $self->_intersection_of_lists($i_nodeIDs_aref, $j_nodeIDs_aref);
|
|
my $score_decrement = $self->_get_evidence_support_from_nodeIDs(@nodes_found_in_both_paths);
|
|
my $candidate_path_score_j = $i_path_struct->{path_score} + $j_path_struct->{struct_score} - $score_decrement;
|
|
if ($candidate_path_score_j > $j_path_struct->{path_score}) {
|
|
## found best path:
|
|
$j_path_struct->{path_score} = $candidate_path_score_j;
|
|
$j_path_struct->{prev} = $i_path_struct->{pathID};
|
|
}
|
|
}
|
|
}
|
|
|
|
## get the highest scoring paths of compatible subpaths (structs)
|
|
my %pathIDs_left;
|
|
my %pathID_to_struct;
|
|
foreach my $struct (@path_structs) {
|
|
my $pathID = $struct->{pathID};
|
|
$pathIDs_left{$pathID} = 1;
|
|
$pathID_to_struct{$pathID} = $struct;
|
|
}
|
|
|
|
my @final_assembled_path_node_lists;
|
|
while (%pathIDs_left) {
|
|
## find the highest scoring chain of paths:
|
|
my $highest_path_score = 0;
|
|
my $highest_scoring_pathID = undef;
|
|
|
|
foreach my $pathID (keys %pathIDs_left) {
|
|
my $struct = $pathID_to_struct{$pathID};
|
|
my $path_score = $struct->{path_score};
|
|
if ($path_score > $highest_path_score) {
|
|
$highest_path_score = $path_score;
|
|
$highest_scoring_pathID = $pathID;
|
|
}
|
|
}
|
|
|
|
my %nodes_along_chain;
|
|
## walk along chain links and collect the nodes:
|
|
my $pathID = $highest_scoring_pathID;
|
|
while (defined $pathID) {
|
|
delete $pathIDs_left{$pathID}; # remove it so we know it's accounted for as a traversed chain link.
|
|
my $struct = $pathID_to_struct{$pathID};
|
|
my $nodes_aref = $struct->{nodeIDs};
|
|
foreach my $node (@$nodes_aref) {
|
|
$nodes_along_chain{$node} = 1;
|
|
}
|
|
$pathID = $struct->{prev};
|
|
}
|
|
|
|
my @nodes_along_chain_list = keys %nodes_along_chain;
|
|
push (@final_assembled_path_node_lists, [@nodes_along_chain_list]);
|
|
}
|
|
|
|
## Create new path objects for these chains of subpaths:
|
|
foreach my $node_list (@final_assembled_path_node_lists) {
|
|
my @nodes = @$node_list;
|
|
## sort them according to position:
|
|
@nodes = sort { $self->get_graph_node_via_nodeID($a)->{lend}
|
|
<=>
|
|
$self->get_graph_node_via_nodeID($b)->{lend}} @nodes;
|
|
my $path_lend = $self->get_graph_node_via_nodeID($nodes[0])->{lend};
|
|
my $path_rend = $self->get_graph_node_via_nodeID($nodes[$#nodes])->{rend};
|
|
my $orient = $self->get_graph_node_via_nodeID($nodes[0])->get_orient();
|
|
|
|
my $splice_path = Splice_graph_path->new($path_lend, $path_rend, $orient, [@nodes]);
|
|
push (@{$self->{_assembled_splice_paths}}, $splice_path);
|
|
}
|
|
return;
|
|
}
|
|
|
|
####
|
|
sub _intersection_of_lists {
|
|
my $self = shift;
|
|
my ($list_A_aref, $list_B_aref) = @_;
|
|
|
|
my %eles_in_A;
|
|
foreach my $ele (@$list_A_aref) {
|
|
$eles_in_A{$ele} = 1;
|
|
}
|
|
my @intersection;
|
|
foreach my $ele (@$list_B_aref) {
|
|
if ($eles_in_A{$ele}) {
|
|
push (@intersection, $ele);
|
|
}
|
|
}
|
|
|
|
return (@intersection);
|
|
}
|
|
|
|
|
|
####
|
|
sub _get_evidence_support_from_nodeIDs {
|
|
my $self = shift;
|
|
my @nodeIDs = @_;
|
|
my $evidence_sum = 0;
|
|
foreach my $nodeID (@nodeIDs) {
|
|
my $node = $self->get_graph_node_via_nodeID($nodeID);
|
|
my $ev_support = $node->get_num_evidence_support();
|
|
$evidence_sum += $ev_support;
|
|
}
|
|
return ($evidence_sum);
|
|
}
|
|
|
|
####
|
|
sub _build_splice_graph {
|
|
my $self = shift;
|
|
|
|
print "## building splice graph:\n" if $SEE;
|
|
|
|
## do n^2 comparison of nodes:
|
|
## number of nodes is relatively small, so this isn't too much of a bottleneck
|
|
my @nodes = $self->get_graph_nodes(); # already sorted by lend
|
|
|
|
## init base scores for nodes
|
|
foreach my $node (@nodes) {
|
|
my $ev_support = $node->get_num_evidence_support();
|
|
# init base score
|
|
$node->{forward_base_score} = $ev_support;
|
|
$node->{reverse_base_score} = $ev_support;
|
|
# init path score
|
|
$node->{forward_path_score} = $ev_support;
|
|
$node->{reverse_path_score} = $ev_support;
|
|
}
|
|
|
|
## build the splice graph
|
|
## at the same time, do the forward dynamic programming calculation so we can backtrack to the best scoring path from any node
|
|
## nodeB must come after nodeA, be of same orientation, adjacent, and an acceptable linkage (intron to exon)
|
|
## perform calculation from left to right with backtracking from right to left.
|
|
for (my $i = 1; $i <= $#nodes; $i++) {
|
|
my $nodeB = $nodes[$i];
|
|
my ($nodeB_lend, $nodeB_rend) = $nodeB->get_coords();
|
|
my $nodeB_orient = $nodeB->get_orient();
|
|
my $nodeB_type = $nodeB->get_type();
|
|
my $nodeB_ID = $nodeB->get_nodeID();
|
|
|
|
for (my $j = $i - 1; $j >= 0; $j--) {
|
|
my $nodeA = $nodes[$j];
|
|
my ($nodeA_lend, $nodeA_rend) = $nodeA->get_coords();
|
|
my $nodeA_orient = $nodeA->get_orient();
|
|
my $nodeA_type = $nodeA->get_type();
|
|
my $nodeA_ID = $nodeA->get_nodeID();
|
|
|
|
print "-comparing " . $nodeA->toString() . " <=> " . $nodeB->toString() if $SEE;
|
|
|
|
unless ($nodeA_orient eq $nodeB_orient) {
|
|
print "-opposite orients, cannot link.\n" if $SEE;
|
|
next;
|
|
}
|
|
|
|
unless ($nodeB_lend == $nodeA_rend + 1) { ## next base coordinate for next feature
|
|
print "-not adjacent, cannot link.\n" if $SEE;
|
|
next;
|
|
}
|
|
|
|
## ensure proper connection:
|
|
unless ($ACCEPTABLE_CONNECTIONS{$nodeA_type}->{$nodeB_type}) {
|
|
print "-unacceptable linkage types, cannot link ($nodeA_type,$nodeB_type).\n" if $SEE;
|
|
next;
|
|
}
|
|
|
|
print "* linking nodes.\n" if $SEE;
|
|
|
|
## if got this far, connections are perfect!
|
|
$nodeA->connect_this_next_nodeID($nodeB_ID);
|
|
$nodeB->connect_this_prev_nodeID($nodeA_ID);
|
|
|
|
## check scores for best scoring path solution
|
|
my $path_score = $nodeA->{forward_path_score} + $nodeB->{forward_base_score};
|
|
if ($path_score > $nodeB->{forward_path_score}) {
|
|
## link the nodes as current best path linkage
|
|
$nodeB->{forward_path_score} = $path_score;
|
|
$nodeB->{forward_backtrack_nodeID} = $nodeA_ID;
|
|
}
|
|
}
|
|
}
|
|
|
|
## Do the reverse dynamic programming calculation so we can find the best scoring path in a left to right traversal from any node
|
|
## calculation from right to left with backtracking from left to right
|
|
for (my $i = ($#nodes - 1); $i >= 0; $i--) {
|
|
my $nodeA = $nodes[$i];
|
|
my $nodeA_ID = $nodeA->get_nodeID();
|
|
|
|
for (my $j = $i+1; $j <= $#nodes; $j++) {
|
|
my $nodeB = $nodes[$j];
|
|
my $nodeB_ID = $nodeB->get_nodeID();
|
|
print "reverse comparison " . $nodeA->toString() . " <=> " . $nodeB->toString() . "\t" if $SEE;
|
|
## already stored the prev/next links, so rely on that to know if an acceptable linkage exists:
|
|
if ($nodeA->has_next_nodeID($nodeB_ID)) {
|
|
print "connectable.\n" if $SEE;
|
|
## check scores for DP
|
|
my $base_score = $nodeA->{reverse_base_score};
|
|
my $path_score = $nodeB->{reverse_path_score} + $base_score;
|
|
if ($path_score > $nodeA->{reverse_path_score}) {
|
|
## make it the best path and link the node
|
|
$nodeA->{reverse_path_score} = $path_score;
|
|
$nodeA->{reverse_backtrack_nodeID} = $nodeB_ID;
|
|
}
|
|
}
|
|
else {
|
|
print "non-connectable nodes.\n" if $SEE;
|
|
}
|
|
}
|
|
}
|
|
|
|
## defensive programming:
|
|
foreach my $node (@nodes) {
|
|
|
|
## ignore singletons
|
|
if ($node->get_type() eq "singleton_exon") { next; }
|
|
|
|
## ensure that only terminal left exons have forward_backtrack_nodeID's as undef
|
|
if ( (! defined $node->{forward_backtrack_nodeID}) && $node->get_type() ne "terminal_left_exon") {
|
|
print $self->toString();
|
|
print Dumper ($node);
|
|
confess "Error, node has undefined forward backtrack ID but is not a terinal left exon: " . $node->toString() . "\n";
|
|
}
|
|
## ensure that only terminal right exons have the reverse_backtrack_nodeID's as undef
|
|
if ( (! defined $node->{reverse_backtrack_nodeID}) && $node->get_type() ne "terminal_right_exon") {
|
|
print $self->toString();
|
|
print Dumper ($node);
|
|
confess "Error, node has undefined reverse backtrack ID but is not a terminal right exon: " . $node->toString() . "\n";
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
####
|
|
sub _extend_splice_paths_to_termini {
|
|
my $self = shift;
|
|
|
|
my @splice_paths = $self->_get_valid_splice_paths();
|
|
foreach my $splice_path (@splice_paths) {
|
|
my @ordered_nodeIDs = $splice_path->get_ordered_nodeIDs();
|
|
|
|
my $left_nodeID = $ordered_nodeIDs[0];
|
|
my $right_nodeID = $ordered_nodeIDs[$#ordered_nodeIDs];
|
|
|
|
print "-extending splice_path [" . join (",", @ordered_nodeIDs) . "] maximally to the left and right:\n" if $SEE;
|
|
my @left_extension_nodes = $self->_extend_node_maximally_left($left_nodeID);
|
|
@left_extension_nodes = grep { $_ != $left_nodeID } @left_extension_nodes; # remove the nucleating node
|
|
|
|
my @right_extension_nodes = $self->_extend_node_maximally_right($right_nodeID);
|
|
@right_extension_nodes = grep { $_ != $right_nodeID } @right_extension_nodes;
|
|
|
|
unless (@left_extension_nodes && @right_extension_nodes) {
|
|
confess "Error, couldn't extend splice path to the left or right to include termini\n"
|
|
. "left: @left_extension_nodes\n"
|
|
. "right: @right_extension_nodes\n "
|
|
. $splice_path->toString();
|
|
}
|
|
|
|
## create alignment assembly:
|
|
my @all_nodes_in_maximal_path = (@left_extension_nodes, @ordered_nodeIDs, @right_extension_nodes);
|
|
my $cdna_alignment = $self->_instantiate_cdna_alignment_from_nodeIDs(@all_nodes_in_maximal_path);
|
|
|
|
$self->_add_alignment_assembly($cdna_alignment);
|
|
}
|
|
return;
|
|
}
|
|
|
|
####
|
|
sub _extend_node_maximally_left {
|
|
my $self = shift;
|
|
my (@nodeIDs) = @_;
|
|
my $highest_scoring_path = $self->_perform_traceback("forward_backtrack", \@nodeIDs);
|
|
my $highest_scoring_nodelist_aref = $highest_scoring_path->{path_nodeIDs_aref};
|
|
return (@$highest_scoring_nodelist_aref);
|
|
}
|
|
|
|
|
|
####
|
|
sub _extend_node_maximally_right {
|
|
my $self = shift;
|
|
my (@nodeIDs) = @_;
|
|
my $highest_scoring_path = $self->_perform_traceback("reverse_backtrack", \@nodeIDs);
|
|
my $highest_scoring_nodelist_aref = $highest_scoring_path->{path_nodeIDs_aref};
|
|
return (@$highest_scoring_nodelist_aref);
|
|
}
|
|
|
|
|
|
####
|
|
sub _perform_traceback {
|
|
my $self = shift;
|
|
my ($traceback_direction, $nodeIDs_aref) = @_;
|
|
|
|
my @paths_and_score_structs;
|
|
|
|
foreach my $nodeID (@$nodeIDs_aref) {
|
|
## do backtracking from forward DP calculation:
|
|
my @nodes_in_path;
|
|
my $next_path_node = $nodeID;
|
|
my $path_score = 0;
|
|
while (defined $next_path_node) {
|
|
my $node_obj = $self->get_graph_node_via_nodeID($next_path_node);
|
|
my $ev_support = $node_obj->get_num_evidence_support();
|
|
$path_score += $ev_support;
|
|
push (@nodes_in_path, $next_path_node);
|
|
if ($traceback_direction eq "forward_backtrack") {
|
|
$next_path_node = $node_obj->{forward_backtrack_nodeID};
|
|
}
|
|
elsif ($traceback_direction eq "reverse_backtrack") {
|
|
$next_path_node = $node_obj->{reverse_backtrack_nodeID};
|
|
}
|
|
else {
|
|
confess "Don't understand traceback direction: $traceback_direction ";
|
|
}
|
|
}
|
|
my $path_and_score_struct = { score => $path_score,
|
|
path_nodeIDs_aref => [@nodes_in_path],
|
|
};
|
|
push (@paths_and_score_structs, $path_and_score_struct);
|
|
|
|
}
|
|
|
|
@paths_and_score_structs = reverse sort {$a->{score}<=>$b->{score}} @paths_and_score_structs;
|
|
|
|
my $highest_scoring_path = shift @paths_and_score_structs;
|
|
|
|
return ($highest_scoring_path);
|
|
|
|
}
|
|
|
|
=tree_traversal_replaced_by_DP_method
|
|
|
|
|
|
####
|
|
my $path_no = 0;
|
|
sub _tree_traversal {
|
|
my $self = shift;
|
|
my ($node_obj, $traversal_direction, $current_path_score, $current_path_list_aref,
|
|
$max_score_sref, $max_path_href) = @_;
|
|
|
|
#print $self->toString();
|
|
#print $self->toAlignIllustration(60);
|
|
|
|
# print "_tree_traversal() current_path: ". join (",", @$current_path_list_aref) . "\n" if $SEE;
|
|
|
|
$self->_ensure_unique_path_nodes(@$current_path_list_aref); # defensive programming
|
|
|
|
my $edge_key = "_" . $traversal_direction . "_nodeID_href";
|
|
|
|
my %edges = %{$node_obj->{$edge_key}};
|
|
|
|
## add current node evidence score to the sum score
|
|
|
|
|
|
if (%edges) {
|
|
## not a terminal node
|
|
## explore each of the linked nodes:
|
|
my @linked_nodes = keys %edges;
|
|
foreach my $linked_node (@linked_nodes) {
|
|
my $linked_node_obj = $self->get_graph_node_via_nodeID($linked_node);
|
|
## add linked node to path list:
|
|
my @current_path_list = @$current_path_list_aref;
|
|
push (@current_path_list, $linked_node);
|
|
my $num_evidence_support = $linked_node_obj->get_num_evidence_support();
|
|
my $linked_node_path_score = $current_path_score + $num_evidence_support;
|
|
|
|
$self->_tree_traversal($linked_node_obj, $traversal_direction, $linked_node_path_score, [@current_path_list],
|
|
$max_score_sref, $max_path_href);
|
|
}
|
|
|
|
}
|
|
else {
|
|
## a terminal node
|
|
## base case!
|
|
if ($node_obj->get_type() !~ /terminal/) {
|
|
## supposed to be a terminal node.
|
|
confess "Reached base case but not a terminal node! " . $node_obj->toString() . "\n" . $self->toString();
|
|
}
|
|
|
|
$path_no++;
|
|
print "$path_no _tree_traversal($traversal_direction) terminal_reached. PATH: " . join (",", @$current_path_list_aref) . "\n" if $SEE;
|
|
if ($current_path_score >= $$max_score_sref) { #includes case where incoming (starting) node is in fact a terminal node.
|
|
## make this maximum scoring path:
|
|
$max_path_href->{score} = $current_path_score;
|
|
$max_path_href->{path_nodeIDs_aref} = [@$current_path_list_aref];
|
|
$$max_score_sref = $current_path_score;
|
|
}
|
|
}
|
|
return;
|
|
|
|
}
|
|
|
|
=cut
|
|
|
|
####
|
|
sub _ensure_unique_path_nodes {
|
|
my $self = shift;
|
|
my @nodes = @_;
|
|
my %check;
|
|
foreach my $node (@nodes) {
|
|
if ($check{$node}) {
|
|
confess $self->toString() . "Error, path list: " . join (",", @nodes) . " has redundant node: $node\n";
|
|
}
|
|
$check{$node} = 1; # log it!
|
|
}
|
|
return; # all good!
|
|
}
|
|
|
|
|
|
####
|
|
sub _append_singletons_to_assembly_list {
|
|
my $self = shift;
|
|
foreach my $node_obj (@{$self->{_singleton_exons}}) {
|
|
my $nodeID = $node_obj->get_nodeID();
|
|
my $cdna_alignment = $self->_instantiate_cdna_alignment_from_nodeIDs($nodeID);
|
|
$self->_add_alignment_assembly($cdna_alignment);
|
|
}
|
|
return;
|
|
}
|
|
|
|
|
|
|
|
####
|
|
sub _instantiate_cdna_alignment_from_nodeIDs {
|
|
my $self = shift;
|
|
my @nodeIDs = @_;
|
|
|
|
my $spliced_orientation = undef;
|
|
|
|
## add alignment segments for all non-intron nodes:
|
|
my @coordsets;
|
|
foreach my $nodeID (@nodeIDs) {
|
|
my $node_obj = $self->get_graph_node_via_nodeID($nodeID);
|
|
my $node_type = $node_obj->get_type();
|
|
if ($node_type eq "intron") { next; } ## no introns!
|
|
|
|
my ($lend, $rend) = $node_obj->get_coords();
|
|
my $orient = $node_obj->get_orient();
|
|
my ($end5, $end3) = ($orient eq '+') ? ($lend, $rend) : ($rend, $lend);
|
|
push (@coordsets, [$end5, $end3]);
|
|
|
|
unless (defined $spliced_orientation) {
|
|
$spliced_orientation = $orient;
|
|
}
|
|
|
|
if ($spliced_orientation ne $orient) {
|
|
confess "Error, alignment segments of opposite orientation are in the same extended splice path! @nodeIDs\n" . $self->toString();
|
|
}
|
|
}
|
|
|
|
@coordsets = sort {$a->[0]<=>$b->[0]} @coordsets;
|
|
my $match_coord_sum = 0;
|
|
my @alignment_segments;
|
|
foreach my $coordset (@coordsets) {
|
|
my ($end5, $end3) = @$coordset;
|
|
my $seg_length = abs ($end3 - $end5) + 1;
|
|
my $match_lend = $match_coord_sum + 1;
|
|
my $match_rend = $match_coord_sum + $seg_length;
|
|
|
|
my $alignment_segment = new CDNA::Alignment_segment($end5, $end3, $match_lend, $match_rend, 100); # 100% identity
|
|
|
|
push (@alignment_segments, $alignment_segment);
|
|
|
|
$match_coord_sum += $seg_length;
|
|
}
|
|
|
|
my $cdna_alignment = new CDNA::CDNA_alignment($match_coord_sum, \@alignment_segments);
|
|
|
|
$cdna_alignment->set_spliced_orientation($spliced_orientation);
|
|
$cdna_alignment->force_spliced_validation($spliced_orientation);
|
|
|
|
## hack in the node list as a hidden object attribute.
|
|
$cdna_alignment->{__Splice_graph_assembler_nodeID_list} = [@nodeIDs];
|
|
|
|
return ($cdna_alignment);
|
|
}
|
|
|
|
####
|
|
sub _correlate_assemblies_with_incoming_alignments {
|
|
my $self = shift;
|
|
my @assemblies = $self->get_assemblies();
|
|
|
|
my %alignment_accs; # keys to alignment objects
|
|
my %incorporated_alignments;
|
|
foreach my $assembly (@assemblies) {
|
|
|
|
my $num_segments_in_assembly = $assembly->get_num_segments();
|
|
my @incorporated_alignments;
|
|
|
|
foreach my $alignment (@{$self->{_incoming_alignments}}) {
|
|
my $alignment_acc = $alignment->get_acc();
|
|
|
|
## store alignment object in lookup table for later retrieval.
|
|
unless (exists $alignment_accs{$alignment_acc}) {
|
|
$alignment_accs{$alignment_acc} = $alignment;
|
|
}
|
|
|
|
## check for alignment incorporation:
|
|
if ($assembly->encapsulates($alignment, $FUZZ_DIST) &&
|
|
(
|
|
## since assemblies have fixed spliced orient, compat will fail when encapsulation
|
|
## of single exon alignments of ambiguous orientation exists
|
|
($num_segments_in_assembly == 1 && $alignment->get_spliced_orientation() eq '?')
|
|
||
|
|
## do rigorous compatibility check:
|
|
($assembly->is_compatible($alignment, $FUZZ_DIST))
|
|
)
|
|
)
|
|
{
|
|
push (@incorporated_alignments, $alignment_acc);
|
|
$incorporated_alignments{$alignment_acc} = 1;
|
|
}
|
|
}
|
|
$assembly->set_acc( join ("/", @incorporated_alignments)); # use / as DELIMETER for cdna accessions
|
|
}
|
|
|
|
my @unincorporated_alignments;
|
|
foreach my $alignment_acc (keys %alignment_accs) {
|
|
unless ($incorporated_alignments{$alignment_acc}) {
|
|
push (@unincorporated_alignments, $alignment_accs{$alignment_acc});
|
|
}
|
|
}
|
|
|
|
|
|
@{$self->{_unincorporated_alignments}} = (); #clear
|
|
|
|
if (@unincorporated_alignments) {
|
|
@{$self->{_unincorporated_alignments}} = @unincorporated_alignments;
|
|
return (1); # still have unincorporated alignments!
|
|
}
|
|
|
|
return(0); ## all alignments are accounted for.
|
|
}
|
|
|
|
####
|
|
sub _explore_assemblies_from_unincorporated_alignments {
|
|
my $self = shift;
|
|
print "method _explore_assemblies_from_unincorporated_alignments()\n" if $SEE;
|
|
## decompose unincorporated alignments and create maximal paths that contain them.
|
|
|
|
my @added_alignment_assemblies;
|
|
|
|
|
|
my @unincorporated_alignments = $self->get_unincorporated_alignments();
|
|
|
|
## examine in order of descending numbers of segments and length:
|
|
@unincorporated_alignments = reverse sort { $a->{num_segments} <=> $b->{num_segments}
|
|
||
|
|
$a->{cdna_length} <=> $b->{cdna_length} } @unincorporated_alignments;
|
|
|
|
|
|
|
|
foreach my $unincorporated_alignment (@unincorporated_alignments) {
|
|
print "\nUnincorporated alignment under scrutiny: " . $unincorporated_alignment->toToken() . "\n" if $SEE;
|
|
|
|
my $orient = $unincorporated_alignment->get_spliced_orientation();
|
|
my ($alignment_lend, $alignment_rend) = $unincorporated_alignment->get_coords();
|
|
|
|
my $found_in_new_assembly_flag = 0;
|
|
foreach my $new_assembly (@added_alignment_assemblies) {
|
|
if ($new_assembly->is_compatible($unincorporated_alignment, $FUZZ_DIST) && $new_assembly->encapsulates($unincorporated_alignment, $FUZZ_DIST)) {
|
|
$found_in_new_assembly_flag = 1;
|
|
last;
|
|
}
|
|
}
|
|
if ($found_in_new_assembly_flag) {
|
|
print "-found in a newly created assembly based on an earlier unincorporated alignment. Now accounted for.\n" if $SEE;
|
|
next;
|
|
}
|
|
|
|
my $num_segments = $unincorporated_alignment->get_num_segments();
|
|
|
|
my @all_nodes;
|
|
|
|
if ($num_segments == 1) {
|
|
## must be included in some terminal exon not already represented by maximal splice paths (only explanation)
|
|
## find a maximal path that includes this segment stemming from a terminal exon
|
|
|
|
my @terminal_nodes = $self->_find_terminal_exons_encompassing_segment($alignment_lend, $alignment_rend, $orient);
|
|
unless (@terminal_nodes) {
|
|
confess "Error, searching for terminal nodes that encompass unincorproated singleton segment and none found.\n"
|
|
. $self->toString();
|
|
}
|
|
## find maximal path:
|
|
my @paths_from_terminal_node;
|
|
foreach my $terminal_node (@terminal_nodes) {
|
|
my $nodeID = $terminal_node->get_nodeID();
|
|
my $type = $terminal_node->get_type();
|
|
my $max_path_href;
|
|
if ($type eq "terminal_left_exon") {
|
|
## must extend to the right
|
|
$max_path_href = $self->_perform_traceback("forward_backtrack", [$nodeID]); #_extend_node_maximally_right($nodeID);
|
|
}
|
|
elsif ($type eq "terminal_right_exon") {
|
|
$max_path_href = $self->_perform_traceback("reverse_backtrack", [$nodeID]); #_extend_node_maximally_left($nodeID);
|
|
}
|
|
else {
|
|
confess "Error, terminal node is supposed to be left or right, but type($type) is not recognized.";
|
|
}
|
|
|
|
push (@paths_from_terminal_node, $max_path_href);
|
|
}
|
|
## get highest scoring path:
|
|
@paths_from_terminal_node = reverse sort { $a->{score}<=>$b->{score} } @paths_from_terminal_node;
|
|
my $highest_scoring_path = shift @paths_from_terminal_node;
|
|
my $node_IDs_in_path_aref = $highest_scoring_path->{path_nodeIDs_aref};
|
|
@all_nodes = @$node_IDs_in_path_aref;
|
|
}
|
|
|
|
else {
|
|
## has an intron! Some alternate termini needed that were not found in a maximal splice path
|
|
|
|
my @segments = $unincorporated_alignment->get_alignment_segments();
|
|
my $first_segment = shift @segments;
|
|
my $last_segment = pop @segments;
|
|
|
|
## create node list for internal segments and introns:
|
|
my @nodes_in_central_path;
|
|
my @introns = $unincorporated_alignment->get_intron_coords();
|
|
foreach my $intron (@introns) {
|
|
my ($lend, $rend) = @$intron;
|
|
my $node_obj = $self->_get_graph_node_via_coords_n_type("intron", $lend, $rend, $orient);
|
|
my $nodeID = $node_obj->get_nodeID();
|
|
push (@nodes_in_central_path, $nodeID);
|
|
}
|
|
foreach my $internal_segment (@segments) {
|
|
my ($lend, $rend) = $internal_segment->get_coords();
|
|
my $node_obj = $self->_get_graph_node_via_coords_n_type("internal_exon", $lend, $rend, $orient);
|
|
my $nodeID = $node_obj->get_nodeID();
|
|
push (@nodes_in_central_path, $nodeID);
|
|
}
|
|
my ($first_segment_lend, $first_segment_rend) = $first_segment->get_coords();
|
|
my @candidate_preceding_nodes = $self->_find_candidate_preceding_exons($first_segment_lend, $first_segment_rend, $orient);
|
|
unless (@candidate_preceding_nodes) {
|
|
confess "Error, no candidate preceding nodes for ($first_segment_lend, $first_segment_rend, $orient)";
|
|
}
|
|
|
|
my ($last_segment_lend, $last_segment_rend) = $last_segment->get_coords();
|
|
my @candidate_following_nodes = $self->_find_candidate_following_exons($last_segment_lend, $last_segment_rend, $orient);
|
|
unless (@candidate_following_nodes) {
|
|
confess "Error, no candidate following nodes for ($last_segment_lend, $last_segment_rend, $orient)";
|
|
}
|
|
|
|
## find maximal paths left and right:
|
|
my @nodeIDs_left;
|
|
foreach my $candidate_preceding_node (@candidate_preceding_nodes) {
|
|
print "Candidate preceding node: " . $candidate_preceding_node->toString() . "\n" if $SEE;
|
|
my $nodeID = $candidate_preceding_node->get_nodeID();
|
|
push (@nodeIDs_left, $nodeID);
|
|
}
|
|
my @path_left = $self->_extend_node_maximally_left(@nodeIDs_left);
|
|
|
|
my @nodeIDs_right;
|
|
foreach my $candidate_following_node (@candidate_following_nodes) {
|
|
print "Candidate following node: " . $candidate_following_node->toString() . "\n" if $SEE;
|
|
my $nodeID = $candidate_following_node->get_nodeID();
|
|
push (@nodeIDs_right, $nodeID);
|
|
}
|
|
my @path_right = $self->_extend_node_maximally_right(@nodeIDs_right);
|
|
|
|
@all_nodes = (@path_left, @nodes_in_central_path, @path_right);
|
|
}
|
|
|
|
|
|
my $assembly = $self->_instantiate_cdna_alignment_from_nodeIDs(@all_nodes);
|
|
|
|
## verify that this assembly accounts for the unincorporated alignment:
|
|
unless ( (my $compatible_result = $assembly->is_compatible($unincorporated_alignment, $FUZZ_DIST))
|
|
&&
|
|
(my $encapsulated_result = $assembly->encapsulates($unincorporated_alignment, $FUZZ_DIST)) ) {
|
|
confess "Error, created new assembly to house missing alignment, but it doesn't afterall!\n"
|
|
. "unincorp: " . $unincorporated_alignment->toToken() . "\n"
|
|
. "newAsmb: " . $assembly->toToken() . "\n"
|
|
. "compatible: $compatible_result\n"
|
|
. "encapsulated: $encapsulated_result\n";
|
|
}
|
|
print "-verified incorporation in new assembly.\n" if $SEE;
|
|
push (@added_alignment_assemblies, $assembly);
|
|
}
|
|
|
|
## add to assembly list
|
|
$self->_add_alignment_assembly(@added_alignment_assemblies);
|
|
|
|
return;
|
|
}
|
|
|
|
####
|
|
sub _find_candidate_preceding_exons {
|
|
my $self = shift;
|
|
my ($lend, $rend, $orient) = @_;
|
|
|
|
## must have same orient, share rend boundary, and be internal or terminal exons that contain it
|
|
my @candidate_exons;
|
|
foreach my $exon (@{$self->{_internal_exons}}, @{$self->{_terminal_left_exons}}) {
|
|
my ($exon_lend, $exon_rend) = $exon->get_coords();
|
|
my $exon_orient = $exon->get_orient();
|
|
if ($exon_orient eq $orient && $lend + $FUZZ_DIST >= $exon_lend && $exon_rend == $rend) {
|
|
push (@candidate_exons, $exon);
|
|
}
|
|
}
|
|
|
|
return (@candidate_exons);
|
|
}
|
|
|
|
####
|
|
####
|
|
sub _find_candidate_following_exons {
|
|
my $self = shift;
|
|
my ($lend, $rend, $orient) = @_;
|
|
|
|
## must have same orient, share rend boundary, and be internal or terminal exons that contain it
|
|
my @candidate_exons;
|
|
foreach my $exon (@{$self->{_internal_exons}}, @{$self->{_terminal_right_exons}}) {
|
|
my ($exon_lend, $exon_rend) = $exon->get_coords();
|
|
my $exon_orient = $exon->get_orient();
|
|
if ($exon_orient eq $orient && $rend - $FUZZ_DIST <= $exon_rend && $exon_lend == $lend) {
|
|
push (@candidate_exons, $exon);
|
|
}
|
|
}
|
|
|
|
return (@candidate_exons);
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
######################################################
|
|
######################################################
|
|
|
|
package Splice_graph_node;
|
|
|
|
use strict;
|
|
use warnings;
|
|
use Carp;
|
|
|
|
my $nodeID = 0; ## class attribute
|
|
|
|
####
|
|
sub new {
|
|
my $packagename = shift;
|
|
|
|
my ($type, $lend, $rend, $orient) = @_;
|
|
|
|
## type can be: intron | internal_exon | terminal_left_exon | terminal_right_exon | singleton_exon
|
|
$nodeID++;
|
|
|
|
# object atts:
|
|
my $self = {
|
|
nodeID => $nodeID,
|
|
type => undef,
|
|
lend => undef,
|
|
rend => undef,
|
|
orient => undef,
|
|
length => undef,
|
|
num_evidence_support => 1, # indicate number of transcripts supporting this feature
|
|
|
|
## graph connections:
|
|
_prev_nodeID_href => {}, ## keys nodeIDs connected to before this node
|
|
_next_nodeID_href => {}, ## keys nodeIDs connected to after this node
|
|
|
|
## attributes for dynamic programming calculations to find longest path
|
|
forward_base_score => 0, ## calculation performed from left to right, with right to left backtracking
|
|
forward_path_score => 0,
|
|
forward_backtrack_nodeID => undef,
|
|
|
|
reverse_base_score => 0, ## calculation performed from right to left with backtracking from left to right
|
|
reverse_path_score => 0,
|
|
reverse_backtrack_nodeID => undef,
|
|
|
|
};
|
|
|
|
bless ($self, $packagename);
|
|
|
|
## use set methods to set attribute values and validate settings:
|
|
$self->set_coords($lend, $rend);
|
|
$self->set_type($type);
|
|
$self->set_orient($orient);
|
|
|
|
return ($self);
|
|
}
|
|
|
|
####
|
|
sub set_num_evidence_support {
|
|
my $self = shift;
|
|
my ($evidence_support) = @_;
|
|
|
|
$self->{num_evidence_support} = $evidence_support;
|
|
return;
|
|
}
|
|
|
|
####
|
|
sub increment_evidence_support {
|
|
my $self = shift;
|
|
my ($inc_val) = @_;
|
|
if (defined $inc_val) {
|
|
$self->{num_evidence_support} += $inc_val;
|
|
}
|
|
else {
|
|
## just add one.
|
|
$self->{num_evidence_support}++;
|
|
}
|
|
return;
|
|
}
|
|
|
|
####
|
|
sub get_num_evidence_support {
|
|
my $self = shift;
|
|
return ($self->{num_evidence_support});
|
|
}
|
|
|
|
####
|
|
sub toString {
|
|
my $self = shift;
|
|
my $text = "node: " . $self->get_nodeID()
|
|
. "\t" . join ("-", $self->get_coords())
|
|
. "\torient(" . $self->get_orient() . ")"
|
|
. "\t" . $self->get_type()
|
|
. "\tEvSupport: " . $self->get_num_evidence_support();
|
|
|
|
return ($text);
|
|
}
|
|
|
|
####
|
|
sub get_nodeID {
|
|
my $self = shift;
|
|
return ($self->{nodeID});
|
|
}
|
|
|
|
|
|
####
|
|
sub set_coords {
|
|
my $self = shift;
|
|
my ($lend, $rend) = @_;
|
|
|
|
unless ($lend =~ /^\d+$/ && $rend =~ /^\d+$/) {
|
|
confess "Error, coordinates [$lend] or [$rend] are not integers";
|
|
}
|
|
|
|
unless ($lend <= $rend) {
|
|
confess "Error, coordinates out of order";
|
|
}
|
|
|
|
$self->{lend} = $lend;
|
|
$self->{rend} = $rend;
|
|
|
|
$self->{length} = $rend - $lend + 1;
|
|
|
|
return;
|
|
}
|
|
|
|
####
|
|
sub get_coords {
|
|
my $self = shift;
|
|
return ($self->{lend}, $self->{rend});
|
|
}
|
|
|
|
####
|
|
sub set_type {
|
|
my $self = shift;
|
|
my ($type) = @_;
|
|
unless ($type =~ /^(intron|internal_exon|terminal_left_exon|terminal_right_exon|singleton_exon)$/) {
|
|
confess "type $type is not acceptible";
|
|
}
|
|
|
|
$self->{type} = $type;
|
|
return;
|
|
}
|
|
|
|
####
|
|
sub get_type {
|
|
my $self = shift;
|
|
return ($self->{type});
|
|
}
|
|
|
|
####
|
|
sub set_orient {
|
|
my $self = shift;
|
|
my ($orient) = @_;
|
|
unless ($orient =~ /^[\+\-]$/) {
|
|
confess "Error, orient($orient) is not allowed here";
|
|
}
|
|
|
|
$self->{orient} = $orient;
|
|
return;
|
|
}
|
|
|
|
####
|
|
sub get_orient {
|
|
my $self = shift;
|
|
return ($self->{orient});
|
|
}
|
|
|
|
|
|
####
|
|
sub connect_this_prev_nodeID {
|
|
my $self = shift;
|
|
my ($nodeID) = @_;
|
|
$self->{_prev_nodeID_href}->{$nodeID} = 1;
|
|
return;
|
|
}
|
|
|
|
####
|
|
sub connect_this_next_nodeID {
|
|
my $self = shift;
|
|
my ($nodeID) = @_;
|
|
$self->{_next_nodeID_href}->{$nodeID} = 1;
|
|
return;
|
|
}
|
|
|
|
####
|
|
sub get_connected_prev_nodeIDs {
|
|
my $self = shift;
|
|
my @prev_nodeIDs = keys %{$self->{_prev_nodeID_href}};
|
|
return (@prev_nodeIDs);
|
|
}
|
|
|
|
####
|
|
sub get_connected_next_nodeIDs {
|
|
my $self = shift;
|
|
my @next_nodeIDs = keys %{$self->{_next_nodeID_href}};
|
|
return (@next_nodeIDs);
|
|
}
|
|
|
|
####
|
|
sub has_next_nodeID {
|
|
my $self = shift;
|
|
my ($nodeID) = @_;
|
|
if ($self->{_next_nodeID_href}->{$nodeID}) {
|
|
return (1); #yes
|
|
}
|
|
else {
|
|
return (0); # no
|
|
}
|
|
}
|
|
|
|
####
|
|
sub has_prev_nodeID {
|
|
my $self = shift;
|
|
my ($nodeID) = @_;
|
|
if ($self->{_prev_nodeID_href}->{$nodeID}) {
|
|
return (1); #yes
|
|
}
|
|
else {
|
|
return (0); #no
|
|
}
|
|
}
|
|
|
|
#############################################
|
|
#############################################
|
|
|
|
package Splice_graph_path;
|
|
|
|
use strict;
|
|
use warnings;
|
|
use Carp;
|
|
|
|
my $PATH_ID = 0; ## class variable
|
|
|
|
####
|
|
sub new {
|
|
my $packagename = shift;
|
|
|
|
my ($lend, $rend, $orient, $ordered_nodeIDs_aref) = @_;
|
|
|
|
unless ($lend =~ /^\d+$/ && $rend =~ /^\d+$/) {
|
|
confess "Error, lend $lend and rend $rend must be integers";
|
|
}
|
|
unless ($orient =~ /^[\+\-]$/) {
|
|
confess "Error, orient($orient) is invalid";
|
|
}
|
|
|
|
unless (@$ordered_nodeIDs_aref) {
|
|
confess "Error, need list of ordered nodeIDs as constructor param";
|
|
}
|
|
|
|
$PATH_ID++;
|
|
|
|
my $self = {
|
|
_lend => $lend,
|
|
_rend => $rend,
|
|
_orient => $orient,
|
|
_ordered_nodeIDs => [@$ordered_nodeIDs_aref], # a path is an ordered list of intron and exon nodeIDs traversed
|
|
_pathID => $PATH_ID,
|
|
|
|
};
|
|
|
|
bless ($self, $packagename);
|
|
|
|
return ($self);
|
|
}
|
|
|
|
####
|
|
sub get_ordered_nodeIDs {
|
|
my $self = shift;
|
|
return (@{$self->{_ordered_nodeIDs}});
|
|
}
|
|
|
|
####
|
|
sub toString {
|
|
my $self = shift;
|
|
my @ordered_nodeIDs = $self->get_ordered_nodeIDs();
|
|
return (join (",", @ordered_nodeIDs));
|
|
}
|
|
|
|
####
|
|
sub is_subpath_of {
|
|
my $self = shift;
|
|
my ($other_path) = @_;
|
|
|
|
my $self_path_string = "," . $self->toString() . ","; ## add comma terminal delimiters
|
|
|
|
my $other_path_string = "," . $other_path->toString() . ",";
|
|
|
|
if ($other_path_string =~ /$self_path_string/) {
|
|
return (1); #true
|
|
}
|
|
else {
|
|
return (0); #false
|
|
}
|
|
}
|
|
|
|
####
|
|
sub is_compatible () {
|
|
my $self = shift;
|
|
my ($other_splice_path_obj) = @_;
|
|
|
|
## make sure they have at least one node in common:
|
|
my @node_IDs_A = $self->get_ordered_nodeIDs();
|
|
my @node_IDs_B = $other_splice_path_obj->get_ordered_nodeIDs();
|
|
|
|
my $found_nodeID_in_common = 0;
|
|
my $common_nodeID = undef;
|
|
{ # do this in a more restricted scope
|
|
my %nodeIDs;
|
|
foreach my $nodeID (@node_IDs_A) {
|
|
$nodeIDs{$nodeID} = 1;
|
|
}
|
|
foreach my $nodeID (@node_IDs_B) {
|
|
if ($nodeIDs{$nodeID}) {
|
|
$found_nodeID_in_common = 1;
|
|
$common_nodeID = $nodeID;
|
|
last;
|
|
}
|
|
}
|
|
}
|
|
unless ($found_nodeID_in_common) {
|
|
return (0); # false; no common node so definitely incompatible
|
|
}
|
|
|
|
## examine all nodes adjacent to common node; they should be completely identical
|
|
my $common_node_pos_A = $self->_find_ele_index_pos_via_value(\@node_IDs_A, $common_nodeID);
|
|
my $common_node_pos_B = $self->_find_ele_index_pos_via_value(\@node_IDs_B, $common_nodeID);
|
|
|
|
## search left:
|
|
my ($i, $j);
|
|
for ($i=$common_node_pos_A, $j=$common_node_pos_B; $i >= 0 && $j >= 0; $i--, $j--) {
|
|
my $A_node = $node_IDs_A[$i];
|
|
my $B_node = $node_IDs_B[$j];
|
|
if ($A_node ne $B_node) {
|
|
# incompatible
|
|
return (0);
|
|
}
|
|
}
|
|
|
|
## search right:
|
|
for ($i = $common_node_pos_A, $j = $common_node_pos_B; $i <= $#node_IDs_A && $j <= $#node_IDs_B; $i++,$j++) {
|
|
my $A_node = $node_IDs_A[$i];
|
|
my $B_node = $node_IDs_B[$j];
|
|
if ($A_node ne $B_node) {
|
|
# incompatible
|
|
return (0);
|
|
}
|
|
}
|
|
|
|
## if got here, then passed compatibility tests.
|
|
return (1); # Yes, compatible
|
|
|
|
}
|
|
|
|
|
|
sub _find_ele_index_pos_via_value {
|
|
my $self = shift;
|
|
my ($list_aref, $value) = @_;
|
|
for (my $i = 0; $i <= $#$list_aref; $i++) {
|
|
if ($list_aref->[$i] eq $value) {
|
|
return ($i);
|
|
}
|
|
}
|
|
confess "Error finding index position of $value in list";
|
|
}
|
|
|
|
####
|
|
sub get_pathID {
|
|
my $self = shift;
|
|
return ($self->{_pathID});
|
|
}
|
|
|
|
####
|
|
sub get_coords {
|
|
my $self = shift;
|
|
return ($self->{_lend}, $self->{_rend});
|
|
}
|
|
|
|
####
|
|
sub get_orient {
|
|
my $self = shift;
|
|
return ($self->{_orient});
|
|
}
|
|
|
|
|
|
1; #EOM
|