290 lines
12 KiB
Perl
290 lines
12 KiB
Perl
package main;
|
|
our $SEE;
|
|
|
|
package CDNA::CDNA_stitcher;
|
|
use strict;
|
|
use CDNA::CDNA_alignment;
|
|
use CDNA::Gene_obj_alignment_assembler; #used for converting gene_obj to alignment.
|
|
use Carp;
|
|
|
|
my $FUZZDIST = 20; #allow FUZZDIST nt extension beyond splice site.
|
|
|
|
sub new {
|
|
my $packagename = shift;
|
|
my $self = {
|
|
fuzzlength=>$FUZZDIST #default setting.
|
|
};
|
|
bless ($self, $packagename);
|
|
return ($self);
|
|
}
|
|
|
|
|
|
sub set_fuzzlength {
|
|
my $self = shift;
|
|
my $fuzzdist = shift;
|
|
$self->{fuzzlength} = $fuzzdist;
|
|
}
|
|
|
|
sub stitch_alignments {
|
|
my $self = shift;
|
|
my ($template_alignment, $thread_alignment) = @_;
|
|
|
|
my $template_acc = $template_alignment->get_acc();
|
|
my $thread_acc = $thread_alignment->get_acc();
|
|
|
|
print "Stitching Template:\n$template_acc: " . $template_alignment->toToken()
|
|
. "\nby\n$thread_acc: " . $thread_alignment->toToken() . "\n\n" if $main::SEE;
|
|
|
|
## don't even think about stitching alignments of opposite spliced orienatations!!!
|
|
|
|
my $template_spliced_orient = $template_alignment->get_spliced_orientation();
|
|
my $thread_spliced_orient = $thread_alignment->get_spliced_orientation();
|
|
|
|
if ($template_spliced_orient ne '?' && $thread_spliced_orient ne '?') {
|
|
## both have assigned transcribed orients
|
|
if ($template_spliced_orient ne $thread_spliced_orient) {
|
|
confess "Error, trying to stitch together oppositely transcribed alignments. That's impossible. ";
|
|
}
|
|
}
|
|
|
|
|
|
my $spliced_orient = ($thread_spliced_orient ne '?') ? $thread_spliced_orient : $template_spliced_orient;
|
|
|
|
my $fuzzlength = $self->{fuzzlength};
|
|
|
|
## Algorithm
|
|
## -anchor the first thread segment to the template alignment
|
|
## -determine the boundaries of the matching anchor and first threaded segment
|
|
## -add all preceding template segments to the stitched alignment if splice compatible.
|
|
## -add internal cdna alignment segments
|
|
## -add terminal template alignment segments if splice compatible.
|
|
|
|
my @template_segments = $template_alignment->get_alignment_segments();
|
|
my @threaded_segments = $thread_alignment->get_alignment_segments();
|
|
|
|
my $single_threaded_segment = ($#threaded_segments == 0) ? 1:0; #indicate only a single segment exists.
|
|
|
|
my @stitched_segments;
|
|
## Anchor first threaded segment to the template alignment:
|
|
my ($thread_lend, $thread_rend) = $threaded_segments[0]->get_coords();
|
|
my $anchor_pos = -1;
|
|
for (my $i = 0; $i <= $#template_segments; $i++) {
|
|
my ($lend, $rend) = $template_segments[$i]->get_coords();
|
|
if ($lend < $thread_rend && $rend > $thread_lend) { #overlap
|
|
$anchor_pos = $i;
|
|
last;
|
|
}
|
|
}
|
|
print "Anchoring first cDNA segment. Anchor point: $anchor_pos\n" if $SEE;
|
|
if ($anchor_pos > -1) { #found an anchor point in template alignment.
|
|
|
|
|
|
## add stitched segment
|
|
my $anchor_segment = $template_segments[$anchor_pos];
|
|
my ($anchor_lend, $anchor_rend) = $anchor_segment->get_coords();
|
|
my $first_threaded_segment = $threaded_segments[0];
|
|
my ($thread_lend, $thread_rend) = $first_threaded_segment->get_coords();
|
|
# initialize to largest spread
|
|
my ($new_lend) = ($anchor_lend < $thread_lend) ? $anchor_lend : $thread_lend;
|
|
my ($new_rend) = ($anchor_rend > $thread_rend) ? $anchor_rend : $thread_rend;
|
|
# adjust based on splice boundaries
|
|
my ($has_left_splice_junction, $has_right_splice_junction);
|
|
$has_left_splice_junction = $anchor_segment->has_left_splice_junction(); #initialize.
|
|
if ($anchor_segment->has_left_splice_junction() && ($anchor_lend - $thread_lend <= $fuzzlength)) {
|
|
$new_lend = $anchor_lend;
|
|
} elsif ($thread_lend < $anchor_lend) {
|
|
#using threaded segment as initial stitched segment
|
|
$has_left_splice_junction = $first_threaded_segment->has_left_splice_junction();
|
|
}
|
|
|
|
$has_right_splice_junction = $first_threaded_segment->has_right_splice_junction();
|
|
if ($first_threaded_segment->has_right_splice_junction()) {
|
|
$new_rend = $thread_rend;
|
|
} elsif ($anchor_segment->has_right_splice_junction() && ($thread_rend - $anchor_rend <= $fuzzlength)) {
|
|
$new_rend = $anchor_rend;
|
|
$has_right_splice_junction = $anchor_segment->has_right_splice_junction();
|
|
}
|
|
|
|
## Add preceding template segments if current segment is left-splice compatible.
|
|
if ($has_left_splice_junction) {
|
|
print "Adding all template segments before position $anchor_pos.\n" if $SEE;
|
|
## add all template segments to the stitched alignment preceding the anchor point:
|
|
for (my $i = 0; $i < $anchor_pos; $i++) {
|
|
push (@stitched_segments, $template_segments[$i]->clone());
|
|
}
|
|
}
|
|
|
|
## add the current segment.
|
|
my $newsegment = $anchor_segment->clone();
|
|
$newsegment->set_coords($new_lend, $new_rend);
|
|
$newsegment->set_left_splice_junction($has_left_splice_junction);
|
|
$newsegment->set_right_splice_junction($has_right_splice_junction);
|
|
print "adding current segment: lsplice: $has_left_splice_junction, rsplice: $has_right_splice_junction\n" if $SEE;
|
|
push (@stitched_segments, $newsegment);
|
|
|
|
} else { #no anchor pos, simply add the first threaded segment:
|
|
print "No anchor position, simply adding the first threaded segment.\n" if $SEE;
|
|
push (@stitched_segments, $threaded_segments[0]->clone());
|
|
}
|
|
|
|
## Add all but last threaded segments to the stitched alignment:
|
|
|
|
for (my $i = 1; $i < $#threaded_segments; $i++) {
|
|
print "Adding internal cDNA alignment segment, index: $i\n" if $SEE;
|
|
push (@stitched_segments, $threaded_segments[$i]->clone());
|
|
}
|
|
|
|
## Anchor the last threaded segment to a segment within the template model:
|
|
print "Anchoring the terminal segment.\n" if $SEE;
|
|
$anchor_pos = -1;
|
|
my $last_threaded_segment = $threaded_segments[$#threaded_segments];
|
|
($thread_lend, $thread_rend) = $last_threaded_segment->get_coords();
|
|
# find last template segment overlapping last cdna segment
|
|
for (my $i = $#template_segments; $i >= 0; $i--) {
|
|
my ($lend, $rend) = $template_segments[$i]->get_coords();
|
|
if ($lend < $thread_rend && $rend > $thread_lend) { #overlap
|
|
$anchor_pos = $i;
|
|
last;
|
|
}
|
|
}
|
|
print "Terminal anchor position: $anchor_pos\n" if $SEE;
|
|
if ($anchor_pos > -1) {
|
|
#build composite segment
|
|
my $anchor_segment = $template_segments[$anchor_pos];
|
|
my $has_left_splice_junction = $last_threaded_segment->has_left_splice_junction(); #initialize.
|
|
my $has_right_splice_junction = $anchor_segment->has_right_splice_junction();
|
|
my ($anchor_lend, $anchor_rend) = $anchor_segment->get_coords();
|
|
my $new_lend = ($anchor_lend < $thread_lend) ? $anchor_lend : $thread_lend;
|
|
my $new_rend = ($anchor_rend > $thread_rend) ? $anchor_rend : $thread_rend;
|
|
if ($last_threaded_segment->has_left_splice_junction()) {
|
|
$new_lend = $thread_lend;
|
|
} elsif ($anchor_segment->has_left_splice_junction() && ($anchor_lend - $thread_lend <= $fuzzlength)) {
|
|
$new_lend = $anchor_lend;
|
|
$has_left_splice_junction = $anchor_segment->has_left_splice_junction();
|
|
}
|
|
if ($anchor_segment->has_right_splice_junction() && ($thread_rend - $anchor_rend <= $fuzzlength)) {
|
|
$new_rend = $anchor_rend;
|
|
} else {
|
|
$has_right_splice_junction = $last_threaded_segment->has_right_splice_junction();
|
|
}
|
|
if ($single_threaded_segment) { #only one segment (first == last)
|
|
#just update existing status:
|
|
print "Only one threaded segment, updating it's rend coord and status.\n" if $SEE;
|
|
my $last_stitched_segment = $stitched_segments[$#stitched_segments];
|
|
my ($curr_lend, $curr_rend) = $last_stitched_segment->get_coords();
|
|
$last_stitched_segment->set_coords($curr_lend, $new_rend);
|
|
$last_stitched_segment->set_right_splice_junction($has_right_splice_junction);
|
|
} else {
|
|
my $newsegment = $anchor_segment->clone();
|
|
$newsegment->set_coords($new_lend, $new_rend);
|
|
$newsegment->set_left_splice_junction($has_left_splice_junction);
|
|
$newsegment->set_right_splice_junction($has_right_splice_junction);
|
|
print "adding composite terminal segment. Lsplice: $has_left_splice_junction, Rsplice: $has_right_splice_junction.\n" if $SEE;
|
|
push (@stitched_segments, $newsegment);
|
|
}
|
|
} else {
|
|
print "No matching terminal exon in template.\n" if $SEE;
|
|
if ($single_threaded_segment) {
|
|
print "Single threaded segment remains unchanged.\n" if $SEE;
|
|
} else {
|
|
|
|
print "Adding last cDNA exon.\n" if $SEE;
|
|
push (@stitched_segments, $last_threaded_segment->clone());
|
|
}
|
|
}
|
|
|
|
|
|
## Add terminal template segments if the last stitched segment contains a splice junction
|
|
|
|
my $last_stitched_segment = $stitched_segments[$#stitched_segments];
|
|
if ($last_stitched_segment->has_right_splice_junction()) {
|
|
print "Adding terminal template segments:\n" if $SEE;
|
|
my ($last_lend, $last_rend) = $last_stitched_segment->get_coords();
|
|
$anchor_pos = -1;
|
|
for (my $i = $#template_segments; $i >= 0; $i--) {
|
|
my ($anchor_lend, $anchor_rend) = $template_segments[$i]->get_coords();
|
|
if ($anchor_rend > $last_lend && $anchor_lend < $last_rend) { #overlap
|
|
$anchor_pos = $i;
|
|
last;
|
|
}
|
|
}
|
|
print "Last overlapping alignment segment in index found at: $anchor_pos\n" if $SEE;
|
|
if ($anchor_pos > -1) {
|
|
for (my $i = $anchor_pos + 1; $i <= $#template_segments; $i++) {
|
|
print "\tadding terminal template segment index: $i\n" if $SEE;
|
|
push (@stitched_segments, $template_segments[$i]->clone());
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
## Done stitching segments together. Create new alignment:
|
|
my $cdna_length = 0;
|
|
foreach my $segment (@stitched_segments) {
|
|
my $length = $segment->get_length();
|
|
$cdna_length += $length;
|
|
}
|
|
|
|
my $seq_ref = $template_alignment->get_genomic_seq_ref();
|
|
|
|
my $stitched_alignment = new CDNA::CDNA_alignment ($cdna_length, \@stitched_segments, $seq_ref); #aligned orientation auto assigned.
|
|
|
|
$stitched_alignment->set_spliced_orientation($spliced_orient);
|
|
$stitched_alignment->set_acc($template_alignment->get_acc());
|
|
|
|
print "Stitched alignment: " . $stitched_alignment->toToken() . "\n" if $SEE;
|
|
|
|
return ($stitched_alignment);
|
|
}
|
|
|
|
|
|
|
|
sub stitch_alignment_into_gene {
|
|
my $self = shift;
|
|
my ($gene_obj, $cdna_obj, $sequence_ref) = @_;
|
|
|
|
my $gene_obj_assembler = new CDNA::Gene_obj_alignment_assembler($sequence_ref);
|
|
my $gene_based_cdna_obj = $gene_obj_assembler->gene_obj_to_cdna_alignment($gene_obj);
|
|
|
|
my $stitched_alignment = $self->stitch_alignments($gene_based_cdna_obj, $cdna_obj);
|
|
$stitched_alignment->set_fli(1);
|
|
|
|
my $partials_href = $self->_analyze_partial_status($gene_obj);
|
|
my $stitched_gene_obj = $stitched_alignment->get_gene_obj_via_alignment($partials_href);
|
|
|
|
return ($stitched_gene_obj);
|
|
|
|
}
|
|
|
|
|
|
####
|
|
sub _analyze_partial_status {
|
|
my $self = shift;
|
|
my ($gene_obj) = shift;
|
|
my %partials;
|
|
if ($gene_obj->is_5prime_partial()) {
|
|
$partials{"5prime"} = 1;
|
|
}
|
|
if ($gene_obj->is_3prime_partial()) {
|
|
$partials{"3prime"} = 1;
|
|
}
|
|
return (\%partials);
|
|
}
|
|
|
|
|
|
|
|
|
|
1; #EOM
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|