biyelunwen/99.scripts/trinity_utils/PerlLib/CDNA/Alternative_splice_comparer.pm

1001 lines
31 KiB
Perl

#!/usr/local/bin/perl
package main;
our $SEE;
package CDNA::Alternative_splice_comparer;
use Gene_obj;
use strict;
use Data::Dumper;
use Carp;
use CDNA::PASA_alignment_assembler;
sub new {
my $packagename = shift;
my $self = {
unspliced_introns => 0,
conventional_alt_splice => 0,
start_or_end_within_intron => 0,
exon_skipping => 0,
alternate_exons => 0
};
bless ($self, $packagename);
return ($self);
}
####
sub compare_isoforms_via_alignmentObjs {
my $self = shift;
my ($align1, $align2) = @_;
my $gene_1 = $align1->get_gene_obj_via_alignment();
my $gene_2 = $align2->get_gene_obj_via_alignment();
return ($self->compare_isoforms_via_geneObjs($gene_1, $gene_2));
}
####
sub compare_isoforms_via_geneObjs {
my $self = shift;
my ($gene1, $gene2) = @_;
## Looking for:
# -unspliced introns
# -conventional alt-splice isoforms
# -transcriptional start or polyadenylation site within intron
# -exon skipping
# -alternate exons
## Look for Unspliced Introns
my $struct = { unspliced_introns => 0,
conventional_alt_splice => 0,
start_or_end_within_intron => 0,
exon_skipping => 0,
alternate_exons=> 0 };
my @unspliced_introns = ($self->find_unspliced_introns($gene1, $gene2), $self->find_unspliced_introns($gene2, $gene1));
if (@unspliced_introns) {
print "*** Unspliced introns \n";
$struct->{unspliced_introns} = 1;
$self->{unspliced_introns} = \@unspliced_introns;
}
## Look for the conventional alt splice isoforms (diff donors/acceptors for introns).
my (%alternate_acceptors_n_donors) = $self->find_conventional_alt_splice_isoforms($gene1, $gene2);
if (%alternate_acceptors_n_donors) {
print "*** Conventional Alt splice (donor and/or acceptor)\n";
#print Dumper (\%alternate_acceptors_n_donors);
$self->{conventional_alt_splice} = \%alternate_acceptors_n_donors;
if (@{$alternate_acceptors_n_donors{acceptors}}) {
$struct->{conventional_alt_acceptor} = 1;
}
if (@{$alternate_acceptors_n_donors{donors}}) {
$struct->{conventional_alt_donor} = 1;
}
}
my %intron_starts_or_ends = ($self->find_starts_and_ends_within_introns ($gene1, $gene2), $self->find_starts_and_ends_within_introns ($gene2, $gene1));
if (%intron_starts_or_ends) {
print "*** Transcriptional start or polyadenylation site within an intron.\n";
$struct->{start_or_end_within_intron} = 1;
my @starts_or_ends = values %intron_starts_or_ends;
$self->{start_or_end_within_intron} = \@starts_or_ends;
}
## Look for exon skipping events.
my @exon_skips = ($self->find_exon_skipping_events($gene1, $gene2), $self->find_exon_skipping_events($gene2, $gene1));
if (@exon_skips) {
print "*** Exon skipping event detected.\n";
$struct->{exon_skipping} = 1;
$self->{exon_skipping} = \@exon_skips;
}
## Look or Alternate exons
my @alternate_exons = ($self->find_alternate_exons($gene1, $gene2), $self->find_alternate_exons($gene2, $gene1));
if (@alternate_exons) {
print "*** Found alternate exons\n";
$struct->{alternate_exons} = 1;
$self->{alternate_exons} = \@alternate_exons;
}
return ($struct);
}
# private
sub enumerate_exons_of_gene {
my $gene_obj = shift;
# put everything in forward coordinate axis:
my %exon_coords;
my @exons = $gene_obj->get_exons();
foreach my $exon (@exons) {
my ($lend, $rend) = sort {$a<=>$b} $exon->get_coords();
$exon_coords{$lend} = $rend;
}
return (%exon_coords);
}
#private
####
sub enumerate_introns_of_gene {
my $gene_obj = shift;
## Put everything in forward strand coordinate axis.
my %introns;
my @exons = sort {$a->{end5}<=>$b->{end5}} $gene_obj->get_exons();
for (my $i = 0; $i < $#exons; $i++) {
my ($exon1_lend, $exon1_rend) = sort {$a<=>$b} $exons[$i]->get_coords();
my ($exon2_lend, $exon2_rend) = sort {$a<=>$b} $exons[$i+1]->get_coords();
my ($intron_end5, $intron_end3) = ($exon1_rend + 1, $exon2_lend - 1);
$introns{$intron_end5} = $intron_end3;
}
return (%introns);
}
=over 4
=item find_unspliced_introns()
B<Description:> Find unspliced introns in gene_1 when compared to gene_2
B<Parameters:> $gene1, $gene2
B<Returns:> @unspliced_introns
@unspliced_introns is a list of coordinate pairs representing the unspliced introns found in gene 1 when compared to gene2
@unspliced_introns = ([end5,end3], ...)
=back
=cut
####
sub find_unspliced_introns {
my $self = shift;
my ($gene1, $gene2) = @_;
## Look for unspliced intron found in gene1 when compared to gene2
my %gene1_exon_coords = &enumerate_exons_of_gene ($gene1);
my %gene2_intron_coords = &enumerate_introns_of_gene($gene2);
my @unspliced_introns;
foreach my $intron_lend (keys %gene2_intron_coords) {
my $intron_rend = $gene2_intron_coords{$intron_lend};
foreach my $exon_lend (keys %gene1_exon_coords) {
my $exon_rend = $gene1_exon_coords{$exon_lend};
if ($intron_lend > $exon_lend && $intron_rend < $exon_rend) { #unspliced intron found
push (@unspliced_introns, [$intron_lend, $intron_rend]);
}
}
}
return (@unspliced_introns);
}
=over 4
=item find_conventional_alt_splice_isoforms()
B<Description:> Looks for different donor and acceptor sites within overlapping introns of genes
B<Parameters:> $gene1, $gene2
B<Returns:> %alt_donors_and_acceptors
with structure:
%alt_donors_and_acceptors = ( acceptors =>
[
{ gene1 => acceptor_coord, gene2 => acceptor_coord }, ...
],
donors => [
{ gene1 => donor_coord, gene2 => donor_coord }, ...
]
);
Coordinates stored are the actual exon boundary coordinates (first or last bp of each exon)
=back
=cut
####
sub find_conventional_alt_splice_isoforms {
my $self = shift;
my ($gene1, $gene2) = @_;
print "## Looking for conventional alt splice isoforms (diff donors, acceptors)\n" if $SEE;
my %exons_1_hash = &enumerate_exons_of_gene ($gene1);
my %exons_2_hash = &enumerate_exons_of_gene ($gene2);
my $orientation = $gene1->get_orientation();
if ($orientation ne $gene2->get_orientation()) {
die "Error, inconsistent orientations between genes: " . $gene1->toString() . $gene2->toString();
}
# algorithm
# -find one-to-one mappings between exons, and locate differences at acceptors and donor sites.
my %alternate_acceptors_n_donors = ( acceptors => [],
donors => []
); # holds coordinates for all gene1 diff boundaries.
my $found_diff_flag = 0;
## compare exons of gene1 to gene2
my @exons_gene_1_list;
my @exons_gene_2_list;
# build data structure:
foreach my $data_pair ( [\%exons_1_hash, \@exons_gene_1_list],
[\%exons_2_hash, \@exons_gene_2_list] ) {
my ($exons_href, $exons_aref) = @$data_pair;
foreach my $lend (keys %$exons_href) {
my $rend = $exons_href->{$lend};
push (@$exons_aref, { lend => $lend,
rend => $rend,
match_indices => [] } );
}
}
@exons_gene_1_list = sort {$a->{lend}<=>$b->{lend}} @exons_gene_1_list;
@exons_gene_2_list = sort {$a->{lend}<=>$b->{lend}} @exons_gene_2_list;
# all-vs-all comparison:
for (my $i = 0; $i <= $#exons_gene_1_list; $i++) {
my $i_ele_ref = $exons_gene_1_list[$i];
my ($i_lend, $i_rend, $i_match_indices_aref) = ($i_ele_ref->{lend},
$i_ele_ref->{rend},
$i_ele_ref->{match_indices} );
for (my $j = 0; $j <= $#exons_gene_2_list; $j++) {
my $j_ele_ref = $exons_gene_2_list[$j];
my ($j_lend, $j_rend, $j_match_indices_aref) = ($j_ele_ref->{lend},
$j_ele_ref->{rend},
$j_ele_ref->{match_indices});
if ($i_lend < $j_rend && $i_rend > $j_lend) { #overlap
push (@$i_match_indices_aref, $j);
push (@$j_match_indices_aref, $i);
}
}
}
## find donors and acceptors:
## check gene_1's exons for 1-1 mappings and end differences at splice junctions
for (my $i = 0; $i <= $#exons_gene_1_list; $i++) {
my $i_ele_ref = $exons_gene_1_list[$i];
my ($i_lend, $i_rend, $i_match_indices_aref) = ($i_ele_ref->{lend},
$i_ele_ref->{rend},
$i_ele_ref->{match_indices} );
if (scalar (@$i_match_indices_aref) == 1) {
## found some mapping
my $j_index = $i_match_indices_aref->[0];
my $j_ele_ref = $exons_gene_2_list[$j_index];
my ($j_lend, $j_rend, $j_match_indices_aref) = ($j_ele_ref->{lend},
$j_ele_ref->{rend},
$j_ele_ref->{match_indices});
if (scalar (@$j_match_indices_aref) != 1) {
next; ## this is a 1-many mapping, want only 1-1 mappings
}
# make sure j's i is i
if ($j_match_indices_aref->[0] != $i) {
## bad, this should never happen!
confess "Error, found exon 1-1 mapping of $i to $j_index, but j maps to @$j_match_indices_aref ";
}
## check left boundary:
if ($i_lend != $j_lend ## diff coordinate
&& $i != 0 # at a splice junction
&& $j_index != 0 # at a splice junction
) {
## found splice difference at left junction:
$found_diff_flag = 1;
my $splice_ref = ($orientation eq '+')
? $alternate_acceptors_n_donors{acceptors}
: $alternate_acceptors_n_donors{donors};
push (@$splice_ref, { gene1 => $i_lend,
gene2 => $j_lend } );
}
## check right boundary:
if ($i_rend != $j_rend ## diff coordinate
&& $i != $#exons_gene_1_list # at splice junction
&& $j_index != $#exons_gene_2_list # at splice junction
) {
$found_diff_flag = 1;
my $splice_ref = ($orientation eq '+')
? $alternate_acceptors_n_donors{donors}
: $alternate_acceptors_n_donors{acceptors};
push (@$splice_ref, { gene1 => $i_rend,
gene2 => $j_rend } );
}
}
}
if ($found_diff_flag) {
return (%alternate_acceptors_n_donors);
} else {
return ();
}
}
=over 4
=item find_exon_skipping_events()
B<Description:> Finds an exon of gene_1 which reside within an intron of gene_2
B<Parameters:> gene1, gene2
B<Returns:> @skipped_exons
notice this is a list of lists
each list is a set of adjacent skipped exons, joined so that they correspond to a single event.
so what we are really getting here is a list of events of skipped exons where each event may contain one or more skipped exons.
@skipped_exons = (
[
[exon_lend,exon_rend], ...
],
[
[exon_lend, exon_rend], ...
]
)
=back
=cut
####
sub find_exon_skipping_events {
my $self = shift;
my ($gene1, $gene2) = @_;
# Algorithm:
# -find an internal exon of gene 1 that resides within an intron of gene 2. Flanking exons must be anchored to the other isoform
my %gene1_exons = &enumerate_exons_of_gene($gene1);
my %gene2_introns = &enumerate_introns_of_gene($gene2);
my @potential_skipped_exons;
foreach my $exon1_lend (keys %gene1_exons) {
my $exon1_rend = $gene1_exons{$exon1_lend};
## See if within intron of second gene
foreach my $intron2_lend (keys %gene2_introns) {
my $intron2_rend = $gene2_introns{$intron2_lend};
if ($exon1_lend > $intron2_lend && $exon1_rend < $intron2_rend) { #exon incapsulated in intron
push (@potential_skipped_exons, [$exon1_lend, $exon1_rend]);
}
}
}
## Verify flanking exons are anchorable:
my @skipped_exons;
if (@potential_skipped_exons) {
foreach my $potential_skipped_exon (@potential_skipped_exons) {
my ($exon_lend, $exon_rend) = @$potential_skipped_exon;
## Try to anchor left exon
my $anchor_left_exon = 0;
foreach my $exon1 ($gene1->get_exons()) {
my ($exon1_lend, $exon1_rend) = sort {$a<=>$b} $exon1->get_coords();
unless ($exon1_rend < $exon_lend) { next;}
foreach my $exon2 ($gene2->get_exons()) {
my ($exon2_lend, $exon2_rend) = sort {$a<=>$b} $exon2->get_coords();
unless ($exon2_rend < $exon_lend) { next;}
if ($exon1_lend < $exon2_rend && $exon1_rend > $exon2_lend) { #anchorable
$anchor_left_exon = 1;
last;
}
}
if ($anchor_left_exon) { last;}
}
unless ($anchor_left_exon) { next;}
## Try to anchor the right exon
my $anchor_right_exon = 0;
foreach my $exon1 ($gene1->get_exons()) {
my ($exon1_lend, $exon1_rend) = sort {$a<=>$b} $exon1->get_coords();
unless ($exon1_lend > $exon_rend) { next;}
foreach my $exon2 ($gene2->get_exons()) {
my ($exon2_lend, $exon2_rend) = sort {$a<=>$b} $exon2->get_coords();
unless ($exon2_lend > $exon_rend) { next;}
if ($exon1_lend < $exon2_rend && $exon1_rend > $exon2_lend) { #anchorable
$anchor_right_exon = 1;
last;
}
}
if ($anchor_right_exon) { last;}
}
if ($anchor_right_exon && $anchor_left_exon) {
push (@skipped_exons, $potential_skipped_exon);
}
}
}
## group into lists of adjacent exons
my @ret_skipped_exons;
if (@skipped_exons) {
@skipped_exons = sort {$a->[0]<=>$b->[0]} @skipped_exons;
my %coord_to_order;
## map each exon to an integer
my $order = 0;
foreach my $exon (sort {$a->{end5}<=>$b->{end5}} $gene1->get_exons()) {
my ($lend, $rend) = sort {$a<=>$b} $exon->get_coords();
$order++;
$coord_to_order{$lend} = $order;
}
my $first_skipped_exon = shift @skipped_exons;
@ret_skipped_exons = ([$first_skipped_exon]);
while (@skipped_exons) {
my $last_event = $ret_skipped_exons[$#ret_skipped_exons];
my $last_skipped_exon = $last_event->[ $#{$last_event} ];
my $last_lend = $last_skipped_exon->[0];
my $curr_skipped_exon = shift @skipped_exons;
my $curr_lend = $curr_skipped_exon->[0];
if ($coord_to_order{$curr_lend} - $coord_to_order{$last_lend} == 1) {
## adjacent, so group them
push (@$last_event, $curr_skipped_exon);
}
else {
## not adjacent
# start new event
push (@ret_skipped_exons, [$curr_skipped_exon]);
}
}
}
return (@ret_skipped_exons);
}
=over 4
=item find_alternate_exons()
B<Description:> Finds terminal exons in gene1 that are different and non-overlapping, and adjacent to overlapping exons.
B<Parameters:> $gene1, $gene2
B<Returns:> @range_of_coords_containing_alternate_exons
@ret = ( { type => lend|rend,
coords => [region_lend,region_rend],
num_exons => intval
}
, ...
)
=back
=cut
sub find_alternate_exons {
my $self = shift;
my ($gene1, $gene2) = @_;
my @alternate_exon_regions; # store coords of alternate exons
# Algorithm:
# -Looking at terminal exons, should have non-overlapping exons prior to the first overlapping exon
## Look from front to back:
my @gene1_exons = sort {$a->{end5}<=>$b->{end5}} $gene1->get_exons();
my @gene2_exons = sort {$a->{end5}<=>$b->{end5}} $gene2->get_exons();
my @alternate_exons_front;
for (my $i = 0; $i <= $#gene1_exons; $i++) {
my ($exon1_lend, $exon1_rend) = sort {$a<=>$b} $gene1_exons[$i]->get_coords();
my $overlapping_j = undef();
for (my $j = 0; $j <= $#gene2_exons; $j++) {
my ($exon2_lend, $exon2_rend) = sort {$a<=>$b} $gene2_exons[$j]->get_coords();
## check for overlap
if ($exon1_lend < $exon2_rend && $exon1_rend > $exon2_lend) {
$overlapping_j = $j;
last;
}
}
if (defined($overlapping_j)) {
## See if i and j are not first:
if ($i != 0 && $overlapping_j != 0) {
for (my $x=0; $x < $i; $x++) {
my $exon = $gene1_exons[$x];
my ($lend, $rend) = sort {$a<=>$b} $exon->get_coords();
push (@alternate_exons_front, [$lend,$rend]);
}
}
last;
}
}
## Look from back to front:
my @alternate_exons_back;
for (my $i = $#gene1_exons; $i >= 0; $i--) {
my ($exon1_lend, $exon1_rend) = sort {$a<=>$b} $gene1_exons[$i]->get_coords();
my $overlapping_j = undef();
for (my $j = $#gene2_exons; $j >= 0; $j--) {
my ($exon2_lend, $exon2_rend) = sort {$a<=>$b} $gene2_exons[$j]->get_coords();
## check for overlap
if ($exon1_lend < $exon2_rend && $exon1_rend > $exon2_lend) {
$overlapping_j = $j;
last;
}
}
if (defined($overlapping_j)) {
## See if i and j are not last:
if ($i != $#gene1_exons && $overlapping_j != $#gene2_exons) {
for (my $x=$#gene1_exons; $x > $i; $x--) {
my $exon = $gene1_exons[$x];
my ($lend, $rend) = sort {$a<=>$b} $exon->get_coords();
push (@alternate_exons_back, [$lend,$rend]);
}
}
last;
}
}
if (@alternate_exons_front) {
my @front_coords;
my $num_alternate_exons_front = scalar (@alternate_exons_front);
foreach my $coordpair (@alternate_exons_front) {
push (@front_coords, @$coordpair);
}
@front_coords = sort {$a<=>$b} @front_coords;
my $region_lend = shift @front_coords;
my $region_rend = pop @front_coords;
push (@alternate_exon_regions, { type => 'lend',
coords => [$region_lend, $region_rend],
num_exons => $num_alternate_exons_front,
}
);
}
if (@alternate_exons_back) {
my @back_coords;
my $num_alternate_exons_back = scalar (@alternate_exons_back);
foreach my $coordpair (@alternate_exons_back) {
push (@back_coords, @$coordpair);
}
@back_coords = sort {$a<=>$b} @back_coords;
my $region_lend = shift @back_coords;
my $region_rend = pop @back_coords;
push (@alternate_exon_regions, { type => 'rend',
coords => [$region_lend, $region_rend],
num_exons => $num_alternate_exons_back
}
);
}
return (@alternate_exon_regions);
}
=over 4
=item find_starts_and_ends_within_introns()
B<Description:> The first and last exons of gene_1 are compared to the introns of gene_2.
B<Parameters:> $gene1, $gene2
B<Returns:> @coords
@coords contains the coordinates of either the very end5 or very end3 of terminal exons which fall into introns of gene_2
=back
=cut
####
sub find_starts_and_ends_within_introns {
my $self = shift;
my ($gene1, $gene2) = @_;
my $fuzzlength = $CDNA::PASA_alignment_assembler::FUZZLENGTH;
my %starts_and_ends;
my $orientation = $gene1->get_orientation();
# Algorithm:
# -first and last exon of gene1 is compared to introns of gene2
my @gene1_exons = $gene1->get_exons();
my %gene2_introns = &enumerate_introns_of_gene($gene2);
my %gene2_exons = &enumerate_exons_of_gene($gene2);
my $first_exon = $gene1_exons[0];
my ($end5, $end3) = $first_exon->get_coords();
foreach my $intron_lend (keys %gene2_introns) {
my $intron_rend = $gene2_introns{$intron_lend};
if ($end5 >= $intron_lend && $end5 <= $intron_rend) { #endpoint encapsulated by intron.
## make sure it's not fuzz:
if ($orientation eq "+") {
if ( abs ($end5-$intron_rend) + 1 <= $fuzzlength) {
next;
}
}
else { # minus strand
if (abs ($end5-$intron_lend)+1 <= $fuzzlength) {
next;
}
}
## make sure exon overlaps another exon
foreach my $exon_lend (keys %gene2_exons) {
my $exon_rend = $gene2_exons{$exon_lend};
my ($lend, $rend) = sort {$a<=>$b} ($end5, $end3);
if ($rend > $exon_lend && $lend < $exon_rend) { #overlap
$starts_and_ends{start} = $end5; # store start
last;
}
}
last;
}
}
## Now try last exon
my $last_exon = $gene1_exons[$#gene1_exons];
my ($end5, $end3) = $last_exon->get_coords();
foreach my $intron_lend (keys %gene2_introns) {
my $intron_rend = $gene2_introns{$intron_lend};
if ($end3 >= $intron_lend && $end3 <= $intron_rend) { #endpoint encapsulated by intron.
## make sure not fuzz:
if ($orientation eq "+") {
if (abs ($end3 - $intron_lend) + 1 <= $fuzzlength) {
next;
}
}
else { #minus strand
if (abs ($end3 - $intron_rend) + 1 <= $fuzzlength) {
next;
}
}
## Make sure exon overlaps another exon
foreach my $exon_lend (keys %gene2_exons) {
my $exon_rend = $gene2_exons{$exon_lend};
my ($lend, $rend) = sort {$a<=>$b} ($end5, $end3);
if ($rend > $exon_lend && $lend < $exon_rend) {
$starts_and_ends{end} = $end3; #store end
last;
}
}
last;
}
}
return (%starts_and_ends);
}
=over 4
=item compare_exons()
B<Description:> Compares all CDS exons between genes 1 and 2, returns number of identical CDS exons and total number of CDS exons between the two genes.
B<Parameters:> $gene1, $gene2
B<Returns:> ($num_identical_CDS_exons, $num_total_CDS_exons)
=back
=cut
####
sub compare_exons {
my $self = shift;
my ($gene1, $gene2) = @_;
print "gene1_strand: $gene1->{strand}\n";
my $clone_1 = $gene1->clone_gene();
print "clone1_strand: " . $clone_1->{strand} . "\n";
my $clone_2 = $gene2->clone_gene();
$clone_1->trim_UTRs();
print "clone1_strand, utrs trimmed: " . $clone_1->{strand} . "\n";
$clone_2->trim_UTRs();
my @exons_1 = $clone_1->get_exons();
my @exons_2 = $clone_2->get_exons();
my @identity_list = ();
my @all_exons = sort {$a->{end5}<=>$b->{end5}} (@exons_1, @exons_2);
for (my $i=0; $i <= $#all_exons-1; $i++) {
my $curr_exon = $all_exons[$i];
my $next_exon = $all_exons[$i+1];
my ($curr_exon_end5, $curr_exon_end3) = $curr_exon->get_coords();
my ($next_exon_end5, $next_exon_end3) = $next_exon->get_coords();
if ($curr_exon_end5 == $next_exon_end5 && $curr_exon_end3 == $next_exon_end3) {
$identity_list[$i] = 1;
$identity_list[$i+1] = 1;
$i++; #if A = B, then go onto comparing C to D, not B to C.
}
}
my ($num_identical_exons, $total_num_exons) = (0,0);
for (my $i=0; $i <= $#all_exons; $i++) {
if ($identity_list[$i]) {
$num_identical_exons++;
}
$total_num_exons++;
}
return ($num_identical_exons, $total_num_exons);
}
=over 4
=item start_or_stop_within_intron()
B<Description:> Compares the start codon and stop codon position of gene1 to the introns of gene2.
B<Parameters:> $gene1, $gene2
B<Returns:> ($start_within_intron, $stop_within_intron)
return values are 0|1 meaning true|false for each return parameter.
=back
=cut
sub start_or_stop_codon_within_intron {
my $self = shift;
my ($gene1, $gene2) = @_;
## Look for annotated start codon or stop codon within intron:
my ($annotated_start_within_intron, $annotated_stop_within_intron) = (0,0);
my ($start_codon, $stop_codon) = $gene1->get_model_span();
my (@alignment_segments) = sort {$a->{end5}<=>$b->{end5}} $gene2->get_exons();
if ($#alignment_segments > 0) { #multiple segments:
for (my $i=1; $i <= $#alignment_segments; $i++) {
my $prev_seg = $alignment_segments[$i-1];
my ($prev_lend, $prev_rend) = sort {$a<=>$b} $prev_seg->get_coords();
my $curr_seg = $alignment_segments[$i];
my ($curr_lend, $curr_rend) = sort {$a<=>$b} $curr_seg->get_coords();
my ($intron_lend, $intron_rend) = ($prev_rend+1, $curr_lend-1);
if ($start_codon >= $intron_lend && $start_codon <= $intron_rend) {
$annotated_start_within_intron = 1;
}
if ($stop_codon >= $intron_lend && $stop_codon <= $intron_rend) {
$annotated_stop_within_intron = 1;
}
}
}
return ($annotated_start_within_intron, $annotated_stop_within_intron);
}
#####
## Static methods
####
=over 4
=item adjust_alternate_exon_region_coords()
B<Description:> Adjusts the coordinates provided by find_alternate_exons() so that they are extended to include the adjacent intron.
B<Parameters:> gene_obj, region_lend, region_rend
B<Returns:> adjusted_region_lend, adjusted_region_rend
This is useful in cases where we try to see if the variation impacts the protein coding region when compared to an alternate gene.
region_lend and region_rend are the lend,rend values stored in table: splice_variation under type = 'alternate_exon'
=back
=cut
sub adjust_alternate_exon_region_coords {
my ($gene_obj, $region_lend, $region_rend) = @_;
my @other_exon_coords;
foreach my $exon ($gene_obj->get_exons()) {
my ($lend, $rend) = sort {$a<=>$b} $exon->get_coords();
unless ($lend >= $region_lend && $rend <= $region_rend) {
# not encapsulated:
push (@other_exon_coords, $lend, $rend);
}
}
@other_exon_coords = sort {$a<=>$b} @other_exon_coords;
#print "other exon coords: @other_exon_coords\n";
my $other_lend = shift @other_exon_coords;
my $other_rend = pop @other_exon_coords;
## make sure there isn't any overlap
if ($other_lend <= $region_rend && $other_rend >= $region_lend) {
#overlap BAD!
confess ("Error, coordinate regions overlap ($other_lend, $other_rend) w/ region($region_lend, $region_rend)\n"
. Dumper (\@other_exon_coords));
}
## adjust boundary to include intron region
if ($other_rend < $region_lend) {
$region_lend = $other_rend + 1;
}
elsif ($other_lend > $region_rend) {
$region_rend = $other_lend - 1;
}
else {
confess "Error, cannot figure out how to adjust the boundaries." . Dumper (\@other_exon_coords);
}
return ($region_lend, $region_rend);
}
=over 4
=item extend_coords_to_intron_bounds()
B<Description:> given the coordinates of a region of skipped exons, the coordinates are extended to the far bounds of the adjacent introns
B<Parameters:> (gene_obj, region_lend, region_rend)
B<Returns:> (adjusted_region_lend, adjusted_region_rend)
=back
=cut
sub extend_coords_to_intron_bounds {
my ($gene_obj, $region_lend, $region_rend) = @_;
my @left_coords;
my @right_coords;
foreach my $exon ($gene_obj->get_exons()) {
my ($lend, $rend) = sort {$a<=>$b} $exon->get_coords();
if ($lend > $region_rend) {
push (@right_coords, $lend, $rend);
}
elsif ($rend < $region_lend) {
push (@left_coords, $lend, $rend);
}
}
@left_coords = sort {$a<=>$b} @left_coords;
@right_coords = sort {$a<=>$b} @right_coords;
unless (@left_coords && @right_coords) {
confess "Error, missing either left or right coords: \n"
. "left: @left_coords\n"
. "right: @right_coords\n"
. "region: $region_lend, $region_rend\n";
}
my $new_left_bound = pop @left_coords;
$new_left_bound++;
my $new_right_bound = shift @right_coords;
$new_right_bound--;
return ($new_left_bound, $new_right_bound);
}
1;