biyelunwen/99.scripts/trinity_utils/PerlLib/CDNA/Alternative_splice_comparer.pm

#!/usr/local/bin/perl

package main;
our $SEE;


package CDNA::Alternative_splice_comparer;
use Gene_obj;
use strict;
use Data::Dumper;
use Carp;
use CDNA::PASA_alignment_assembler;

sub new {
    my $packagename = shift;
    my $self = {
        unspliced_introns => 0,
        conventional_alt_splice => 0,
        start_or_end_within_intron => 0,
        exon_skipping => 0,
        alternate_exons => 0
        };
    bless ($self, $packagename);
    return ($self);
}


####
sub compare_isoforms_via_alignmentObjs {
    my $self = shift;
    my ($align1, $align2) = @_;
    my $gene_1 = $align1->get_gene_obj_via_alignment();
    my $gene_2 = $align2->get_gene_obj_via_alignment();

    return ($self->compare_isoforms_via_geneObjs($gene_1, $gene_2));
}


####
sub compare_isoforms_via_geneObjs {
    my $self = shift;
    my ($gene1, $gene2) = @_;
    ## Looking for:
    #    -unspliced introns
    #    -conventional alt-splice isoforms
    #    -transcriptional start or polyadenylation site within intron
    #    -exon skipping
    #    -alternate exons

    ## Look for Unspliced Introns
    my $struct  = { unspliced_introns => 0,
                    conventional_alt_splice => 0,
                    start_or_end_within_intron => 0,
                    exon_skipping => 0,
                    alternate_exons=> 0 };


    my @unspliced_introns = ($self->find_unspliced_introns($gene1, $gene2), $self->find_unspliced_introns($gene2, $gene1));
    if (@unspliced_introns) {
        print "*** Unspliced introns \n";
        $struct->{unspliced_introns} = 1;
        $self->{unspliced_introns} = \@unspliced_introns;
    }


    ## Look for the conventional alt splice isoforms (diff donors/acceptors for introns).
    my (%alternate_acceptors_n_donors) = $self->find_conventional_alt_splice_isoforms($gene1, $gene2);
    if (%alternate_acceptors_n_donors) {
        print "*** Conventional Alt splice (donor and/or acceptor)\n";
        #print Dumper (\%alternate_acceptors_n_donors);

        $self->{conventional_alt_splice} = \%alternate_acceptors_n_donors;
        if (@{$alternate_acceptors_n_donors{acceptors}}) {
            $struct->{conventional_alt_acceptor} = 1;
        }
        if (@{$alternate_acceptors_n_donors{donors}}) {
            $struct->{conventional_alt_donor} = 1;
        }
    }


    my %intron_starts_or_ends = ($self->find_starts_and_ends_within_introns ($gene1, $gene2), $self->find_starts_and_ends_within_introns ($gene2, $gene1));
    if (%intron_starts_or_ends) {
        print "*** Transcriptional start or polyadenylation site within an intron.\n";
        $struct->{start_or_end_within_intron} = 1;
        my @starts_or_ends = values %intron_starts_or_ends;
        $self->{start_or_end_within_intron} = \@starts_or_ends;
    }

    ## Look for exon skipping events.
    my @exon_skips = ($self->find_exon_skipping_events($gene1, $gene2), $self->find_exon_skipping_events($gene2, $gene1));
    if (@exon_skips) {
        print "*** Exon skipping event detected.\n";
        $struct->{exon_skipping} = 1;
        $self->{exon_skipping} = \@exon_skips;
    }

    ## Look or Alternate exons
    my @alternate_exons = ($self->find_alternate_exons($gene1, $gene2), $self->find_alternate_exons($gene2, $gene1));
    if (@alternate_exons) {
        print "*** Found alternate exons\n";
        $struct->{alternate_exons} = 1;
        $self->{alternate_exons} = \@alternate_exons;
    }

    return ($struct);

}


# private
sub enumerate_exons_of_gene {
    my $gene_obj = shift;
    # put everything in forward coordinate axis:
    my %exon_coords;
    my @exons = $gene_obj->get_exons();
    foreach my $exon (@exons) {
        my ($lend, $rend) = sort {$a<=>$b} $exon->get_coords();
        $exon_coords{$lend} = $rend;
    }
    return (%exon_coords);
}


#private
####
sub enumerate_introns_of_gene {
    my $gene_obj = shift;
    ## Put everything in forward strand coordinate axis.
    my %introns;
    my @exons = sort {$a->{end5}<=>$b->{end5}} $gene_obj->get_exons();
    for (my $i = 0; $i < $#exons; $i++) {
        my ($exon1_lend, $exon1_rend) = sort {$a<=>$b} $exons[$i]->get_coords();
        my ($exon2_lend, $exon2_rend) = sort {$a<=>$b} $exons[$i+1]->get_coords();
        my ($intron_end5, $intron_end3) = ($exon1_rend + 1, $exon2_lend - 1);
        $introns{$intron_end5} = $intron_end3;
    }
    return (%introns);
}


=over 4

=item find_unspliced_introns()

B<Description:> Find unspliced introns in gene_1 when compared to gene_2

B<Parameters:> $gene1, $gene2

B<Returns:> @unspliced_introns

@unspliced_introns is a list of coordinate pairs representing the unspliced introns found in gene 1 when compared to gene2

@unspliced_introns = ([end5,end3], ...)

=back

=cut

####
sub find_unspliced_introns {
    my $self = shift;
    my ($gene1, $gene2) = @_;
    ## Look for unspliced intron found in gene1 when compared to gene2
    my %gene1_exon_coords = &enumerate_exons_of_gene ($gene1);
    my %gene2_intron_coords = &enumerate_introns_of_gene($gene2);

    my @unspliced_introns;
    foreach my $intron_lend (keys %gene2_intron_coords) {
        my $intron_rend = $gene2_intron_coords{$intron_lend};

        foreach my $exon_lend (keys %gene1_exon_coords) {
            my $exon_rend = $gene1_exon_coords{$exon_lend};

            if ($intron_lend > $exon_lend && $intron_rend < $exon_rend) { #unspliced intron found
                push (@unspliced_introns, [$intron_lend, $intron_rend]);
            }
        }
    }
    return (@unspliced_introns);
}


=over 4

=item find_conventional_alt_splice_isoforms()

B<Description:> Looks for different donor and acceptor sites within overlapping introns of genes

B<Parameters:> $gene1, $gene2

B<Returns:> %alt_donors_and_acceptors

with structure:

%alt_donors_and_acceptors = ( acceptors =>
                              [
                                   { gene1 => acceptor_coord, gene2 => acceptor_coord }, ...


                                   ],

                              donors => [

                                         { gene1 => donor_coord, gene2 => donor_coord }, ...


                                         ]

                              );

     Coordinates stored are the actual exon boundary coordinates (first or last bp of each exon)


=back

=cut


####
sub find_conventional_alt_splice_isoforms {
    my $self = shift;
    my ($gene1, $gene2) = @_;
    print "## Looking for conventional alt splice isoforms (diff donors, acceptors)\n" if $SEE;
    my %exons_1_hash = &enumerate_exons_of_gene ($gene1);
    my %exons_2_hash = &enumerate_exons_of_gene ($gene2);

    my $orientation = $gene1->get_orientation();
    if ($orientation ne $gene2->get_orientation()) {
        die "Error, inconsistent orientations between genes: " . $gene1->toString() . $gene2->toString();
    }

    # algorithm
    #   -find one-to-one mappings between exons, and locate differences at acceptors and donor sites.

    my %alternate_acceptors_n_donors = ( acceptors => [],
                                         donors => []
                                         ); # holds coordinates for all gene1 diff boundaries.

    my $found_diff_flag = 0;


    ## compare exons of gene1 to gene2
    my @exons_gene_1_list;
    my @exons_gene_2_list;
    # build data structure:
    foreach my $data_pair ( [\%exons_1_hash, \@exons_gene_1_list],
                            [\%exons_2_hash, \@exons_gene_2_list] ) {

        my ($exons_href, $exons_aref) = @$data_pair;

        foreach my $lend (keys %$exons_href) {
            my $rend = $exons_href->{$lend};

            push (@$exons_aref, { lend => $lend,
                                  rend => $rend,
                                  match_indices => [] } );
        }
    }

    @exons_gene_1_list = sort {$a->{lend}<=>$b->{lend}} @exons_gene_1_list;
    @exons_gene_2_list = sort {$a->{lend}<=>$b->{lend}} @exons_gene_2_list;


    # all-vs-all comparison:
    for (my $i = 0; $i <= $#exons_gene_1_list; $i++) {

        my $i_ele_ref = $exons_gene_1_list[$i];
        my ($i_lend, $i_rend, $i_match_indices_aref) = ($i_ele_ref->{lend},
                                                        $i_ele_ref->{rend},
                                                        $i_ele_ref->{match_indices} );


        for (my $j = 0; $j <= $#exons_gene_2_list; $j++) {

            my $j_ele_ref = $exons_gene_2_list[$j];
            my ($j_lend, $j_rend, $j_match_indices_aref) = ($j_ele_ref->{lend},
                                                            $j_ele_ref->{rend},
                                                            $j_ele_ref->{match_indices});

            if ($i_lend < $j_rend && $i_rend > $j_lend) { #overlap
                push (@$i_match_indices_aref, $j);
                push (@$j_match_indices_aref, $i);
            }
        }
    }

    ## find donors and acceptors:
    ## check gene_1's exons for 1-1 mappings and end differences at splice junctions

    for (my $i = 0; $i <= $#exons_gene_1_list; $i++) {

        my $i_ele_ref = $exons_gene_1_list[$i];
        my ($i_lend, $i_rend, $i_match_indices_aref) = ($i_ele_ref->{lend},
                                                        $i_ele_ref->{rend},
                                                        $i_ele_ref->{match_indices} );

        if (scalar (@$i_match_indices_aref) == 1) {
            ## found some mapping
            my $j_index = $i_match_indices_aref->[0];
            my $j_ele_ref = $exons_gene_2_list[$j_index];
            my ($j_lend, $j_rend, $j_match_indices_aref) = ($j_ele_ref->{lend},
                                                            $j_ele_ref->{rend},
                                                            $j_ele_ref->{match_indices});


            if (scalar (@$j_match_indices_aref) != 1) {
                next; ## this is a 1-many mapping, want only 1-1 mappings
            }

            # make sure j's i is i
            if ($j_match_indices_aref->[0] != $i) {
                ## bad, this should never happen!
                confess "Error, found exon 1-1 mapping of $i to $j_index, but j maps to @$j_match_indices_aref ";
            }


            ## check left boundary:
            if ($i_lend != $j_lend ## diff coordinate
                && $i != 0  # at a splice junction
                && $j_index != 0  # at a splice junction
                ) {

                ## found splice difference at left junction:
                $found_diff_flag = 1;

                my $splice_ref = ($orientation eq '+')
                    ? $alternate_acceptors_n_donors{acceptors}
                    : $alternate_acceptors_n_donors{donors};

                push (@$splice_ref, { gene1 => $i_lend,
                                      gene2 => $j_lend } );

            }


            ## check right boundary:
            if ($i_rend != $j_rend ## diff coordinate
                && $i != $#exons_gene_1_list # at splice junction
                && $j_index != $#exons_gene_2_list # at splice junction
                ) {

                $found_diff_flag = 1;

                my $splice_ref = ($orientation eq '+')
                    ? $alternate_acceptors_n_donors{donors}
                    : $alternate_acceptors_n_donors{acceptors};

                push (@$splice_ref, { gene1 => $i_rend,
                                      gene2 => $j_rend } );
            }
        }
    }

    if ($found_diff_flag) {
        return (%alternate_acceptors_n_donors);
    } else {
        return ();
    }

}


=over 4

=item find_exon_skipping_events()

B<Description:> Finds an exon of gene_1 which reside within an intron of gene_2

B<Parameters:> gene1, gene2

B<Returns:> @skipped_exons


 notice this is a list of lists
 each list is a set of adjacent skipped exons, joined so that they correspond to a single event.
so what we are really getting here is a list of events of skipped exons where each event may contain one or more skipped exons.


@skipped_exons = (

                   [
                    [exon_lend,exon_rend], ...

                    ],

                   [
                    [exon_lend, exon_rend], ...

                    ]


                    )

=back

=cut


####
sub find_exon_skipping_events {
    my $self = shift;
    my ($gene1, $gene2) = @_;

    # Algorithm:
    #   -find an internal exon of gene 1 that resides within an intron of gene 2.  Flanking exons must be anchored to the other isoform
    my %gene1_exons = &enumerate_exons_of_gene($gene1);
    my %gene2_introns = &enumerate_introns_of_gene($gene2);

    my @potential_skipped_exons;
    foreach my $exon1_lend (keys %gene1_exons) {
        my $exon1_rend = $gene1_exons{$exon1_lend};

        ## See if within intron of second gene
        foreach my $intron2_lend (keys %gene2_introns) {
            my $intron2_rend = $gene2_introns{$intron2_lend};

            if ($exon1_lend > $intron2_lend && $exon1_rend < $intron2_rend) { #exon incapsulated in intron
                push (@potential_skipped_exons, [$exon1_lend, $exon1_rend]);
            }
        }
    }

    ## Verify flanking exons are anchorable:
    my @skipped_exons;
    if (@potential_skipped_exons) {
        foreach my $potential_skipped_exon (@potential_skipped_exons) {
            my ($exon_lend, $exon_rend) = @$potential_skipped_exon;

            ## Try to anchor left exon
            my $anchor_left_exon = 0;
            foreach my $exon1 ($gene1->get_exons()) {
                my ($exon1_lend, $exon1_rend) = sort {$a<=>$b} $exon1->get_coords();
                unless ($exon1_rend < $exon_lend) { next;}
                foreach my $exon2 ($gene2->get_exons()) {
                    my ($exon2_lend, $exon2_rend) = sort {$a<=>$b} $exon2->get_coords();
                    unless ($exon2_rend < $exon_lend) { next;}

                    if ($exon1_lend < $exon2_rend && $exon1_rend > $exon2_lend) { #anchorable
                        $anchor_left_exon = 1;
                        last;
                    }
                }
                if ($anchor_left_exon) { last;}
            }
            unless ($anchor_left_exon) { next;}

            ## Try to anchor the right exon
            my $anchor_right_exon = 0;
            foreach my $exon1 ($gene1->get_exons()) {
                my ($exon1_lend, $exon1_rend) = sort {$a<=>$b} $exon1->get_coords();
                unless ($exon1_lend > $exon_rend) { next;}
                foreach my $exon2 ($gene2->get_exons()) {
                    my ($exon2_lend, $exon2_rend) = sort {$a<=>$b} $exon2->get_coords();
                    unless ($exon2_lend > $exon_rend) { next;}

                    if ($exon1_lend < $exon2_rend && $exon1_rend > $exon2_lend) { #anchorable
                        $anchor_right_exon = 1;
                        last;
                    }
                }
                if ($anchor_right_exon) { last;}
            }
            if ($anchor_right_exon && $anchor_left_exon) {
                push (@skipped_exons, $potential_skipped_exon);
            }
        }

    }


    ## group into lists of adjacent exons

    my @ret_skipped_exons;
    if (@skipped_exons) {
        @skipped_exons = sort {$a->[0]<=>$b->[0]} @skipped_exons;

        my %coord_to_order;
        ## map each exon to an integer
        my $order = 0;
        foreach my $exon (sort {$a->{end5}<=>$b->{end5}} $gene1->get_exons()) {
            my ($lend, $rend) = sort {$a<=>$b} $exon->get_coords();
            $order++;
            $coord_to_order{$lend} = $order;
        }

        my $first_skipped_exon = shift @skipped_exons;
        @ret_skipped_exons = ([$first_skipped_exon]);

        while (@skipped_exons) {
            my $last_event = $ret_skipped_exons[$#ret_skipped_exons];
            my $last_skipped_exon = $last_event->[ $#{$last_event} ];

            my $last_lend = $last_skipped_exon->[0];

            my $curr_skipped_exon = shift @skipped_exons;
            my $curr_lend = $curr_skipped_exon->[0];

            if ($coord_to_order{$curr_lend} - $coord_to_order{$last_lend} == 1) {
                ## adjacent, so group them
                push (@$last_event, $curr_skipped_exon);
            }
            else {
                ## not adjacent
                # start new event
                push (@ret_skipped_exons, [$curr_skipped_exon]);
            }
        }
    }

    return (@ret_skipped_exons);

}


=over 4

=item find_alternate_exons()

B<Description:> Finds terminal exons in gene1 that are different and non-overlapping, and adjacent to overlapping exons.

B<Parameters:> $gene1, $gene2

B<Returns:> @range_of_coords_containing_alternate_exons

    @ret = ( {  type => lend|rend,
                coords => [region_lend,region_rend],
                num_exons => intval
                }

             , ...

             )


=back

=cut

sub find_alternate_exons {
    my $self = shift;
    my ($gene1, $gene2) = @_;
    my @alternate_exon_regions; # store coords of alternate exons
    # Algorithm:
    #    -Looking at terminal exons, should have non-overlapping exons prior to the first overlapping exon

    ## Look from front to back:
    my @gene1_exons = sort {$a->{end5}<=>$b->{end5}} $gene1->get_exons();
    my @gene2_exons = sort {$a->{end5}<=>$b->{end5}} $gene2->get_exons();

    my @alternate_exons_front;
    for (my $i = 0; $i <= $#gene1_exons; $i++) {
        my ($exon1_lend, $exon1_rend) = sort {$a<=>$b} $gene1_exons[$i]->get_coords();
        my $overlapping_j = undef();
        for (my $j = 0; $j <= $#gene2_exons; $j++) {
            my ($exon2_lend, $exon2_rend) = sort {$a<=>$b} $gene2_exons[$j]->get_coords();

            ## check for overlap
            if ($exon1_lend < $exon2_rend && $exon1_rend > $exon2_lend) {
                $overlapping_j = $j;
                last;
            }
        }
        if (defined($overlapping_j)) {
            ## See if i and j are not first:
            if ($i != 0 && $overlapping_j != 0) {
                for (my $x=0; $x < $i; $x++) {
                    my $exon = $gene1_exons[$x];
                    my ($lend, $rend) = sort {$a<=>$b} $exon->get_coords();
                    push (@alternate_exons_front, [$lend,$rend]);
                }
            }
            last;
        }
    }

    ## Look from back to front:

    my @alternate_exons_back;
    for (my $i = $#gene1_exons; $i >= 0; $i--) {
        my ($exon1_lend, $exon1_rend) = sort {$a<=>$b} $gene1_exons[$i]->get_coords();
        my $overlapping_j = undef();
        for (my $j = $#gene2_exons; $j >= 0; $j--) {
            my ($exon2_lend, $exon2_rend) = sort {$a<=>$b} $gene2_exons[$j]->get_coords();

            ## check for overlap
            if ($exon1_lend < $exon2_rend && $exon1_rend > $exon2_lend) {
                $overlapping_j = $j;
                last;
            }
        }
        if (defined($overlapping_j)) {
            ## See if i and j are not last:
            if ($i != $#gene1_exons && $overlapping_j != $#gene2_exons) {
                for (my $x=$#gene1_exons; $x > $i; $x--) {
                    my $exon = $gene1_exons[$x];
                    my ($lend, $rend) = sort {$a<=>$b} $exon->get_coords();
                    push (@alternate_exons_back, [$lend,$rend]);
                }
            }
            last;
        }
    }

    if (@alternate_exons_front) {

        my @front_coords;
        my $num_alternate_exons_front = scalar (@alternate_exons_front);
        foreach my $coordpair (@alternate_exons_front) {
            push (@front_coords, @$coordpair);
        }
        @front_coords = sort {$a<=>$b} @front_coords;
        my $region_lend = shift @front_coords;
        my $region_rend = pop @front_coords;
        push (@alternate_exon_regions, { type => 'lend',
                                         coords => [$region_lend, $region_rend],
                                         num_exons => $num_alternate_exons_front,
                                         }
              );
    }

    if (@alternate_exons_back) {
        my @back_coords;
        my $num_alternate_exons_back = scalar (@alternate_exons_back);
        foreach my $coordpair (@alternate_exons_back) {
            push (@back_coords, @$coordpair);
        }
        @back_coords = sort {$a<=>$b} @back_coords;
        my $region_lend = shift @back_coords;
        my $region_rend = pop @back_coords;
        push (@alternate_exon_regions, { type => 'rend',
                                         coords => [$region_lend, $region_rend],
                                         num_exons => $num_alternate_exons_back
                                             }
              );
    }


    return (@alternate_exon_regions);
}


=over 4

=item find_starts_and_ends_within_introns()

B<Description:> The first and last exons of gene_1 are compared to the introns of gene_2.

B<Parameters:> $gene1, $gene2

B<Returns:> @coords

@coords contains the coordinates of either the very end5 or very end3 of terminal exons which fall into introns of gene_2

=back

=cut


####
sub find_starts_and_ends_within_introns {
    my $self = shift;
    my ($gene1, $gene2) = @_;

    my $fuzzlength = $CDNA::PASA_alignment_assembler::FUZZLENGTH;


    my %starts_and_ends;

    my $orientation = $gene1->get_orientation();

    # Algorithm:
    #   -first and last exon of gene1 is compared to introns of gene2
    my @gene1_exons = $gene1->get_exons();
    my %gene2_introns = &enumerate_introns_of_gene($gene2);
    my %gene2_exons = &enumerate_exons_of_gene($gene2);
    my $first_exon = $gene1_exons[0];
    my ($end5, $end3) = $first_exon->get_coords();

    foreach my $intron_lend (keys %gene2_introns) {
        my $intron_rend = $gene2_introns{$intron_lend};
        if ($end5 >= $intron_lend && $end5 <= $intron_rend) { #endpoint encapsulated by intron.

            ## make sure it's not fuzz:
            if ($orientation eq "+") {
                if ( abs ($end5-$intron_rend) + 1 <= $fuzzlength) {
                    next;
                }
            }
            else { # minus strand
                if (abs ($end5-$intron_lend)+1 <= $fuzzlength) {
                    next;
                }
            }

            ## make sure exon overlaps another exon
            foreach my $exon_lend (keys %gene2_exons) {
                my $exon_rend = $gene2_exons{$exon_lend};
                my ($lend, $rend) = sort {$a<=>$b} ($end5, $end3);
                if ($rend > $exon_lend && $lend < $exon_rend) { #overlap
                    $starts_and_ends{start} = $end5; # store start
                    last;
                }
            }
            last;
        }
    }

    ## Now try last exon
    my $last_exon = $gene1_exons[$#gene1_exons];
    my ($end5, $end3) = $last_exon->get_coords();

    foreach my $intron_lend (keys %gene2_introns) {
        my $intron_rend = $gene2_introns{$intron_lend};
        if ($end3 >= $intron_lend && $end3 <= $intron_rend) { #endpoint encapsulated by intron.

            ## make sure not fuzz:
            if ($orientation eq "+") {
                if (abs ($end3 - $intron_lend) + 1 <= $fuzzlength) {
                    next;
                }
            }
            else { #minus strand
                if (abs ($end3 - $intron_rend) + 1 <= $fuzzlength) {
                    next;
                }
            }

            ## Make sure exon overlaps another exon
            foreach my $exon_lend (keys %gene2_exons) {
                my $exon_rend = $gene2_exons{$exon_lend};
                my ($lend, $rend) = sort {$a<=>$b} ($end5, $end3);
                if ($rend > $exon_lend && $lend < $exon_rend) {
                    $starts_and_ends{end} = $end3; #store end
                    last;
                }
            }
            last;
        }
    }
    return (%starts_and_ends);
}


=over 4

=item compare_exons()

B<Description:> Compares all CDS exons between genes 1 and 2, returns number of identical CDS exons and total number of CDS exons between the two genes.

B<Parameters:> $gene1, $gene2

B<Returns:> ($num_identical_CDS_exons, $num_total_CDS_exons)


=back

=cut


####
sub compare_exons {
    my $self = shift;
    my ($gene1, $gene2) = @_;
    print "gene1_strand: $gene1->{strand}\n";
    my $clone_1 = $gene1->clone_gene();
    print "clone1_strand: " . $clone_1->{strand} . "\n";
    my $clone_2 = $gene2->clone_gene();
    $clone_1->trim_UTRs();
    print "clone1_strand, utrs trimmed: " . $clone_1->{strand} . "\n";
    $clone_2->trim_UTRs();

    my @exons_1 = $clone_1->get_exons();

    my @exons_2 = $clone_2->get_exons();

    my @identity_list = ();
    my @all_exons = sort {$a->{end5}<=>$b->{end5}} (@exons_1, @exons_2);

    for (my $i=0; $i <= $#all_exons-1; $i++) {

        my $curr_exon = $all_exons[$i];
        my $next_exon = $all_exons[$i+1];

        my ($curr_exon_end5, $curr_exon_end3) = $curr_exon->get_coords();

        my ($next_exon_end5, $next_exon_end3) = $next_exon->get_coords();

        if ($curr_exon_end5 == $next_exon_end5 && $curr_exon_end3 == $next_exon_end3) {
            $identity_list[$i] = 1;
            $identity_list[$i+1] = 1;
            $i++; #if A = B, then go onto comparing C to D, not B to C.
        }
    }

    my ($num_identical_exons, $total_num_exons) = (0,0);
    for (my $i=0; $i <= $#all_exons; $i++) {
        if ($identity_list[$i]) {
            $num_identical_exons++;
        }
        $total_num_exons++;
    }

    return ($num_identical_exons, $total_num_exons);
}


=over 4

=item start_or_stop_within_intron()

B<Description:> Compares the start codon and stop codon position of gene1 to the introns of gene2.

B<Parameters:> $gene1, $gene2

B<Returns:> ($start_within_intron, $stop_within_intron)

return values are 0|1 meaning true|false for each return parameter.

=back

=cut


sub start_or_stop_codon_within_intron {
    my $self = shift;
    my ($gene1, $gene2) = @_;


    ## Look for annotated start codon or stop codon within intron:
    my ($annotated_start_within_intron, $annotated_stop_within_intron) = (0,0);

    my ($start_codon, $stop_codon) = $gene1->get_model_span();
    my (@alignment_segments) = sort {$a->{end5}<=>$b->{end5}} $gene2->get_exons();
    if ($#alignment_segments > 0) { #multiple segments:
        for (my $i=1; $i <= $#alignment_segments; $i++) {
            my $prev_seg = $alignment_segments[$i-1];
            my ($prev_lend, $prev_rend) = sort {$a<=>$b} $prev_seg->get_coords();
            my $curr_seg = $alignment_segments[$i];
            my ($curr_lend, $curr_rend) = sort {$a<=>$b} $curr_seg->get_coords();
            my ($intron_lend, $intron_rend) = ($prev_rend+1, $curr_lend-1);
            if ($start_codon >= $intron_lend && $start_codon <= $intron_rend) {
                $annotated_start_within_intron = 1;
            }
            if ($stop_codon >= $intron_lend && $stop_codon <= $intron_rend) {
                $annotated_stop_within_intron = 1;
            }
        }
    }
    return ($annotated_start_within_intron, $annotated_stop_within_intron);
}


#####
## Static methods
####

=over 4

=item adjust_alternate_exon_region_coords()

B<Description:> Adjusts the coordinates provided by find_alternate_exons() so that they are extended to include the adjacent intron.

B<Parameters:> gene_obj, region_lend, region_rend

B<Returns:> adjusted_region_lend, adjusted_region_rend


This is useful in cases where we try to see if the variation impacts the protein coding region when compared to an alternate gene.

region_lend and region_rend are the lend,rend values stored in table: splice_variation under type = 'alternate_exon'


=back

=cut

sub adjust_alternate_exon_region_coords {

    my ($gene_obj, $region_lend, $region_rend) = @_;

    my @other_exon_coords;
    foreach my $exon ($gene_obj->get_exons()) {
        my ($lend, $rend) = sort {$a<=>$b} $exon->get_coords();
        unless ($lend >= $region_lend && $rend <= $region_rend) {
            # not encapsulated:
            push (@other_exon_coords, $lend, $rend);
        }
    }
    @other_exon_coords = sort {$a<=>$b} @other_exon_coords;

    #print "other exon coords: @other_exon_coords\n";

    my $other_lend = shift @other_exon_coords;
    my $other_rend = pop @other_exon_coords;

    ## make sure there isn't any overlap
    if ($other_lend <= $region_rend && $other_rend >= $region_lend) {
        #overlap BAD!
        confess ("Error, coordinate regions overlap ($other_lend, $other_rend) w/ region($region_lend, $region_rend)\n"
                 . Dumper (\@other_exon_coords));
    }

    ## adjust boundary to include intron region
    if ($other_rend < $region_lend) {
        $region_lend = $other_rend + 1;
    }
    elsif ($other_lend > $region_rend) {
        $region_rend = $other_lend - 1;
    }
    else {
        confess "Error, cannot figure out how to adjust the boundaries." . Dumper (\@other_exon_coords);
    }

    return ($region_lend, $region_rend);
}


=over 4

=item extend_coords_to_intron_bounds()

B<Description:> given the coordinates of a region of skipped exons, the coordinates are extended to the far bounds of the adjacent introns

B<Parameters:> (gene_obj, region_lend, region_rend)

B<Returns:> (adjusted_region_lend, adjusted_region_rend)

=back

=cut


sub extend_coords_to_intron_bounds {
    my ($gene_obj, $region_lend, $region_rend) = @_;

    my @left_coords;
    my @right_coords;

    foreach my $exon ($gene_obj->get_exons()) {
        my ($lend, $rend) = sort {$a<=>$b} $exon->get_coords();

        if ($lend > $region_rend) {
            push (@right_coords, $lend, $rend);
        }
        elsif ($rend < $region_lend) {
            push (@left_coords, $lend, $rend);
        }

    }

    @left_coords = sort {$a<=>$b} @left_coords;
    @right_coords = sort {$a<=>$b} @right_coords;

    unless (@left_coords && @right_coords) {
        confess "Error, missing either left or right coords: \n"
            . "left: @left_coords\n"
            . "right: @right_coords\n"
            . "region: $region_lend, $region_rend\n";
    }


    my $new_left_bound = pop @left_coords;
    $new_left_bound++;

    my $new_right_bound = shift @right_coords;
    $new_right_bound--;

    return ($new_left_bound, $new_right_bound);
}


1;