biyelunwen/99.scripts/trinity_utils/PerlLib/Overlap_piler.pm

175 lines
4.3 KiB
Perl

package main;
our $SEE = 0;
## taken from CDNA::Overlap_assembler.pm
## really should be a more general purpose class as written here.
package Overlap_piler;
use strict;
use warnings;
use Carp;
sub new {
my $packagename = shift;
my $self = {
node_list => []
};
bless ($self, $packagename);
return ($self);
}
#### Static method!!!
sub simple_coordsets_collapser {
my @coordsets = @_; ## list of coordinates [a, b], [c, d], ...
my $counter = 0;
my $piler = new Overlap_piler();
my %coords_mapping;
foreach my $coordset (@coordsets) {
$counter++;
$coords_mapping{$counter} = [@$coordset];
my ($lend, $rend) = @$coordset;
if ($lend !~ /\d/ || $rend !~ /\d/) {
confess "Error, coordinates [ $lend, $rend ] include a non-number";
}
$piler->add_coordSet($counter, @$coordset);
}
my @clusters = $piler->build_clusters();
my @coord_spans;
foreach my $cluster (@clusters) {
my @eles = @$cluster;
my @coords;
foreach my $ele (@eles) {
push (@coords, @{$coords_mapping{$ele}});
}
@coords = sort {$a<=>$b} @coords;
my $min_coord = shift @coords;
my $max_coord = pop @coords;
push (@coord_spans, [$min_coord, $max_coord]);
}
return (@coord_spans);
}
####
sub add_coordSet {
my $self = shift;
my ($acc, $end5, $end3) = @_;
my ($lend, $rend) = sort {$a<=>$b} ($end5, $end3);
my $node = CoordSet_node->new($acc, $lend, $rend);
push (@{$self->{node_list}}, $node);
}
####
sub build_clusters {
my $self = shift;
my $node_list_aref = $self->{node_list};
@{$node_list_aref} = sort {$a->{lend}<=>$b->{lend}} @{$node_list_aref}; #sort by lend coord.
## set indices
for (my $i = 0; $i <= $#{$node_list_aref}; $i++) {
$node_list_aref->[$i]->{myIndex} = $i;
}
my @clusters;
my $first_node = $node_list_aref->[0];
my $start_pos = 0;
my ($exp_left, $exp_right) = ($first_node->{lend}, $first_node->{rend});
print $first_node->{acc} . " ($exp_left, $exp_right)\n" if $SEE;
for (my $i = 1; $i <= $#{$node_list_aref}; $i++) {
my $curr_node = $node_list_aref->[$i];
my ($lend, $rend) = ($curr_node->{lend}, $curr_node->{rend});
print $curr_node->{acc} . " ($lend, $rend)\n" if $SEE;
if ($exp_left <= $rend && $exp_right >= $lend) { #overlap
$exp_left = &min($exp_left, $lend);
$exp_right = &max($exp_right, $rend);
print "overlap. New expanded coords: ($exp_left, $exp_right)\n" if $SEE;
} else {
print "No overlap; Creating cluster: " if $SEE;
my @cluster;
for (my $j=$start_pos; $j < $i; $j++) {
my $acc = $node_list_aref->[$j]->{acc};
push (@cluster, $acc);
print "$acc, " if $SEE;
}
push (@clusters, [@cluster]);
$start_pos = $i;
($exp_left, $exp_right) = ($lend, $rend);
print "\nResetting expanded coords: ($lend, $rend)\n" if $SEE;
}
}
print "# Adding final cluster.\n" if $SEE;
if ($start_pos != $#{$node_list_aref}) {
print "final cluster: " if $SEE;
my @cluster;
for (my $j = $start_pos; $j <= $#{$node_list_aref}; $j++) {
my $acc = $node_list_aref->[$j]->{acc};
print "$acc, " if $SEE;
push (@cluster, $acc);
}
push (@clusters, [@cluster]);
print "\n" if $SEE;
} else {
my $acc = $node_list_aref->[$start_pos]->{acc};
push (@clusters, [$acc]);
print "adding final $acc.\n" if $SEE;
}
return (@clusters);
}
sub min {
my (@x) = @_;
@x = sort {$a<=>$b} @x;
my $min = shift @x;
return ($min);
}
sub max {
my @x = @_;
@x = sort {$a<=>$b} @x;
my $max = pop @x;
return ($max);
}
#################################################################
package CoordSet_node;
use strict;
sub new {
my $packagename = shift;
my ($acc, $lend, $rend) = @_;
my $self = { acc=>$acc,
lend=>$lend,
rend=>$rend,
myIndex=>undef(),
overlapping_indices=>[]
};
bless ($self, $packagename);
return ($self);
}
1; #EOM