175 lines
4.3 KiB
Perl
175 lines
4.3 KiB
Perl
package main;
|
|
our $SEE = 0;
|
|
|
|
## taken from CDNA::Overlap_assembler.pm
|
|
## really should be a more general purpose class as written here.
|
|
|
|
|
|
package Overlap_piler;
|
|
|
|
|
|
use strict;
|
|
use warnings;
|
|
use Carp;
|
|
|
|
sub new {
|
|
my $packagename = shift;
|
|
my $self = {
|
|
node_list => []
|
|
};
|
|
bless ($self, $packagename);
|
|
return ($self);
|
|
}
|
|
|
|
|
|
#### Static method!!!
|
|
sub simple_coordsets_collapser {
|
|
my @coordsets = @_; ## list of coordinates [a, b], [c, d], ...
|
|
|
|
my $counter = 0;
|
|
|
|
my $piler = new Overlap_piler();
|
|
|
|
my %coords_mapping;
|
|
foreach my $coordset (@coordsets) {
|
|
$counter++;
|
|
$coords_mapping{$counter} = [@$coordset];
|
|
|
|
my ($lend, $rend) = @$coordset;
|
|
if ($lend !~ /\d/ || $rend !~ /\d/) {
|
|
confess "Error, coordinates [ $lend, $rend ] include a non-number";
|
|
}
|
|
|
|
$piler->add_coordSet($counter, @$coordset);
|
|
}
|
|
|
|
my @clusters = $piler->build_clusters();
|
|
|
|
my @coord_spans;
|
|
foreach my $cluster (@clusters) {
|
|
my @eles = @$cluster;
|
|
my @coords;
|
|
foreach my $ele (@eles) {
|
|
push (@coords, @{$coords_mapping{$ele}});
|
|
}
|
|
|
|
@coords = sort {$a<=>$b} @coords;
|
|
|
|
my $min_coord = shift @coords;
|
|
my $max_coord = pop @coords;
|
|
|
|
push (@coord_spans, [$min_coord, $max_coord]);
|
|
}
|
|
|
|
return (@coord_spans);
|
|
|
|
}
|
|
|
|
|
|
|
|
####
|
|
sub add_coordSet {
|
|
my $self = shift;
|
|
my ($acc, $end5, $end3) = @_;
|
|
my ($lend, $rend) = sort {$a<=>$b} ($end5, $end3);
|
|
my $node = CoordSet_node->new($acc, $lend, $rend);
|
|
push (@{$self->{node_list}}, $node);
|
|
}
|
|
|
|
|
|
|
|
|
|
####
|
|
sub build_clusters {
|
|
my $self = shift;
|
|
my $node_list_aref = $self->{node_list};
|
|
@{$node_list_aref} = sort {$a->{lend}<=>$b->{lend}} @{$node_list_aref}; #sort by lend coord.
|
|
## set indices
|
|
for (my $i = 0; $i <= $#{$node_list_aref}; $i++) {
|
|
$node_list_aref->[$i]->{myIndex} = $i;
|
|
}
|
|
|
|
my @clusters;
|
|
my $first_node = $node_list_aref->[0];
|
|
my $start_pos = 0;
|
|
my ($exp_left, $exp_right) = ($first_node->{lend}, $first_node->{rend});
|
|
print $first_node->{acc} . " ($exp_left, $exp_right)\n" if $SEE;
|
|
for (my $i = 1; $i <= $#{$node_list_aref}; $i++) {
|
|
my $curr_node = $node_list_aref->[$i];
|
|
my ($lend, $rend) = ($curr_node->{lend}, $curr_node->{rend});
|
|
print $curr_node->{acc} . " ($lend, $rend)\n" if $SEE;
|
|
if ($exp_left <= $rend && $exp_right >= $lend) { #overlap
|
|
$exp_left = &min($exp_left, $lend);
|
|
$exp_right = &max($exp_right, $rend);
|
|
print "overlap. New expanded coords: ($exp_left, $exp_right)\n" if $SEE;
|
|
} else {
|
|
print "No overlap; Creating cluster: " if $SEE;
|
|
my @cluster;
|
|
for (my $j=$start_pos; $j < $i; $j++) {
|
|
my $acc = $node_list_aref->[$j]->{acc};
|
|
push (@cluster, $acc);
|
|
print "$acc, " if $SEE;
|
|
}
|
|
push (@clusters, [@cluster]);
|
|
$start_pos = $i;
|
|
($exp_left, $exp_right) = ($lend, $rend);
|
|
print "\nResetting expanded coords: ($lend, $rend)\n" if $SEE;
|
|
}
|
|
}
|
|
|
|
print "# Adding final cluster.\n" if $SEE;
|
|
if ($start_pos != $#{$node_list_aref}) {
|
|
print "final cluster: " if $SEE;
|
|
my @cluster;
|
|
for (my $j = $start_pos; $j <= $#{$node_list_aref}; $j++) {
|
|
my $acc = $node_list_aref->[$j]->{acc};
|
|
print "$acc, " if $SEE;
|
|
push (@cluster, $acc);
|
|
}
|
|
push (@clusters, [@cluster]);
|
|
print "\n" if $SEE;
|
|
} else {
|
|
my $acc = $node_list_aref->[$start_pos]->{acc};
|
|
push (@clusters, [$acc]);
|
|
print "adding final $acc.\n" if $SEE;
|
|
}
|
|
return (@clusters);
|
|
}
|
|
|
|
sub min {
|
|
my (@x) = @_;
|
|
@x = sort {$a<=>$b} @x;
|
|
my $min = shift @x;
|
|
return ($min);
|
|
}
|
|
|
|
sub max {
|
|
my @x = @_;
|
|
@x = sort {$a<=>$b} @x;
|
|
my $max = pop @x;
|
|
return ($max);
|
|
}
|
|
|
|
#################################################################
|
|
package CoordSet_node;
|
|
use strict;
|
|
|
|
sub new {
|
|
my $packagename = shift;
|
|
my ($acc, $lend, $rend) = @_;
|
|
my $self = { acc=>$acc,
|
|
lend=>$lend,
|
|
rend=>$rend,
|
|
myIndex=>undef(),
|
|
overlapping_indices=>[]
|
|
};
|
|
bless ($self, $packagename);
|
|
return ($self);
|
|
}
|
|
|
|
1; #EOM
|
|
|
|
|
|
|
|
|