318 lines
5.3 KiB
Perl
318 lines
5.3 KiB
Perl
package BHStats;
|
|
|
|
require Exporter;
|
|
our @ISA = qw (Exporter);
|
|
our @EXPORT = qw (binomial_probability_sum_k_to_n
|
|
binomial_probability_sum_k_to_0
|
|
binomial_probability
|
|
binomial_coefficient
|
|
factorial
|
|
stDev
|
|
stdErr
|
|
median
|
|
avg
|
|
CorrelationCoeff
|
|
geometric_mean
|
|
min
|
|
max
|
|
sum
|
|
tukey_biweight
|
|
);
|
|
|
|
|
|
use strict;
|
|
|
|
sub binomial_probability_sum_k_to_n {
|
|
my ($n,$k,$p) = @_;
|
|
my $sum = 0;
|
|
for (my $i = $k; $i <= $n; $i++) {
|
|
my $binProb = binomial_probability($n,$i,$p);
|
|
$sum += $binProb;
|
|
}
|
|
return ($sum);
|
|
}
|
|
|
|
sub binomial_probability_sum_k_to_0 {
|
|
my ($n,$k,$p) = @_;
|
|
my $sum = 0;
|
|
for (my $i = $k; $i >= 0; $i--) {
|
|
my $binProb = binomial_probability($n,$i,$p);
|
|
$sum += $binProb;
|
|
}
|
|
return ($sum);
|
|
}
|
|
|
|
|
|
|
|
|
|
sub binomial_probability {
|
|
my ($n_observations, $k_successes, $p_probability) = @_;
|
|
|
|
my ($n, $k, $p) = ($n_observations, $k_successes, $p_probability);
|
|
|
|
### Given B(n,p), find P(X=k)
|
|
|
|
my $binomial_prob = binomial_coefficient($n,$k) * ($p**$k) * (1-$p)**($n-$k);
|
|
|
|
return ($binomial_prob);
|
|
|
|
}
|
|
|
|
|
|
sub binomial_coefficient {
|
|
my ($n_things, $k_at_a_time) = @_;
|
|
|
|
my $number_of_k_arrangements = (factorial($n_things)) / ( factorial($k_at_a_time) * factorial($n_things-$k_at_a_time) );
|
|
|
|
return ($number_of_k_arrangements);
|
|
}
|
|
|
|
|
|
sub factorial {
|
|
my $x = shift;
|
|
$x = int($x);
|
|
my $factorial = 1;
|
|
while ($x > 1) {
|
|
$factorial *= $x;
|
|
$x--;
|
|
}
|
|
return ($factorial);
|
|
}
|
|
|
|
|
|
sub stDev {
|
|
# standard deviation calculation
|
|
my @nums = @_;
|
|
@nums = sort {$a<=>$b} @nums;
|
|
|
|
|
|
my $avg = avg(@nums);
|
|
my $count_eles = scalar(@nums);
|
|
|
|
## sum up the sqr of diff from avg
|
|
my $sum_avg_diffs_sqr = 0;
|
|
foreach my $num (@nums) {
|
|
my $diff = $num - $avg;
|
|
my $sqr = $diff**2;
|
|
$sum_avg_diffs_sqr += $sqr;
|
|
}
|
|
my $stdev = sqrt ($sum_avg_diffs_sqr/($count_eles-1));
|
|
return ($stdev);
|
|
}
|
|
|
|
####
|
|
sub stdErr {
|
|
my @vals = @_;
|
|
|
|
my $stdev = &stDev(@vals);
|
|
|
|
my $num_vals = scalar(@vals);
|
|
|
|
my $stdErr = $stdev / sqrt($num_vals);
|
|
|
|
return($stdErr);
|
|
}
|
|
|
|
|
|
sub median {
|
|
my @nums = @_;
|
|
|
|
@nums = sort {$a<=>$b} @nums;
|
|
|
|
my $count = scalar (@nums);
|
|
if ($count %2 == 0) {
|
|
## even number:
|
|
my $half = $count / 2;
|
|
return (avg ($nums[$half-1], $nums[$half]));
|
|
}
|
|
else {
|
|
## odd number. Return middle value
|
|
my $middle_index = int($count/2);
|
|
return ($nums[$middle_index]);
|
|
}
|
|
}
|
|
|
|
sub avg {
|
|
my @nums = @_;
|
|
my $total = $#nums + 1;
|
|
my $sum = 0;
|
|
foreach my $num (@nums) {
|
|
$sum += $num;
|
|
}
|
|
my $avg = $sum/$total;
|
|
return ($avg);
|
|
}
|
|
|
|
|
|
sub CorrelationCoeff {
|
|
my ($x_aref, $y_aref) = @_;
|
|
my @x = @$x_aref;
|
|
my @y = @$y_aref;
|
|
|
|
my $total = $#x + 1;
|
|
my $avg_x = avg(@x);
|
|
my $avg_y = avg(@y);
|
|
|
|
my $stdev_x = stDev(@x);
|
|
my $stdev_y = stDev(@y);
|
|
|
|
# sum part of equation
|
|
my $summation = 0;
|
|
for (my $i = 0; $i < $total; $i++) {
|
|
my $x_val = $x[$i];
|
|
my $y_val = $y[$i];
|
|
|
|
my $x_part = ($x_val - $avg_x)/$stdev_x;
|
|
my $y_part = ($y_val - $avg_y)/$stdev_y;
|
|
|
|
$summation += ($x_part * $y_part);
|
|
}
|
|
|
|
my $cor = (1/($total-1)) * $summation;
|
|
|
|
return ($cor);
|
|
}
|
|
|
|
|
|
####
|
|
sub geometric_mean {
|
|
my @entries = @_;
|
|
|
|
my $num_entries = scalar (@entries);
|
|
unless ($num_entries) {
|
|
return (undef);
|
|
}
|
|
|
|
## All entries must be > 0
|
|
my $logsum = 0;
|
|
foreach my $entry (@entries) {
|
|
unless ($entry > 0) {
|
|
return (undef);
|
|
}
|
|
$logsum += log ($entry);
|
|
}
|
|
|
|
my $geo_mean = exp ( (1/$num_entries) * $logsum);
|
|
|
|
return ($geo_mean);
|
|
}
|
|
|
|
|
|
####
|
|
sub min {
|
|
my @vals = @_;
|
|
|
|
@vals = sort {$a<=>$b} @vals;
|
|
|
|
my $min_val = shift @vals;
|
|
|
|
return ($min_val);
|
|
}
|
|
|
|
####
|
|
sub max {
|
|
my @vals = @_;
|
|
|
|
@vals = sort {$a<=>$b} @vals;
|
|
|
|
my $max_val = pop @vals;
|
|
|
|
return ($max_val);
|
|
}
|
|
|
|
|
|
####
|
|
sub sum {
|
|
my @vals = @_;
|
|
|
|
my $x = 0;
|
|
foreach my $val (@vals) {
|
|
$x += $val;
|
|
}
|
|
|
|
return ($x);
|
|
}
|
|
|
|
|
|
=Rcode
|
|
|
|
> tukey.biweight
|
|
function (x, c = 5, epsilon = 1e-04)
|
|
{
|
|
m <- median(x)
|
|
s <- median(abs(x - m))
|
|
u <- (x - m)/(c * s + epsilon)
|
|
w <- rep(0, length(x))
|
|
i <- abs(u) <= 1
|
|
w[i] <- ((1 - u^2)^2)[i]
|
|
t.bi <- sum(w * x)/sum(w)
|
|
return(t.bi)
|
|
}
|
|
=cut
|
|
|
|
####
|
|
sub tukey_biweight {
|
|
my (@x) = @_;
|
|
|
|
my $m = median(@x);
|
|
|
|
my $s;
|
|
{
|
|
my @y;
|
|
foreach my $val (@x) {
|
|
my $t = abs($val - $m);
|
|
push (@y, $t);
|
|
}
|
|
$s = median(@y);
|
|
}
|
|
|
|
|
|
my @u;
|
|
{
|
|
my $epsilon = 1e-4;
|
|
my $c = 5;
|
|
|
|
foreach my $val (@x) {
|
|
my $t = $val - $m;
|
|
$t /= ($c * $s + $epsilon);
|
|
push (@u, $t);
|
|
}
|
|
}
|
|
|
|
my @i;
|
|
{
|
|
foreach my $val (@u) {
|
|
my $t = (abs($val) <= 1) ? 1:0;
|
|
push (@i, $t);
|
|
}
|
|
}
|
|
|
|
my @w;
|
|
{
|
|
foreach my $val (@u) {
|
|
my $i = shift @i;
|
|
my $t = ( (1 - $val**2) **2) * $i;
|
|
push (@w, $t);
|
|
}
|
|
}
|
|
|
|
my $bi;
|
|
{
|
|
my $sum_w = 0;
|
|
for (my $i = 0; $i < @w; $i++) {
|
|
my $w = $w[$i];
|
|
my $x = $x[$i];
|
|
$bi += $w * $x;
|
|
$sum_w += $w;
|
|
}
|
|
$bi /= $sum_w;
|
|
}
|
|
|
|
return($bi);
|
|
}
|
|
|
|
|
|
|
|
|
|
1; #EOM
|