#!/usr/bin/perl use strict; use warnings; use POSIX; use IO::Select; use IO::Socket; use Digest::MD5; use Getopt::Long; use Cwd qw(abs_path); use Fcntl qw(:flock); use File::Path qw(remove_tree); use Time::HiRes qw(time usleep); my $os_name = $^O; # Turn on autoflush $|++; # Maximum number of threads to use my $max_forks; # Server port my $port = 10002; # Stores executing machine hostnames my @machines; my %machines; # Path to text file containing computers to run on my $machine_file_path; # MrBayes block which will be used for each run my $mb_block; # Where this script is located my $script_path = abs_path($0); # Directory script was called from my $init_dir = abs_path("."); # Where the script was called from my $initial_directory = $ENV{PWD}; # Allow for reusing info from an old run my $input_is_dir = 0; # How the script was called my $invocation = "perl mb.pl @ARGV"; # Name of output directory my $project_name = "mb-".int(time()); #my $project_name = "mb-dir"; # Read commandline settings GetOptions( #"no-forks" => \$no_forks, "mb-block|m:s" => \$mb_block, "machine-file:s" => \$machine_file_path, "check|c:f" => \&check_nonconvergent, "remove|r:f" => \&remove_nonconvergent, "out-dir|o=s" => \$project_name, "n-threads|T" => \$max_forks, "port=i" => \$port, "server-ip:s" => \&client, # for internal usage only "help|h" => sub { print &help; exit(0); }, "usage" => sub { print &usage; exit(0); }, ); # Get paths to required executables my $mb = check_path_for_exec("mb"); my $archive = shift(@ARGV); # Some error checking die "You must specify an archive file.\n\n", &usage if (!defined($archive)); die "Could not locate '$archive', perhaps you made a typo.\n" if (!-e $archive); die "You specified a MrBayes run archive instead of an MDL gene archive.\n" if ($archive =~ /\.mb\.tar$/); die "Could not locate '$machine_file_path'.\n" if (defined($machine_file_path) && !-e $machine_file_path); die "You must specify a file containing a valid MrBayes block which will be appended to each gene.\n\n", &usage if (!defined($mb_block)); die "Could not locate '$mb_block', perhaps you made a typo.\n\n" if (!-e $mb_block); # Input is a previous run directory, reuse information $input_is_dir++ if (-d $archive); # Determine which machines we will run the analyses on if (defined($machine_file_path)) { # Get list of machines print "Fetching machine names listed in '$machine_file_path'...\n"; open(my $machine_file, '<', $machine_file_path); chomp(@machines = <$machine_file>); close($machine_file); # Check that we can connect to specified machines foreach my $index (0 .. $#machines) { my $machine = $machines[$index]; print " Testing connection to: $machine...\n"; # Attempt to ssh onto machine with a five second timeout my $ssh_test = `timeout 5 ssh -v $machine exit 2>&1`; # Look for machine's IP in test connection my $machine_ip; if ($ssh_test =~ /Connecting to \S+ \[(\S+)\] port \d+\./s) { $machine_ip = $1; } # Could connect but passwordless login not enabled if ($ssh_test =~ /Are you sure you want to continue connecting \(yes\/no\)/s) { print " Connection to $machine failed, removing from list of useable machines (passwordless login not enabled).\n"; splice(@machines, $index, 1); } # Successful connection elsif (defined($machine_ip)) { print " Connection to $machine [$machine_ip] successful.\n"; $machines{$machine} = $machine_ip; } # Unsuccessful connection else { print " Connection to $machine failed, removing from list of useable machines.\n"; splice(@machines, $index, 1); } } } print "\nScript was called as follows:\n$invocation\n"; # Load MrBayes block into memory open(my $mb_block_file, "<", $mb_block) or die "Could not open '$mb_block': $!.\n"; my @mb_block = <$mb_block_file>; close($mb_block_file); my $archive_root; my $archive_root_no_ext; if (!$input_is_dir) { # Clean run with no prior output # Extract name information from input file ($archive_root = $archive) =~ s/.*\/(.*)/$1/; ($archive_root_no_ext = $archive) =~ s/(.*\/)?(.*)(\.tar\.gz)|(\.tgz)/$2/; # Initialize working directory # Remove conditional eventually mkdir($project_name) || die "Could not create '$project_name'$!.\n" if (!-e $project_name); my $archive_abs_path = abs_path($archive); # Remove conditional eventually run_cmd("ln -s $archive_abs_path $project_name/$archive_root") if (! -e "$project_name/$archive_root"); } else { # Prior output available, set relevant variables $project_name = $archive; my @contents = glob("$project_name/*"); # Determine the archive name by looking for a symlink my $found_name = 0; foreach my $file (@contents) { if (-l $file) { $file =~ s/\Q$project_name\E\///; #$archive = $file; $archive = "$project_name/$file"; $found_name = 1; } } die "Could not locate archive in '$project_name'.\n" if (!$found_name); # Extract name information from input file ($archive_root = $archive) =~ s/.*\/(.*)/$1/; ($archive_root_no_ext = $archive) =~ s/(.*\/)?(.*)(\.tar\.gz)|(\.tgz)/$2/; } # The name of the output archive my $mb_archive = "$archive_root_no_ext.mb.tar"; chdir($project_name); # Change how Ctrl+C is interpreted to allow for clean up $SIG{'INT'} = 'INT_handler'; # Define and initialize directories my $gene_dir = "genes/"; mkdir($gene_dir) or die "Could not create '$gene_dir': $!.\n" if (!-e $gene_dir); # Check if completed genes from a previous run exist my %complete_genes; if (-e $mb_archive) { print "\nArchive containing completed MrBayes runs found for this dataset found in '$mb_archive'.\n"; print "Completed runs contained in this archive will be removed from the job queue.\n"; # Add gene names in tarball to list of completed genes chomp(my @complete_genes = `tar tf '$mb_archive'`); foreach my $gene (@complete_genes) { $gene =~ s/\.tar\.gz//; $complete_genes{$gene}++; } } # Unarchive input genes chomp(my @genes = `tar xvf '$init_dir/$archive' -C $gene_dir 2>&1`); @genes = map { s/x //; $_ } @genes if ($os_name eq "darwin"); chdir($gene_dir); # Remove completed genes if (%complete_genes) { foreach my $index (reverse(0 .. $#genes)) { if (exists($complete_genes{$genes[$index]})) { unlink($genes[$index]); splice(@genes, $index, 1); } } } die "\nAll jobs have already completed.\n\n" if (!@genes); # Append given MrBayes block to the end of each gene print "\nAppending MrBayes block to each gene... "; foreach my $gene (@genes) { open(my $gene_file, ">>", $gene) or die "Could not open '$gene': $!.\n"; print {$gene_file} "\n", @mb_block; close($gene_file); } print "done.\n\n"; # Returns the external IP address of this computer chomp(my $server_ip = `dig +short myip.opendns.com \@resolver1.opendns.com 2>&1`); if ($server_ip !~ /(?:[0-9]{1,3}\.){3}[0-9]{1,3}/) { print "Could not determine external IP address, only local clients will be created.\n"; $server_ip = "127.0.0.1"; } # Initialize a server my $sock = IO::Socket::INET->new( LocalPort => $port, Blocking => 0, Reuse => 1, Listen => SOMAXCONN, Proto => 'tcp') or die "Could not create server socket: $!.\n"; $sock->autoflush(1); print "Job server successfully created.\n"; # Should probably do this earlier # Determine server hostname and add to machines if none were specified by the user chomp(my $server_hostname = `hostname`); if (scalar(@machines) == 0) { push(@machines, $server_hostname); $machines{$server_hostname} = "127.0.0.1"; } elsif (scalar(@machines) == 1) { # Check if the user input only the local machine in the config if ($machines{$machines[0]} eq $server_ip) { $machines{$machines[0]} = "127.0.0.1"; } } my @pids; foreach my $machine (@machines) { # Fork and create a client on the given machine my $pid = fork(); if ($pid == 0) { close(STDIN); close(STDOUT); close(STDERR); (my $script_name = $script_path) =~ s/.*\///; # Move required datafiles to machines, initialize clients if ($machines{$machine} ne "127.0.0.1" && $machines{$machine} ne $server_ip) { # Send this script to the machine system("scp", "-q", $script_path, $machine.":/tmp"); # Send MrBayes executable to the machine system("scp", "-q", $mb, $machine.":/tmp"); # Execute this perl script on the given machine # -tt forces pseudo-terminal allocation and lets us stop remote processes exec("ssh", "-tt", "$machine", "perl", "/tmp/$script_name", "--server-ip=$server_ip:$port"); } else { # Send this script to the machine system("cp", $script_path, "/tmp"); # Send MrBayes executable to the machine system("cp", $mb, "/tmp"); # Execute this perl script on the given machine exec("perl", "/tmp/$script_name", "--server-ip=127.0.0.1:$port"); } exit(0); } else { push(@pids, $pid); } } #chdir($gene_dir); my $select = IO::Select->new($sock); # Don't create zombies $SIG{CHLD} = 'IGNORE'; # Stores which job is next in queue my $job_number = 0; # Number of open connections to a client my $total_connections; # Number of complete jobs (necessary?) my $complete_count = 0; # Number of connections server has closed my $closed_connections = 0; # Minimum number of connections server should expect my $starting_connections = scalar(@machines); my $time = time(); my $num_digits = get_num_digits({'NUMBER' => scalar(@genes)}); # Begin the server's job distribution while ((!defined($total_connections) || $closed_connections != $total_connections) || $total_connections < $starting_connections) { # Contains handles to clients which have sent information to the server my @clients = $select->can_read(0); # Free up CPU by sleeping for 10 ms usleep(10000); # Handle each ready client individually CLIENT: foreach my $client (@clients) { # Client requesting new connection if ($client == $sock) { $total_connections++; $select->add($sock->accept()); } else { # Get client's message my $response = <$client>; next if (not defined($response)); # a response should never actually be undefined # Client wants to send us a file if ($response =~ /SEND_FILE: (.*)/) { my $file_name = $1; receive_file({'FILE_PATH' => $file_name, 'FILE_HANDLE' => $client}); } # Client has finished a job if ($response =~ /DONE (.*) \|\|/) { $complete_count++; printf(" Analyses complete: %".$num_digits."d/%d.\r", $complete_count, scalar(@genes)); # Perform appending of new gene to tarball in a fork as this can take some time my $pid; until (defined($pid)) { $pid = fork(); usleep(30000); } if ($pid == 0) { # Check if this is the first to complete, if so we must create the directory my $completed_gene = $1; if (!-e "../$mb_archive") { system("touch", "$mb_archive"); system("tar", "cf", "../$mb_archive", $completed_gene); unlink($completed_gene); } else { # Obtain a file lock on archive so another process doesn't simultaneously try to add to it open(my $mb_archive_file, "<", "../$mb_archive"); flock($mb_archive_file, LOCK_EX) || die "Could not lock '$mb_archive_file': $!.\n"; # Add completed gene system("tar", "rf", "../$mb_archive", $completed_gene); unlink($completed_gene); # Release lock flock($mb_archive_file, LOCK_UN) || die "Could not unlock '$mb_archive_file': $!.\n"; close($mb_archive_file); } exit(0); } else { push(@pids, $pid); } } # Client wants a new job if ($response =~ /NEW: (.*)/) { my $client_ip = $1; # Check if jobs remain in the queue if ($job_number < scalar(@genes)) { printf("\n Analyses complete: %".$num_digits."d/%d.\r", 0, scalar(@genes)) if ($job_number == 0); my $gene = $genes[$job_number]; # Check whether the client is remote or local, send it needed files if remote if ($client_ip ne $server_ip) { # Fork to perform the file transfer and prevent stalling the server my $pid; until (defined($pid)) { $pid = fork(); usleep(30000); } #my $pid = fork(); if ($pid == 0) { send_file({'FILE_PATH' => $gene, 'FILE_HANDLE' => $client}); unlink($gene); print {$client} "NEW: $gene\n"; exit(0); } else { push(@pids, $pid); } } else { print {$client} "CHDIR: ".abs_path("./")."\n"; print {$client} "NEW: $gene\n"; } $job_number++; } else { # Client has asked for a job, but there are none remaining print {$client} "HANGUP\n"; $select->remove($client); $client->close(); $closed_connections++; next CLIENT; } } } } } # Don't think this is needed foreach my $pid (@pids) { waitpid($pid, 0); } print "\n All connections closed.\n"; print "Total execution time: ", sec2human(time() - $time), ".\n\n"; # Go back to project directory, delete empty gene dir chdir(".."); &INT_handler; sub client { my ($opt_name, $address) = @_; my ($server_ip, $port) = split(":", $address); chdir("/tmp"); my $mb = "/tmp/mb"; #my $pgrp = getpgrp(); my $pgrp = $$; setpgrp(); # Determine this host's IP chomp(my $ip = `dig +short myip.opendns.com \@resolver1.opendns.com`); # Set IP to localhost if we don't have internet if ($ip !~ /(?:[0-9]{1,3}\.){3}[0-9]{1,3}/) { $ip = "127.0.0.1"; } # Spawn more clients my @pids; # my $total_forks = get_free_cpus(); # A slightly modification in order to limit forks to 10 my $total_forks = 12; if ($total_forks > 1) { foreach my $fork (1 .. $total_forks - 1) { my $pid = fork(); if ($pid == 0) { last; } else { push(@pids, $pid); } } } # The name of the gene we are working on my $gene; # Stores filenames of unneeded files my @unlink; # Change signal handling so killing the server kills these processes and cleans up $SIG{CHLD} = 'IGNORE'; $SIG{HUP} = sub { unlink($0, $mb); kill -15, $$; }; $SIG{TERM} = sub { unlink(glob($gene."*")) if defined($gene); exit(0)}; # Connect to the server my $sock = new IO::Socket::INET( PeerAddr => $server_ip.":".$port, Proto => 'tcp') or exit(0); $sock->autoflush(1); print {$sock} "NEW: $ip\n"; while (chomp(my $response = <$sock>)) { if ($response =~ /SEND_FILE: (.*)/) { my $file_name = $1; receive_file({'FILE_PATH' => $file_name, 'FILE_HANDLE' => $sock}); } elsif ($response =~ /CHDIR: (.*)/) { chdir($1); } elsif ($response =~ /NEW: (.*)/) { $gene = $1; # Redirect STDOUT to a log file open(my $std_out, ">&", *STDOUT); open(STDOUT, ">", $gene.".log"); system($mb, $gene); # Put STDOUT back to normal open(STDOUT, ">&", $std_out); close($std_out); unlink($gene); # Zip and tarball the results my @results = glob($gene."*"); my $gene_archive_name = "$gene.tar.gz"; @results = grep {!/\Q$gene_archive_name\E/} @results; system("tar", "czf", $gene_archive_name, @results); unlink(@results); # Send the results back to the server if this is a remote client if ($server_ip ne "127.0.0.1" && $server_ip ne $ip) { send_file({'FILE_PATH' => $gene_archive_name, 'FILE_HANDLE' => $sock}); unlink($gene_archive_name); } # Request a new job print {$sock} "DONE $gene_archive_name || NEW: $ip\n"; } elsif ($response eq "HANGUP") { last; } } # Have initial client wait for all others to finish and clean up if ($$ == $pgrp) { foreach my $pid (@pids) { waitpid($pid, 0); } unlink($0, $mb); } exit(0); } sub check_nonconvergent { my ($opt_name, $threshold) = @_; # We have to do weird things here to get the input name my @ARGV = split(/\s+/, $invocation); shift(@ARGV); shift(@ARGV); # Look for a directory in arguments provided my $archive; foreach my $arg (@ARGV) { if (-d $arg) { $archive = $arg; } } # Die if user didn't give us a directory if (!defined($archive)) { print "You must specify a directory previously generated by this script to check for nonconvergent genes.\n"; exit(0); } # Prior output available, set relevant variables $project_name = $archive; my @contents = glob("$project_name/*"); # Determine the archive name by looking for a symlink my $found_name = 0; foreach my $file (@contents) { if (-l $file) { $file =~ s/\Q$project_name\E\///; $archive = $file; $found_name = 1; } } die "Could not locate archive in '$project_name'.\n" if (!$found_name); chdir($project_name); # Extract name information from input file (my $archive_root = $archive) =~ s/.*\/(.*)/$1/; (my $archive_root_no_ext = $archive) =~ s/(.*\/)?(.*)(\.tar\.gz)|(\.tgz)/$2/; # Should have some completed genes in it my $incomplete_archive = $archive_root_no_ext.".mb.tar"; # Check that the incomplete archive exists if (!-e $incomplete_archive) { print "Could not locate an archive containing completed MrBayes runs.\n"; exit(0); } print "\nScript was called as follows:\n$invocation\n\n"; # Create a temporary directory for our operations my $check_dir = "tmp/"; mkdir($check_dir) if (!-e $check_dir); $SIG{INT} = sub { remove_tree($check_dir); exit(0) }; # Open tarball in genes directory chomp(my @genes = `tar xvf '$incomplete_archive' -C $check_dir 2>&1`); @genes = map { s/x //; $_ } @genes if ($os_name eq "darwin"); @genes = sort { (local $a = $a) =~ s/.*-(\d+)-\d+\..*/$1/; (local $b = $b) =~ s/.*-(\d+)-\d+\..*/$1/; $a <=> $b } @genes; my $longest_name_length = length($genes[$#genes]); print "MrBayes results available for ", scalar(@genes), " total genes:\n"; chdir($check_dir); $SIG{INT} = sub { chdir(".."); remove_tree($check_dir); exit(0) }; # Parse log of each gene to determine final standard deviation of split frequencies my $count = 0; foreach my $gene (@genes) { chomp(my @contents = `tar xvf '$gene' 2>&1`); @contents = map { s/x //; $_ } @contents if ($os_name eq "darwin"); (my $log_file_path = $gene) =~ s/\.tar\.gz$/.log/; # Check log file exists if (!-e $log_file_path) { print "Could not locate log file for '$gene'.\n"; exit(0); } open(my $log_file, "<", $log_file_path); chomp(my @data = <$log_file>); close($log_file); my @splits = grep { /Average standard deviation of split frequencies:/ } @data; my $final_split = pop(@splits); $final_split =~ s/.*frequencies: (.*)/$1/; #print " $gene: $final_split\n"; printf(" %-${longest_name_length}s: %s\n", $gene, $final_split); if (!defined($final_split) || $final_split > $threshold) { $count++; } unlink(@contents); } printf("%d gene(s) failed to meet the threshold of %s (%.2f%%).\n", $count, $threshold, ($count / scalar(@genes) * 100)); # Clean up and exit kill(2, $$); } sub remove_nonconvergent { my ($opt_name, $threshold) = @_; # We have to do weird things here to get the input name my @ARGV = split(/\s+/, $invocation); shift(@ARGV); shift(@ARGV); # Look for a directory in arguments provided my $archive; foreach my $arg (@ARGV) { if (-d $arg) { $archive = $arg; } } # Die if user didn't give us a directory if (!defined($archive)) { print "You must specify a directory previously generated by this script to check for nonconvergent genes.\n"; exit(0); } my $initial_archive = $archive; # Prior output available, set relevant variables $project_name = $archive; my @contents = glob("$project_name/*"); # Determine the archive name by looking for a symlink my $found_name = 0; foreach my $file (@contents) { if (-l $file) { $file =~ s/\Q$project_name\E\///; $archive = $file; $found_name = 1; } } die "Could not locate archive in '$project_name'.\n" if (!$found_name); chdir($project_name); # Extract name information from input file (my $archive_root = $archive) =~ s/.*\/(.*)/$1/; (my $archive_root_no_ext = $archive) =~ s/(.*\/)?(.*)(\.tar\.gz)|(\.tgz)/$2/; # Should have some completed genes in it my $incomplete_archive = $archive_root_no_ext.".mb.tar"; # Check that the incomplete archive exists if (!-e $incomplete_archive) { print "Could not locate an archive containing completed MrBayes runs.\n"; exit(0); } print "\nScript was called as follows:\n$invocation\n\n"; # Create a temporary directory for our operations my $check_dir = "tmp/"; mkdir($check_dir) if (!-e $check_dir); $SIG{INT} = sub { remove_tree($check_dir); exit(0) }; # Open tarball in genes directory chomp(my @genes = `tar xvf '$incomplete_archive' -C $check_dir 2>&1`); @genes = map { s/x //; $_ } @genes if ($os_name eq "darwin"); @genes = sort { (local $a = $a) =~ s/.*-(\d+)-\d+\..*/$1/; (local $b = $b) =~ s/.*-(\d+)-\d+\..*/$1/; $a <=> $b } @genes; my $longest_name_length = length($genes[$#genes]); print "MrBayes results available for ", scalar(@genes), " total genes:\n"; chdir($check_dir); $SIG{INT} = sub { chdir(".."); remove_tree($check_dir); exit(0) }; # Parse log of each gene to determine final standard deviation of split frequencies my $count = 0; foreach my $gene (@genes) { chomp(my @contents = `tar xvf '$gene' 2>&1`); @contents = map { s/x //; $_ } @contents if ($os_name eq "darwin"); (my $log_file_path = $gene) =~ s/\.tar\.gz$/.log/; # Check log file exists if (!-e $log_file_path) { print "Could not locate log file for '$gene'.\n"; exit(0); } open(my $log_file, "<", $log_file_path); chomp(my @data = <$log_file>); close($log_file); my @splits = grep { /Average standard deviation of split frequencies:/ } @data; my $final_split = pop(@splits); $final_split =~ s/.*frequencies: (.*)/$1/; #print " $gene: $final_split"; if (!defined($final_split) || $final_split > $threshold) { unlink($gene); #print " -- REMOVED\n"; printf(" %-${longest_name_length}s: %s -- REMOVED\n", $gene, $final_split); $count++; } else { printf(" %-${longest_name_length}s: %s\n", $gene, $final_split); #print "\n"; } unlink(@contents); } printf("%d gene(s) failed to meet the threshold of %s (%.2f%%) and have been removed.\n", $count, $threshold, ($count / scalar(@genes) * 100)); # Determine which genes met threshold and still remain @genes = glob($archive_root_no_ext."*.nex.tar.gz"); @genes = sort { (local $a = $a) =~ s/.*-(\d+)-\d+\..*/$1/; (local $b = $b) =~ s/.*-(\d+)-\d+\..*/$1/; $a <=> $b } @genes; # Recreate archive with remaining genes if (@genes) { system("tar", "cf", $incomplete_archive, @genes); unlink(@genes); system("mv", $incomplete_archive, ".."); } else { # Delete the working directory if no genes meet the threshold print "No genes met the threshold, removing specified directory.\n"; chdir($initial_directory); $SIG{INT} = sub { remove_tree($initial_archive); exit(0) }; } # Clean up and exit kill(2, $$); } sub hashsum { my $settings = shift; my $file_path = $settings->{'FILE_PATH'}; open(my $file, "<", $file_path) or die "Couldn't open file '$file_path': $!.\n"; my $md5 = Digest::MD5->new; my $md5sum = $md5->addfile(*$file)->hexdigest; close($file); return $md5sum; } sub send_file { my $settings = shift; my $file_path = $settings->{'FILE_PATH'}; my $file_handle = $settings->{'FILE_HANDLE'}; my $hash = hashsum({'FILE_PATH' => $file_path}); print {$file_handle} "SEND_FILE: $file_path\n"; open(my $file, "<", $file_path) or die "Couldn't open file '$file_path': $!.\n"; while (<$file>) { print {$file_handle} $_; } close($file); print {$file_handle} " END_FILE: $hash\n"; # Stall until we know status of file transfer while (defined(my $response = <$file_handle>)) { chomp($response); last if ($response eq "TRANSFER_SUCCESS"); die "Unsuccessful file transfer, checksums did not match.\n" if ($response eq "TRANSFER_FAILURE"); } } sub receive_file { my $settings = shift; my $file_path = $settings->{'FILE_PATH'}; my $file_handle = $settings->{'FILE_HANDLE'}; my $check_hash; open(my $file, ">", $file_path); while (<$file_handle>) { if ($_ =~ /(.*) END_FILE: (\S+)/) { print {$file} $1; $check_hash = $2; last; } else { print {$file} $_; } } close($file); # Use md5 hashsum to make sure transfer worked my $hash = hashsum({'FILE_PATH' => $file_path}); if ($hash ne $check_hash) { die "Unsuccessful file transfer, checksums do not match.\n'$hash' - '$check_hash'\n"; # hopefully this never pops up print {$file_handle} "TRANSFER_FAILURE\n" } else { print {$file_handle} "TRANSFER_SUCCESS\n"; } } sub INT_handler { # Kill ssh process(es) spawn by this script foreach my $pid (@pids) { #kill(-1, $pid); kill(15, $pid); } # Move into gene directory #chdir("$initial_directory"); chdir("$initial_directory/$project_name"); # Try to delete directory five times, if it can't be deleted print an error message # I've found this method is necessary for analyses performed on AFS drives my $count = 0; until (!-e $gene_dir || $count == 5) { $count++; remove_tree($gene_dir, {error => \my $err}); sleep(1); } #logger("Could not clean all files in './$gene_dir/'.") if ($count == 5); print "Could not clean all files in './$gene_dir/'.\n" if ($count == 5); exit(0); } sub clean_up { my $settings = shift; my $remove_dirs = $settings->{'DIRS'}; my $current_dir = getcwd(); # chdir($alignment_root); # unlink(glob($gene_dir."$alignment_name*")); # #unlink($server_check_file) if (defined($server_check_file)); # # if ($remove_dirs) { # rmdir($gene_dir); # } chdir($current_dir); } sub get_num_digits { my $settings = shift; my $number = $settings->{'NUMBER'}; my $digits = 1; while (floor($number / 10) != 0) { $number = floor($number / 10); $digits++; } return $digits; } sub sec2human { my $secs = shift; # Constants my $secs_in_min = 60; my $secs_in_hour = 60 * 60; my $secs_in_day = 24 * 60 * 60; $secs = int($secs); return "0 seconds" if (!$secs); # Calculate units of time my $days = int($secs / $secs_in_day); my $hours = ($secs / $secs_in_hour) % 24; my $mins = ($secs / $secs_in_min) % 60; $secs = $secs % 60; # Format return nicely my $time; if ($days) { $time .= ($days != 1) ? "$days days, " : "$days day, "; } if ($hours) { $time .= ($hours != 1) ? "$hours hours, " : "$hours hour, "; } if ($mins) { $time .= ($mins != 1) ? "$mins minutes, " : "$mins minute, "; } if ($secs) { $time .= ($secs != 1) ? "$secs seconds " : "$secs second "; } else { # Remove comma chop($time); } chop($time); return $time; } sub get_free_cpus { return $max_forks if (defined($max_forks)); my $os_name = $^O; # Returns a two-member array containing CPU usage observed by top, # top is run twice as its first output is usually inaccurate my @percent_free_cpu; if ($os_name eq "darwin") { # Mac OS chomp(@percent_free_cpu = `top -i 1 -l 2 | grep "CPU usage"`); } else { # Linux chomp(@percent_free_cpu = `top -b -n2 -d0.05 | grep "Cpu(s)"`); } my $percent_free_cpu = pop(@percent_free_cpu); if ($os_name eq "darwin") { # Mac OS $percent_free_cpu =~ s/.*?(\d+\.\d+)%\s+id.*/$1/; } else { # linux $percent_free_cpu =~ s/.*?(\d+\.\d)\s*%?ni,\s*(\d+\.\d)\s*%?id.*/$1 + $2/; # also includes %nice as free $percent_free_cpu = eval($percent_free_cpu); } my $total_cpus; if ($os_name eq "darwin") { # Mac OS $total_cpus = `sysctl -n hw.ncpu`; } else { # linux $total_cpus = `grep --count 'cpu' /proc/stat` - 1; } my $free_cpus = ceil($total_cpus * $percent_free_cpu / 100); if ($free_cpus == 0 || $free_cpus !~ /^\d+$/) { $free_cpus = 1; # assume that at least one cpu can be used } return $free_cpus; } sub run_cmd { my $command = shift; my $return = system($command); if ($return) { logger("'$command' died with error: '$return'.\n"); #kill(2, $parent_pid); exit(0); } } sub check_path_for_exec { my $exec = shift; my $path = $ENV{PATH}.":."; # include current directory as well my @path_dirs = split(":", $path); my $exec_path; foreach my $dir (@path_dirs) { $dir .= "/" if ($dir !~ /\/$/); $exec_path = abs_path($dir.$exec) if (-e $dir.$exec && -x $dir.$exec && !-d $dir.$exec); } die "Could not find the following executable: '$exec'. This script requires this program in your path.\n" if (!defined($exec_path)); return $exec_path; } sub usage { return "Usage: mb.pl ([PARTITION TARBALL] [-m MRBAYES BLOCK]) || ([MRBAYES TARBALL] [-c THRESHOLD] || [-r THRESHOLD])\n"; } sub help { print < EOF exit(0); }