#! /usr/bin/env Rscript suppressMessages({ library(ape) library(phytools) library(coda) library(argparser) library(processx) library(cli) }) # Function to check if the specified outgroup is at the root of the tree check_tree_outgroup <- function(tree, outgroup) { if (!is.rooted(tree)) { return(list( judge = FALSE, msg = "The tree is unrooted." )) } if (!outgroup %in% tree$tip.label) { return(list( judge = FALSE, msg = "Outgroup not found in tree tip labels." )) } root_node <- getMRCA(tree, tree$tip.label) outgroup_parent <- getParent(tree, which(tree$tip.label == outgroup)) if (root_node != outgroup_parent) { list( judge = FALSE, msg = "The specified outgroup is not at the root of the tree." ) } else { list( judge = TRUE, msg = "The outgroup is correctly placed at the root." ) } } # Function to check multiple outgroups sequentially check_outgroup_list <- function(tree, outgroup_list) { tree_iter <- tree iter <- NULL for (outgroup in outgroup_list) { res <- check_tree_outgroup(tree_iter, outgroup) if (!res$judge) { cli_alert_danger( paste( "Outgroup check failed for", outgroup, ":", res$msg ) ) break } iter <- c(iter, outgroup) tree_iter <- drop.tip(tree_iter, outgroup) } cli_alert_success("All outgroups are correctly placed at the root.") res$judge } # Function to generate input tree for mcmctree with calibration points gen_input_tre <- function(tree) { tree_bak <- tree tree_bak$node.label <- rep("", tree$Nnode) tree_bak$edge.length <- NULL ntips <- length(tree$tip.label) ep_node <- getParent(tree, which(tree$tip.label == "EP")) tree_bak$node.label[ep_node - ntips] <- "'L(55.8,0.02,0.02)'" zju_node <- getParent(tree, which(tree$tip.label == "Zju")) tree_bak$node.label[zju_node - ntips] <- "'L(99.6,0.02,0.02)'" cli_alert_info("Generated input tree for mcmctree with calibration points:") cat(paste(ntips, "1\n"), file = "input.tre") raw_check_label <- function(x) { # this is a workaround to bypass the checkLabel function in ape x } assignInNamespace("checkLabel", raw_check_label, ns = "ape") write.tree(tree_bak, file = "input.tre", append = TRUE) cli_text(write.tree(tree_bak)) } # Function to run modeltest-ng on the alignment file run_modeltest <- function(alnfile, prefix) { file.copy(alnfile, "input.fa", overwrite = TRUE) cmd <- "modeltest-ng" args <- c( "-p", 1, "-d", "nt", "-r", 12345, "-m", "JC,K80,F81,HKY", "-h", "ug", "-t", "ml", "-i", "input.fa", "-o", paste0(prefix, ".modeltest") ) result <- run(cmd, args = args, echo_cmd = TRUE, echo = TRUE) if (result$status != 0) { cli_alert_danger("modeltest-ng execution failed.") stop("modeltest-ng execution failed.") } else { cli_alert_success("modeltest-ng run completed.") } } # Function to parse modeltest-ng output and extract the model & gamma parameter parse_modeltest_output <- function(modeltest_file) { model_map <- c( "JC" = "JC69", "K80" = "K80", "F81" = "F81", "HKY" = "HKY85" ) lines <- readLines(modeltest_file) head_line <- grep("Best model according to AICc", lines, value = FALSE) if (length(head_line) == 0) { cli_alert_danger("No model found in modeltest-ng output.") stop("No model found in modeltest-ng output.") } model_line <- lines[head_line + 2] model_out <- strsplit(model_line, ":")[[1]][2] model_out <- unlist(strsplit(trimws(model_out), "+", fixed = TRUE)) cli_alert_info(paste("Selected model:", paste(model_out, collapse = "+"))) model <- model_map[model_out[1]] if (length(model_out) > 1 && model_out[2] == "G4") { gamma <- 0.5 } else { gamma <- 0 } list( model = model, gamma = gamma ) } # Function to render the mcmctree control file from the template render_mcmctree_ctl <- function(template, outputfile, model, gamma, prefix) { ctl_lines <- readLines(template) ctl_lines <- gsub("{{MODEL_PLACEHOLDER}}", model, ctl_lines, fixed = TRUE) ctl_lines <- gsub("{{GAMMA_PLACEHOLDER}}", as.character(gamma), ctl_lines, fixed = TRUE ) ctl_lines <- gsub("{{PREFIX_PLACEHOLDER}}", prefix, ctl_lines, fixed = TRUE ) writeLines(ctl_lines, con = outputfile) } # Function to run mcmctree with the specified control file run_mcmctree <- function(ctlfile) { cmd <- "mcmctree" args <- ctlfile result <- run(cmd, args = args, echo_cmd = TRUE, echo = TRUE) if (result$status != 0) { cli_alert_danger("mcmctree execution failed.") stop("mcmctree execution failed.") } else { cli_alert_success("mcmctree run completed.") } } # Function to check ESS values from mcmctree MCMC output check_ess <- function(mcmcfile, check_file, threshold = 200) { mcmc_samples <- read.table(mcmcfile, header = TRUE)[, -1] mcmc_results <- mcmc(mcmc_samples) ess_values <- effectiveSize(mcmc_results) ess_table <- data.frame( Parameter = names(ess_values), ESS = as.numeric(ess_values) ) write.table(ess_table, file = paste0(check_file, ".ess.tsv"), sep = "\t", row.names = FALSE, quote = FALSE ) if (all(ess_values >= threshold)) { cli_alert_success(paste("All parameters have ESS >=", threshold)) file.create(paste0(check_file, ".ess_check.passed")) } else { failed_params <- names(ess_values[ess_values < threshold]) names(failed_params) <- rep("x", length(failed_params)) cli_alert_warning(paste("Parameters with ESS <", threshold)) cli_bullets(failed_params) file.create(paste0(check_file, ".ess_check.failed")) } } # Main function to orchestrate the workflow main <- function(treefile, alnfile, template, prefix) { tree <- read.tree(treefile) cli_h1("1. Checking Outgroups in the Tree") outgroup_list <- c("Zju", "EP") if (!check_outgroup_list(tree, outgroup_list)) { return(NULL) } cli_h1("2. Running ModelTest-NG") run_modeltest(alnfile, prefix) modeltest_file <- paste0(prefix, ".modeltest.out") model_info <- parse_modeltest_output(modeltest_file) cli_h1("3. Preparing and Running MCMCTree") gen_input_tre(tree) render_mcmctree_ctl( template, paste0(prefix, ".mcmctree.ctl"), model_info$model, model_info$gamma, prefix ) cat("\n\n") run_mcmctree(paste0(prefix, ".mcmctree.ctl")) cli_h1("4. Checking ESS Values") check_ess("mcmc.txt", prefix) cli_h1("Workflow Completed Successfully") } p <- arg_parser("MCMCTree Workflow Script", hide.opts = TRUE) p <- add_argument(p, "--treefile", short = "-t", help = "Input tree file in Newick format", ) p <- add_argument(p, "--alnfile", short = "-a", help = "Input alignment file in FASTA format" ) p <- add_argument(p, "--template", short = "-c", help = "Template control file for mcmctree" ) p <- add_argument(p, "--prefix", short = "-p", help = "Prefix for output files" ) args <- parse_args(p) check_param <- c( !is.na(args$treefile), !is.na(args$alnfile), !is.na(args$template), !is.na(args$prefix) ) if (all(check_param)) { cli_h1("Workflow Parameters:") cli_dl(args[c("treefile", "alnfile", "template", "prefix")]) main(args$treefile, args$alnfile, args$template, args$prefix) } else { warnlist <- c( "--treefile, -t", "--alnfile, -a", "--template, -c", "--prefix, -p" ) names(warnlist) <- ifelse(check_param, "v", "x") theme <- list( ".bullets .bullet-x" = list( "text-exdent" = 4, before = function(x) paste0(" ", col_red(symbol$cross), " ") ), ".bullets .bullet-v" = list( "text-exdent" = 4, before = function(x) paste0(" ", col_green(symbol$tick), " ") ) ) cli_div(theme = theme) cli_h1("Missing required arguments:") cli_bullets(warnlist) cli_h1("") cli_end() print(p) }