277 lines
8.4 KiB
R
277 lines
8.4 KiB
R
#! /usr/bin/env Rscript
|
|
suppressMessages({
|
|
library(ape)
|
|
library(phytools)
|
|
library(coda)
|
|
library(argparser)
|
|
library(processx)
|
|
library(cli)
|
|
})
|
|
|
|
# Function to check if the specified outgroup is at the root of the tree
|
|
check_tree_outgroup <- function(tree, outgroup) {
|
|
if (!is.rooted(tree)) {
|
|
return(list(
|
|
judge = FALSE,
|
|
msg = "The tree is unrooted."
|
|
))
|
|
}
|
|
if (!outgroup %in% tree$tip.label) {
|
|
return(list(
|
|
judge = FALSE,
|
|
msg = "Outgroup not found in tree tip labels."
|
|
))
|
|
}
|
|
root_node <- getMRCA(tree, tree$tip.label)
|
|
outgroup_parent <- getParent(tree, which(tree$tip.label == outgroup))
|
|
if (root_node != outgroup_parent) {
|
|
list(
|
|
judge = FALSE,
|
|
msg = "The specified outgroup is not at the root of the tree."
|
|
)
|
|
} else {
|
|
list(
|
|
judge = TRUE,
|
|
msg = "The outgroup is correctly placed at the root."
|
|
)
|
|
}
|
|
}
|
|
|
|
# Function to check multiple outgroups sequentially
|
|
check_outgroup_list <- function(tree, outgroup_list) {
|
|
tree_iter <- tree
|
|
iter <- NULL
|
|
for (outgroup in outgroup_list) {
|
|
res <- check_tree_outgroup(tree_iter, outgroup)
|
|
if (!res$judge) {
|
|
cli_alert_danger(
|
|
paste(
|
|
"Outgroup check failed for", outgroup,
|
|
":", res$msg
|
|
)
|
|
)
|
|
break
|
|
}
|
|
iter <- c(iter, outgroup)
|
|
tree_iter <- drop.tip(tree_iter, outgroup)
|
|
}
|
|
cli_alert_success("All outgroups are correctly placed at the root.")
|
|
res$judge
|
|
}
|
|
|
|
# Function to generate input tree for mcmctree with calibration points
|
|
gen_input_tre <- function(tree) {
|
|
tree_bak <- tree
|
|
tree_bak$node.label <- rep("", tree$Nnode)
|
|
tree_bak$edge.length <- NULL
|
|
ntips <- length(tree$tip.label)
|
|
ep_node <- getParent(tree, which(tree$tip.label == "EP"))
|
|
tree_bak$node.label[ep_node - ntips] <- "'L(55.8,0.02,0.02)'"
|
|
zju_node <- getParent(tree, which(tree$tip.label == "Zju"))
|
|
tree_bak$node.label[zju_node - ntips] <- "'L(99.6,0.02,0.02)'"
|
|
cli_alert_info("Generated input tree for mcmctree with calibration points:")
|
|
cat(paste(ntips, "1\n"), file = "input.tre")
|
|
raw_check_label <- function(x) {
|
|
# this is a workaround to bypass the checkLabel function in ape
|
|
x
|
|
}
|
|
assignInNamespace("checkLabel", raw_check_label, ns = "ape")
|
|
write.tree(tree_bak, file = "input.tre", append = TRUE)
|
|
cli_text(write.tree(tree_bak))
|
|
}
|
|
|
|
# Function to run modeltest-ng on the alignment file
|
|
run_modeltest <- function(alnfile, prefix) {
|
|
file.copy(alnfile, "input.fa", overwrite = TRUE)
|
|
cmd <- "modeltest-ng"
|
|
args <- c(
|
|
"-p", 1,
|
|
"-d", "nt",
|
|
"-r", 12345,
|
|
"-m", "JC,K80,F81,HKY",
|
|
"-h", "ug",
|
|
"-t", "ml",
|
|
"-i", "input.fa",
|
|
"-o", paste0(prefix, ".modeltest")
|
|
)
|
|
result <- run(cmd, args = args, echo_cmd = TRUE, echo = TRUE)
|
|
if (result$status != 0) {
|
|
cli_alert_danger("modeltest-ng execution failed.")
|
|
stop("modeltest-ng execution failed.")
|
|
} else {
|
|
cli_alert_success("modeltest-ng run completed.")
|
|
}
|
|
}
|
|
|
|
# Function to parse modeltest-ng output and extract the model & gamma parameter
|
|
parse_modeltest_output <- function(modeltest_file) {
|
|
model_map <- c(
|
|
"JC" = "JC69",
|
|
"K80" = "K80",
|
|
"F81" = "F81",
|
|
"HKY" = "HKY85"
|
|
)
|
|
lines <- readLines(modeltest_file)
|
|
head_line <- grep("Best model according to AICc", lines, value = FALSE)
|
|
if (length(head_line) == 0) {
|
|
cli_alert_danger("No model found in modeltest-ng output.")
|
|
stop("No model found in modeltest-ng output.")
|
|
}
|
|
model_line <- lines[head_line + 2]
|
|
model_out <- strsplit(model_line, ":")[[1]][2]
|
|
model_out <- unlist(strsplit(trimws(model_out), "+", fixed = TRUE))
|
|
cli_alert_info(paste("Selected model:", paste(model_out, collapse = "+")))
|
|
model <- model_map[model_out[1]]
|
|
if (length(model_out) > 1 && model_out[2] == "G4") {
|
|
gamma <- 0.5
|
|
} else {
|
|
gamma <- 0
|
|
}
|
|
list(
|
|
model = model,
|
|
gamma = gamma
|
|
)
|
|
}
|
|
|
|
# Function to render the mcmctree control file from the template
|
|
render_mcmctree_ctl <- function(template, outputfile, model, gamma, prefix) {
|
|
ctl_lines <- readLines(template)
|
|
ctl_lines <- gsub("{{MODEL_PLACEHOLDER}}", model, ctl_lines, fixed = TRUE)
|
|
ctl_lines <- gsub("{{GAMMA_PLACEHOLDER}}",
|
|
as.character(gamma),
|
|
ctl_lines,
|
|
fixed = TRUE
|
|
)
|
|
ctl_lines <- gsub("{{PREFIX_PLACEHOLDER}}",
|
|
prefix,
|
|
ctl_lines,
|
|
fixed = TRUE
|
|
)
|
|
writeLines(ctl_lines, con = outputfile)
|
|
}
|
|
|
|
# Function to run mcmctree with the specified control file
|
|
run_mcmctree <- function(ctlfile) {
|
|
cmd <- "mcmctree"
|
|
args <- ctlfile
|
|
result <- run(cmd, args = args, echo_cmd = TRUE, echo = TRUE)
|
|
if (result$status != 0) {
|
|
cli_alert_danger("mcmctree execution failed.")
|
|
stop("mcmctree execution failed.")
|
|
} else {
|
|
cli_alert_success("mcmctree run completed.")
|
|
}
|
|
}
|
|
|
|
# Function to check ESS values from mcmctree MCMC output
|
|
check_ess <- function(mcmcfile, check_file, threshold = 200) {
|
|
mcmc_samples <- read.table(mcmcfile, header = TRUE)[, -1]
|
|
mcmc_results <- mcmc(mcmc_samples)
|
|
ess_values <- effectiveSize(mcmc_results)
|
|
ess_table <- data.frame(
|
|
Parameter = names(ess_values),
|
|
ESS = as.numeric(ess_values)
|
|
)
|
|
write.table(ess_table,
|
|
file = paste0(check_file, ".ess.tsv"),
|
|
sep = "\t",
|
|
row.names = FALSE,
|
|
quote = FALSE
|
|
)
|
|
if (all(ess_values >= threshold)) {
|
|
cli_alert_success(paste("All parameters have ESS >=", threshold))
|
|
file.create(paste0(check_file, ".ess_check.passed"))
|
|
} else {
|
|
failed_params <- names(ess_values[ess_values < threshold])
|
|
names(failed_params) <- rep("x", length(failed_params))
|
|
cli_alert_warning(paste("Parameters with ESS <", threshold))
|
|
cli_bullets(failed_params)
|
|
file.create(paste0(check_file, ".ess_check.failed"))
|
|
}
|
|
}
|
|
|
|
# Main function to orchestrate the workflow
|
|
main <- function(treefile, alnfile, template, prefix) {
|
|
tree <- read.tree(treefile)
|
|
cli_h1("1. Checking Outgroups in the Tree")
|
|
outgroup_list <- c("Zju", "EP")
|
|
if (!check_outgroup_list(tree, outgroup_list)) {
|
|
return(NULL)
|
|
}
|
|
cli_h1("2. Running ModelTest-NG")
|
|
run_modeltest(alnfile, prefix)
|
|
modeltest_file <- paste0(prefix, ".modeltest.out")
|
|
model_info <- parse_modeltest_output(modeltest_file)
|
|
cli_h1("3. Preparing and Running MCMCTree")
|
|
gen_input_tre(tree)
|
|
render_mcmctree_ctl(
|
|
template,
|
|
paste0(prefix, ".mcmctree.ctl"),
|
|
model_info$model,
|
|
model_info$gamma,
|
|
prefix
|
|
)
|
|
cat("\n\n")
|
|
run_mcmctree(paste0(prefix, ".mcmctree.ctl"))
|
|
cli_h1("4. Checking ESS Values")
|
|
check_ess("mcmc.txt", prefix)
|
|
cli_h1("Workflow Completed Successfully")
|
|
}
|
|
|
|
p <- arg_parser("MCMCTree Workflow Script", hide.opts = TRUE)
|
|
p <- add_argument(p, "--treefile",
|
|
short = "-t",
|
|
help = "Input tree file in Newick format",
|
|
)
|
|
p <- add_argument(p, "--alnfile",
|
|
short = "-a",
|
|
help = "Input alignment file in FASTA format"
|
|
)
|
|
p <- add_argument(p, "--template",
|
|
short = "-c",
|
|
help = "Template control file for mcmctree"
|
|
)
|
|
p <- add_argument(p, "--prefix",
|
|
short = "-p",
|
|
help = "Prefix for output files"
|
|
)
|
|
|
|
args <- parse_args(p)
|
|
|
|
check_param <- c(
|
|
!is.na(args$treefile),
|
|
!is.na(args$alnfile),
|
|
!is.na(args$template),
|
|
!is.na(args$prefix)
|
|
)
|
|
|
|
if (all(check_param)) {
|
|
cli_h1("Workflow Parameters:")
|
|
cli_dl(args[c("treefile", "alnfile", "template", "prefix")])
|
|
main(args$treefile, args$alnfile, args$template, args$prefix)
|
|
} else {
|
|
warnlist <- c(
|
|
"--treefile, -t",
|
|
"--alnfile, -a",
|
|
"--template, -c",
|
|
"--prefix, -p"
|
|
)
|
|
names(warnlist) <- ifelse(check_param, "v", "x")
|
|
theme <- list(
|
|
".bullets .bullet-x" = list(
|
|
"text-exdent" = 4,
|
|
before = function(x) paste0(" ", col_red(symbol$cross), " ")
|
|
),
|
|
".bullets .bullet-v" = list(
|
|
"text-exdent" = 4,
|
|
before = function(x) paste0(" ", col_green(symbol$tick), " ")
|
|
)
|
|
)
|
|
cli_div(theme = theme)
|
|
cli_h1("Missing required arguments:")
|
|
cli_bullets(warnlist)
|
|
cli_h1("")
|
|
cli_end()
|
|
print(p)
|
|
}
|