133 lines
5.9 KiB
Julia
133 lines
5.9 KiB
Julia
#! /usr/bin/env julia
|
||
## Installing Dependencies
|
||
## using Pkg
|
||
## Pkg.add("Distributed")
|
||
## Pkg.add("DataFrames")
|
||
## Pkg.add("CSV")
|
||
## Pkg.add("SNaQ")
|
||
## Pkg.add("PhyloNetworks")
|
||
## Pkg.add("RCall")
|
||
## Pkg.add("PhyloPlots")
|
||
## Pkg.add("QuartetNetworkGoodnessFit")
|
||
|
||
# Running SNaQ Analysis
|
||
using PhyloNetworks, SNaQ;
|
||
using Distributed;
|
||
addprocs(9);
|
||
@everywhere using PhyloNetworks, SNaQ;
|
||
nruns = 100; # number of runs for each hmax
|
||
astralfile = joinpath("..", "species_tree", "coal.tre");
|
||
astraltree = readnewick(astralfile);
|
||
|
||
### Reading RAxML gene trees and ASTRAL species tree
|
||
### running in raxml_snaq/ folder
|
||
# raxmltrees = joinpath("..", "..", "species_tree", "all.trees");
|
||
# inputCF = readtrees2CF(raxmltrees);
|
||
# net0 = snaq!(astraltree, inputCF, hmax=0, filename="net0", seed=123, outgroup="Zju", runs=nruns);
|
||
# net1 = snaq!(net0, inputCF, hmax=1, filename="net1", seed=123, outgroup="Zju", runs=nruns);
|
||
# net2 = snaq!(net1, inputCF, hmax=2, filename="net2", seed=123, outgroup="Zju", runs=nruns);
|
||
# net3 = snaq!(net2, inputCF, hmax=3, filename="net3", seed=123, outgroup="Zju", runs=nruns);
|
||
# net4 = snaq!(net3, inputCF, hmax=4, filename="net4", seed=123, outgroup="Zju", runs=nruns);
|
||
|
||
### Alternatively, reading in the input files from Bucky
|
||
### running in input_snaq/ folder
|
||
inputCFfile = joinpath("..","bucky","bucky_1","input.mb.CFs.csv");
|
||
inputCF = readtableCF(inputCFfile);
|
||
net0 = snaq!(astraltree, inputCF, hmax=0, filename="net0", seed=123, outgroup="Zju", runs=nruns);
|
||
net1 = snaq!(net0, inputCF, hmax=1, filename="net1", seed=123, outgroup="Zju", runs=nruns);
|
||
net2 = snaq!(net1, inputCF, hmax=2, filename="net2", seed=123, outgroup="Zju", runs=nruns);
|
||
net3 = snaq!(net2, inputCF, hmax=3, filename="net3", seed=123, outgroup="Zju", runs=nruns);
|
||
net4 = snaq!(net3, inputCF, hmax=4, filename="net4", seed=123, outgroup="Zju", runs=nruns);
|
||
|
||
|
||
# Plotting the SNaQ results
|
||
using PhyloPlots, RCall;
|
||
## Network scores vs. hmax
|
||
scores = [loglik(net0), loglik(net1), loglik(net2), loglik(net3), loglik(net4)];
|
||
hmax = collect(0:4);
|
||
R"pdf"("snaq_network_scores.pdf", width=12, height=8);
|
||
R"plot"(hmax, scores, type="b", ylab="network score", xlab="hmax", col="blue");
|
||
R"dev.off"();
|
||
|
||
## Rerooting and rotating the networks for better visualization
|
||
rootatnode!(net1, "Zju");
|
||
rootatnode!(net2, "Zju");
|
||
rootatnode!(net3, "Zju");
|
||
rootatnode!(net4, "Zju");
|
||
### rotate!(net1, -2);
|
||
### rotate!(net2, -2);
|
||
### rotate!(net3, -2);
|
||
### rotate!(net4, -2);
|
||
|
||
## Plotting the networks
|
||
R"pdf"("snaq_networks_fulltree.pdf", width=14, height=10);
|
||
R"layout(matrix(1:4, 2, 2, byrow=TRUE))"; # to get 4 plots into a single figure: 2 row, 2 columns
|
||
R"par"(mar=[0, 0, 1.5, 0]); # for smaller margins
|
||
xmin, xmax = PhyloPlots.PhyloPlots.edgenode_coordinates(net1, false, false)[13:14];
|
||
xmax += (xmax - xmin) * 0.3;
|
||
plot(net1, showgamma=true, tipoffset=0.1, xlim=[xmin, xmax]);
|
||
R"mtext"(string("hmax=1, loglik=-", round(loglik(net1), digits=2)), font=2);
|
||
xmin, xmax = PhyloPlots.PhyloPlots.edgenode_coordinates(net2, false, false)[13:14];
|
||
xmax += (xmax - xmin) * 0.3;
|
||
plot(net2, showgamma=true, tipoffset=0.1, xlim=[xmin, xmax]);
|
||
R"mtext"(string("hmax=2, loglik=-", round(loglik(net2), digits=2)), font=2);
|
||
xmin, xmax = PhyloPlots.PhyloPlots.edgenode_coordinates(net3, false, false)[13:14];
|
||
xmax += (xmax - xmin) * 0.3;
|
||
plot(net3, showgamma=true, tipoffset=0.1, xlim=[xmin, xmax]);
|
||
R"mtext"(string("hmax=3, loglik=-", round(loglik(net3), digits=2)), font=2);
|
||
xmin, xmax = PhyloPlots.PhyloPlots.edgenode_coordinates(net4, false, false)[13:14];
|
||
xmax += (xmax - xmin) * 0.3;
|
||
plot(net4, showgamma=true, tipoffset=0.1, xlim=[xmin, xmax]);
|
||
R"mtext"(string("hmax=4, loglik=-", round(loglik(net4), digits=2)), font=2);
|
||
R"dev.off"();
|
||
|
||
R"pdf"("snaq_networks_majortree.pdf", width=14, height=10);
|
||
R"layout(matrix(1:4, 2, 2, byrow=TRUE))"; # to get 4 plots into a single figure: 2 row, 2 columns
|
||
R"par"(mar=[0, 0, 1.5, 0]); # for smaller margins
|
||
xmin, xmax = PhyloPlots.PhyloPlots.edgenode_coordinates(net1, false, false)[13:14];
|
||
xmax += (xmax - xmin) * 0.3;
|
||
plot(net1, style=:majortree, showgamma=true, tipoffset=0.1, xlim=[xmin, xmax]);
|
||
R"mtext"(string("hmax=1, loglik=-", round(loglik(net1), digits=2)), font=2);
|
||
xmin, xmax = PhyloPlots.PhyloPlots.edgenode_coordinates(net2, false, false)[13:14];
|
||
xmax += (xmax - xmin) * 0.3;
|
||
plot(net2, style=:majortree, showgamma=true, tipoffset=0.1, xlim=[xmin, xmax]);
|
||
R"mtext"(string("hmax=2, loglik=-", round(loglik(net2), digits=2)), font=2);
|
||
xmin, xmax = PhyloPlots.PhyloPlots.edgenode_coordinates(net3, false, false)[13:14];
|
||
xmax += (xmax - xmin) * 0.3;
|
||
plot(net3, style=:majortree, showgamma=true, tipoffset=0.1, xlim=[xmin, xmax]);
|
||
R"mtext"(string("hmax=3, loglik=-", round(loglik(net3), digits=2)), font=2);
|
||
xmin, xmax = PhyloPlots.PhyloPlots.edgenode_coordinates(net4, false, false)[13:14];
|
||
xmax += (xmax - xmin) * 0.3;
|
||
plot(net4, style=:majortree, showgamma=true, tipoffset=0.1, xlim=[xmin, xmax]);
|
||
R"mtext"(string("hmax=4, loglik=-", round(loglik(net4), digits=2)), font=2);
|
||
R"dev.off"();
|
||
|
||
## expected vs. observed quartet concordance factors
|
||
using CSV, DataFrames, Distributions, Random, RCall;
|
||
inputCFfile = joinpath("bucky_1.CFs.csv");
|
||
inputCF = readtableCF(inputCFfile);
|
||
net3 = readsnaqnetwork("net3.out");
|
||
topologymaxQpseudolik!(net3, inputCF);
|
||
df_long = fittedquartetCF(inputCF, :long);
|
||
|
||
### Adding jitter to the points for better visualization
|
||
Random.seed!(123);
|
||
jitter = 0.005;
|
||
df_jittered = deepcopy(df_long);
|
||
df_jittered.obsCF .+= rand(Uniform(-jitter / 2, jitter / 2), nrow(df_long));
|
||
df_jittered.expCF .+= rand(Uniform(-jitter / 2, jitter / 2), nrow(df_long));
|
||
|
||
R"pdf"("expCFs_obsvsfitted.pdf", width=6, height=6);
|
||
R"plot"(df_jittered.obsCF, df_jittered.expCF, xlab="quartet CF observed in gene trees",
|
||
ylab="quartet CF expected from network", pch=16,
|
||
col="#008080",
|
||
cex=0.7);
|
||
R"abline"(0, 1, col="#008080", lwd=0.5);
|
||
R"dev.off"();
|
||
|
||
# Goodness of fit of the SNaQ networks
|
||
using QuartetNetworkGoodnessFit;
|
||
res1 = quarnetGoFtest!(net3, inputCF, true; seed=123, nsim=1000);
|
||
res1[[1,2,3]] # p-value, uncorrected z, σ
|
||
|