#! /usr/bin/env julia ## Installing Dependencies ## using Pkg ## Pkg.add("Distributed") ## Pkg.add("DataFrames") ## Pkg.add("CSV") ## Pkg.add("SNaQ") ## Pkg.add("PhyloNetworks") ## Pkg.add("RCall") ## Pkg.add("PhyloPlots") ## Pkg.add("QuartetNetworkGoodnessFit") # Running SNaQ Analysis using PhyloNetworks, SNaQ; using Distributed; addprocs(5); @everywhere using PhyloNetworks, SNaQ; nruns = 100; # number of runs for each hmax astralfile = joinpath("..", "..", "species_tree", "aster.out"); astraltree = readnewick(astralfile); ### Reading RAxML gene trees and ASTRAL species tree ### running in raxml_snaq/ folder # raxmltrees = joinpath("..", "..", "species_tree", "all.trees"); # inputCF = readtrees2CF(raxmltrees); # net0 = snaq!(astraltree, inputCF, hmax=0, filename="net0", seed=123, outgroup="Zju", runs=nruns); # net1 = snaq!(net0, inputCF, hmax=1, filename="net1", seed=123, outgroup="Zju", runs=nruns); # net2 = snaq!(net1, inputCF, hmax=2, filename="net2", seed=123, outgroup="Zju", runs=nruns); # net3 = snaq!(net2, inputCF, hmax=3, filename="net3", seed=123, outgroup="Zju", runs=nruns); # net4 = snaq!(net3, inputCF, hmax=4, filename="net4", seed=123, outgroup="Zju", runs=nruns); ### Alternatively, reading in the input files from Bucky ### running in input_snaq/ folder inputCFfile = joinpath("bucky_1.CFs.csv"); inputCF = readtableCF(inputCFfile); net0 = snaq!(astraltree, inputCF, hmax=0, filename="net0", seed=123, outgroup="Zju", runs=nruns); net1 = snaq!(net0, inputCF, hmax=1, filename="net1", seed=123, outgroup="Zju", runs=nruns); net2 = snaq!(net1, inputCF, hmax=2, filename="net2", seed=123, outgroup="Zju", runs=nruns); net3 = snaq!(net2, inputCF, hmax=3, filename="net3", seed=123, outgroup="Zju", runs=nruns); net4 = snaq!(net3, inputCF, hmax=4, filename="net4", seed=123, outgroup="Zju", runs=nruns); # Plotting the SNaQ results using PhyloPlots, RCall; ## Network scores vs. hmax scores = [loglik(net0), loglik(net1), loglik(net2), loglik(net3), loglik(net4)]; hmax = collect(0:4); R"pdf"("snaq_network_scores.pdf", width=12, height=8); R"plot"(hmax, scores, type="b", ylab="network score", xlab="hmax", col="blue"); R"dev.off"(); ## Rerooting and rotating the networks for better visualization rootatnode!(net1, "Zju"); rootatnode!(net2, "Zju"); rootatnode!(net3, "Zju"); rootatnode!(net4, "Zju"); ### rotate!(net1, -2); ### rotate!(net2, -2); ### rotate!(net3, -2); ### rotate!(net4, -2); ## Plotting the networks R"pdf"("snaq_networks_fulltree.pdf", width=14, height=10); R"layout(matrix(1:4, 2, 2, byrow=TRUE))"; # to get 4 plots into a single figure: 2 row, 2 columns R"par"(mar=[0, 0, 1.5, 0]); # for smaller margins xmin, xmax = PhyloPlots.PhyloPlots.edgenode_coordinates(net1, false, false)[13:14]; xmax += (xmax - xmin) * 0.3; plot(net1, showgamma=true, tipoffset=0.1, xlim=[xmin, xmax]); R"mtext"(string("hmax=1, loglik=-", round(loglik(net1), digits=2)), font=2); xmin, xmax = PhyloPlots.PhyloPlots.edgenode_coordinates(net2, false, false)[13:14]; xmax += (xmax - xmin) * 0.3; plot(net2, showgamma=true, tipoffset=0.1, xlim=[xmin, xmax]); R"mtext"(string("hmax=2, loglik=-", round(loglik(net2), digits=2)), font=2); xmin, xmax = PhyloPlots.PhyloPlots.edgenode_coordinates(net3, false, false)[13:14]; xmax += (xmax - xmin) * 0.3; plot(net3, showgamma=true, tipoffset=0.1, xlim=[xmin, xmax]); R"mtext"(string("hmax=3, loglik=-", round(loglik(net3), digits=2)), font=2); xmin, xmax = PhyloPlots.PhyloPlots.edgenode_coordinates(net4, false, false)[13:14]; xmax += (xmax - xmin) * 0.3; plot(net4, showgamma=true, tipoffset=0.1, xlim=[xmin, xmax]); R"mtext"(string("hmax=4, loglik=-", round(loglik(net4), digits=2)), font=2); R"dev.off"(); R"pdf"("snaq_networks_majortree.pdf", width=14, height=10); R"layout(matrix(1:4, 2, 2, byrow=TRUE))"; # to get 4 plots into a single figure: 2 row, 2 columns R"par"(mar=[0, 0, 1.5, 0]); # for smaller margins xmin, xmax = PhyloPlots.PhyloPlots.edgenode_coordinates(net1, false, false)[13:14]; xmax += (xmax - xmin) * 0.3; plot(net1, style=:majortree, showgamma=true, tipoffset=0.1, xlim=[xmin, xmax]); R"mtext"(string("hmax=1, loglik=-", round(loglik(net1), digits=2)), font=2); xmin, xmax = PhyloPlots.PhyloPlots.edgenode_coordinates(net2, false, false)[13:14]; xmax += (xmax - xmin) * 0.3; plot(net2, style=:majortree, showgamma=true, tipoffset=0.1, xlim=[xmin, xmax]); R"mtext"(string("hmax=2, loglik=-", round(loglik(net2), digits=2)), font=2); xmin, xmax = PhyloPlots.PhyloPlots.edgenode_coordinates(net3, false, false)[13:14]; xmax += (xmax - xmin) * 0.3; plot(net3, style=:majortree, showgamma=true, tipoffset=0.1, xlim=[xmin, xmax]); R"mtext"(string("hmax=3, loglik=-", round(loglik(net3), digits=2)), font=2); xmin, xmax = PhyloPlots.PhyloPlots.edgenode_coordinates(net4, false, false)[13:14]; xmax += (xmax - xmin) * 0.3; plot(net4, style=:majortree, showgamma=true, tipoffset=0.1, xlim=[xmin, xmax]); R"mtext"(string("hmax=4, loglik=-", round(loglik(net4), digits=2)), font=2); R"dev.off"(); ## expected vs. observed quartet concordance factors using CSV, DataFrames, Distributions, Random, RCall; inputCFfile = joinpath("bucky_1.CFs.csv"); inputCF = readtableCF(inputCFfile); net3 = readsnaqnetwork("net3.out"); topologymaxQpseudolik!(net3, inputCF); df_long = fittedquartetCF(inputCF, :long); ### Adding jitter to the points for better visualization Random.seed!(123); jitter = 0.005; df_jittered = deepcopy(df_long); df_jittered.obsCF .+= rand(Uniform(-jitter / 2, jitter / 2), nrow(df_long)); df_jittered.expCF .+= rand(Uniform(-jitter / 2, jitter / 2), nrow(df_long)); R"pdf"("expCFs_obsvsfitted.pdf", width=6, height=6); R"plot"(df_jittered.obsCF, df_jittered.expCF, xlab="quartet CF observed in gene trees", ylab="quartet CF expected from network", pch=16, col="#008080", cex=0.7); R"abline"(0, 1, col="#008080", lwd=0.5); R"dev.off"(); # Goodness of fit of the SNaQ networks using QuartetNetworkGoodnessFit; res1 = quarnetGoFtest!(net3, inputCF, true; seed=123, nsim=1000); res1[[1,2,3]] # p-value, uncorrected z, σ