20251213
This commit is contained in:
parent
8bab5cfa52
commit
072524e303
|
|
@ -14,3 +14,4 @@
|
||||||
99.scripts/phyparts/
|
99.scripts/phyparts/
|
||||||
.pueue.yml
|
.pueue.yml
|
||||||
run.status
|
run.status
|
||||||
|
!*/_description.md
|
||||||
|
|
@ -7,4 +7,4 @@ This directory contains raw data used for analysis.
|
||||||
- `clean_fastq` This folder contains cleaned FASTQ files after quality control.
|
- `clean_fastq` This folder contains cleaned FASTQ files after quality control.
|
||||||
- `raw_fastq` This folder contains the original raw FASTQ files.
|
- `raw_fastq` This folder contains the original raw FASTQ files.
|
||||||
- `sra` This folder contains SRA files downloaded from the NCBI database.
|
- `sra` This folder contains SRA files downloaded from the NCBI database.
|
||||||
- `sra.list` This file lists the SRA accession numbers for the datasets used in this project.
|
- `sra.list` This file lists the SRA downloading links for the datasets used in this project.
|
||||||
|
|
|
||||||
|
|
@ -31,7 +31,7 @@ astraltree = readnewick(astralfile);
|
||||||
|
|
||||||
### Alternatively, reading in the input files from Bucky
|
### Alternatively, reading in the input files from Bucky
|
||||||
### running in input_snaq/ folder
|
### running in input_snaq/ folder
|
||||||
inputCFfile = joinpath("..", "..", "input", "input.CFs.csv");
|
inputCFfile = joinpath("bucky_1.CFs.csv");
|
||||||
inputCF = readtableCF(inputCFfile);
|
inputCF = readtableCF(inputCFfile);
|
||||||
net0 = snaq!(astraltree, inputCF, hmax=0, filename="net0", seed=123, outgroup="Zju", runs=nruns);
|
net0 = snaq!(astraltree, inputCF, hmax=0, filename="net0", seed=123, outgroup="Zju", runs=nruns);
|
||||||
net1 = snaq!(net0, inputCF, hmax=1, filename="net1", seed=123, outgroup="Zju", runs=nruns);
|
net1 = snaq!(net0, inputCF, hmax=1, filename="net1", seed=123, outgroup="Zju", runs=nruns);
|
||||||
|
|
@ -60,7 +60,7 @@ rootatnode!(net4, "Zju");
|
||||||
### rotate!(net4, -2);
|
### rotate!(net4, -2);
|
||||||
|
|
||||||
## Plotting the networks
|
## Plotting the networks
|
||||||
R"pdf"("snaq_networks.pdf", width=14, height=10);
|
R"pdf"("snaq_networks_fulltree.pdf", width=14, height=10);
|
||||||
R"layout(matrix(1:4, 2, 2, byrow=TRUE))"; # to get 4 plots into a single figure: 2 row, 2 columns
|
R"layout(matrix(1:4, 2, 2, byrow=TRUE))"; # to get 4 plots into a single figure: 2 row, 2 columns
|
||||||
R"par"(mar=[0, 0, 1.5, 0]); # for smaller margins
|
R"par"(mar=[0, 0, 1.5, 0]); # for smaller margins
|
||||||
xmin, xmax = PhyloPlots.PhyloPlots.edgenode_coordinates(net1, false, false)[13:14];
|
xmin, xmax = PhyloPlots.PhyloPlots.edgenode_coordinates(net1, false, false)[13:14];
|
||||||
|
|
@ -81,9 +81,52 @@ plot(net4, showgamma=true, tipoffset=0.1, xlim=[xmin, xmax]);
|
||||||
R"mtext"(string("hmax=4, loglik=-", round(loglik(net4), digits=2)), font=2);
|
R"mtext"(string("hmax=4, loglik=-", round(loglik(net4), digits=2)), font=2);
|
||||||
R"dev.off"();
|
R"dev.off"();
|
||||||
|
|
||||||
|
R"pdf"("snaq_networks_majortree.pdf", width=14, height=10);
|
||||||
|
R"layout(matrix(1:4, 2, 2, byrow=TRUE))"; # to get 4 plots into a single figure: 2 row, 2 columns
|
||||||
|
R"par"(mar=[0, 0, 1.5, 0]); # for smaller margins
|
||||||
|
xmin, xmax = PhyloPlots.PhyloPlots.edgenode_coordinates(net1, false, false)[13:14];
|
||||||
|
xmax += (xmax - xmin) * 0.3;
|
||||||
|
plot(net1, style=:majortree, showgamma=true, tipoffset=0.1, xlim=[xmin, xmax]);
|
||||||
|
R"mtext"(string("hmax=1, loglik=-", round(loglik(net1), digits=2)), font=2);
|
||||||
|
xmin, xmax = PhyloPlots.PhyloPlots.edgenode_coordinates(net2, false, false)[13:14];
|
||||||
|
xmax += (xmax - xmin) * 0.3;
|
||||||
|
plot(net2, style=:majortree, showgamma=true, tipoffset=0.1, xlim=[xmin, xmax]);
|
||||||
|
R"mtext"(string("hmax=2, loglik=-", round(loglik(net2), digits=2)), font=2);
|
||||||
|
xmin, xmax = PhyloPlots.PhyloPlots.edgenode_coordinates(net3, false, false)[13:14];
|
||||||
|
xmax += (xmax - xmin) * 0.3;
|
||||||
|
plot(net3, style=:majortree, showgamma=true, tipoffset=0.1, xlim=[xmin, xmax]);
|
||||||
|
R"mtext"(string("hmax=3, loglik=-", round(loglik(net3), digits=2)), font=2);
|
||||||
|
xmin, xmax = PhyloPlots.PhyloPlots.edgenode_coordinates(net4, false, false)[13:14];
|
||||||
|
xmax += (xmax - xmin) * 0.3;
|
||||||
|
plot(net4, style=:majortree, showgamma=true, tipoffset=0.1, xlim=[xmin, xmax]);
|
||||||
|
R"mtext"(string("hmax=4, loglik=-", round(loglik(net4), digits=2)), font=2);
|
||||||
|
R"dev.off"();
|
||||||
|
|
||||||
## expected vs. observed quartet concordance factors
|
## expected vs. observed quartet concordance factors
|
||||||
using CSV, DataFrames;
|
using CSV, DataFrames, Distributions, Random, RCall;
|
||||||
|
inputCFfile = joinpath("bucky_1.CFs.csv");
|
||||||
|
inputCF = readtableCF(inputCFfile);
|
||||||
|
net3 = readsnaqnetwork("net3.out");
|
||||||
|
topologymaxQpseudolik!(net3, inputCF);
|
||||||
|
df_long = fittedquartetCF(inputCF, :long);
|
||||||
|
|
||||||
|
### Adding jitter to the points for better visualization
|
||||||
|
Random.seed!(123);
|
||||||
|
jitter = 0.005;
|
||||||
|
df_jittered = deepcopy(df_long);
|
||||||
|
df_jittered.obsCF .+= rand(Uniform(-jitter / 2, jitter / 2), nrow(df_long));
|
||||||
|
df_jittered.expCF .+= rand(Uniform(-jitter / 2, jitter / 2), nrow(df_long));
|
||||||
|
|
||||||
|
R"pdf"("expCFs_obsvsfitted.pdf", width=6, height=6);
|
||||||
|
R"plot"(df_jittered.obsCF, df_jittered.expCF, xlab="quartet CF observed in gene trees",
|
||||||
|
ylab="quartet CF expected from network", pch=16,
|
||||||
|
col="#008080",
|
||||||
|
cex=0.7);
|
||||||
|
R"abline"(0, 1, col="#008080", lwd=0.5);
|
||||||
|
R"dev.off"();
|
||||||
|
|
||||||
# Goodness of fit of the SNaQ networks
|
# Goodness of fit of the SNaQ networks
|
||||||
using QuartetNetworkGoodnessFit;
|
using QuartetNetworkGoodnessFit;
|
||||||
|
res1 = quarnetGoFtest!(net3, inputCF, true; seed=123, nsim=1000);
|
||||||
|
res1[[1,2,3]] # p-value, uncorrected z, σ
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue