更新 .gitignore,添加 06.gene_trees 目录;重构 macse.sh 脚本,移除 fs_lr 参数;删除多个不再使用的脚本;添加 check_frameshift.py 和 get_og_seqs.py 脚本以处理阅读框移位和提取单拷贝OG序列;更新 pixi.toml 和 pixi.lock 文件以添加 paml 依赖。
This commit is contained in:
parent
e8ba2ed962
commit
dba4833905
|
|
@ -10,6 +10,7 @@
|
|||
05.reduce_redundancy/*
|
||||
05.orthology_inference/*
|
||||
06.phylogeny_reconstruction/*
|
||||
06.gene_trees/*
|
||||
10.plastid/*
|
||||
98.results/*
|
||||
99.scripts/bucky/
|
||||
|
|
|
|||
|
|
@ -0,0 +1,128 @@
|
|||
#! /usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
Check and process frameshift in alignment files.
|
||||
"""
|
||||
|
||||
import shutil
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
from Bio import SeqIO
|
||||
from Bio.SeqRecord import SeqRecord
|
||||
from Bio.Seq import Seq
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
|
||||
def parse_frameshift_list(fs_list: Path) -> pd.DataFrame:
|
||||
"""
|
||||
Parse a frameshift list file into a pandas DataFrame.
|
||||
|
||||
Args:
|
||||
fs_list (Path): Path to the frameshift list file.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: DataFrame containing the frameshift information.
|
||||
"""
|
||||
try:
|
||||
df = pd.read_csv(fs_list, sep=",", header=0)
|
||||
except Exception as e:
|
||||
print(f"Error reading frameshift list file {fs_list}: {e}")
|
||||
sys.exit(1)
|
||||
return df
|
||||
|
||||
|
||||
def get_alignment_files(aln_dir: Path, ext: str) -> list[Path]:
|
||||
"""
|
||||
Get a list of alignment files in a directory with a specific extension.
|
||||
|
||||
Args:
|
||||
aln_dir (Path): Path to the directory containing alignment files.
|
||||
ext (str): Extension of the alignment files.
|
||||
|
||||
Returns:
|
||||
list[Path]: List of Paths to the alignment files.
|
||||
"""
|
||||
try:
|
||||
aln_files = list(aln_dir.glob(f"*{ext}"))
|
||||
except Exception as e:
|
||||
print(f"Error accessing alignment files in {aln_dir}: {e}")
|
||||
sys.exit(1)
|
||||
return aln_files
|
||||
|
||||
|
||||
def main(alignment_dir: str, ext: str, frameshift_list: str, outdir: str):
|
||||
aln_dir = Path(alignment_dir)
|
||||
fs_list_path = Path(frameshift_list)
|
||||
out_dir = Path(outdir)
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
fs_df = parse_frameshift_list(fs_list_path)
|
||||
aln_files = get_alignment_files(aln_dir, ext)
|
||||
good_aln_count = 0
|
||||
keep_fs_count = 0
|
||||
discard_count = 0
|
||||
|
||||
for file in aln_files:
|
||||
if str(file) not in fs_df["alignment_file"].values.tolist():
|
||||
try:
|
||||
shutil.copy(file, out_dir / file.name)
|
||||
good_aln_count += 1
|
||||
continue
|
||||
except Exception as e:
|
||||
print(f"Error copying file {file} to {out_dir}: {e}")
|
||||
sys.exit(1)
|
||||
if (
|
||||
fs_df.loc[
|
||||
fs_df["alignment_file"] == str(file), "possible_attributions"
|
||||
].values[0]
|
||||
== "Framshift only in Ziziphus jujuba or Elaeagnus pungens"
|
||||
):
|
||||
try:
|
||||
records = []
|
||||
for record in SeqIO.parse(file, "fasta"):
|
||||
seq = str(record.seq).replace("!", "-")
|
||||
id = record.id
|
||||
records.append(SeqRecord(seq=Seq(seq), id=id, description=""))
|
||||
print(
|
||||
f"Keep the alignment file with frameshift: {file}. Due to frameshift only in outgroup."
|
||||
)
|
||||
keep_fs_count += 1
|
||||
SeqIO.write(records, out_dir / file.name, "fasta")
|
||||
except Exception as e:
|
||||
print(f"Error processing file {file}: {e}")
|
||||
sys.exit(1)
|
||||
else:
|
||||
discard_count += 1
|
||||
print("Frameshift processing completed successfully.")
|
||||
print(f"Number of good alignments copied: {good_aln_count}")
|
||||
print(f"Number of alignments with frameshift kept: {keep_fs_count}")
|
||||
print(f"Number of alignments with frameshift discarded: {discard_count}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Check and process frameshift in alignment files."
|
||||
)
|
||||
parser.add_argument(
|
||||
"-a",
|
||||
"--alignment_dir",
|
||||
required=True,
|
||||
help="Directory containing alignment files",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-e", "--ext", default=".nal", help="Extension of alignment files"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-f",
|
||||
"--frameshift_list",
|
||||
required=True,
|
||||
help="CSV file listing frameshift information",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-o", "--outdir", required=True, help="Output directory for processed files"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
main(args.alignment_dir, args.ext, args.frameshift_list, args.outdir)
|
||||
|
|
@ -0,0 +1,148 @@
|
|||
#! /usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
Extract Single Copy OG sequences from a comprehensive FASTA file based on OG definitions from results of orthofinder.
|
||||
"""
|
||||
|
||||
import sys
|
||||
from Bio import SeqIO
|
||||
from Bio.SeqRecord import SeqRecord
|
||||
from Bio.Seq import Seq
|
||||
from pathlib import Path
|
||||
import argparse
|
||||
|
||||
|
||||
def get_og_fastas(og_dir: Path, ext: str) -> dict[str, list[str]]:
|
||||
"""
|
||||
Get a dictionary of OGs and their corresponding sequence IDs from FASTA files in a directory.
|
||||
|
||||
Args:
|
||||
og_dir (Path): Path to the directory containing OG FASTA files.
|
||||
ext (str): Extension of the FASTA files. Default is ".fa".
|
||||
|
||||
Returns:
|
||||
dict: Dictionary with OG names as keys and lists of sequence IDs as values.
|
||||
"""
|
||||
og_dict: dict[str, list[str]] = {}
|
||||
try:
|
||||
for fasta_file in og_dir.glob(f"*{ext}"):
|
||||
og_name = fasta_file.stem
|
||||
seq_ids: list[str] = []
|
||||
for record in SeqIO.parse(fasta_file, "fasta"):
|
||||
seq_ids.append(record.id)
|
||||
og_dict[og_name] = seq_ids
|
||||
except Exception as e:
|
||||
print(f"Error processing OG FASTA files in {og_dir}: {e}")
|
||||
sys.exit(1)
|
||||
return og_dict
|
||||
|
||||
|
||||
def parse_all_fasta(fasta_file: Path) -> dict[str, Seq]:
|
||||
"""
|
||||
Parse a FASTA file and return a dictionary of sequence IDs and their corresponding Seq objects.
|
||||
|
||||
Args:
|
||||
fasta_file (Path): Path to the FASTA file.
|
||||
Returns:
|
||||
dict: Dictionary with sequence IDs as keys and Seq objects as values.
|
||||
"""
|
||||
seq_dict: dict[str, Seq] = {}
|
||||
try:
|
||||
for record in SeqIO.parse(fasta_file, "fasta"):
|
||||
seq_dict[record.id] = record.seq
|
||||
except Exception as e:
|
||||
print(f"Error parsing FASTA file {fasta_file}: {e}")
|
||||
sys.exit(1)
|
||||
return seq_dict
|
||||
|
||||
|
||||
def output_og_seqs(
|
||||
all_seq_dict: dict[str, Seq], og_dict: dict[str, list[str]], output_dir: Path
|
||||
):
|
||||
"""
|
||||
Output sequences for each OG into separate FASTA files, remove gene id and only keep taxon name.
|
||||
|
||||
Args:
|
||||
all_seq_dict (dict): Dictionary of all sequences with sequence IDs as keys.
|
||||
og_dict (dict): Dictionary of OGs with OG names as keys and lists of sequence IDs as values.
|
||||
output_dir (Path): Path to the output directory.
|
||||
"""
|
||||
for og_name, seq_ids in og_dict.items():
|
||||
og_seqs: list[SeqRecord] = []
|
||||
for seq_id in seq_ids:
|
||||
if seq_id in all_seq_dict:
|
||||
seq_record = SeqRecord(
|
||||
all_seq_dict[seq_id], id=seq_id.split("@")[0], description=""
|
||||
)
|
||||
og_seqs.append(seq_record)
|
||||
else:
|
||||
print(f"Warning: Sequence ID {seq_id} not found in all sequences.")
|
||||
output_fasta = output_dir / f"{og_name}.fa"
|
||||
try:
|
||||
SeqIO.write(og_seqs, output_fasta, "fasta")
|
||||
except Exception as e:
|
||||
print(f"Error writing to FASTA file {output_fasta}: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def write_og_list(og_dict: dict[str, list[str]]):
|
||||
"""
|
||||
Write the OG list to a text file.
|
||||
|
||||
Args:
|
||||
og_dict (dict): Dictionary of OGs with OG names as keys and lists of sequence IDs as values.
|
||||
"""
|
||||
try:
|
||||
with open("og_list.tsv", "w") as f:
|
||||
for og_name, seq_ids in og_dict.items():
|
||||
line = f"{og_name}\t" + "\t".join(seq_ids) + "\n"
|
||||
f.write(line)
|
||||
print("OG list written to og_list.tsv")
|
||||
except Exception as e:
|
||||
print(f"Error writing OG list to file: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def main(og_dir_path: str, all_fasta_path: str, output_dir_path: str, ext: str = ".fa"):
|
||||
og_dir = Path(og_dir_path)
|
||||
all_fasta = Path(all_fasta_path)
|
||||
output_dir = Path(output_dir_path)
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
og_dict = get_og_fastas(og_dir, ext)
|
||||
all_seqs = parse_all_fasta(all_fasta)
|
||||
output_og_seqs(all_seqs, og_dict, output_dir)
|
||||
write_og_list(og_dict)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Extract OG sequences from a comprehensive FASTA file based on OG definitions."
|
||||
)
|
||||
parser.add_argument(
|
||||
"-d",
|
||||
"--og_dir",
|
||||
required=True,
|
||||
help="Directory containing OG FASTA files.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-a",
|
||||
"--all_fasta",
|
||||
required=True,
|
||||
help="FASTA file containing all sequences.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-o",
|
||||
"--output_dir",
|
||||
required=True,
|
||||
help="Output directory for OG FASTA files.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-e",
|
||||
"--ext",
|
||||
default=".fa",
|
||||
help="Extension of OG FASTA files (default: .fa).",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
main(args.og_dir, args.all_fasta, args.output_dir, args.ext)
|
||||
|
|
@ -0,0 +1,74 @@
|
|||
#! /usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
Get primary CDS sequences from a FASTA file containing multiple CDS per gene.
|
||||
"""
|
||||
|
||||
from Bio import SeqIO
|
||||
from Bio.SeqRecord import SeqRecord
|
||||
import re
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
|
||||
def get_primary_cds(input_fasta, output_fasta):
|
||||
primary_cds_records = []
|
||||
gene = ""
|
||||
length = 0
|
||||
seq = None
|
||||
id = None
|
||||
try:
|
||||
for record in SeqIO.parse(input_fasta, "fasta"):
|
||||
seq_len = len(record.seq)
|
||||
desc = record.description
|
||||
match = re.search(r"\[gene=(\S+)\]", desc)
|
||||
if match:
|
||||
gene_name = match.group(1)
|
||||
else:
|
||||
# Skip if gene name not found
|
||||
continue
|
||||
|
||||
if gene_name != gene:
|
||||
# new gene encountered
|
||||
# print(f"Processing gene: {gene_name}")
|
||||
if length > 0:
|
||||
# this is not the first record, save the previous longest record
|
||||
primary_cds_record = SeqRecord(
|
||||
seq, id=id, description=f"[gene={gene}]"
|
||||
)
|
||||
primary_cds_records.append(primary_cds_record)
|
||||
gene = gene_name
|
||||
seq = record.seq
|
||||
id = record.id
|
||||
length = seq_len
|
||||
else:
|
||||
# same gene, check length
|
||||
if seq_len > length:
|
||||
seq = record.seq
|
||||
id = record.id
|
||||
length = seq_len
|
||||
# after loop, save the last gene
|
||||
if gene and length > 0:
|
||||
primary_cds_record = SeqRecord(seq, id=id, description=f"[gene={gene}]")
|
||||
primary_cds_records.append(primary_cds_record)
|
||||
SeqIO.write(primary_cds_records, output_fasta, "fasta")
|
||||
print(f"Primary CDS sequences written to {args.output_fasta}")
|
||||
except Exception as e:
|
||||
print(f"Error processing FASTA file {input_fasta}: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Extract primary CDS sequences from a FASTA file."
|
||||
)
|
||||
parser.add_argument(
|
||||
"-i", "--input_fasta", help="Input FASTA file containing CDS sequences."
|
||||
)
|
||||
parser.add_argument(
|
||||
"-o", "--output_fasta", help="Output FASTA file to write primary CDS sequences."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
get_primary_cds(args.input_fasta, args.output_fasta)
|
||||
|
|
@ -1,7 +1,5 @@
|
|||
#! /bin/bash
|
||||
|
||||
FS_LR=7
|
||||
|
||||
if [ "$#" -ne 3 ]; then
|
||||
echo "Usage: $0 <reliable_seq> <less_reliable_seq> <stem>"
|
||||
echo "Perform MACSE alignment on given sequences"
|
||||
|
|
@ -16,7 +14,7 @@ stem=$3
|
|||
echo "Command:"
|
||||
echo "macse -prog alignSequences -seq $seq -seq_lr ${seq_lr}"
|
||||
echo " -out_NT ${stem}.nal -out_AA ${stem}.pal"
|
||||
echo " -optim 2 -max_refine_iter 3 -local_realign_init 0.2 -fs_lr $FS_LR"
|
||||
echo " -optim 2 -max_refine_iter 3 -local_realign_init 0.2"
|
||||
} >alignSequences.log
|
||||
macse -prog alignSequences \
|
||||
-seq "$seq" -seq_lr "${seq_lr}" \
|
||||
|
|
@ -24,5 +22,4 @@ macse -prog alignSequences \
|
|||
-optim 2 \
|
||||
-max_refine_iter 3 \
|
||||
-local_realign_init 0.2 \
|
||||
-fs_lr $FS_LR \
|
||||
>>alignSequences.log 2>&1
|
||||
|
|
|
|||
|
|
@ -1,51 +0,0 @@
|
|||
#! /bin/bash
|
||||
set -e
|
||||
|
||||
if [ "$#" -ne 4 ]; then
|
||||
echo "Usage: $0 <ogs_dir> <outdir> <proteome> <threads>"
|
||||
echo "search homologous sequences in <proteome> using HMMs built from orthogroup alignments"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ogs_dir=$(readlink -f "$1")
|
||||
outdir=$2
|
||||
proteome=$(readlink -f "$3")
|
||||
threads=$4
|
||||
|
||||
mkdir -p "$outdir"
|
||||
cd "$outdir" || exit 1
|
||||
echo "Working directory: $(pwd)"
|
||||
echo "Using OGS directory: $ogs_dir"
|
||||
echo "Using $threads threads"
|
||||
echo ""
|
||||
echo "Starting orthogroup sequence alignment..."
|
||||
mkdir -p msa
|
||||
echo -n >mafft.cmds
|
||||
for i in "$ogs_dir"/*.fa; do
|
||||
j=$(basename "$i")
|
||||
echo "linsi --quiet $i > msa/$j" >>mafft.cmds
|
||||
done
|
||||
xargs -t -P "$threads" -I cmd -a mafft.cmds bash -c "cmd"
|
||||
echo "Orthogroup sequence alignment completed."
|
||||
echo ""
|
||||
echo "Starting HMM building from alignments..."
|
||||
mkdir -p hmms
|
||||
echo -n >hmmbuild.cmds
|
||||
for i in msa/*.fa; do
|
||||
j=$(basename "$i")
|
||||
echo "hmmbuild -o hmms/${j}.hmmbuild.out --amino hmms/${j}.hmm $i" >>hmmbuild.cmds
|
||||
done
|
||||
xargs -t -P "$threads" -I cmd -a hmmbuild.cmds bash -c "cmd"
|
||||
echo "HMM building completed."
|
||||
echo ""
|
||||
echo "Starting HMM search against other proteome..."
|
||||
mkdir -p search
|
||||
echo -n >hmmsearch.cmds
|
||||
for i in hmms/*.hmm; do
|
||||
j=$(basename "$i")
|
||||
echo "hmmsearch --tblout search/${j}search.tblout $i $proteome > search/${j}search.rawout" >>hmmsearch.cmds
|
||||
done
|
||||
xargs -t -P "$threads" -I cmd -a hmmsearch.cmds bash -c "cmd"
|
||||
echo "HMM search completed."
|
||||
echo ""
|
||||
echo "All steps completed successfully."
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
#! /bin/bash
|
||||
set -e
|
||||
THREADS=${THREADS:-12}
|
||||
EXT=${EXT:-"fa"}
|
||||
|
||||
if [ "$#" -ne 2 ]; then
|
||||
echo "Usage: $0 <ogs_dir> <out_dir>"
|
||||
echo "Perform MACSE alignment for each orthologous group"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ogs_dir=$(readlink -f "$1")
|
||||
out_dir=$2
|
||||
|
||||
mkdir -p "$out_dir"
|
||||
echo "Starting MACSE alignment of orthologous groups..."
|
||||
echo -n >macse.cmds
|
||||
for og_fasta in "$ogs_dir"/*."$EXT"; do
|
||||
og_name=$(basename "$og_fasta" ."$EXT")
|
||||
out_stem="$out_dir/$og_name"
|
||||
echo "macse -prog alignSequences -seq $og_fasta -out_AA ${out_stem}.pal -out_NT ${out_stem}.nal > ${out_stem}.log 2>&1" >>macse.cmds
|
||||
done
|
||||
xargs -t -P "$THREADS" -I cmd -a macse.cmds bash -c "cmd" &&
|
||||
echo "MACSE alignment completed."
|
||||
|
|
@ -1,34 +0,0 @@
|
|||
#! /bin/bash
|
||||
set -e
|
||||
SCRIPTS=${SCRIPTS:-"$PROJECTHOME/99.scripts"}
|
||||
THREADS=${THREADS:-12}
|
||||
|
||||
if [ "$#" -ne 5 ]; then
|
||||
echo "Usage: $0 <ogs_dir> <hmmsearch_result_dir> <all_cds.fa> <output_dir> <homolog_stem>"
|
||||
echo "Integrate hmmsearch results to new orthologous groups directory and perform MACSE alignment"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ogs_dir=$(readlink -f "$1")
|
||||
search_dir=$(readlink -f "$2")
|
||||
all_cds=$(readlink -f "$3")
|
||||
out_dir=$4
|
||||
stem=$5
|
||||
|
||||
echo "Integrating hmmsearch results to new orthologous groups directory..."
|
||||
python3 "$SCRIPTS"/miscs/hmmsearch_result_to_new_ogs_dir.py \
|
||||
-d "$ogs_dir" \
|
||||
-t "$search_dir" \
|
||||
-f "$all_cds" \
|
||||
-o "$out_dir" \
|
||||
-s "$stem"
|
||||
echo "Integration completed."
|
||||
|
||||
echo "Starting MACSE alignment of orthologous groups..."
|
||||
echo -n >macse.cmds
|
||||
for og_dir in "$out_dir"/ogs/*; do
|
||||
j=$(basename "$og_dir")
|
||||
echo "cd $og_dir && bash $SCRIPTS/miscs/macse.sh ${j}_${stem}.fa ${j}.fa $j" >>macse.cmds
|
||||
done
|
||||
xargs -t -P "$THREADS" -I cmd -a macse.cmds bash -c "cmd"
|
||||
echo "MACSE alignment completed."
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
#! /bin/bash
|
||||
set -e
|
||||
THREADS=${THREADS:-12}
|
||||
if [ "$#" -ne 3 ]; then
|
||||
echo "Usage: $0 <nal_dir> <out_dir> <ext>"
|
||||
echo "Trim MACSE nucleotide alignments using trimal"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
nal_dir=$(readlink -f "$1")
|
||||
out_dir=$(readlink -f "$2")
|
||||
ext=$3
|
||||
|
||||
mkdir -p "$out_dir"
|
||||
echo -n >trimal.cmds
|
||||
for i in "$nal_dir"/*."$ext"; do
|
||||
j=$(basename "$i" ."$ext")
|
||||
echo "trimal -in $i -out $out_dir/${j}.trimed.fa -automated1 -resoverlap 0.5 -seqoverlap 50" >>trimal.cmds
|
||||
done
|
||||
xargs -t -P "$THREADS" -I cmd -a trimal.cmds bash -c "cmd"
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
#! /bin/bash
|
||||
set -e
|
||||
THREADS=${THREADS:-12}
|
||||
|
||||
if [ "$#" -ne 3 ]; then
|
||||
echo "Usage: $0 <aln_dir> <out_dir> <ext>"
|
||||
echo "Build FastTree phylogenetic trees and perform TreeShrink on trimmed MACSE nucleotide alignments"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
aln_dir=$(readlink -f "$1")
|
||||
out_dir=$(readlink -f "$2")
|
||||
ext=$3
|
||||
|
||||
# fasttree
|
||||
mkdir -p "$out_dir"
|
||||
echo -n >fasttree.cmds
|
||||
for i in "$aln_dir"/*."$ext"; do
|
||||
j=$(basename "$i" ."$ext")
|
||||
mkdir -p "$out_dir"/"${j}"
|
||||
cp -s "$i" "$out_dir"/"${j}"/input.fasta
|
||||
echo "FastTree -nt -gtr -quiet $out_dir/${j}/input.fasta > $out_dir/${j}/input.tree" >>fasttree.cmds
|
||||
done
|
||||
xargs -t -P "$THREADS" -I cmd -a fasttree.cmds bash -c "cmd"
|
||||
# treeshrink
|
||||
run_treeshrink.py -f -i "$out_dir"/ -t input.tree -a input.fasta >treeshrink.log
|
||||
|
|
@ -1,8 +0,0 @@
|
|||
#! /usr/bin/env bash
|
||||
mkdir -p trimed_nal
|
||||
echo -n > trimal.cmds
|
||||
for i in cds_aln/*.nal ;do
|
||||
j=$(basename "$i")
|
||||
echo "trimal -in $i -out trimed_nal/${j/.nal/.trimed.fa} -automated1 -resoverlap 0.5 -seqoverlap 50" >> trimal.cmds
|
||||
done
|
||||
xargs -t -P 4 -I cmd -a trimal.cmds bash -c "cmd"
|
||||
|
|
@ -1,8 +0,0 @@
|
|||
#! /usr/bin/env bash
|
||||
mkdir -p fasttree
|
||||
echo -n > fasttree.cmds
|
||||
for i in trimed_nal/*.trimed.fa ;do
|
||||
j=$(basename "$i")
|
||||
echo "FastTree -nt -gtr -quiet $i > fasttree/${j/.trimed.fa/.tree}" >> fasttree.cmds
|
||||
done
|
||||
xargs -t -P 8 -I cmd -a fasttree.cmds bash -c "cmd"
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
#! /usr/bin/env bash
|
||||
mkdir -p treeshrink
|
||||
for i in trimed_nal/*.trimed.fa; do
|
||||
j=$(basename "$i")
|
||||
mkdir -p treeshrink/"${j/.trimed.fa/}"
|
||||
cd treeshrink/"${j/.trimed.fa/}" || exit 1
|
||||
ln -s ../../fasttree/"${j/.trimed.fa/.tree}" input.tree
|
||||
ln -s ../../"$i" input.fasta
|
||||
cd ../../
|
||||
done
|
||||
run_treeshrink.py -i treeshrink/ -t input.tree -a input.fasta > treeshrink.log
|
||||
|
|
@ -338,6 +338,7 @@ environments:
|
|||
- conda: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/linux-64/p7zip-16.02-h9c3ff4c_1001.tar.bz2
|
||||
- conda: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/noarch/packaging-25.0-pyh29332c3_1.conda
|
||||
- conda: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/bioconda/noarch/pal2nal-14.1-pl5321hdfd78af_3.tar.bz2
|
||||
- conda: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/bioconda/linux-64/paml-4.10.9-h7b50bb2_1.conda
|
||||
- conda: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/linux-64/pandas-2.3.3-py311hed34c8f_1.conda
|
||||
- conda: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/linux-64/pandoc-3.8.2.1-ha770c72_0.conda
|
||||
- conda: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/linux-64/pango-1.56.4-hadf4263_0.conda
|
||||
|
|
@ -5125,6 +5126,15 @@ packages:
|
|||
license_family: GPL
|
||||
size: 22693
|
||||
timestamp: 1642293349880
|
||||
- conda: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/bioconda/linux-64/paml-4.10.9-h7b50bb2_1.conda
|
||||
sha256: 81c5237b07796984d7915d29a768068a1e5e6c02b4ae25f27836b0babe84e5aa
|
||||
md5: ef02b401c5daaf104b3d1091f8ec37f5
|
||||
depends:
|
||||
- libgcc >=13
|
||||
license: GPL-3.0-or-later
|
||||
license_family: GPL3
|
||||
size: 679359
|
||||
timestamp: 1755706728619
|
||||
- conda: https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/linux-64/pandas-2.3.3-py311hed34c8f_1.conda
|
||||
sha256: c97f796345f5b9756e4404bbb4ee049afd5ea1762be6ee37ce99162cbee3b1d3
|
||||
md5: 72e3452bf0ff08132e86de0272f2fbb0
|
||||
|
|
|
|||
|
|
@ -29,7 +29,6 @@ cd-hit = ">=4.8.1,<5"
|
|||
orthofinder = ">=3.1.0,<4"
|
||||
pip = ">=25.2,<26"
|
||||
seqkit = ">=2.10.1,<3"
|
||||
hmmer = ">=3.4,<4"
|
||||
r-rstan = ">=2.32.7,<3"
|
||||
biopython = ">=1.85,<2"
|
||||
pal2nal = ">=14.1,<15"
|
||||
|
|
@ -54,6 +53,7 @@ r-ggplot2 = ">=4.0.1,<5"
|
|||
macse = ">=2.7,<3"
|
||||
td2 = ">=1.0.6,<2"
|
||||
mmseqs2 = ">=18.8cc5c,<19"
|
||||
paml = ">=4.10.9,<5"
|
||||
|
||||
[feature.mrbayes.dependencies]
|
||||
beagle-lib = ">=3.1.2,<4"
|
||||
|
|
|
|||
Loading…
Reference in New Issue