biyelunwen/99.scripts/workflow/orthology_inference/01.hmmer_sc.sh

52 lines
1.4 KiB
Bash
Executable File

#! /bin/bash
set -e
if [ "$#" -ne 4 ]; then
echo "Usage: $0 <ogs_dir> <outdir> <proteome> <threads>"
echo "search homologous sequences in <proteome> using HMMs built from orthogroup alignments"
exit 1
fi
ogs_dir=$(readlink -f "$1")
outdir=$2
proteome=$(readlink -f "$3")
threads=$4
mkdir -p "$outdir"
cd "$outdir" || exit 1
echo "Working directory: $(pwd)"
echo "Using OGS directory: $ogs_dir"
echo "Using $threads threads"
echo ""
echo "Starting orthogroup sequence alignment..."
mkdir -p msa
echo -n >mafft.cmds
for i in "$ogs_dir"/*.fa; do
j=$(basename "$i")
echo "linsi --quiet $i > msa/$j" >>mafft.cmds
done
xargs -t -P "$threads" -I cmd -a mafft.cmds bash -c "cmd"
echo "Orthogroup sequence alignment completed."
echo ""
echo "Starting HMM building from alignments..."
mkdir -p hmms
echo -n >hmmbuild.cmds
for i in msa/*.fa; do
j=$(basename "$i")
echo "hmmbuild -o hmms/${j}.hmmbuild.out --amino hmms/${j}.hmm $i" >>hmmbuild.cmds
done
xargs -t -P "$threads" -I cmd -a hmmbuild.cmds bash -c "cmd"
echo "HMM building completed."
echo ""
echo "Starting HMM search against other proteome..."
mkdir -p search
echo -n >hmmsearch.cmds
for i in hmms/*.hmm; do
j=$(basename "$i")
echo "hmmsearch --tblout search/${j}search.tblout $i $proteome > search/${j}search.rawout" >>hmmsearch.cmds
done
xargs -t -P "$threads" -I cmd -a hmmsearch.cmds bash -c "cmd"
echo "HMM search completed."
echo ""
echo "All steps completed successfully."