biyelunwen/99.scripts/workflow/transcripts_assembly/05.reduce_redundancy.sh

28 lines
861 B
Bash
Executable File

#! /bin/bash
set -e
SCRIPTS=${SCRIPTS:-"$PROJECTHOME/99.scripts"}
IDENTITY=${IDENTITY:-0.99}
THREADS=${THREADS:-6}S
if [ "$#" -ne 3 ]; then
echo "Usage: $0 <input_dir> <output_dir> <extension>"
echo "Reduce redundancy of CDS files using cd-hit-est"
exit 1
fi
indir=$1
outdir=$2
ext=$3
mkdir -p "$outdir"
# Process each file in the input directory with the specified extension
for cds in "$indir"/*."$ext"; do
stem=$(basename "$cds" ."$ext")
echo "Processing $cds($stem) ..."
echo "cd-hit-est -i $cds -o $outdir/$stem.cds_rr.fa -c $IDENTITY -n 10 -r 0 -T $THREADS"
cd-hit-est -i "$cds" -o "$outdir/$stem".cds_rr.fa -c "$IDENTITY" -n 10 -r 0 -T "$THREADS"
echo "seqkit translate $outdir/$stem.cds_rr.fa > $outdir/$stem.prot_rr.fa"
seqkit translate "$outdir/$stem".cds_rr.fa >"$outdir/$stem".prot_rr.fa
echo "Done."
done