biyelunwen/99.scripts/workflow/transcripts_assembly/04.longest_cds_rename.sh

26 lines
986 B
Bash
Executable File

#! /bin/bash
set -e
SCRIPTS=${SCRIPTS:-"$PROJECTHOME/99.scripts"}
if [ "$#" -ne 3 ]; then
echo "Usage: $0 <input_dir> <output_dir> <extension>"
echo "Extract longest isoform per gene and rename sequences"
exit 1
fi
indir=$1
outdir=$2
ext=$3
mkdir -p "$outdir"
# Process each file in the input directory with the specified extension
for td_cds in "$indir"/*."$ext"; do
stem=$(basename "$td_cds" ."$ext")
echo "Processing $td_cds($stem) ..."
echo "perl $SCRIPTS/trinity_utils/util/misc/get_longest_isoform_seq_per_trinity_gene.pl $td_cds > $outdir/$stem.longest_isoform.fa"
perl "$SCRIPTS"/trinity_utils/util/misc/get_longest_isoform_seq_per_trinity_gene.pl "$td_cds" >"$outdir"/"$stem".longest_isoform.fa
echo "$SCRIPTS/rename_trinity_fasta.py $outdir/$stem.longest_isoform.fa $stem $outdir/$stem.full_cds.fa"
"$SCRIPTS"/miscs/rename_trinity_fasta.py "$outdir"/"$stem".longest_isoform.fa "$stem" "$outdir"/"$stem".full_cds.fa
echo "Done."
done