biyelunwen/99.scripts/workflow/transcripts_assembly/01.denove_assembly.sh

32 lines
1.2 KiB
Bash

#! /bin/bash
set -e
SCRIPTS=${SCRIPTS:-"$PROJECTHOME/99.scripts"}
MAX_MEMORY=${MAX_MEMORY:-"20G"}
VIRIDI=${VIRIDI:-"$PROJECTHOME/01.reference/viridiplantae_odb12/"}
ROSALES=${ROSALES:-"$PROJECTHOME/01.reference/rosales_odb12/"}
if [ "$#" -ne 4 ]; then
echo "Usage: $0 <reads_1.fastq> <reads_2.fastq> <stem> <threads>"
echo "Perform de novo transcriptome assembly using Trinity"
exit 1
fi
fq1=$1
fq2=$2
stem=$3
outdir="$stem"_trinity_out_dir
THREADS=$4
# Run Trinity for de novo transcriptome assembly
Trinity --seqType fq --left "$fq1" --right "$fq2" --CPU "$THREADS" --max_memory "$MAX_MEMORY" --output "$outdir"
# Get Longest isoform per gene
perl "$SCRIPTS"/trinity_utils/util/misc/get_longest_isoform_seq_per_trinity_gene.pl "$outdir.Trinity.fasta" >"$outdir".longest_isoform.fasta
# BUSCO assessment
busco -i "$outdir".longest_isoform.fasta -l "$VIRIDI" -m tran --cpu "$THREADS" -o "$outdir"_busco_viridi -f
busco -i "$outdir".longest_isoform.fasta -l "$ROSALES" -m tran --cpu "$THREADS" -o "$outdir"_busco_rosales -f
# Length Statistics
TrinityStats.pl "$outdir.Trinity.fasta" >"$outdir".Trinity.fasta.length_stat.txt
# Clear temporary directory
rm -rf "$outdir"