#! /bin/bash set -e MAX_MEMORY=${MAX_MEMORY:-"50G"} VIRIDI=${VIRIDI:-"$PROJECTHOME/01.reference/viridiplantae_odb12/"} ROSALES=${ROSALES:-"$PROJECTHOME/01.reference/rosales_odb12/"} SCRIPTS=${SCRIPTS:-"$PROJECTHOME/99.scripts"} if [ "$#" -ne 5 ]; then echo "Usage: $0 " echo "Perform reference-guided transcriptome assembly using Hisat2 and Trinity" exit 1 fi fq1=$1 fq2=$2 ref=$3 stem=$4 outdir="$stem"_trinity_out_dir THREADS=$5 # Run Hisat2 for reads mapping to reference genome hisat2 -p "$THREADS" --dta -x "$ref" -1 "$fq1" -2 "$fq2" -S "$stem".sam samtools view -b -@ "$THREADS" -o "$stem".raw.bam "$stem".sam samtools sort -@ "$THREADS" -o "$stem".sorted.bam "$stem".raw.bam samtools index "$stem".sorted.bam rm "$stem".sam "$stem".raw.bam # Run Trinity for de novo transcriptome assembly Trinity --genome_guided_bam "$stem".sorted.bam --genome_guided_max_intron 10000 --max_memory "$MAX_MEMORY" --CPU "$THREADS" --output "$outdir" # Get Longest isoform per gene perl "$SCRIPTS"/trinity_utils/util/misc/get_longest_isoform_seq_per_trinity_gene.pl "$outdir/Trinity-GG.fasta" >"$outdir/longest_isoform.fasta" # BUSCO assessment busco -i "$outdir"/longest_isoform.fasta -l "$VIRIDI" -m tran --cpu "$THREADS" -o "$outdir"/busco_viridi -f busco -i "$outdir"/longest_isoform.fasta -l "$ROSALES" -m tran --cpu "$THREADS" -o "$outdir"/busco_rosales -f # Length Statistics TrinityStats.pl "$outdir"/Trinity-GG.fasta >"$outdir"/length_stat.txt