38 lines
1.5 KiB
Bash
38 lines
1.5 KiB
Bash
#! /bin/bash
|
|
set -e
|
|
MAX_MEMORY=${MAX_MEMORY:-"50G"}
|
|
VIRIDI=${VIRIDI:-"$PROJECTHOME/01.reference/viridiplantae_odb12/"}
|
|
ROSALES=${ROSALES:-"$PROJECTHOME/01.reference/rosales_odb12/"}
|
|
SCRIPTS=${SCRIPTS:-"$PROJECTHOME/99.scripts"}
|
|
|
|
if [ "$#" -ne 5 ]; then
|
|
echo "Usage: $0 <reads_1.fastq> <reads_2.fastq> <ref> <stem> <threads>"
|
|
echo "Perform reference-guided transcriptome assembly using Hisat2 and Trinity"
|
|
exit 1
|
|
fi
|
|
|
|
fq1=$1
|
|
fq2=$2
|
|
ref=$3
|
|
stem=$4
|
|
outdir="$stem"_trinity_out_dir
|
|
THREADS=$5
|
|
|
|
|
|
# Run Hisat2 for reads mapping to reference genome
|
|
hisat2 -p "$THREADS" --dta -x "$ref" -1 "$fq1" -2 "$fq2" -S "$stem".sam
|
|
samtools view -b -@ "$THREADS" -o "$stem".raw.bam "$stem".sam
|
|
samtools sort -@ "$THREADS" -o "$stem".sorted.bam "$stem".raw.bam
|
|
samtools index "$stem".sorted.bam
|
|
rm "$stem".sam "$stem".raw.bam
|
|
# Run Trinity for de novo transcriptome assembly
|
|
Trinity --genome_guided_bam "$stem".sorted.bam --genome_guided_max_intron 10000 --max_memory "$MAX_MEMORY" --CPU "$THREADS" --output "$outdir"
|
|
# Get Longest isoform per gene
|
|
perl "$SCRIPTS"/trinity_utils/util/misc/get_longest_isoform_seq_per_trinity_gene.pl "$outdir/Trinity-GG.fasta" >"$outdir/longest_isoform.fasta"
|
|
# BUSCO assessment
|
|
busco -i "$outdir"/longest_isoform.fasta -l "$VIRIDI" -m tran --cpu "$THREADS" -o "$outdir"/busco_viridi -f
|
|
busco -i "$outdir"/longest_isoform.fasta -l "$ROSALES" -m tran --cpu "$THREADS" -o "$outdir"/busco_rosales -f
|
|
# Length Statistics
|
|
TrinityStats.pl "$outdir"/Trinity-GG.fasta >"$outdir"/length_stat.txt
|
|
|