biyelunwen/99.scripts/trinity_utils/util/PBS/trinity_pbs.p4a

157 lines
7.4 KiB
Plaintext

##################################################################################################################################
########################## ########################################
########################## Trinity PBS job submission with multi part dependencies ########################################
########################## ########################################
##################################################################################################################################
### Author: Josh Bowden, Alexie Papanicolaou, CSIRO
### Version 1.0
### QuantifyGraph and Butterfly p4a Script
##################################################################################################################################
# we will not use array in order to ensure only jobs that have not finished are Submitting.
# this does cause a problem when wanting to kill them manually but best to use kill script
## JOBPREFIX is passed via HASHBANG
JOBSTRING4=""$HASHBANG"
"$NODESCPUS"
JOB_CHRYSALIS="$JOBPREFIX"_p4b
JOB_BUTTERFLY="$JOBPREFIX"_p5b
cd "$OUTPUTDIR"
rm -f \$JOB_CHRYSALIS.jobnames \$JOB_BUTTERFLY.jobnames
FILENAME=""$OUTPUTDIR"/chrysalis/quantifyGraph_commands"
FILENAMEBFLY=""$OUTPUTDIR"/chrysalis/butterfly_commands"
if [ ! -e \$FILENAME.pbs ];then
sed -e 's/.*/if [[ -e SEDPLACEHOLDER ]]; then &;fi/' \$FILENAME|sed -r 's/(-e\s)SEDPLACEHOLDER(\s.+-i\s)(\S+).tmp/\1\3.tmp\2\3.tmp/' > \$FILENAME.pbs
split -d -a 3 -l "1000" \$FILENAME.pbs \$FILENAME.pbs.
sleep 5
fi
if [ ! -e \$FILENAMEBFLY.pbs ];then
sed -e 's/.*/if [ -e SEDPLACEHOLDER ]; then &;fi/' \$FILENAMEBFLY|sed -r 's/(-e\s)SEDPLACEHOLDER(\s.+-C\s)(\S+)/\1\3.out\2\3/' > \$FILENAMEBFLY.pbs
split -d -a 3 -l "1000" \$FILENAMEBFLY.pbs \$FILENAMEBFLY.pbs.
sleep 5
fi
FILENAME=\$FILENAME.pbs
FILENAMEBFLY=\$FILENAMEBFLY.pbs
NUMCMDS=\`ls -l \$FILENAME.??? | wc -l\`
NUMCMDSBFLY=\`ls -l \$FILENAMEBFLY.??? | wc -l\`
let NUMCMDS=\$NUMCMDS-1
let NUMCMDSBFLY=\$NUMCMDSBFLY-1
let SUBMITTED_C=0
let SUBMITTED_B=0
for ((JOBID=0;JOBID<=\$NUMCMDS;++JOBID));do
JOB_INDEX_PADDED=\`printf "%03d" \$JOBID\`
MYJOBQ=\""\$FILENAME".\$JOB_INDEX_PADDED\"
MYJOBB=\""\$FILENAMEBFLY".\$JOB_INDEX_PADDED\"
JOB_FILESIZE_Q=\$(stat -c%s \$MYJOBQ)
JOB_FILESIZE_B=\$(stat -c%s \$MYJOBB)
#if some Q have completed:
if [ -s \"\$MYJOBQ.completed\" ] ; then
JOB_COMPLETED_FILESIZE_Q=\$(stat -c%s \"\$MYJOBQ.completed\")
# if not all have completed then run both Q and B
if [ \"\$JOB_FILESIZE_Q\" -gt \"\$JOB_COMPLETED_FILESIZE_Q\" ] ; then
PBS_JOB4=\`qsub -v JOB_INDEX_PADDED=\$JOB_INDEX_PADDED \"\$JOB_CHRYSALIS.sh\" \`
if [[ ! \$PBS_JOB4 ]]; then
echo \"Submission for qsub -v JOB_INDEX_PADDED=\$JOB_INDEX_PADDED \"\$JOB_CHRYSALIS.sh\" FAILED. Aborting...\"
exit 255
fi
let SUBMITTED_C++
echo \$PBS_JOB4 >> \"\$JOB_CHRYSALIS.jobnames\"
echo \$PBS_JOB4 >> jobnumbers.out ;
PBS_JOB5=\`qsub -v JOB_INDEX_PADDED=\$JOB_INDEX_PADDED -W depend=afterok:$PBS_JOB4 \"\$JOB_BUTTERFLY.sh\" \`
if [[ ! \$PBS_JOB5 ]]; then
echo \"Submission for qsub -v JOB_INDEX_PADDED=\$JOB_INDEX_PADDED -W depend=afterok:$PBS_JOB4 \"\$JOB_BUTTERFLY.sh\" FAILED. Aborting...\"
exit 255
fi
let SUBMITTED_B++
echo \$PBS_JOB5 >> \"\$JOB_BUTTERFLY.jobnames\"
echo \$PBS_JOB5 >> jobnumbers.out ;
if [ \$(( \$JOBID % 20 )) -eq 0 ] ; then
echo Submitting up to 20 Quantify and/or Butterfly jobs
sleep 3 # be nice
fi
# else all Q have completed; have B completed?
else
# if at least some B have completed
if [ -s \"\$MYJOBB.completed\" ] ; then
JOB_COMPLETED_FILESIZE_B=\$(stat -c%s \"\$MYJOBB.completed\" )
# if not all, run them with no dependency (Q has completed)
if [ \"\$JOB_FILESIZE_B\" -gt \"\$JOB_COMPLETED_FILESIZE_B\" ] ; then
PBS_JOB5=\`qsub -v JOB_INDEX_PADDED=\$JOB_INDEX_PADDED \"\$JOB_BUTTERFLY.sh\" \`
if [[ ! \$PBS_JOB5 ]]; then
echo \"Submission for qsub -v JOB_INDEX_PADDED=\$JOB_INDEX_PADDED \"\$JOB_BUTTERFLY.sh\" FAILED. Aborting...\"
exit 255
fi
let SUBMITTED_B++
echo \$PBS_JOB5 >> \"\$JOB_BUTTERFLY.jobnames\"
echo \$PBS_JOB5 >> jobnumbers.out ;
if [ \$(( \$JOBID % 20 )) -eq 0 ] ; then
echo Submitting up to 20 Quantify and/or Butterfly jobs
sleep 3 # be nice
fi
fi
# else no Q have completed; run them without dependency
else
PBS_JOB5=\`qsub -v JOB_INDEX_PADDED=\$JOB_INDEX_PADDED \"\$JOB_BUTTERFLY.sh\" \`
if [[ ! \$PBS_JOB5 ]]; then
echo \"Submission for qsub -v JOB_INDEX_PADDED=\$JOB_INDEX_PADDED \"\$JOB_BUTTERFLY.sh\" FAILED. Aborting...\"
exit 255
fi
let SUBMITTED_B++
echo \$PBS_JOB5 >> \"\$JOB_BUTTERFLY.jobnames\"
echo \$PBS_JOB5 >> jobnumbers.out ;
if [ \$(( \$JOBID % 20 )) -eq 0 ] ; then
echo Submitting up to 20 Quantify and/or Butterfly jobs
sleep 3 # be nice
fi
fi
fi
# neither Q (and thus nor B) have ever ran successfully, submit both with a dependency
else
PBS_JOB4=\`qsub -v JOB_INDEX_PADDED=\$JOB_INDEX_PADDED \"\$JOB_CHRYSALIS.sh\" \`
if [[ ! \$PBS_JOB4 ]]; then
echo \"Submission for qsub -v JOB_INDEX_PADDED=\$JOB_INDEX_PADDED \"\$JOB_CHRYSALIS.sh\" FAILED. Aborting...\"
exit 255
fi
let SUBMITTED_C++
echo \$PBS_JOB4 >> \"\$JOB_CHRYSALIS.jobnames\"
echo \$PBS_JOB4 >> jobnumbers.out ;
PBS_JOB5=\`qsub -v JOB_INDEX_PADDED=\$JOB_INDEX_PADDED -W depend=afterok:\$PBS_JOB4 \"\$JOB_BUTTERFLY.sh\" \`
if [[ ! \$PBS_JOB5 ]]; then
echo \"Submission for qsub -v JOB_INDEX_PADDED=\$JOB_INDEX_PADDED -W depend=afterok:\$PBS_JOB4 \"\$JOB_BUTTERFLY.sh\" FAILED. Aborting...\"
exit 255
fi
let SUBMITTED_B++
echo \$PBS_JOB5 >> \"\$JOB_BUTTERFLY.jobnames\"
echo \$PBS_JOB5 >> jobnumbers.out ;
if [ \$(( \$JOBID % 20 )) -eq 0 ] ; then
echo Submitting up to 20 Quantify and/or Butterfly jobs
sleep 3 # be nice
fi
fi
done
echo Submitted \$SUBMITTED_C Chrysalis and \$SUBMITTED_B Butterfly jobs
if [[ \$SUBMITTED_B == 0 && \$SUBMITTED_C == 0 ]]; then
echo \"No Trinity jobs need to be submitted \"
if [ -s "$OUTPUTDIR"/Trinity.fasta.complete ]; then
echo \"Trinity RNA-Seq assembly is complete! Result file is present as "$OUTPUTDIR"/Trinity.fasta \"
else
echo \"Proceeding with capturing the output with this command\"
echo ' find "$OUTPUTDIR"/chrysalis -name *allProbPaths.fasta -exec cat {} \\; > "$OUTPUTDIR"/Trinity.fasta '
find "$OUTPUTDIR"/chrysalis -name *allProbPaths.fasta -exec cat {} \\; > "$OUTPUTDIR"/Trinity.fasta
touch "$OUTPUTDIR"/Trinity.fasta.complete
echo DO: rm -f "$OUTPUTDIR"/bowtie.nameSorted.sam* "$OUTPUTDIR"/both.fa* "$OUTPUTDIR"/inchworm.kmer_count "$OUTPUTDIR"/iworm_* "$OUTPUTDIR"/target* "$OUTPUTDIR"/jellyfish* "$OUTPUTDIR"/scaffolding* "$OUTPUTDIR"/*.finished "$OUTPUTDIR"/mer_counts_*
fi
fi
"
######
##### Write the above script to a file for later execution
echo "${JOBSTRING4}" | cat -> "$JOBNAME4.sh"