#!/usr/bin/env perl use strict; use warnings; use FindBin; use lib("$FindBin::RealBin/../../PerlLib"); use Pipeliner; use File::Basename; use Cwd; use Carp; use Getopt::Long qw(:config no_ignore_case bundling pass_through); my $usage = <<__EOUSAGE__; ###################################################################### # # Required: # --genome target genome to align to # --gtf|G GTF file for incorporating reference splice site info. (recommended) # # --reads fastq files. If pairs, indicate both in quotes, ie. "left.fq right.fq" # # Optional: # --CPU number of threads (default: 2) # --out_prefix output prefix (default: star) # --out_dir output directory (default: current working directory) # --star_path full path to the STAR program to use. # --patch genomic targets to patch the genome fasta with. # --chim_search include Chimeric.junction outputs # --max_intron max intron length (and PE gap size) # --one_pass do one pass alignment instead of two-pass # ####################################################################### __EOUSAGE__ ; my ($genome, $reads); my $CPU = 2; my $help_flag; my $out_prefix = "star"; my $gtf_file; my $out_dir; my $ADV = 0; my $star_path = "STAR"; my $patch; my $chim_search; my $max_intron; my $one_pass = 0; &GetOptions( 'h' => \$help_flag, 'genome=s' => \$genome, 'reads=s' => \$reads, 'CPU=i' => \$CPU, 'out_prefix=s' => \$out_prefix, 'gtf|G=s' => \$gtf_file, 'out_dir=s' => \$out_dir, 'ADV' => \$ADV, 'star_path=s' => \$star_path, 'patch=s' => \$patch, 'chim_search' => \$chim_search, "max_intron=i" => \$max_intron, 'one_pass' => \$one_pass, ); unless ($genome && $reads) { die $usage; } if ($help_flag) { die $usage; } if (@ARGV) { die "Error, cannot recognize opts: @ARGV"; } my $star_prog = `sh -c "command -v $star_path"`; chomp $star_prog; unless ($star_prog =~ /\w/) { die "Error, cannot locate STAR program. Be sure it's in your PATH setting. "; } main: { ## ensure all full paths $genome = &Pipeliner::ensure_full_path($genome); $gtf_file = &Pipeliner::ensure_full_path($gtf_file) if $gtf_file; my @read_files = split(/\s+/, $reads); foreach my $read_file (@read_files) { if ($read_file) { $read_file = &Pipeliner::ensure_full_path($read_file); } } $reads = join(" ", @read_files); if ($out_dir) { unless (-d $out_dir) { mkdir $out_dir or die "Error, cannot mkdir $out_dir"; } chdir $out_dir or die "Error, cannot cd to $out_dir"; } my $star_index = "$genome.star.idx"; if (! -e "$star_index/build.ok") { ## build star index unless (-d $star_index) { mkdir($star_index) or die "Error, cannot mkdir $star_index"; } my $cmd = "$star_prog --runThreadN $CPU --runMode genomeGenerate --genomeDir $star_index " . " --genomeFastaFiles $genome " . " --limitGenomeGenerateRAM 40419136213 "; if ($gtf_file) { $cmd .= " --sjdbGTFfile $gtf_file " . " --sjdbOverhang 100 "; } &process_cmd($cmd); &process_cmd("touch $star_index/build.ok"); } ## run STAR my @tmpfiles; my $pipeliner = new Pipeliner(-verbose => 1); my $pass_mode = ($one_pass) ? "None" : "Basic"; my $cmd = "$star_prog " . " --runThreadN $CPU " . " --genomeDir $star_index " . " --outSAMtype BAM SortedByCoordinate " . " --runMode alignReads " . " --readFilesIn $reads " . " --twopassMode $pass_mode " . " --alignSJDBoverhangMin 10 " . " --outSAMstrandField intronMotif " . " --outSAMunmapped Within " . " --outReadsUnmapped Fastx " . " --alignInsertionFlush Right " . " --alignSplicedMateMapLminOverLmate 0 " . " --alignSplicedMateMapLmin 30 " . " --alignSJstitchMismatchNmax 5 -1 5 5 " #which allows for up to 5 mismatches for non-canonical GC/AG, and AT/AC junctions, and any number of mismatches for canonical junctions (the default values 0 -1 0 0 replicate the old behavior (from AlexD) . " --peOverlapNbasesMin 12 " . " --peOverlapMMp 0.1 " . " --limitBAMsortRAM 20000000000"; if ($max_intron) { $cmd .= " --alignMatesGapMax $max_intron " . " --alignIntronMax $max_intron "; } if ($chim_search) { $cmd .= " --chimJunctionOverhangMin 8 " . " --chimOutJunctionFormat 1 " . " --chimSegmentMin 12 " . " --chimSegmentReadGapMax parameter 3 " . " --chimMultimapNmax 20 " . " --chimOutType Junctions WithinBAM " . " --chimScoreJunctionNonGTAG -4 " . " --chimNonchimScoreDropMin 10 " . " --chimMultimapScoreRange 10 "; } if ($patch) { $cmd .= " --genomeFastaFiles $patch "; } if ($reads =~ /\.gz$/) { $cmd .= " --readFilesCommand 'gunzip -c' "; } $pipeliner->add_commands( new Command($cmd, "star_align.ok") ); my $bam_outfile = "Aligned.sortedByCoord.out.bam"; my $renamed_bam_outfile = "$out_prefix.sortedByCoord.out.bam"; $pipeliner->add_commands( new Command("mv $bam_outfile $renamed_bam_outfile", "$renamed_bam_outfile.ok") ); $pipeliner->add_commands( new Command("samtools index $renamed_bam_outfile", "$renamed_bam_outfile.bai.ok") ); $pipeliner->run(); exit(0); } #### sub process_cmd { my ($cmd) = @_; print STDERR "CMD: $cmd\n"; #return; my $ret = system($cmd); if ($ret) { die "Error, cmd: $cmd died with ret ($ret)"; } return; }