#!/usr/bin/env perl use strict; use warnings; use FindBin; use lib("$FindBin::RealBin/../../PerlLib"); use Pipeliner; use File::Basename; use Cwd; use Carp; use Getopt::Long qw(:config no_ignore_case bundling pass_through); our $HISAT_HOME; BEGIN { if ($ENV{HISAT_HOME}) { $HISAT_HOME = $ENV{HISAT_HOME}; } else { my $hisat_prog = `sh -c "command -v hisat"`; if ($hisat_prog) { chomp $hisat_prog; $HISAT_HOME = dirname($hisat_prog); } else { die "Error, cannot find hisat in PATH setting"; } } } my $usage = <<__EOUSAGE__; ###################################################################### # # Required: # --genome target genome to align to # --reads fastq files. If pairs, indicate both in quotes, ie. "left.fq right.fq" # # Optional: # -N max number of alignments to report. (default: 1) # -G GTF file for incorporating reference splice site info. # --CPU number of threads (default: 2) # --out_prefix output prefix (default: hisat) # --run_as_single_reads if paired, run as single reads # ####################################################################### Include additional hisat arguments as additional command line parameters. ######################################################################## __EOUSAGE__ ; my ($genome, $reads); my $CPU = 2; my $help_flag; my $out_prefix = "hisat"; my $gtf_file; my $run_as_single_flag = 0; my $num_top_hits = 1; &GetOptions( 'h' => \$help_flag, 'genome=s' => \$genome, 'reads=s' => \$reads, 'CPU=i' => \$CPU, 'out_prefix=s' => \$out_prefix, 'G=s' => \$gtf_file, 'run_as_single_reads' => \$run_as_single_flag, 'N=i' => \$num_top_hits, ); if ($help_flag) { die $usage; } unless ($genome && $reads) { die $usage; } main: { my $hisat_index = "$genome.hisat.idx"; if (! -s "$hisat_index.1.bt2") { ## build hisat index my $cmd = "$HISAT_HOME/hisat-build $genome $hisat_index"; &process_cmd($cmd); } my $splice_incl = ""; if ($gtf_file) { my $gtf_splice = "$gtf_file.hisat.splice"; unless (-s $gtf_splice) { my $cmd = "$HISAT_HOME/extract_splice_sites.py $gtf_file > $gtf_file.hisat.splice"; &process_cmd($cmd); } $splice_incl = " --known-splicesite-infile $gtf_splice "; } ## run HISAT $reads = &add_zcat_fifo_and_add_hisat_params($reads); my $top_hits_count = ""; if ($num_top_hits > 1) { $top_hits_count = " -k $num_top_hits "; } my @tmpfiles; my $pipeliner = new Pipeliner(-verbose => 1); my $cmd = "bash -c \"set -o pipefail && $HISAT_HOME/hisat -x $hisat_index -q $reads $splice_incl -p $CPU $top_hits_count @ARGV | gzip -c > $out_prefix.sam.gz\" "; $pipeliner->add_commands( new Command($cmd, "$out_prefix.sam.gz.ok") ); push (@tmpfiles, "$out_prefix.sam.gz"); $cmd = "bash -c \"set -o pipefail && gunzip -c $out_prefix.sam.gz | samtools view -@ $CPU -F 4 -Sb -o $out_prefix.bam \""; $pipeliner->add_commands( new Command($cmd, "$out_prefix.bam.ok") ); push (@tmpfiles, "$out_prefix.bam"); $cmd = "samtools sort -@ $CPU $out_prefix.bam -o $out_prefix.cSorted.bam"; $pipeliner->add_commands( new Command($cmd, "$out_prefix.cSorted.bam.ok") ); $pipeliner->run(); if (-s "$out_prefix.cSorted.bam") { $cmd = "samtools index $out_prefix.cSorted.bam"; &process_cmd($cmd); } unlink(@tmpfiles); exit(0); } #### sub add_zcat_fifo_and_add_hisat_params { my ($reads) = @_; $reads =~ s/^\s+|\s+$//g; my @adj_reads_list; my $counter = 0; my @read_files = split(/\s+/, $reads); my @updated_read_filenames; foreach my $reads_file (@read_files) { $counter++; if ($reads_file =~ /\.gz$/) { $reads_file = "<(zcat $reads_file)"; } push (@updated_read_filenames, $reads_file); # add decoration $reads_file = (scalar(@read_files) == 2) ? "-$counter $reads_file" : "-U $reads_file"; push (@adj_reads_list, $reads_file); } if ($run_as_single_flag) { return("-U " . join(",", @updated_read_filenames)); } else { my $adj_reads = join(" ", @adj_reads_list); return($adj_reads); } } #### sub process_cmd { my ($cmd) = @_; print STDERR "CMD: $cmd\n"; #return; my $ret = system($cmd); if ($ret) { die "Error, cmd: $cmd died with ret ($ret)"; } return; }