2. Bulk RNA-Seq Analysis Example.

Here is an example pipeline to use the Star2Pass, which runs STAR aligner in two pass mode, for aligning RNA-seq data.

There are two main steps:

Inputs for the pipeline include:

Outputs of the pipeline are:

Dependencies:

STAR

#!/usr/bin/env anduril
//$OPT --threads 100
//$OPT --wrapper slurm-prefix

import anduril.builtin._
import anduril.tools._
import org.anduril.runtime._
import anduril.microarray._
import anduril.sequencing._

object twoPassAlignment {

  val reads = INPUT(path="files/reads")
  val mates = INPUT(path="files/mates")

  val annotation = INPUT(path="files/annotation.gtf")
  val reference = INPUT(path="files/genome.fa")
  val STARparameters=INPUT(path="files/parametersDefault")

  val genome = STARGenome(
      genomeFasta     = reference,
      annotation      = annotation,
      genomeParameters= "--sjdbOverhang 64",
      threads          = 5
    )

  val custom = QuickBash(
      in    = reads,
      script=
        """
          tsvecho Key Custom > $out
            for i in $( tail -n +2 $in/_index | cut -f1 )
            do
                tsvecho $i "--outSAMattrRGline ID:$key1 PU:unknown LB:Illumina SM:$key1 PL:Illumina" >> $out
            done
        """
    )

  val twoPass = Star2Pass(
      reference         = reference,
      reads             = reads,
      mates             = mates,
      genome            = genome.genome,
      annotation        = annotation,
      parameterFile     = STARparameters,
      custom            = custom.out,
      mainAlignmentType = "toTranscriptome",
      genomeParameters  = "--sjdbOverhang 64",
      genomeLoad        = "LoadAndRemove",
      readFilesCommand  = "zcat",
      useEncodeParams   = true,
      alignParameters   = "--outSAMtype BAM Unsorted --outReadsUnmapped Fastx --quantMode TranscriptomeSAM GeneCounts --quantTranscriptomeBan Singleend --limitBAMsortRAM 10000000000",
      memory          = 39000,
      threads         = 15,
      execute         = "once"
    )

  val getGenomeAligned = QuickBash(
      in=twoPass.folder,
      script=
          """
          awk -F"\t" '{if (NR>1) {print $0"/Aligned.out.bam"} else {print $0}}' OFS="\t" $in/_index > $out
          """
    ).out

  val genomeAlignments = CSV2Array(in=getGenomeAligned,keys="column").out

  OUTPUT(twoPass.alignments)
  OUTPUT(genomeAlignments)
}