CSV handling with NamedMaps, NamedSeqs and Arrays

#!/usr/bin/env anduril

import anduril.builtin._
import anduril.tools._
import org.anduril.runtime._

object csvFiddler {

    val seed = Randomizer(
        columns=1,
        rows=5,
        distribution="normal",
        mean=0
    )

    val src  = NamedSeq[Randomizer]("src") // Populate a NamedSeq with Components
    val src2 = NamedSeq[Any]("src2") // Populate another with Any type

    for ( rowMap <- iterCSV(seed.content) ) {
        info(rowMap.mkString(", "))
        src += Randomizer(
            columns=5,
            rows=20,
            distribution="normal",
            mean=rowMap("Column1").toDouble
        )
    }
    for ( i <- "1,2,3, 4,5".split(",") ) {
        src2 += Randomizer(
            columns=5,
            rows=20,
            distribution="normal",
            mean=i.toDouble
        )
    }
    // Seq src2 now contains a mixture of objects, Randomizer component instance, and an integer
    src2 += 123

    // Convert NamedSeq to an array, and join the outputs
    val joined_src=CSVListJoin(in=src)
    val split_src=CSVSplit(in=joined_src, labelCol="file", includeLabelCol=false)
    // Iterate over array, and plot contents
    val plots      = NamedMap[Plot2D]("plots")
    val plots2     = NamedMap[Latex]("plots2")
    val transposes = NamedMap[MatrixTranspose]("transposes")

    for ( (k,v) <- iterArray(split_src.out) ) {
        info(k + ", " + v.getAbsolutePath)

        plots(k) = Plot2D(
            x=split_src.out(k),
            y=split_src.out(k),
            xColumns="Column1",  
            yColumns="Column2"
        )

        plots2(k) = Plot2D(
            x=split_src.out(k),
            y=split_src.out(k),
            xColumns="Column1",
            yColumns="Column2"
        ).out

        transposes(k) = MatrixTranspose(split_src.out(k).force())
    }
    // Convert NamedMap plots to array, and join the files in one folder
    val plotFiles  = Array2Folder(plots.values, fileMode=".")

    for ( (k,v) <- iterFolder(plotFiles.out) ) {
        info(k + ", " + v.getAbsolutePath + ", " + new java.util.Date(v.lastModified()) )
    }
    val plotFiles2 = Array2Folder(plots2.values, fileMode=".")

    // Compare two files in the Seq. Note the src2 has a mixture of objects,
    // and you have to cast a type for it:
    val seq_compare=StatisticalTest(
        matrix=src(0),
        matrix2=src2(1).asInstanceOf[Randomizer],
        byRow=false,
        targetColumns="Column1,Column2",
        referenceColumns="Column1,Column2"
    )

    // Compare two files in a NamedMap.
    val map_compare=StatisticalTest(
        matrix=transposes("1"),
        matrix2=transposes("2"),
        byRow=false,
        targetColumns="Row1,Row2",
        referenceColumns="Row1,Row2"
    )

    // Compare two files in an array.
    // Note the use of .force(), array items are CSV, component expects Matrix
    val array_compare=StatisticalTest(
        matrix=split_src.out("1").force(),
        matrix2=split_src.out("2").force(),
        byRow=false,
        targetColumns="Column1,Column2",
        referenceColumns="Column1,Column2"
    )
}

BashEvaluate forarray function

#!/usr/bin/env anduril
import anduril.builtin._
import anduril.tools._
import org.anduril.runtime._
object csvArray {

     /*
     *  ForArray using BashEvaluate, for quick array manipulations
     *  and no parallelization
     */

    // Populate a Chain with Ports
    val src  = NamedSeq[Randomizer]("src")
    for (i <- 1 to 5) {
        src+=Randomizer(columns=5, rows=20, distribution="normal", mean=i)
    }

    // Convert Chain to an array, and join the outputs
    val csvArr=makeArray(src)

    // BashEvaluate for array function:
    //   write a file that uses variables  $key and $file. the stdout is stored
    //   as a new array output
    val kilmer=BashEvaluate(
        array1=csvArr,
        script=
        """
        echo 'echo $key >> out1; wc $file' > out2
        forarray out2
        """
    )

    val entine=Array2Folder(kilmer.arrayOut1)
    info(io.Source.fromFile(kilmer.out1.content).mkString("\n"))

    /*
     *  Arrays using BashEvaluate, with parallelization
     */
    val iant=NamedMap[BashEvaluate.gettype]("iant")
    for ( (k,v) <- iterArray(csvArr) ) {
        iant(k) = BashEvaluate(
            var1=csvArr(k),
            script="echo "+k+" >> out1; wc @var1@ > out2"
        )
    }

    // Collect the output port from the component instances
    val iant_array = makeArray(iant mapValues {_.out2})

    /*
     *  QuickBash, an alternative to BashEvaluate, that handles only
     *  single input and output port
     */
    val quick_kilmer=Folder2Array(QuickBash(
        in=csvArr,
        script=
        """
        echo 'wc $file' > "$tmp/loopscript.sh"; mkdir $out
        forarray "$tmp/loopscript.sh" keys in out
        """
    ))

    /*
     *  QuickBash in a foorloop, for parallelization
     *  Note, here we cannot use integers as array keys!
     */
    val ium=NamedMap[QuickBash.gettype]("ium")
    for ( (k,v) <- iterArray(csvArr) ) {
        ium(k) = QuickBash(in=csvArr,script="wc $key"+k+" > $out")
    }
    val ium_array = makeArray(ium)
}

Include statement (or the lack of it)

Anduril2 does not have an equivalent to Anduril1.x include. Now, included files are like functions with no global namespace:

File setup.scala

package myPackage
package object setup {
    // It's easier to save everything as a string, than mixed types!
    val constants = Map[String,String](
        "script" -> """echo -e hello\\nworld > $out""",
        "value" -> "600"
    )    
}

File include.scala

package myPackage
import anduril.tools._
import anduril.builtin._
package object myInclude {
    def myPart(s: BinaryFile): (BinaryFile,String)={
        val c=QuickBash(s, script="rev $in > $out")
        val d=QuickBash(c.out, script="tac $in > $out")
        (d.out, "A Fine String We Have Here")
    }
}

File runme.scala

#!/usr/bin/env anduril
//$OPT -s include.scala -s setup.scala

import anduril.builtin._
import anduril.tools._
import org.anduril.runtime._
import myPackage.myInclude._
import myPackage.setup._
object network01 {
    info("Constant value plus 300= "+(constants("value").toInt+300))
    val b = QuickBash(script=constants("script"))
    val f = myPart(b.out)
    val g = QuickBash(f._1, script="cat $in > $out; echo "+f._2+" >> $out")  
}

Header Syntax Examples

#!/usr/bin/env anduril
//$OPT --threads 10
//$OPT --pipe "tee permanent.log.file"
//$OPT --pipe "anduril-pager --ls"
//$OPT --wrapper slurm-prefix
//$PRE echo this gets run before pipeline
//$POST echo and this after
  1. anduril executable works now as shebang launcher
  2. Options to anduril run via //$OPT
  3. Use –pipe to pipe the output on anduril, tee saves it to a file, and also pipes to stdout
  4. pager needs stdout to colorize the pipeline
  5. –wrapper sets a command that prefixes all component calls
  6. //$PRE runs shell commands before pipeline
  7. //$POST runs shell commands after pipeline