CSV handling with NamedMaps, NamedSeqs and Arrays
#!/usr/bin/env anduril
import anduril.builtin._
import anduril.tools._
import org.anduril.runtime._
object csvFiddler {
val seed = Randomizer(
columns=1,
rows=5,
distribution="normal",
mean=0
)
val src = NamedSeq[Randomizer]("src") // Populate a NamedSeq with Components
val src2 = NamedSeq[Any]("src2") // Populate another with Any type
for ( rowMap <- iterCSV(seed.content) ) {
info(rowMap.mkString(", "))
src += Randomizer(
columns=5,
rows=20,
distribution="normal",
mean=rowMap("Column1").toDouble
)
}
for ( i <- "1,2,3, 4,5".split(",") ) {
src2 += Randomizer(
columns=5,
rows=20,
distribution="normal",
mean=i.toDouble
)
}
// Seq src2 now contains a mixture of objects, Randomizer component instance, and an integer
src2 += 123
// Convert NamedSeq to an array, and join the outputs
val joined_src=CSVListJoin(in=src)
val split_src=CSVSplit(in=joined_src, labelCol="file", includeLabelCol=false)
// Iterate over array, and plot contents
val plots = NamedMap[Plot2D]("plots")
val plots2 = NamedMap[Latex]("plots2")
val transposes = NamedMap[MatrixTranspose]("transposes")
for ( (k,v) <- iterArray(split_src.out) ) {
info(k + ", " + v.getAbsolutePath)
plots(k) = Plot2D(
x=split_src.out(k),
y=split_src.out(k),
xColumns="Column1",
yColumns="Column2"
)
plots2(k) = Plot2D(
x=split_src.out(k),
y=split_src.out(k),
xColumns="Column1",
yColumns="Column2"
).out
transposes(k) = MatrixTranspose(split_src.out(k).force())
}
// Convert NamedMap plots to array, and join the files in one folder
val plotFiles = Array2Folder(plots.values, fileMode=".")
for ( (k,v) <- iterFolder(plotFiles.out) ) {
info(k + ", " + v.getAbsolutePath + ", " + new java.util.Date(v.lastModified()) )
}
val plotFiles2 = Array2Folder(plots2.values, fileMode=".")
// Compare two files in the Seq. Note the src2 has a mixture of objects,
// and you have to cast a type for it:
val seq_compare=StatisticalTest(
matrix=src(0),
matrix2=src2(1).asInstanceOf[Randomizer],
byRow=false,
targetColumns="Column1,Column2",
referenceColumns="Column1,Column2"
)
// Compare two files in a NamedMap.
val map_compare=StatisticalTest(
matrix=transposes("1"),
matrix2=transposes("2"),
byRow=false,
targetColumns="Row1,Row2",
referenceColumns="Row1,Row2"
)
// Compare two files in an array.
// Note the use of .force(), array items are CSV, component expects Matrix
val array_compare=StatisticalTest(
matrix=split_src.out("1").force(),
matrix2=split_src.out("2").force(),
byRow=false,
targetColumns="Column1,Column2",
referenceColumns="Column1,Column2"
)
}
BashEvaluate forarray function
#!/usr/bin/env anduril
import anduril.builtin._
import anduril.tools._
import org.anduril.runtime._
object csvArray {
/*
* ForArray using BashEvaluate, for quick array manipulations
* and no parallelization
*/
// Populate a Chain with Ports
val src = NamedSeq[Randomizer]("src")
for (i <- 1 to 5) {
src+=Randomizer(columns=5, rows=20, distribution="normal", mean=i)
}
// Convert Chain to an array, and join the outputs
val csvArr=makeArray(src)
// BashEvaluate for array function:
// write a file that uses variables $key and $file. the stdout is stored
// as a new array output
val kilmer=BashEvaluate(
array1=csvArr,
script=
"""
echo 'echo $key >> out1; wc $file' > out2
forarray out2
"""
)
val entine=Array2Folder(kilmer.arrayOut1)
info(io.Source.fromFile(kilmer.out1.content).mkString("\n"))
/*
* Arrays using BashEvaluate, with parallelization
*/
val iant=NamedMap[BashEvaluate.gettype]("iant")
for ( (k,v) <- iterArray(csvArr) ) {
iant(k) = BashEvaluate(
var1=csvArr(k),
script="echo "+k+" >> out1; wc @var1@ > out2"
)
}
// Collect the output port from the component instances
val iant_array = makeArray(iant mapValues {_.out2})
/*
* QuickBash, an alternative to BashEvaluate, that handles only
* single input and output port
*/
val quick_kilmer=Folder2Array(QuickBash(
in=csvArr,
script=
"""
echo 'wc $file' > "$tmp/loopscript.sh"; mkdir $out
forarray "$tmp/loopscript.sh" keys in out
"""
))
/*
* QuickBash in a foorloop, for parallelization
* Note, here we cannot use integers as array keys!
*/
val ium=NamedMap[QuickBash.gettype]("ium")
for ( (k,v) <- iterArray(csvArr) ) {
ium(k) = QuickBash(in=csvArr,script="wc $key"+k+" > $out")
}
val ium_array = makeArray(ium)
}
Include statement (or the lack of it)
Anduril2 does not have an equivalent to Anduril1.x include. Now, included files are like functions with no global namespace:
File setup.scala
package myPackage
package object setup {
// It's easier to save everything as a string, than mixed types!
val constants = Map[String,String](
"script" -> """echo -e hello\\nworld > $out""",
"value" -> "600"
)
}
File include.scala
package myPackage
import anduril.tools._
import anduril.builtin._
package object myInclude {
def myPart(s: BinaryFile): (BinaryFile,String)={
val c=QuickBash(s, script="rev $in > $out")
val d=QuickBash(c.out, script="tac $in > $out")
(d.out, "A Fine String We Have Here")
}
}
File runme.scala
#!/usr/bin/env anduril
//$OPT -s include.scala -s setup.scala
import anduril.builtin._
import anduril.tools._
import org.anduril.runtime._
import myPackage.myInclude._
import myPackage.setup._
object network01 {
info("Constant value plus 300= "+(constants("value").toInt+300))
val b = QuickBash(script=constants("script"))
val f = myPart(b.out)
val g = QuickBash(f._1, script="cat $in > $out; echo "+f._2+" >> $out")
}
Header Syntax Examples
#!/usr/bin/env anduril
//$OPT --threads 10
//$OPT --pipe "tee permanent.log.file"
//$OPT --pipe "anduril-pager --ls"
//$OPT --wrapper slurm-prefix
//$PRE echo this gets run before pipeline
//$POST echo and this after
- anduril executable works now as shebang launcher
- Options to anduril run via //$OPT
- Use –pipe to pipe the output on anduril, tee saves it to a file, and also pipes to stdout
- pager needs stdout to colorize the pipeline
- –wrapper sets a command that prefixes all component calls
- //$PRE runs shell commands before pipeline
- //$POST runs shell commands after pipeline