##inputs library(componentSkeleton) #Paneldoc execute <- function(cf){ ################################################################################################### ## PanelDoc ## ################################################################################################### run_PanelDoc <- get.parameter(cf, "run_PanelDoc") if (run_PanelDoc == "TRUE"){ criteria <- list( ### Set criteria for analysis ### prep.partition = get.parameter(cf, "prep_partition"), # if TRUE generate genomic data for bed platform, else already present prep.partition.platform = get.parameter(cf, "partition_platform"), generate.median = get.parameter(cf, "generate_median"), # if TRUE, then generate raw median coverage data gene.graphics = get.parameter(cf, "gene_graphics"), #default F, if partitions represent genes, then TRUE, else FALSE. If true, gene is plotted in graphical output output.bedgraph = get.parameter(cf, "output_bedgraph"), # if TRUE, bedgraphs are written for raw coverage and normalized ratio data output.normalized = get.parameter(cf, "output_normalized"), # if TRUE, output data is written for raw and normalized coverage. output.ratio = get.parameter(cf, "output_ratio"), # if TRUE, ratio table is written for each partition. This is useful if another segmentation method is to be used call.cnvs = get.parameter(cf, "call_cnvs"), # if TRUE, call CNVs using sliding window method. If not TRUE, no CNV calling performed annotate.cnvs = get.parameter(cf, "annotate_cnvs"), # if TRUE, annotate CNVs to genes sample.type = get.parameter(cf, "calling_algorithm"), # different cnv calling algorithm in place for tumor (enter "Tumor") DNA, else "Blood" ### Set variant calling criteria ### minimum.coverage =as.numeric(get.parameter(cf, "minimum_coverage", type='string')), # set minimum median coverage for base inclusion in variant calling minimum.bait = get.parameter(cf, "minimum_bait"), # set minimum distance from non-targeted region for base inclusion in variant calling maximum.selfchain = get.parameter(cf, "maximum_selfchain"), # set maximum self-chain repeat count for base inclusion in variant calling gain.many.seed.signal = get.parameter(cf, "gain_many_seed_signal"), # expected signal for gain seed generation loss.none.seed.signal = get.parameter(cf, "loss_none_seed_signal"), # expected signal for loss seed generation gain.many.extend.signal = get.parameter(cf, "gain_many_extend_signal"), # expected signal for gain seed extension loss.none.extend.signal = get.parameter(cf, "loss_none_extend_signal"), # expected signal for loss seed extension gain.many.call.signal = get.parameter(cf, "gain_many_call_signal"), # signal criteria for gain call loss.none.call.signal = get.parameter(cf, "loss_none_call_signal"), # signal criteria for loss call gain.seed.signal = get.parameter(cf, "gain_seed_signal"), # expected signal for gain seed generation loss.seed.signal = get.parameter(cf, "loss_seed_signal"), # expected signal for loss seed generation gain.extend.signal = get.parameter(cf, "gain_extend_signal"), # expected signal for gain seed extension loss.extend.signal = get.parameter(cf, "loss_extend_signal"), # expected signal for loss seed extension gain.call.signal = get.parameter(cf, "gain_call_signal"), # signal criteria for gain call loss.call.signal = get.parameter(cf, "loss_call_signal"), # signal criteria for loss call minimum.zscore = get.parameter(cf, "minimum_zscore"), # minimum z score for gc content normalization minimum.base.window = get.parameter(cf, "minimum_base_window"), # minimum window base count for call minimum.base.pass = get.parameter(cf, "minimum_base_pass"), # minimum base count that pass criteria for call window.size = as.numeric(get.parameter(cf, "window_size")), # window size for scanning pass.count = get.parameter(cf, "pass_count"), # number of bases in window required to pass criteria for call max.uncertainty = get.parameter(cf, "max_uncertainty"), # maximum permitted uncertainty for state calling using mclust for tumors. Lower = more stringent. Not relevant for slifing window CNV calling. mclust.states = get.parameter(cf, "mclust_states"), # maximum number of states for mixture model. Max is 9. Analysis optimized for 2 states. More states likely to generate false positives. experiment.name = "PanelDoc_Run", ### Set file locations (provide full path) ### # directory where coverage/ratio data is stored coverage.directory = get.parameter(cf, "coverage"), # directory where platform data is stored #platform.directory = paste(main_directory,"Platform/", sep = ""), # directory where genome information is stored genome.directory = get.parameter(cf,"Genome"), # directory where gene annotation information is stored #annotation.directory = paste(main_directory,"Annotation/", sep = ""), ### Set input data filenames (must match expected format) # file with sample information for processing batch in csv format sample.filename = get.parameter(cf, "samples"), # file with information for bait probes in bed format bait.filename = get.parameter(cf, "bedfile2"), # file with data on how genomic coverage data is stored (typically chromosomes) partition.filename = get.parameter(cf, "partition_regions"), # file with all known CNVs for normalization purposes (including large known CNVs should improve normalization) knowncnvs.filename = get.parameter(cf, "knowncnvs"), # refesq file from UCSC used for plotting (should be in annotation.directory) gene.filename = get.parameter(cf,"refFlat"), #chainSelf chain.self =get.parameter(cf, "chain_self"), # source file for functions source.filename = "comp_functions_split.R") source(criteria$source.filename) ### Create folder structure### dir.list <- list( bedgraph.dir = get.output(cf, 'bedgraph'), calls.dir = get.output(cf, 'calls'), raw.dir = get.output(cf, 'raw'), normalized.dir = get.output(cf, 'normalized'), PDFs.dir = get.output(cf, 'PDFs'), QC_Metrics.dir = get.output(cf, 'QC_Metrics'), experiment.dir = get.output(cf, 'General_output')) dir.create(dir.list$experiment.dir, recursive = TRUE) dir.create(dir.list$bedgraph.dir, recursive=TRUE) dir.create(dir.list$calls.dir, recursive=TRUE) dir.create(dir.list$raw.dir, recursive=TRUE) dir.create(dir.list$PDFs.dir, recursive=TRUE) dir.create(dir.list$QC_Metrics.dir, recursive=TRUE) dir.create(dir.list$normalized.dir, recursive=TRUE) ##Run analysis## run.analysis(criteria, dir.list) } ################################################################################################### ## CNVPanelizer ## ################################################################################################### run_CNVPanelizer <- get.parameter(cf, "run_CNVPanelizer") CNVPanelizer.dir <- get.output(cf, "CNVPanelizer_results") dir.create(CNVPanelizer.dir, recursive = TRUE) if (run_CNVPanelizer =="TRUE"){ ##CNVPanelizer### bedFilepath <- get.parameter(cf,"bedfile") sampleDirectory <- get.parameter(cf,"bams") referenceDirectory <- get.parameter(cf, "normals") removePcrDuplicates <- get.parameter(cf, "Remove_duplicates") source("CNVscript.R") reportTables <- call.CNVPanelizer(bedFilepath, sampleDirectory, referenceDirectory, removePcrDuplicates) write.csv(reportTables, file = file.path(CNVPanelizer.dir, "CNVPanelizer_resultstable.csv"), row.names = T, col.names = T) } ################################################################################################### ## PanelDoc: Filtering ## ################################################################################################### source("shortlisting.R") run_Filtering <- get.parameter(cf, "run_Filtering") if (run_Filtering == "TRUE" & run_PanelDoc == "TRUE"){ call.filtering(dir.list$calls.dir) } ################################################################################################### ## PanelDoc: Change results to per gene ## ################################################################################################### run_per_gene <- get.parameter(cf, "run_per_gene") genelist <- get.parameter(cf, "genelist_for_combining") if (run_per_gene == "TRUE" & run_PanelDoc == "TRUE"){ source("shortlisting.R") combined_results_PanelDoc <- call.combine_paneldoc(dir.list$calls.dir, run_Filtering) call.per_gene(combined_results_PanelDoc, dir.list$calls.dir, genelist) print("succeeded with combining") } ################################################################################################### ## Combine PanelDoc and CNVPanelizer results ## ################################################################################################### run_Combine_results <- get.parameter(cf, "run_Combine_results") if (run_Combine_results == "TRUE" & run_PanelDoc == "TRUE"){ call.Combine_results(CNVPanelizer.dir, dir.list$calls.dir) } print("what's going on?") } main(execute)