library(componentSkeleton) execute <- function(cf) { expr <- LogMatrix.read(get.input(cf, 'expr')) annotation <- AnnotationTable.read(get.input(cf, 'geneAnnotation')) out.dir <- get.output(cf, 'report') dir.create(out.dir, recursive=TRUE) column.name <- get.parameter(cf, 'column') internal.ids <- rownames(expr) probe.names <- AnnotationTable.get.vector(annotation, column.name, internal.ids) if (any(is.na(probe.names))) { write.log(cf, paste('Warning: some probe names are missing:', paste(internal.ids[is.na(probe.names)], collapse=', '))) } dup.names <- sort(unique(probe.names[duplicated(probe.names)])) tex <- c('\\subsection{Duplicate probe distribution}') # Slide goes from green to red. Red=0, green=2/6. plot.colors <- rev(rainbow(32, start=0, end=2/6)) Z.THRESHOLD <- 2 z.to.color <- function(z) { # norm: [0, 1] norm <- (z + Z.THRESHOLD) / (2*Z.THRESHOLD) norm <- max(min(norm, 1, na.rm=TRUE), 0) stopifnot(norm >= 0 && norm <= 1) # index: [1, length(plot.colors)] index <- 1 + (length(plot.colors)-1)*norm return(plot.colors[index]) } probe.rows <- AnnotationTable.get.vector(annotation, 'Row', internal.ids) probe.cols <- AnnotationTable.get.vector(annotation, 'Col', internal.ids) for (group.id in colnames(expr)) { empty <- rep('', length(dup.names)) fr <- data.frame( Count=empty, Min=empty, Median=empty, Max=empty, SD=empty, stringsAsFactors=FALSE) rownames(fr) <- dup.names first <- TRUE png.basename <- sprintf('DuplicateQuality-%s-%s.png', get.metadata(cf, 'instanceName'), group.id) png.open(file.path(out.dir, png.basename)) par(bg='black') for (probe.name in dup.names) { ids <- names(probe.names[probe.names == probe.name]) if (length(ids) < 5) next values <- expr[ids, group.id] med <- median(values, na.rm=TRUE) stddev <- sd(values, na.rm=TRUE) z.values <- (values - med) / max(0.001, stddev) x <- probe.cols[ids] y <- probe.rows[ids] # Map the range [-Z.THRESHOLD, Z.THRESHOLD] to plot.colors. this.colors <- sapply(z.values, z.to.color) pch <- 19 if (first) { first <- FALSE plot(x=x, y=y, col=this.colors, pch=pch) } else { points(x=x, y=y, col=this.colors, pch=pch) } fr[probe.name, 'Count'] <- as.character(length(ids)) fr[probe.name, 'Min'] <- sprintf('%.1f', min(values, na.rm=TRUE)) fr[probe.name, 'Median'] <- sprintf('%.1f', med) fr[probe.name, 'Max'] <- sprintf('%.1f', max(values, na.rm=TRUE)) fr[probe.name, 'SD'] <- sprintf('%.2f', stddev) } png.close() fr <- fr[nchar(fr$Count) > 0,] if (nrow(fr) > 0) { fr <- fr[order(as.numeric(fr$Count), decreasing=TRUE),] fr <- fr[1:min(nrow(fr),40), ] caption <- sprintf('Duplicate probe distributions for %s', group.id) tex <- c(tex, latex.table(fr, 'lrrrrr', caption, position='!h', use.row.names=TRUE), latex.figure(png.basename, group.id, image.width=10, position='H')) } } tex <- c(tex, '\\clearpage') latex.write.main(cf, 'report', tex) return(0) } main(execute)