require(componentSkeleton) require(ggplot2) require(gridExtra) CHROMOSOMES <- c(1:22, "X", "Y") execute <- function(cf) { ## Read input and parameters cna <- CSV.read(get.input(cf, 'cna')) rearrangements <- CSV.read(get.input(cf, 'rearrangements')) binwidth <- as.numeric(get.parameter(cf, 'binwidth', type="int")) ## Clean chromosomal rearrangements data names(rearrangements) <- c("type","chr","start","end","id") rearrangements <- subset(rearrangements, chr %in% CHROMOSOMES) rearrangements <- droplevels(rearrangements) rearrangements$chr <- factor(rearrangements$chr, CHROMOSOMES) ## Set location of inversion as start site inversion <- subset(rearrangements, type == "inversions") inversion$type <- "inversion" inversion$location <- inversion$start ## Add inversion end points as separate locations inversion.end <- transform(inversion, location = end, id = paste(id,"_end",sep="")) inversion <- rbind(inversion, inversion.end) ## Use center of translocations as location translocation <- subset(rearrangements, type %in% c("translocationEnd","translocationStart")) translocation$location <- (translocation$start + translocation$end)/2 ## Remove distinction of start and end of translocations translocation <- transform(translocation, type = "translocation") ## Clean copy number data names(cna) <- c("id","chr","start","ratio","medianRatio","cn","type") cna <- subset(cna, chr %in% CHROMOSOMES) cna <- droplevels(cna) cna$chr <- factor(cna$chr, CHROMOSOMES) cna$location <- cna$start cna$end <- cna$start cna$type <- tolower(cna$type) ## Combine cleaned data cols <- c("id","chr","location","type") breakpoints <- rbind(inversion[,cols], translocation[,cols], cna[,cols]) types <- sort(unique(breakpoints$type)) breakpoints$type <- factor(breakpoints$type, types) ## Add a reversed order type for nicer plot breakpoints$revtype <- factor(breakpoints$type, rev(types)) ## Convert location to Mb breakpoints$location <- as.double(breakpoints$location)*1e-6 #### Plotting theme_set(theme_bw()) ## Create output folder for the component. output.dir <- get.output(cf, 'report') dir.create(output.dir, recursive=TRUE) instance.name <- get.metadata(cf, 'instanceName') tex <- character() ## Plot bar diagrams of break point counts per chromosome filename <- sprintf('barplot-%s.pdf', instance.name) file <- file.path(output.dir, filename) p <- ggplot(breakpoints) p <- p + geom_bar(aes(x=chr, fill=type)) + facet_wrap(~ type, ncol=2) p <- p + scale_fill_discrete(drop=FALSE, guide=FALSE) invisible(ggsave(file, p, width=7, height=4, units="in")) fig.caption <- "Counts of breakpoints in each chromosome grouped by breakpoint type." tex <- c(tex, latex.figure(file, caption=fig.caption, fig.label=sprintf("fig:%s-barplot",instance.name))) for(crom in CHROMOSOMES) { data <- subset(breakpoints, chr == crom) ## Duplicate data for faceting #data <- transform(data, plottype="breakpoints") #data2 <- transform(data, plottype="histogram") ## Facet method, aligned x axes, but ugly y axis scale in histogram #p <- ggplot(rbind(data,data2), aes(x=location)) + facet_wrap(~plottype, ncol=1, scales="free_y") #p <- p + geom_jitter(data=data, aes(y=revtype,color=type), alpha=0.3) #p <- p + geom_bar(data=data2, aes(y=..count..), binwidth=BINWIDTH*1e-6) #p <- p + scale_y_continuous(breaks=pretty) #p <- p + scale_colour_discrete(drop=FALSE, guide=FALSE) #p <- p + labs(title=sprintf("chr %s",crom), x="location (Mb)", y="") ## Grid arrange method, needs less memory, nicer y scale, but unaligned x axes p <- ggplot(data, aes(x=location)) p1 <- p + geom_jitter(aes(y=revtype,color=type), alpha=0.4, size=2) p1 <- p1 + scale_colour_discrete(drop=FALSE, guide=FALSE) p1 <- p1 + labs(title=sprintf("chr %s",crom), x=NULL, y=NULL) p2 <- p + geom_bar(aes(y=..count..), binwidth=binwidth*1e-6) p2 <- p2 + labs(title=NULL, x="location (Mb)") filename <- sprintf("breakpointLocations-chr%s-%s.png", crom, instance.name) file <- file.path(output.dir, filename) #invisible(ggsave(file, p, width=7, height=4, units="in")) png(file, width=10, height=8, units="in", res=300) #align_plots(p1, p2, width=unit(7,'in'), height=unit(3,'in')) grid.arrange(p1, p2) invisible(dev.off()) fig.caption <- sprintf("Breakpoint locations in chromosome %s.", crom) tex <- c(tex, latex.figure(file, caption=fig.caption, fig.label=sprintf("fig:%s-breakpoints-chr%s",instance.name,crom))) } latex.write.main(cf, 'report', tex) return(0) } main(execute)