# Retrieves consensus predicted functional mature miRNA sequence across samples for set of putative novel expressed miRNAs library(componentSkeleton) execute <- function(cf) { # Create array output dir. array.out <- get.output(cf, "array") dir.create(array.out) # Array object array.output <- Array.new() # Parameters + input execDir <- get.parameter(cf,"execDir","string") execPrefix <- get.parameter(cf,"execPrefix","string") DE <- CSV.read(get.input(cf, "novel")) ref <- CSV.read(get.input(cf, "fqFiles")) # Match DE positions with predicted mature DE_pos <- do.call(rbind,strsplit(as.character(DE[,"id"]),"_")) # Ensure execDir exists and ends with a "/" if (!file.exists(execDir)) { write.error(paste("Invalid directory path:", execDir,sep="")) return(1) } else { test <- unlist(strsplit(execDir,"")) if (test[length(test)] != "/") { execDir <- paste(execDir,"/",sep="") } # Find folder names to iterate samples <- paste(execPrefix,ref[,1],sep="-") files <- list.files(path=execDir) samples <- files[grep(paste(samples,collapse="|"),files)] DE_mirnas <- list() # Retrieve "raw" sequenced reads predicted to be novel miRNAs from each sample for (pos in 1:nrow(DE_pos)) { mature_temp <- c() for (sample in samples) { # Ensure file exists filePath <- paste(execDir,sample,"/folder1/newMicroRNA_genome.txt",sep="") if (!file.exists(filePath)) { write.error(paste("Invalid directory or file does not exist:", filePath,sep="")) return(1) } else { predicted_mtr <- read.csv(filePath, sep="\t", quote="\"") predicted_mtr[,2] <- gsub("MT",25,predicted_mtr[,2]) predicted_mtr[,2] <- gsub("X",23,predicted_mtr[,2]) predicted_mtr[,2] <- gsub("Y",24,predicted_mtr[,2]) found <- which(predicted_mtr[,2] == DE_pos[pos,1] # chrom & predicted_mtr[,3] >= as.numeric(DE_pos[pos,2]) # chromStart & predicted_mtr[,4] <= as.numeric(DE_pos[pos,3]) # chromEnd & predicted_mtr[,5] == DE_pos[pos,4]) # strand # If sublocation found, extract predicted mature sequence! Else, skip. if (length(found) > 0) { mature_temp <- rbind(mature_temp,as.character(predicted_mtr[found,10])) # read.cluster.sequence } } } # Save all predicted mature sequences to list DE_mirnas[[as.character(DE[pos,"id"])]] <- mature_temp if (!is.null(nrow(mature_temp))) { CSV.write(paste(array.out,"/",as.character(DE[pos,"id"]),".csv",sep=""),mature_temp) array.output <- Array.add(array.output,as.character(DE[pos,"id"]),paste(as.character(DE[pos,"id"]),".csv",sep="")) } } DE_majority <- list() # For each DE miRNA, take the most frequently occurring predicted sequence for (len in 1:length(DE_mirnas)) { if (nrow(DE_mirnas[[len]]) != 0) { freq <- table(DE_mirnas[[len]]) majority_seq <- names(freq)[which(freq == max(freq))] # if sequences are tied for frequency, take the shorter sequence if (length(majority_seq) > 1) { majority_seq <- majority_seq[which(nchar(majority_seq) == min(nchar(majority_seq)))[1]] } DE_majority[[names(DE_mirnas)[len]]] <- majority_seq } else { DE_majority[[names(DE_mirnas)[len]]] <- NA } } # Merge predicted mature sequence with DE table ID <- DE[,1] predicted_mature <- do.call(rbind,DE_majority) DE_new <- as.matrix(cbind(ID, predicted_mature=rep("", length(ID)),DE[,-1])) for (r in 1:nrow(predicted_mature)) { found <- grep(rownames(predicted_mature)[r],DE_new[,1]) if (length(found) > 0) { DE_new[found,"predicted_mature"] <- predicted_mature[r,1] } } } # Write outputs CSV.write(get.output(cf,"mature"),DE_new) Array.write(cf,array.output,"array") return(0) } main(execute)