# R version 2.7 # Arguments: # - groupNum: number of cluster # - inFilename: an input filename, the file is a tab-delimited text file # whose rows and columns are indexed by the experiment IDs and whose cells # contains the pairwise experiment dissimilarity scores # - outResFilename: an output filename, the file contains clustering # results with silhouette scores # - outIdFilename: an output filename, the file whose rows are experiment IDs # for a cluster, one cluster per row groupNum<-gsub("groupNum=","",grep("groupNum=",commandArgs(),value=T)) groupNum<-as.numeric(groupNum) inFilename<-gsub("inFilename=","",grep("inFilename=",commandArgs(),value=T)) outResFilename<-gsub("outResFilename=","",grep("outResFilename=",commandArgs(),value=T)) outIdFilename<-gsub("outIdFilename=","",grep("outIdFilename=",commandArgs(),value=T)) # Load cluster library library(cluster) packageDescription("cluster") ############################################## # write clustering results ############################################## writeStudyIDs<-function(res, filename, gNum) { for(i in 1:gNum) { ids<-rownames(subset(res,res[,1]==i)) write(ids,filename,ncolumn=length(ids), append=TRUE, sep="\t") } } ############################################## # load matrix file ############################################## data<-read.table(inFilename,strip.white=TRUE, sep ="\t", header=TRUE) # Retrieve data without studyID column subset<-as.matrix(data[,2:ncol(data)]) # Calculate distance matrix using default distance measure method matrix_dist<-as.dist(subset) # Hierarchical cluster analysis # The agglomeration method should be (an unambiguous abbreviation of) one of # "ward", "single", "complete", "average", "mcquitty", "median" or "centroid". hclust<-hclust(matrix_dist, method="average") ####################################################################################### # cut a hierarchical cluster tree into several groups by specifying the desired number ####################################################################################### hclustK<-cutree(hclust, k=groupNum) # Compute or Extract Silhouette Information from Clustering sil.hclust<-silhouette(x=hclustK, dist=matrix_dist) sorted_sil<-sortSilhouette(sil.hclust) #is.matrix(sorted_sil) sorted_sil<-as.matrix(sorted_sil) #sorted_sil # write result in a file write.table(sorted_sil, outResFilename, quote=FALSE, sep="\t") cbind(hclustK) # write experiment IDs for each cluster per row in a file writeStudyIDs(cbind(hclustK),outIdFilename, groupNum)