#bioinformatics.demo<-function(heatmaps=TRUE) #step1 gamma.dis<-read.eda("f:\\clear\\gamma.txt") ###################CHANGED #gamma.dis<-(Gamma5.03[-c(seqs2remove),-c(seqs2remove)]/100) gamma.tax<-read.taxonomy(sequence.taxonomy, gamma.dis) gamma.dis<-gamma.dis[gamma.tax[,3] > 1809 & gamma.tax[,3] < 3030, gamma.tax[,3] > 1809 & gamma.tax[,3] < 3030] #This step is included to eleminate the RDP sequence of Marinobacter(Pseudomonas) #stanier, which exhibits some anomalies and significantly effects the final output #scale. gamma.dis<-gamma.dis[-grep("stanier*", as.character(dimnames(gamma.dis)[[1]])), -grep("stanier*", as.character(dimnames(gamma.dis)[[2]]))] gamma.tax<-gamma.tax[gamma.tax[,3] > 1809 & gamma.tax[,3] < 3030, ] gamma.tax<-gamma.tax[-grep("stanier*", as.character(dimnames(gamma.tax)[[1]])),] gamma.genus.all<-unique(outline1.3[outline1.3[,5]=="Gammaproteobacteria" & outline1.3[,9]=="not.basonym" ,8]) ###################CHANGED if(heatmaps) { pdf.graph("fig1.pdf", horizontal = TRUE) heatmap(gamma.dis) title(main="Unordered Gammaproteobacteria EDA matrix", cex=0.75) dev.off() } gamma.tax<-na.omit(gamma.tax[order(gamma.tax[,3]),]) gamma.dis<-gamma.dis[dimnames(gamma.tax)[[1]], dimnames(gamma.tax)[[1]]] gamma.dis.bak<-gamma.dis gamma.tax.bak<-gamma.tax #step2 pass1<-step2(gamma.tax, gamma.dis) misidentified<-pass1$misidentified taxon.seq.new<-pass1$taxon.seq.new pass1<-pass1[1:(length(pass1)-1)] ###################CHANGED if(heatmaps) { pdf.graph("fig2.pdf", horizontal = TRUE) heatmap(gamma.dis[unlist(pass1), unlist(pass1)]) title(main="First pass ordering, Gammaproteobacteria EDA matrix", cex=0.75) label.heatmap(build.heatmap.label.table(gamma.tax,2, unique(gamma.tax[,2]),unlist(pass1)),0.45) dev.off() } #step3 gamma.dis<-gamma.dis[unlist(pass1), unlist(pass1)] taxon.seq.new2<-setdiff(unlist(pass1),misidentified) pass2<-unlist(taxon.seq.new2) ###################CHANGED if(heatmaps) { pdf.graph("fig3.pdf", horizontal = TRUE) heatmap(gamma.dis[pass2, pass2]) title(main="First pass reorganization of Gammaprotebacteria EDA matrix misidentified strains removed." , cex=0.75) label.heatmap(build.heatmap.label.table(gamma.tax[pass2,],2, unique(gamma.tax[pass2,2]),unlist(pass2)),0.45) dev.off() } #step4 gamma.tax<-gamma.tax[unlist(pass2),] gamma.dis<-gamma.dis[unlist(pass2), unlist(pass2)] pass3<-step2(gamma.tax, gamma.dis) misidentified<-c(misidentified,pass3$misidentified) pass3<-pass3[1:(length(pass3)-1)] pass3<-setdiff(unlist(pass3),misidentified) gamma.dis<-gamma.dis.bak gamma.tax<-gamma.tax.bak ###################CHANGED if(heatmaps) { pdf.graph("fig4.pdf", horizontal = TRUE) heatmap(gamma.dis[unlist(pass3), unlist(pass3)]) title(main="Second pass reorganization of Gammaprotebacteria EDA matrix misidentified strains removed.", cex=0.75) label.heatmap(build.heatmap.label.table(gamma.tax[pass3,],2, unique(gamma.tax[pass3,2]),unlist(pass3)),0.45) dev.off() } correct.placements<-unlist(pass3) ###################CHANGED if(heatmaps) { pdf.graph("fig5.pdf", horizontal = TRUE) heatmap(gamma.dis[misidentified, correct.placements]) title(main="Misidentified vs correctly identified strains", cex=0.75) dev.off() } #step5 excluded.strains<-setdiff(dimnames(gamma.tax)[[1]], c(misidentified,as.vector(correct.placements))) pass4<-unique(c(correct.placements, unique(c(misidentified, excluded.strains)))) ###################CHANGED if(heatmaps) { pdf.graph("fig6.pdf", horizontal = TRUE) heatmap(gamma.dis[unlist(pass4), unlist(pass4)]) title(main="Misidentified and excluded strains added back to matrix" , cex=0.75) label.heatmap(build.heatmap.label.table(gamma.tax[pass3,],2, unique(gamma.tax[pass3,2]),unlist(pass3)),0.45) dev.off() } #step6 revised.gamma.tax<-step6(gamma.dis[c(misidentified, excluded.strains),],gamma.tax, correct.placements) known.errors<-revised.gamma.tax$known.errors[revised.gamma.tax$known.errors!=""] nearest.neighbors<-revised.gamma.tax$nearest.neighbors gamma.genus<-revised.gamma.tax$gamma.genus revised.gamma.tax<-revised.gamma.tax$revised.gamma.tax revised.gamma.tax[,3]<-as.character(revised.gamma.tax[,3]) revised.gamma.tax[known.errors,3]<-"known.error" revised.gamma.tax[,3]<-as.factor(revised.gamma.tax[,3]) #step 7 pass5<-unlist(step7(revised.gamma.tax)) pass5<-setdiff(unlist(pass5), c(misidentified, known.errors)) misidentified<-unique(c(misidentified,dimnames(gamma.dis)[[1]][abs(apply(scale(gamma.dis), 1, max))>2.5])) pass5<-setdiff(unlist(pass5), c(misidentified, known.errors)) ###################CHANGED if(heatmaps) { pdf.graph("fig7.pdf", horizontal = TRUE) heatmap(gamma.dis[unlist(pass5), unlist(pass5)]) title(main="Third pass reorganization of Gammaprotebacteria EDA matrix misidentified and excluded strains repositioned.", cex=0.75) label.heatmap(build.heatmap.label.table(revised.gamma.tax[pass5,],2, unique(revised.gamma.tax[pass5,2]),unlist(pass5)),0.45) dev.off() } #step8 outlier<-0 place.error<-as.vector(unlist(step8(revised.gamma.tax))) revised.gamma.tax[,3]<-as.character(revised.gamma.tax[,3]) revised.gamma.tax[place.error,3]<-"placement error" revised.gamma.tax[,3]<-as.factor(revised.gamma.tax[,3]) #Step9 id.matrix<-gamma.dis[revised.gamma.tax[,3]=="placement error", revised.gamma.tax[,3]!="placement error"] nearest.neighbors<-step9(id.matrix) ###################CHANGED #Note that at this step, I need to accomodate instances where there is no #misplacement error. #id.matrix<-NULL #nearest.neighbors<-NULL #Step10 revised.gamma.tax<-rbind(revised.gamma.tax[revised.gamma.tax[,3]!="placement error",], revised.gamma.tax[revised.gamma.tax[,3]=="placement error",]) gamma.dis<-gamma.dis.bak gamma.tax<-gamma.tax.bak ###################CHANGED gamma.genus<-substring(as.character(revised.gamma.tax[,4]), 1, regexpr("[^a-zA-Z]", as.character(revised.gamma.tax[,4]))-1) pass6<-step7(revised.gamma.tax) seq2remove<-sort(unique(step10(revised.gamma.tax)))[-1] pass6<-setdiff(unlist(pass6), seq2remove) ###################CHANGED if(heatmaps) { pdf.graph("fig8.pdf", horizontal = TRUE) heatmap(gamma.dis[pass6,pass6]) title(main="Final rearragement of Gammaproteobacteria, \nBergey's Family Order", cex=0.75) label.heatmap(build.heatmap.label.table(revised.gamma.tax[pass6,],5, unique(revised.gamma.tax[pass6,5]),pass6),0.45) dev.off() } #Step 11 revised.gamma.tax<-revised.gamma.tax[pass6, ] gamma.dis<-gamma.dis[pass6,pass6] medioids<-step11(revised.gamma.tax) genus.order<-medioids$genus.order #Step12 pass10<-step12(revised.gamma.tax) family.order<-pass10$family.order pass10<-pass10$x revised.gamma.tax<-revised.gamma.tax[pass10,] ###################CHANGED pdf.graph("fig9.pdf", horizontal = TRUE) heatmap(gamma.dis[pass10,pass10]) title(main="Optimized rearragement of Gammaproteobacteria, \nbased on genus mediods", cex=0.75) label.heatmap(build.heatmap.label.table(revised.gamma.tax[pass10,],5, family.order,pass10),0.45) dev.off()