#SOSCC algorithm #George M. Garrity and Timothy G. Lilburn #Copyright Michigan State University 2004, all rights reserved "step9" = function(temp, tax.table) { print("inside step9") temp.order <- matrix(0, nrow(temp), ncol(temp)) while (TRUE) { temp <- abs(jitter(temp, factor = 0.001)) int <- F for (j in 1:nrow(temp)) { temp.order[j,] <- rank(as.matrix(temp[j, ])) tf <- unique(as.integer(as.vector(temp.order[j,])) == as.vector(temp.order[j,])) print(tf) if (length(tf)==1) { int <- T } else { int <- F } } if (int) break } dimnames(temp.order)[[2]] <- dimnames(temp)[[2]] dimnames(temp.order)[[1]] <- dimnames(temp)[[1]] temp.reorder <- NULL for(j in 1:ncol(temp.order)) { temp.reorder <- c(temp.reorder, dimnames(temp.order)[[1]][temp.order[, j] == 1]) } #identify any data that would have been dropped by a tie dropped.names <- setdiff(dimnames(temp)[[1]], temp.reorder) temp <- temp[c(temp.reorder, dropped.names), ] ###NOTE - This step can be problematic and may require manual intervention best.match <- NULL for(i in 1:nrow(temp.order)) { best.match <- c(best.match, dimnames(temp.order)[[2]][temp.order[i, ] == 1]) } nearest.neighbors <- cbind(dimnames(temp.order)[[1]], tax.table[best.match, c("species", "family")]) nearest.neighbors <- cbind(nearest.neighbors, substring(as.character(nearest.neighbors[, 2]), 1, regexpr("[^a-zA-Z]", as.character(nearest.neighbors[, 2])) - 1)) dimnames(nearest.neighbors)[[2]][2:3] <- c("MPI.species", "MPI.family") nearest.neighbors <- cbind(tax.table[as.character(nearest.neighbors[, 1]), c("species", "family", "taxon.seq")], nearest.neighbors[, 2:3]) return(nearest.neighbors) }