#SOSCC algorithm #George M. Garrity and Timothy G. Lilburn #Copyright Michigan State University 2004, all rights reserved "step8" <- function(taxon.subset, dis.table, all.genus, taxon.seq.new) { all.genus <- substring(as.character(taxon.subset[, 4]), 1, regexpr("[^a-zA-Z]", as.character(taxon.subset[, 4])) - 1) taxon.level <- unique(all.genus) taxon.seq.refs <- taxon.seq.new outliers <- vector("list", length(taxon.level)) names(outliers) <- taxon.level outlier <- 0 for(i in 1:length(taxon.level)) { if(length(taxon.seq.new[[i]]) < 4) { } if(length(taxon.seq.new[[i]]) >= 4) { #estimate sample statistics temp.ref <- dis.table[taxon.seq.new[[i]], taxon.seq.new[[i]]] temp <- dis.table[taxon.seq.new[[i]], taxon.seq.new[[i]]] #identify outliers (> 3 stdev) outlier <- temp[temp < stdev(as.matrix(na.omit(temp.ref[lower.tri(temp.ref), diag = T]))) * 4] temp2 <- apply(temp, 1, FUN=function(x, y) match(as.vector(x), y), y = outlier) temp2 <- is.na(as.data.frame(temp2[, 1:nrow(temp2)])) * 1 temp2 <- (apply(temp2, 1, sum)) names(temp2) <- dimnames(temp)[[1]] outliers[[i]] <- as.character(names(temp2[temp2 > summary(temp2)[[5]]])) } } return(outliers) }