Loading data and packages

library(knitr)
library(kableExtra)
library(dplyr)
library(metagenomeSeq)
library(CountClust)

Read in filtered, corrected, etc data.

load("../data/nasal_GOM.rdata")
colnames(fData(MRobj))

aggregateByTaxonomy code edits

aggregateByTaxonomy <- function (obj, lvl, alternate = FALSE, norm = FALSE, log = FALSE, 
    aggfun = colSums, sl = 1000, featureOrder = NULL, returnFullHierarchy = TRUE, 
    out = "MRexperiment") {
    if (class(obj) == "MRexperiment") {
        mat = MRcounts(obj, norm = norm, log = log, sl = sl)
        if (length(lvl) == 1) {
            # added code
            include_features <- fData(obj)[match(rownames(mat),fData(obj)$Species),]
            levels=as.character(include_features[, lvl])
            #            levels = as.character(fData(obj)[, lvl])
        } else { levels = as.character(lvl) }
    }
    else {
        mat = obj
        levels = as.character(lvl)
        if (length(levels) != nrow(mat)) 
            stop("If input is a count matrix, lvl must be a vector of length = nrow(count matrix)")
    }
    if (!(out %in% c("MRexperiment", "matrix"))) {
        stop("The variable out must either be 'MRexperiment' or 'matrix'")
    }
    nafeatures = is.na(levels)
    if (length(nafeatures) > 0) {
        if (alternate == FALSE) {
            levels[nafeatures] = "no_match"
        }
        else {
            levels[nafeatures] = paste("OTU_", rownames(obj)[nafeatures], 
                sep = "")
        }
    }
    grps = split(seq_along(levels), levels)
    newMat = array(NA, dim = c(length(grps), ncol(obj)))
    for (i in seq_along(grps)) {
        newMat[i, ] = aggfun(mat[grps[[i]], , drop = FALSE])
    }
    rownames(newMat) = names(grps)
    colnames(newMat) = colnames(obj)
    if (out == "matrix") 
        return(newMat)
    if (out == "MRexperiment") {
        if (returnFullHierarchy) {
            if (is.null(featureOrder)) {
                featureOrder <- colnames(fData(obj))
            }
            taxa = featureData(obj)[match(names(grps), fData(obj)[, 
                lvl]), featureOrder[1:which(featureOrder == lvl)]]
            featureNames(taxa) = names(grps)
        }
        else {
            taxa = data.frame(names(grps))
            colnames(taxa) = "Taxa"
            rownames(taxa) = names(grps)
            taxa = as(taxa, "AnnotatedDataFrame")
        }
        if (class(obj) == "MRexperiment") {
            pd = phenoData(obj)
            newObj = newMRexperiment(newMat, featureData = taxa, 
                phenoData = pd)
        }
        else {
            newObj = newMRexperiment(newMat, featureData = taxa)
        }
        return(newObj)
    }
}

Analysis at genus level

cl = pData(MRobj)$GOM
mod = model.matrix(~cl)

obj <- aggregateByTaxonomy(MRobj, lvl="Genus", aggfun=colSums)
obj@expSummary$expSummary$normFactors <- normFactors(MRobj)
fit <- fitFeatureModel(obj, mod=mod)
MRfulltable(fit,number = nrow(obj),file="../data/tbl_genus.tsv")
dim(obj)

# export normalized and logged couhnts
obj <- aggregateByTaxonomy(MRobj, lvl="Genus", aggfun=colSums, norm=T, log=T)
# export aggregated count table
library(xlsx)
write.xlsx(MRcounts(obj), file="../data/exprs_genus_logged_normed.xlsx",
           col.names=TRUE, row.names=TRUE, append=FALSE, showNA=TRUE, password=NULL)

Analysis at family level

cl = pData(MRobj)$GOM
mod = model.matrix(~cl)

obj <- aggregateByTaxonomy(MRobj, lvl="Family", aggfun=colSums)
obj@expSummary$expSummary$normFactors <- normFactors(MRobj)
fit <- fitFeatureModel(obj, mod=mod)
MRfulltable(fit,number = nrow(obj),file="../data/tbl_family.tsv")
dim(obj)


obj <- aggregateByTaxonomy(MRobj, lvl="Family", aggfun=colSums, norm=T, log=T)
# export aggregated count table
library(xlsx)
write.xlsx(MRcounts(obj), file="../data/exprs_family_logged_normed.xlsx",
           col.names=TRUE, row.names=TRUE, append=FALSE, showNA=TRUE, password=NULL)

Analysis at order level

cl = pData(MRobj)$GOM
mod = model.matrix(~cl)

obj <- aggregateByTaxonomy(MRobj, lvl="Order", aggfun=colSums)
obj@expSummary$expSummary$normFactors <- normFactors(MRobj)
fit <- fitFeatureModel(obj, mod=mod)
MRfulltable(fit,number = nrow(obj),file="../data/tbl_order.tsv")
dim(obj)


obj <- aggregateByTaxonomy(MRobj, lvl="Order", aggfun=colSums, norm=T, log=T)
# export aggregated count table
library(xlsx)
write.xlsx(MRcounts(obj), file="../data/exprs_order_logged_normed.xlsx",
           col.names=TRUE, row.names=TRUE, append=FALSE, showNA=TRUE, password=NULL)

Analysis at class level

cl = pData(MRobj)$GOM
mod = model.matrix(~cl)

obj <- aggregateByTaxonomy(MRobj, lvl="Class", aggfun=colSums)
obj@expSummary$expSummary$normFactors <- normFactors(MRobj)
fit <- fitFeatureModel(obj, mod=mod)
MRfulltable(fit,number = nrow(obj),file="../data/tbl_class.tsv")
dim(obj)


obj <- aggregateByTaxonomy(MRobj, lvl="Class", aggfun=colSums, norm=T, log=T)
# export aggregated count table
library(xlsx)
write.xlsx(MRcounts(obj), file="../data/exprs_class_logged_normed.xlsx",
           col.names=TRUE, row.names=TRUE, append=FALSE, showNA=TRUE, password=NULL)

Analysis at phylum level

cl = pData(MRobj)$GOM
mod = model.matrix(~cl)

obj <- aggregateByTaxonomy(MRobj, lvl="Phylum", aggfun=colSums)
obj@expSummary$expSummary$normFactors <- normFactors(MRobj)
fit <- fitFeatureModel(obj, mod=mod)
MRfulltable(fit,number = nrow(obj),file="../data/tbl_phylum.tsv")
dim(obj)

Session Info

## R version 3.4.1 (2017-06-30)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] CountClust_1.6.2     ggplot2_3.1.0        metagenomeSeq_1.21.1
##  [4] RColorBrewer_1.1-2   glmnet_2.0-16        foreach_1.4.4       
##  [7] Matrix_1.2-15        limma_3.34.9         Biobase_2.38.0      
## [10] BiocGenerics_0.24.0  dplyr_0.7.8          kableExtra_0.9.0    
## [13] knitr_1.20          
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.3.1         viridisLite_0.3.0  gtools_3.8.1      
##  [4] assertthat_0.2.0   stats4_3.4.1       yaml_2.2.0        
##  [7] slam_0.1-43        pillar_1.3.0       backports_1.1.2   
## [10] lattice_0.20-38    glue_1.3.0         digest_0.6.18     
## [13] rvest_0.3.2        colorspace_1.3-2   picante_1.7       
## [16] cowplot_0.9.3      htmltools_0.3.6    plyr_1.8.4        
## [19] pkgconfig_2.0.2    purrr_0.2.5        scales_1.0.0      
## [22] gdata_2.18.0       Rtsne_0.15         tibble_1.4.2      
## [25] mgcv_1.8-25        withr_2.1.2        nnet_7.3-12       
## [28] lazyeval_0.2.1     magrittr_1.5       crayon_1.3.4      
## [31] evaluate_0.12      MASS_7.3-51.1      nlme_3.1-137      
## [34] gplots_3.0.1       xml2_1.2.0         vegan_2.5-3       
## [37] tools_3.4.1        hms_0.4.2          matrixStats_0.54.0
## [40] stringr_1.3.1      munsell_0.5.0      cluster_2.0.7-1   
## [43] bindrcpp_0.2.2     compiler_3.4.1     caTools_1.17.1.1  
## [46] mapplots_1.5.1     rlang_0.3.0.1      grid_3.4.1        
## [49] iterators_1.0.10   rstudioapi_0.8     bitops_1.0-6      
## [52] rmarkdown_1.10     boot_1.3-20        gtable_0.2.0      
## [55] codetools_0.2-15   flexmix_2.3-14     reshape2_1.4.3    
## [58] R6_2.3.0           bindr_0.1.1        rprojroot_1.3-2   
## [61] maptpx_1.9-5       permute_0.9-4      ape_5.2           
## [64] KernSmooth_2.23-15 readr_1.1.1        modeltools_0.2-22 
## [67] stringi_1.2.4      SQUAREM_2017.10-1  Rcpp_1.0.0        
## [70] tidyselect_0.2.5

This R Markdown site was created with workflowr