library(knitr)
library(kableExtra)
library(dplyr)
library(metagenomeSeq)
library(CountClust)
Read in filtered, corrected, etc data.
load("../data/nasal_GOM.rdata")
colnames(fData(MRobj))
aggregateByTaxonomy <- function (obj, lvl, alternate = FALSE, norm = FALSE, log = FALSE,
aggfun = colSums, sl = 1000, featureOrder = NULL, returnFullHierarchy = TRUE,
out = "MRexperiment") {
if (class(obj) == "MRexperiment") {
mat = MRcounts(obj, norm = norm, log = log, sl = sl)
if (length(lvl) == 1) {
# added code
include_features <- fData(obj)[match(rownames(mat),fData(obj)$Species),]
levels=as.character(include_features[, lvl])
# levels = as.character(fData(obj)[, lvl])
} else { levels = as.character(lvl) }
}
else {
mat = obj
levels = as.character(lvl)
if (length(levels) != nrow(mat))
stop("If input is a count matrix, lvl must be a vector of length = nrow(count matrix)")
}
if (!(out %in% c("MRexperiment", "matrix"))) {
stop("The variable out must either be 'MRexperiment' or 'matrix'")
}
nafeatures = is.na(levels)
if (length(nafeatures) > 0) {
if (alternate == FALSE) {
levels[nafeatures] = "no_match"
}
else {
levels[nafeatures] = paste("OTU_", rownames(obj)[nafeatures],
sep = "")
}
}
grps = split(seq_along(levels), levels)
newMat = array(NA, dim = c(length(grps), ncol(obj)))
for (i in seq_along(grps)) {
newMat[i, ] = aggfun(mat[grps[[i]], , drop = FALSE])
}
rownames(newMat) = names(grps)
colnames(newMat) = colnames(obj)
if (out == "matrix")
return(newMat)
if (out == "MRexperiment") {
if (returnFullHierarchy) {
if (is.null(featureOrder)) {
featureOrder <- colnames(fData(obj))
}
taxa = featureData(obj)[match(names(grps), fData(obj)[,
lvl]), featureOrder[1:which(featureOrder == lvl)]]
featureNames(taxa) = names(grps)
}
else {
taxa = data.frame(names(grps))
colnames(taxa) = "Taxa"
rownames(taxa) = names(grps)
taxa = as(taxa, "AnnotatedDataFrame")
}
if (class(obj) == "MRexperiment") {
pd = phenoData(obj)
newObj = newMRexperiment(newMat, featureData = taxa,
phenoData = pd)
}
else {
newObj = newMRexperiment(newMat, featureData = taxa)
}
return(newObj)
}
}
cl = pData(MRobj)$GOM
mod = model.matrix(~cl)
obj <- aggregateByTaxonomy(MRobj, lvl="Genus", aggfun=colSums)
obj@expSummary$expSummary$normFactors <- normFactors(MRobj)
fit <- fitFeatureModel(obj, mod=mod)
MRfulltable(fit,number = nrow(obj),file="../data/tbl_genus.tsv")
dim(obj)
# export normalized and logged couhnts
obj <- aggregateByTaxonomy(MRobj, lvl="Genus", aggfun=colSums, norm=T, log=T)
# export aggregated count table
library(xlsx)
write.xlsx(MRcounts(obj), file="../data/exprs_genus_logged_normed.xlsx",
col.names=TRUE, row.names=TRUE, append=FALSE, showNA=TRUE, password=NULL)
–
cl = pData(MRobj)$GOM
mod = model.matrix(~cl)
obj <- aggregateByTaxonomy(MRobj, lvl="Family", aggfun=colSums)
obj@expSummary$expSummary$normFactors <- normFactors(MRobj)
fit <- fitFeatureModel(obj, mod=mod)
MRfulltable(fit,number = nrow(obj),file="../data/tbl_family.tsv")
dim(obj)
obj <- aggregateByTaxonomy(MRobj, lvl="Family", aggfun=colSums, norm=T, log=T)
# export aggregated count table
library(xlsx)
write.xlsx(MRcounts(obj), file="../data/exprs_family_logged_normed.xlsx",
col.names=TRUE, row.names=TRUE, append=FALSE, showNA=TRUE, password=NULL)
cl = pData(MRobj)$GOM
mod = model.matrix(~cl)
obj <- aggregateByTaxonomy(MRobj, lvl="Order", aggfun=colSums)
obj@expSummary$expSummary$normFactors <- normFactors(MRobj)
fit <- fitFeatureModel(obj, mod=mod)
MRfulltable(fit,number = nrow(obj),file="../data/tbl_order.tsv")
dim(obj)
obj <- aggregateByTaxonomy(MRobj, lvl="Order", aggfun=colSums, norm=T, log=T)
# export aggregated count table
library(xlsx)
write.xlsx(MRcounts(obj), file="../data/exprs_order_logged_normed.xlsx",
col.names=TRUE, row.names=TRUE, append=FALSE, showNA=TRUE, password=NULL)
cl = pData(MRobj)$GOM
mod = model.matrix(~cl)
obj <- aggregateByTaxonomy(MRobj, lvl="Class", aggfun=colSums)
obj@expSummary$expSummary$normFactors <- normFactors(MRobj)
fit <- fitFeatureModel(obj, mod=mod)
MRfulltable(fit,number = nrow(obj),file="../data/tbl_class.tsv")
dim(obj)
obj <- aggregateByTaxonomy(MRobj, lvl="Class", aggfun=colSums, norm=T, log=T)
# export aggregated count table
library(xlsx)
write.xlsx(MRcounts(obj), file="../data/exprs_class_logged_normed.xlsx",
col.names=TRUE, row.names=TRUE, append=FALSE, showNA=TRUE, password=NULL)
cl = pData(MRobj)$GOM
mod = model.matrix(~cl)
obj <- aggregateByTaxonomy(MRobj, lvl="Phylum", aggfun=colSums)
obj@expSummary$expSummary$normFactors <- normFactors(MRobj)
fit <- fitFeatureModel(obj, mod=mod)
MRfulltable(fit,number = nrow(obj),file="../data/tbl_phylum.tsv")
dim(obj)
## R version 3.4.1 (2017-06-30)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] CountClust_1.6.2 ggplot2_3.1.0 metagenomeSeq_1.21.1
## [4] RColorBrewer_1.1-2 glmnet_2.0-16 foreach_1.4.4
## [7] Matrix_1.2-15 limma_3.34.9 Biobase_2.38.0
## [10] BiocGenerics_0.24.0 dplyr_0.7.8 kableExtra_0.9.0
## [13] knitr_1.20
##
## loaded via a namespace (and not attached):
## [1] httr_1.3.1 viridisLite_0.3.0 gtools_3.8.1
## [4] assertthat_0.2.0 stats4_3.4.1 yaml_2.2.0
## [7] slam_0.1-43 pillar_1.3.0 backports_1.1.2
## [10] lattice_0.20-38 glue_1.3.0 digest_0.6.18
## [13] rvest_0.3.2 colorspace_1.3-2 picante_1.7
## [16] cowplot_0.9.3 htmltools_0.3.6 plyr_1.8.4
## [19] pkgconfig_2.0.2 purrr_0.2.5 scales_1.0.0
## [22] gdata_2.18.0 Rtsne_0.15 tibble_1.4.2
## [25] mgcv_1.8-25 withr_2.1.2 nnet_7.3-12
## [28] lazyeval_0.2.1 magrittr_1.5 crayon_1.3.4
## [31] evaluate_0.12 MASS_7.3-51.1 nlme_3.1-137
## [34] gplots_3.0.1 xml2_1.2.0 vegan_2.5-3
## [37] tools_3.4.1 hms_0.4.2 matrixStats_0.54.0
## [40] stringr_1.3.1 munsell_0.5.0 cluster_2.0.7-1
## [43] bindrcpp_0.2.2 compiler_3.4.1 caTools_1.17.1.1
## [46] mapplots_1.5.1 rlang_0.3.0.1 grid_3.4.1
## [49] iterators_1.0.10 rstudioapi_0.8 bitops_1.0-6
## [52] rmarkdown_1.10 boot_1.3-20 gtable_0.2.0
## [55] codetools_0.2-15 flexmix_2.3-14 reshape2_1.4.3
## [58] R6_2.3.0 bindr_0.1.1 rprojroot_1.3-2
## [61] maptpx_1.9-5 permute_0.9-4 ape_5.2
## [64] KernSmooth_2.23-15 readr_1.1.1 modeltools_0.2-22
## [67] stringi_1.2.4 SQUAREM_2017.10-1 Rcpp_1.0.0
## [70] tidyselect_0.2.5
This R Markdown site was created with workflowr