On logged and normed counts. Complete and average distance give the same results.
library(metagenomeSeq)
MRobj <- readRDS("../data/nasal_filtered_normed_batchcorrected.rds")
counts_logged <- MRcounts(MRobj,norm=T,log=T)
colnames(counts_logged) <- colnames(MRobj)
dist_mat <- dist(t(counts_logged), method="euclidean")
hclust_avg <- hclust(dist_mat, method = 'complete')
cut_avg_1 <- cutree(hclust_avg, k = 2)
table(cut_avg_1)
## cut_avg_1
## 1 2
## 190 7
plot(hclust_avg)
rect.hclust(hclust_avg , k = 2, border = c(2,4))

#saveRDS(cut_avg_1, file = "output/cluster-alternative-methods.Rmd/cut_res_1.rds")
Total tax count and cluster membership
plot(cut_avg_1, colSums(MRcounts(MRobj,norm=T,log=F)))

t.test(colSums(MRcounts(MRobj,norm=T,log=F)) ~ cut_avg_1)
##
## Welch Two Sample t-test
##
## data: colSums(MRcounts(MRobj, norm = T, log = F)) by cut_avg_1
## t = 1.1585, df = 25.818, p-value = 0.2573
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -21945.83 78589.22
## sample estimates:
## mean in group 1 mean in group 2
## 142283.4 113961.7
On normed counts.
library(metagenomeSeq)
MRobj <- readRDS("../data/nasal_filtered_normed_batchcorrected.rds")
counts <- MRcounts(MRobj,norm=T,log=F)
colnames(counts) <- colnames(MRobj)
library(compositions)
counts_clr <- t(counts)
# use cutree to determine sample cluster membership
dist_mat <- dist(counts_clr, method="euclidean")
hclust_avg <- hclust(dist_mat, method = 'complete')
cut_avg_2 <- cutree(hclust_avg, k = 2)
table(cut_avg_2)
## cut_avg_2
## 1 2
## 196 1
plot(hclust_avg)
rect.hclust(hclust_avg , k = 2, border = c(2,4))

#saveRDS(cut_avg_2, file = "output/cluster-alternative-methods.Rmd/cut_res_2.rds")
library(metagenomeSeq)
MRobj <- readRDS("../data/nasal_filtered_normed_batchcorrected.rds")
counts <- MRcounts(MRobj,norm=T,log=F)
colnames(counts) <- colnames(MRobj)
library(vegan)
mat_prop <- t(t(counts)/colSums(counts))
beta_bray <- vegdist(t(mat_prop), method = "bray", upper=T)
# use cutree to determine sample cluster membership
#dist_mat <- dist(t(counts_clr), method="euclidean")
hclust_avg <- hclust(beta_bray, method = 'complete')
cut_avg_3 <- cutree(hclust_avg, k = 2)
table(cut_avg_3)
## cut_avg_3
## 1 2
## 161 36
plot(hclust_avg)
rect.hclust(hclust_avg , k = 2, border = c(2,4))

#saveRDS(cut_avg_3, file = "output/cluster-alternative-methods.Rmd/cut_res_3.rds")
cut_avg_1 <- readRDS(file = "../output/cluster-alternative-methods.Rmd/cut_res_1.rds")
cut_avg_2 <- readRDS(file = "../output/cluster-alternative-methods.Rmd/cut_res_2.rds")
cut_avg_3 <- readRDS(file = "../output/cluster-alternative-methods.Rmd/cut_res_3.rds")
all.equal(names(cut_avg_1), names(cut_avg_2))
## [1] TRUE
all.equal(names(cut_avg_1), names(cut_avg_3))
## [1] TRUE
df <- data.frame(sample=names(cut_avg_1),
method1=cut_avg_1,
method2=cut_avg_2,
method3=cut_avg_3)
# write.csv(df,
# file= "../output-manuscript/supp-clusterig.csv",
# quote=F, row.names=F)
table(df$method1)
##
## 1 2
## 190 7
table(df$method2)
##
## 1 2
## 196 1
table(df$method3)
##
## 1 2
## 161 36
table(df$method1, df$method3)
##
## 1 2
## 1 154 36
## 2 7 0
table(df$method1, df$method3)
##
## 1 2
## 1 154 36
## 2 7 0
obj <- readRDS("../data/nasal_GOM.rds")
gom <- pData(obj)$GOM
names(gom) <- rownames(pData(obj))
table(gom, df$method1)
##
## gom 1 2
## 1 167 0
## 2 23 7
table(gom, df$method2)
##
## gom 1 2
## 1 167 0
## 2 29 1
table(gom, df$method3)
##
## gom 1 2
## 1 132 35
## 2 29 1
sessionInfo()
## R version 3.4.1 (2017-06-30)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] vegan_2.5-3 lattice_0.20-38 permute_0.9-4
## [4] compositions_1.40-2 bayesm_3.1-0.1 energy_1.7-5
## [7] robustbase_0.93-3 tensorA_0.36.1 metagenomeSeq_1.21.1
## [10] RColorBrewer_1.1-2 glmnet_2.0-16 foreach_1.4.4
## [13] Matrix_1.2-15 limma_3.34.9 Biobase_2.38.0
## [16] BiocGenerics_0.24.0
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.0 compiler_3.4.1 DEoptimR_1.0-8
## [4] bitops_1.0-6 iterators_1.0.10 tools_3.4.1
## [7] boot_1.3-20 digest_0.6.18 nlme_3.1-137
## [10] evaluate_0.12 mgcv_1.8-25 yaml_2.2.0
## [13] cluster_2.0.7-1 stringr_1.3.1 knitr_1.20
## [16] gtools_3.8.1 caTools_1.17.1.1 rprojroot_1.3-2
## [19] grid_3.4.1 rmarkdown_1.10 gdata_2.18.0
## [22] magrittr_1.5 MASS_7.3-51.1 backports_1.1.2
## [25] gplots_3.0.1 codetools_0.2-15 matrixStats_0.54.0
## [28] htmltools_0.3.6 KernSmooth_2.23-15 stringi_1.2.4
This R Markdown site was created with workflowr