Grades of membership model finalized

Summary

Load data post filtering, normalization and batch correction (nasal_filtered_normed_batchcorrected.rds).

Apply grades of membership model using CountClust. We chose a random seed based on a series of analysis comparing goodness-of-fit of the model under 100 random seeds (https://jhsiao999.github.io/nasalmicrobiome/gom-seeds.html). The best random seed produces the largest Bayes Factor and is used for running the Grades of Membership (GoM) model.

\(~\)

Loading data and packages

library(knitr)
library(kableExtra)
library(dplyr)
library(metagenomeSeq)
library(CountClust)

Read in filtered data.

MRobj = readRDS("../data/nasal_filtered_normed_batchcorrected.rds")
MRobj

MRexperiment (storageMode: environment)
assayData: 553 features, 197 samples 
  element names: counts 
protocolData: none
phenoData
  sampleNames: EM0042 EM0047 ... E0168 (197 total)
  varLabels: StudyID age ... infnone (94 total)
  varMetadata: labelDescription
featureData
  featureNames:
    k__Bacteria;p__[Thermi];c__Deinococci;o__Deinococcales;f__Deinococcaceae;g__CM44;s__
    k__Bacteria;p__[Thermi];c__Deinococci;o__Deinococcales;f__Deinococcaceae;g__Deinococcus;s__
    ... s__zeae (553 total)
  fvarLabels: Kingdom Phylum ... Species (7 total)
  fvarMetadata: labelDescription
experimentData: use 'experimentData(object)'
Annotation:

\(~\)

Clustering

Apply CountClust to fit grades of membership model.

set.seed(67)

counts <- MRcounts(MRobj,norm=FALSE,log=FALSE)
counts_clust <- FitGoM(t(counts), K=c(2), tol=0.00001)
saveRDS(counts_clust, file = "../data/count-clust.rds")

Load previous computed count_clust results.

counts_clust <- readRDS(file = "../data/count-clust.rds")

cct <- counts_clust[[1]]$omega
rowMaxs = sapply(seq(nrow(cct)),function(i){
    seq(along=cct[i,])[cct[i,]==max(cct[i,])]
  })
rowMaxs = unlist(rowMaxs)
uniqueGroups = unique(unlist(rowMaxs))
groupIds = split(seq(rowMaxs),factor(rowMaxs))

cl = rep("1",sum(sapply(groupIds,length))); cl[groupIds[[2]]] = 2

table(cl)

cl
  1   2 
167  30

cbind(groupIDs = cl,round(cct,3)) %>% kable %>%
  kable_styling()

	groupIDs	1	2
EM0042	1	1	0
EM0047	1	0.996	0.004
E0133	1	0.992	0.008
E0197	1	1	0
EM0046	1	1	0
EM0071	1	0.677	0.323
EM0029	1	1	0
E0358	1	0.992	0.008
EM0086	1	0.958	0.042
E0060	1	0.995	0.005
EM0068	1	0.991	0.009
E0275	2	0.246	0.754
E0584	1	0.921	0.079
E0119	2	0.027	0.973
E0117	2	0	1
E0126	2	0.237	0.763
E0684	1	1	0
E0103	1	0.921	0.079
E0327	1	1	0
E0089	2	0.009	0.991
E0183	1	0.702	0.298
E0207	1	0.947	0.053
E0050	2	0.479	0.521
EM0012	1	1	0
E0399	1	0.935	0.065
E0120	2	0.321	0.679
EM0060	1	1	0
E0083	1	0.965	0.035
E0042	1	0.817	0.183
EM0013	1	1	0
E0132	1	0.968	0.032
EM0050	1	0.959	0.041
E0076	1	0.891	0.109
E0106	1	0.959	0.041
E0608	1	0.991	0.009
E0122	2	0.325	0.675
E0250	1	0.988	0.012
EM0067	1	0.955	0.045
E0686	1	0.884	0.116
E0211	1	1	0
E0384	1	0.997	0.003
EM0078	1	0.983	0.017
E0077	1	0.984	0.016
E0652	1	0.803	0.197
E0229	1	1	0
E0125	2	0.079	0.921
E0487	1	0.754	0.246
EM0065	1	1	0
E0129	2	0.023	0.977
E0661	1	0.929	0.071
E0040	1	0.598	0.402
E0393	1	0.993	0.007
E0024	1	0.673	0.327
EM0080	1	0.999	0.001
EM0085	1	0.76	0.24
E0019	1	0.923	0.077
E0039	1	0.861	0.139
E0593	2	0.376	0.624
E0049	2	0.359	0.641
E0112	1	0.999	0.001
E0299	1	0.968	0.032
E0165	1	0.76	0.24
E0071	1	0.9	0.1
E0475	1	0.839	0.161
E0489	1	0.966	0.034
E0488	1	0.893	0.107
E0174	2	0.475	0.525
E0537	1	0.55	0.45
E0560	2	0.317	0.683
E0343	1	0.78	0.22
E0577	2	0.288	0.712
E0297	1	0.977	0.023
E0136	1	0.72	0.28
E0149	2	0.031	0.969
E0555	1	0.968	0.032
E0234	2	0.483	0.517
E0100	1	0.974	0.026
E0253	1	0.997	0.003
E0359	1	0.878	0.122
E0329	2	0.332	0.668
E0595	1	0.75	0.25
E0220	1	1	0
E0551	1	1	0
EM0061	1	0.952	0.048
E0261	1	0.994	0.006
E0205	2	0.446	0.554
E0445	1	0.995	0.005
E0529	1	0.998	0.002
E0436	1	0.954	0.046
E0586	1	1	0
E0354	1	0.987	0.013
E0267	1	0.908	0.092
E0379	1	0.973	0.027
E0187	1	1	0
E0035	1	0.994	0.006
E0496	1	0.993	0.007
E0678	1	0.517	0.483
E0179	1	0.881	0.119
E0438	1	1	0
E0341	2	0.037	0.963
E0463	1	0.912	0.088
E0104	1	0.98	0.02
E0069	1	0.889	0.111
E0591	1	0.965	0.035
E0396	1	1	0
E0392	1	1	0
E0061	1	0.843	0.157
E0180	1	0.94	0.06
E0579	1	0.89	0.11
E0038	1	0.996	0.004
E0654	2	0.222	0.778
E0457	1	0.998	0.002
E0556	2	0.134	0.866
E0097	1	0.986	0.014
E0641	1	0.945	0.055
E0667	1	0.983	0.017
E0510	1	0.971	0.029
E0109	1	0.998	0.002
E0401	1	0.924	0.076
E0420	1	0.986	0.014
E0520	1	0.933	0.067
E0550	1	0.845	0.155
E0371	1	0.993	0.007
E0255	1	0.999	0.001
E0439	1	0.989	0.011
E0156	1	0.907	0.093
E0532	1	1	0
E0247	1	0.916	0.084
E0045	1	0.996	0.004
E0153	1	0.972	0.028
E0364	1	0.958	0.042
E0148	2	0.36	0.64
E0523	1	0.914	0.086
E0403	1	0.96	0.04
E0264	1	0.527	0.473
E0224	2	0.396	0.604
E0105	1	1	0
E0693	2	0.229	0.771
E0462	1	0.851	0.149
E0662	1	0.974	0.026
E0504	2	0.483	0.517
E0499	1	0.978	0.022
E0656	1	0.995	0.005
E0513	1	0.915	0.085
E0116	1	0.98	0.02
E0484	1	0.956	0.044
E0571	1	0.991	0.009
E0381	1	0.903	0.097
E0283	1	0.95	0.05
E0374	1	0.963	0.037
E0557	1	0.794	0.206
E0644	1	0.866	0.134
E0170	1	0.644	0.356
E0248	1	1	0
E0587	1	1	0
E0260	1	0.903	0.097
E0574	1	0.999	0.001
E0175	1	1	0
E0161	1	1	0
E0048	1	0.943	0.057
E0047	1	0.992	0.008
E0085	1	0.975	0.025
E0202	1	0.829	0.171
E0456	1	0.999	0.001
E0111	1	0.985	0.015
E0360	1	0.994	0.006
E0382	1	1	0
E0620	1	0.969	0.031
E0303	1	0.979	0.021
E0079	1	0.981	0.019
E0447	1	0.947	0.053
E0389	1	0.997	0.003
E0294	1	0.999	0.001
E0521	1	0.915	0.085
E0675	1	0.795	0.205
E0687	1	0.977	0.023
E0027	1	0.921	0.079
E0672	2	0.134	0.866
E0398	1	0.758	0.242
E0582	1	0.943	0.057
E0350	1	0.561	0.439
E0515	1	0.939	0.061
E0082	1	0.993	0.007
E0355	1	0.992	0.008
E0572	2	0.351	0.649
E0171	1	0.709	0.291
E0166	1	1	0
E0549	1	0.856	0.144
E0533	1	0.725	0.275
E0616	2	0.291	0.709
E0683	1	0.839	0.161
E0528	1	0.974	0.026
E0298	1	0.999	0.001
EM0049	1	1	0
E0251	2	0	1
E0336	1	0.606	0.394
E0168	1	0.994	0.006

\(~\)

Preserve data with GOM

Next we save the dataset with the sample clustering labels

# MRobj2 = MRobj
# assayData(MRobj2)[["counts"]] = counts
# featureData(MRobj2) = AnnotatedDataFrame(data.frame(Species=rownames(counts)))
pData(MRobj)$GOM = cl
#pData(MRobj2)$GOM = cl

saveRDS(MRobj, file="../data/nasal_GOM.rds")

\(~\)

Session Info

R version 3.4.1 (2017-06-30)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13

Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] parallel  stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] CountClust_1.4.1     ggplot2_2.2.1        metagenomeSeq_1.20.1
 [4] RColorBrewer_1.1-2   glmnet_2.0-13        foreach_1.4.4       
 [7] Matrix_1.2-12        limma_3.34.9         Biobase_2.38.0      
[10] BiocGenerics_0.24.0  dplyr_0.7.4          kableExtra_0.7.0    
[13] knitr_1.20          

loaded via a namespace (and not attached):
 [1] maptpx_1.9-3       Rcpp_0.12.15       ape_5.0           
 [4] lattice_0.20-35    gtools_3.5.0       assertthat_0.2.0  
 [7] rprojroot_1.3-2    digest_0.6.15      slam_0.1-42       
[10] R6_2.2.2           plyr_1.8.4         backports_1.1.2   
[13] stats4_3.4.1       evaluate_0.10.1    highr_0.6         
[16] httr_1.3.1         pillar_1.2.1       gplots_3.0.1      
[19] rlang_0.2.0        lazyeval_0.2.1     gdata_2.18.0      
[22] vegan_2.4-6        rmarkdown_1.9      readr_1.1.1       
[25] stringr_1.3.0      munsell_0.4.3      compiler_3.4.1    
[28] pkgconfig_2.0.1    SQUAREM_2017.10-1  mgcv_1.8-23       
[31] htmltools_0.3.6    nnet_7.3-12        tibble_1.4.2      
[34] codetools_0.2-15   matrixStats_0.53.1 permute_0.9-4     
[37] viridisLite_0.3.0  MASS_7.3-49        bitops_1.0-6      
[40] grid_3.4.1         nlme_3.1-131.1     gtable_0.2.0      
[43] git2r_0.21.0       magrittr_1.5       scales_0.5.0      
[46] KernSmooth_2.23-15 stringi_1.1.6      reshape2_1.4.3    
[49] flexmix_2.3-14     bindrcpp_0.2       xml2_1.2.0        
[52] boot_1.3-20        cowplot_0.9.2      iterators_1.0.9   
[55] tools_3.4.1        picante_1.6-2      glue_1.2.0        
[58] hms_0.4.1          yaml_2.1.18        colorspace_1.3-2  
[61] cluster_2.0.6      caTools_1.17.1     rvest_0.3.2       
[64] bindr_0.1          modeltools_0.2-21

Grades of membership model finalized

JNP

Summary

Loading data and packages

Clustering

Preserve data with GOM

Session Info