Warning in as.POSIXlt.POSIXct(Sys.time()): unknown timezone 'zone/tz/2018c.
1.0/zoneinfo/America/Chicago'

Last updated: 2018-04-22

Code version: 863632d


Summary

Load data post filtering, normalization and batch correction (nasal_filtered_normed_batchcorrected.rds).

Apply grades of membership model using CountClust. We chose a random seed based on a series of analysis comparing goodness-of-fit of the model under 100 random seeds (https://jhsiao999.github.io/nasalmicrobiome/gom-seeds.html). The best random seed produces the largest Bayes Factor and is used for running the Grades of Membership (GoM) model.

\(~\)


Loading data and packages

library(knitr)
library(kableExtra)
library(dplyr)
library(metagenomeSeq)
library(CountClust)

Read in filtered data.

MRobj = readRDS("../data/nasal_filtered_normed_batchcorrected.rds")
MRobj
MRexperiment (storageMode: environment)
assayData: 553 features, 197 samples 
  element names: counts 
protocolData: none
phenoData
  sampleNames: EM0042 EM0047 ... E0168 (197 total)
  varLabels: StudyID age ... infnone (94 total)
  varMetadata: labelDescription
featureData
  featureNames:
    k__Bacteria;p__[Thermi];c__Deinococci;o__Deinococcales;f__Deinococcaceae;g__CM44;s__
    k__Bacteria;p__[Thermi];c__Deinococci;o__Deinococcales;f__Deinococcaceae;g__Deinococcus;s__
    ... s__zeae (553 total)
  fvarLabels: Kingdom Phylum ... Species (7 total)
  fvarMetadata: labelDescription
experimentData: use 'experimentData(object)'
Annotation:  

\(~\)


Clustering

Apply CountClust to fit grades of membership model.

set.seed(67)

counts <- MRcounts(MRobj,norm=FALSE,log=FALSE)
counts_clust <- FitGoM(t(counts), K=c(2), tol=0.00001)
saveRDS(counts_clust, file = "../data/count-clust.rds")

Load previous computed count_clust results.

counts_clust <- readRDS(file = "../data/count-clust.rds")

cct <- counts_clust[[1]]$omega
rowMaxs = sapply(seq(nrow(cct)),function(i){
    seq(along=cct[i,])[cct[i,]==max(cct[i,])]
  })
rowMaxs = unlist(rowMaxs)
uniqueGroups = unique(unlist(rowMaxs))
groupIds = split(seq(rowMaxs),factor(rowMaxs))

cl = rep("1",sum(sapply(groupIds,length))); cl[groupIds[[2]]] = 2

table(cl)
cl
  1   2 
167  30 
cbind(groupIDs = cl,round(cct,3)) %>% kable %>%
  kable_styling()
groupIDs 1 2
EM0042 1 1 0
EM0047 1 0.996 0.004
E0133 1 0.992 0.008
E0197 1 1 0
EM0046 1 1 0
EM0071 1 0.677 0.323
EM0029 1 1 0
E0358 1 0.992 0.008
EM0086 1 0.958 0.042
E0060 1 0.995 0.005
EM0068 1 0.991 0.009
E0275 2 0.246 0.754
E0584 1 0.921 0.079
E0119 2 0.027 0.973
E0117 2 0 1
E0126 2 0.237 0.763
E0684 1 1 0
E0103 1 0.921 0.079
E0327 1 1 0
E0089 2 0.009 0.991
E0183 1 0.702 0.298
E0207 1 0.947 0.053
E0050 2 0.479 0.521
EM0012 1 1 0
E0399 1 0.935 0.065
E0120 2 0.321 0.679
EM0060 1 1 0
E0083 1 0.965 0.035
E0042 1 0.817 0.183
EM0013 1 1 0
E0132 1 0.968 0.032
EM0050 1 0.959 0.041
E0076 1 0.891 0.109
E0106 1 0.959 0.041
E0608 1 0.991 0.009
E0122 2 0.325 0.675
E0250 1 0.988 0.012
EM0067 1 0.955 0.045
E0686 1 0.884 0.116
E0211 1 1 0
E0384 1 0.997 0.003
EM0078 1 0.983 0.017
E0077 1 0.984 0.016
E0652 1 0.803 0.197
E0229 1 1 0
E0125 2 0.079 0.921
E0487 1 0.754 0.246
EM0065 1 1 0
E0129 2 0.023 0.977
E0661 1 0.929 0.071
E0040 1 0.598 0.402
E0393 1 0.993 0.007
E0024 1 0.673 0.327
EM0080 1 0.999 0.001
EM0085 1 0.76 0.24
E0019 1 0.923 0.077
E0039 1 0.861 0.139
E0593 2 0.376 0.624
E0049 2 0.359 0.641
E0112 1 0.999 0.001
E0299 1 0.968 0.032
E0165 1 0.76 0.24
E0071 1 0.9 0.1
E0475 1 0.839 0.161
E0489 1 0.966 0.034
E0488 1 0.893 0.107
E0174 2 0.475 0.525
E0537 1 0.55 0.45
E0560 2 0.317 0.683
E0343 1 0.78 0.22
E0577 2 0.288 0.712
E0297 1 0.977 0.023
E0136 1 0.72 0.28
E0149 2 0.031 0.969
E0555 1 0.968 0.032
E0234 2 0.483 0.517
E0100 1 0.974 0.026
E0253 1 0.997 0.003
E0359 1 0.878 0.122
E0329 2 0.332 0.668
E0595 1 0.75 0.25
E0220 1 1 0
E0551 1 1 0
EM0061 1 0.952 0.048
E0261 1 0.994 0.006
E0205 2 0.446 0.554
E0445 1 0.995 0.005
E0529 1 0.998 0.002
E0436 1 0.954 0.046
E0586 1 1 0
E0354 1 0.987 0.013
E0267 1 0.908 0.092
E0379 1 0.973 0.027
E0187 1 1 0
E0035 1 0.994 0.006
E0496 1 0.993 0.007
E0678 1 0.517 0.483
E0179 1 0.881 0.119
E0438 1 1 0
E0341 2 0.037 0.963
E0463 1 0.912 0.088
E0104 1 0.98 0.02
E0069 1 0.889 0.111
E0591 1 0.965 0.035
E0396 1 1 0
E0392 1 1 0
E0061 1 0.843 0.157
E0180 1 0.94 0.06
E0579 1 0.89 0.11
E0038 1 0.996 0.004
E0654 2 0.222 0.778
E0457 1 0.998 0.002
E0556 2 0.134 0.866
E0097 1 0.986 0.014
E0641 1 0.945 0.055
E0667 1 0.983 0.017
E0510 1 0.971 0.029
E0109 1 0.998 0.002
E0401 1 0.924 0.076
E0420 1 0.986 0.014
E0520 1 0.933 0.067
E0550 1 0.845 0.155
E0371 1 0.993 0.007
E0255 1 0.999 0.001
E0439 1 0.989 0.011
E0156 1 0.907 0.093
E0532 1 1 0
E0247 1 0.916 0.084
E0045 1 0.996 0.004
E0153 1 0.972 0.028
E0364 1 0.958 0.042
E0148 2 0.36 0.64
E0523 1 0.914 0.086
E0403 1 0.96 0.04
E0264 1 0.527 0.473
E0224 2 0.396 0.604
E0105 1 1 0
E0693 2 0.229 0.771
E0462 1 0.851 0.149
E0662 1 0.974 0.026
E0504 2 0.483 0.517
E0499 1 0.978 0.022
E0656 1 0.995 0.005
E0513 1 0.915 0.085
E0116 1 0.98 0.02
E0484 1 0.956 0.044
E0571 1 0.991 0.009
E0381 1 0.903 0.097
E0283 1 0.95 0.05
E0374 1 0.963 0.037
E0557 1 0.794 0.206
E0644 1 0.866 0.134
E0170 1 0.644 0.356
E0248 1 1 0
E0587 1 1 0
E0260 1 0.903 0.097
E0574 1 0.999 0.001
E0175 1 1 0
E0161 1 1 0
E0048 1 0.943 0.057
E0047 1 0.992 0.008
E0085 1 0.975 0.025
E0202 1 0.829 0.171
E0456 1 0.999 0.001
E0111 1 0.985 0.015
E0360 1 0.994 0.006
E0382 1 1 0
E0620 1 0.969 0.031
E0303 1 0.979 0.021
E0079 1 0.981 0.019
E0447 1 0.947 0.053
E0389 1 0.997 0.003
E0294 1 0.999 0.001
E0521 1 0.915 0.085
E0675 1 0.795 0.205
E0687 1 0.977 0.023
E0027 1 0.921 0.079
E0672 2 0.134 0.866
E0398 1 0.758 0.242
E0582 1 0.943 0.057
E0350 1 0.561 0.439
E0515 1 0.939 0.061
E0082 1 0.993 0.007
E0355 1 0.992 0.008
E0572 2 0.351 0.649
E0171 1 0.709 0.291
E0166 1 1 0
E0549 1 0.856 0.144
E0533 1 0.725 0.275
E0616 2 0.291 0.709
E0683 1 0.839 0.161
E0528 1 0.974 0.026
E0298 1 0.999 0.001
EM0049 1 1 0
E0251 2 0 1
E0336 1 0.606 0.394
E0168 1 0.994 0.006

\(~\)


Preserve data with GOM

Next we save the dataset with the sample clustering labels

# MRobj2 = MRobj
# assayData(MRobj2)[["counts"]] = counts
# featureData(MRobj2) = AnnotatedDataFrame(data.frame(Species=rownames(counts)))
pData(MRobj)$GOM = cl
#pData(MRobj2)$GOM = cl

saveRDS(MRobj, file="../data/nasal_GOM.rds")

\(~\)


Session Info

R version 3.4.1 (2017-06-30)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13

Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] parallel  stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] CountClust_1.4.1     ggplot2_2.2.1        metagenomeSeq_1.20.1
 [4] RColorBrewer_1.1-2   glmnet_2.0-13        foreach_1.4.4       
 [7] Matrix_1.2-12        limma_3.34.9         Biobase_2.38.0      
[10] BiocGenerics_0.24.0  dplyr_0.7.4          kableExtra_0.7.0    
[13] knitr_1.20          

loaded via a namespace (and not attached):
 [1] maptpx_1.9-3       Rcpp_0.12.15       ape_5.0           
 [4] lattice_0.20-35    gtools_3.5.0       assertthat_0.2.0  
 [7] rprojroot_1.3-2    digest_0.6.15      slam_0.1-42       
[10] R6_2.2.2           plyr_1.8.4         backports_1.1.2   
[13] stats4_3.4.1       evaluate_0.10.1    highr_0.6         
[16] httr_1.3.1         pillar_1.2.1       gplots_3.0.1      
[19] rlang_0.2.0        lazyeval_0.2.1     gdata_2.18.0      
[22] vegan_2.4-6        rmarkdown_1.9      readr_1.1.1       
[25] stringr_1.3.0      munsell_0.4.3      compiler_3.4.1    
[28] pkgconfig_2.0.1    SQUAREM_2017.10-1  mgcv_1.8-23       
[31] htmltools_0.3.6    nnet_7.3-12        tibble_1.4.2      
[34] codetools_0.2-15   matrixStats_0.53.1 permute_0.9-4     
[37] viridisLite_0.3.0  MASS_7.3-49        bitops_1.0-6      
[40] grid_3.4.1         nlme_3.1-131.1     gtable_0.2.0      
[43] git2r_0.21.0       magrittr_1.5       scales_0.5.0      
[46] KernSmooth_2.23-15 stringi_1.1.6      reshape2_1.4.3    
[49] flexmix_2.3-14     bindrcpp_0.2       xml2_1.2.0        
[52] boot_1.3-20        cowplot_0.9.2      iterators_1.0.9   
[55] tools_3.4.1        picante_1.6-2      glue_1.2.0        
[58] hms_0.4.1          yaml_2.1.18        colorspace_1.3-2  
[61] cluster_2.0.6      caTools_1.17.1     rvest_0.3.2       
[64] bindr_0.1          modeltools_0.2-21 

This R Markdown site was created with workflowr