Predict Test-sample Ordering Using Training Labels

Apply the estimates from cycle_npreg_insample to another gene expression dataset to infer an angle, or cell cycle phase, for each cell.

cycle_npreg_outsample(
  Y_test,
  sigma_est,
  funs_est,
  method.trend = c("trendfilter", "loess", "bspline"),
  normed = TRUE,
  polyorder = 2,
  method.grid = "uniform",
  ncores = 2,
  grids = 100,
  get_trend_estimates = FALSE
)

Arguments

Y_test	A `SingleCellExperiment` object.
sigma_est	A vector of gene-specific standard errors of the cyclic trends.
funs_est	A vector of cyclic functions used to estimate cyclic trends.
method.trend	How to estimate cyclic trend of gene expression values. We offer three options: `method.trend = "trendfilter"`, uses `fit_trendfilter`, `method.trend = "loess"` uses `fit_loess`), and `method.trend = "bsplines"` uses `fit_bspline`. We found that `"trendfilter"` provided the best fits in our experiments. However, trend-filtering may require more computational effort since it uses cross-validation, so for fast results we recommend using `"bspline"`.
normed	Is the data already normalized? `TRUE` or `FALSE`.
polyorder	We estimate cyclic trends of gene expression levels using nonparamtric trend filtering.
method.grid	The approach used to initialize angles in the computation. `method.grid = "uniform"` creates k equally-spaced bins ("grids"). `method.grid = "pca"` uses gene expression values to infer angles, and then these angles are used to project the cells to the closest bin.
ncores	We use doParallel package for parallel computing.
grids	number of bins to be selected along 0 to 2pi.
get_trend_estimates	Re-estimate the cylic trend based on the predicted cell cycle phase, or not (`TRUE` or `FALSE`). This step calls trendfilter and is computationally intensive.

Value

A list with the following elements:

The inputted gene expression marix.

cell_times_est

Inferred angles or cell cycle phases (not ordered).

loglik_est

Log-likelihoods for each gene.

cell_times_reordered

The inferred angles, in ascending order.

Y_reorded

The input gene expression matrix reordered by cell_times_reordered.

sigma_reordered

Standard error of the cyclic trend for each gene, reordered by cell_times_reordered.

funs_reordered

A list of functions for approximating the cyclic trends of gene expression levels for each gene, reordered by cell_times_reordered.

mu_reordered

Estimated cyclic trend of gene expression values for each gene, reordered by cell_times_reordered.

prob_per_cell_by_celltimes

Probabilities of each cell belonging to each bin.

Examples

library(SingleCellExperiment)
#> Loading required package: SummarizedExperiment
#> Loading required package: GenomicRanges
#> Loading required package: stats4
#> Loading required package: BiocGenerics
#> Loading required package: parallel
#> 
#> Attaching package: ‘BiocGenerics’
#> The following objects are masked from ‘package:parallel’:
#> 
#>     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
#>     clusterExport, clusterMap, parApply, parCapply, parLapply,
#>     parLapplyLB, parRapply, parSapply, parSapplyLB
#> The following objects are masked from ‘package:stats’:
#> 
#>     IQR, mad, sd, var, xtabs
#> The following objects are masked from ‘package:base’:
#> 
#>     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
#>     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
#>     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
#>     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
#>     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
#>     union, unique, unsplit, which, which.max, which.min
#> Loading required package: S4Vectors
#> Warning: package ‘S4Vectors’ was built under R version 3.6.3
#> 
#> Attaching package: ‘S4Vectors’
#> The following object is masked from ‘package:base’:
#> 
#>     expand.grid
#> Loading required package: IRanges
#> Loading required package: GenomeInfoDb
#> Warning: package ‘GenomeInfoDb’ was built under R version 3.6.3
#> Loading required package: Biobase
#> Welcome to Bioconductor
#> 
#>     Vignettes contain introductory material; view with
#>     'browseVignettes()'. To cite Bioconductor, see
#>     'citation("Biobase")', and for packages 'citation("pkgname")'.
#> Loading required package: DelayedArray
#> Warning: package ‘DelayedArray’ was built under R version 3.6.3
#> Loading required package: matrixStats
#> 
#> Attaching package: ‘matrixStats’
#> The following objects are masked from ‘package:Biobase’:
#> 
#>     anyMissing, rowMedians
#> Loading required package: BiocParallel
#> 
#> Attaching package: ‘DelayedArray’
#> The following objects are masked from ‘package:matrixStats’:
#> 
#>     colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
#> The following objects are masked from ‘package:base’:
#> 
#>     aperm, apply, rowsum
data(sce_top101genes)

# Select top 5 cyclic genes.
sce_top5 <- sce_top101genes[order(rowData(sce_top101genes)$pve_fucci,
                                  decreasing = TRUE)[1:5],]

# Select NA18511 samples for training and test sets.
coldata               <- colData(sce_top5)
which_samples_train   <- rownames(coldata)[coldata$chip_id != "NA18511"]
which_samples_predict <- rownames(coldata)[coldata$chip_id == "NA18511"]

# Learn gene cyclic functions from the training data using the
# trend filtering method.
sce_top5    <- data_transform_quantile(sce_top5)
#> computing with 2 threads
expr_quant  <- assay(sce_top5, "cpm_quantNormed")
Y_train     <- expr_quant[, colnames(expr_quant) %in% which_samples_train]
theta_train <- coldata$theta_shifted[rownames(coldata) %in%
                 which_samples_train]
names(theta_train) <- rownames(coldata)[rownames(coldata) %in%
                                        which_samples_train]
model_5genes_train <- cycle_npreg_insample(Y = Y_train,theta = theta_train,
                                           polyorder = 2,ncores = 2,
                                           method.trend = "trendfilter")
#> computing on 2 cores

# Predict cell cycle in the test samples.
Y_test <- sce_top5[,colnames(sce_top5) %in% which_samples_predict]
model_5genes_predict <-
  cycle_npreg_outsample(Y_test = Y_test,
                        sigma_est = model_5genes_train$sigma_est,
                        funs_est = model_5genes_train$funs_est,
                        method.trend = "trendfilter",
                        ncores = 2,get_trend_estimates = FALSE)

# Estimate cyclic gene expression levels given the cell cycle for each
# gene.
predict_cyclic <-
  fit_cyclical_many(Y = assay(model_5genes_predict$Y,"cpm_quantNormed"),
                    theta = colData(model_5genes_predict$Y)$cellcycle_peco)
#> computing with 2 threads

# Plot the cell cycle predictions vs. FUCCI phase.
par(mfrow = c(2,3),mar = c(4,4,3,1))
for (g in seq_along(rownames(model_5genes_predict$Y))) {
  plot(assay(model_5genes_predict$Y,"cpm_quantNormed")[
        rownames(model_5genes_predict$Y)[g],],
       x = colData(model_5genes_predict$Y)$cellcycle_peco,axes = FALSE,
       xlab = "FUCCI phase",ylab = "predicted phase")
  x <- seq(0,2*pi,length.out = 100)
  lines(x = x,y = predict_cyclic$cellcycle_function[[
                     rownames(model_5genes_predict$Y)[g]]](x),
        lwd = 1.5,col = "tomato")
  axis(2)
  axis(1,at = seq(0,2*pi,length.out = 5),
       labels = c(0,expression(pi/2),expression(pi),expression(3*pi/2),
                  expression(2*pi)))
  abline(h = 0,lwd = 1,col = "skyblue",lty = "dotted")
  title(names(model_5genes_predict$Y)[g])
}

Arguments

Value

See also

Examples