Apply the estimates from cycle_npreg_insample to another gene expression dataset to infer an angle, or cell cycle phase, for each cell.

cycle_npreg_outsample(
  Y_test,
  sigma_est,
  funs_est,
  method.trend = c("trendfilter", "loess", "bspline"),
  normed = TRUE,
  polyorder = 2,
  method.grid = "uniform",
  ncores = 2,
  grids = 100,
  get_trend_estimates = FALSE
)

Arguments

Y_test

A SingleCellExperiment object.

sigma_est

A vector of gene-specific standard errors of the cyclic trends.

funs_est

A vector of cyclic functions used to estimate cyclic trends.

method.trend

How to estimate cyclic trend of gene expression values. We offer three options: method.trend = "trendfilter", uses fit_trendfilter, method.trend = "loess" uses fit_loess), and method.trend = "bsplines" uses fit_bspline. We found that "trendfilter" provided the best fits in our experiments. However, trend-filtering may require more computational effort since it uses cross-validation, so for fast results we recommend using "bspline".

normed

Is the data already normalized? TRUE or FALSE.

polyorder

We estimate cyclic trends of gene expression levels using nonparamtric trend filtering.

method.grid

The approach used to initialize angles in the computation. method.grid = "uniform" creates k equally-spaced bins ("grids"). method.grid = "pca" uses gene expression values to infer angles, and then these angles are used to project the cells to the closest bin.

ncores

We use doParallel package for parallel computing.

grids

number of bins to be selected along 0 to 2pi.

get_trend_estimates

Re-estimate the cylic trend based on the predicted cell cycle phase, or not (TRUE or FALSE). This step calls trendfilter and is computationally intensive.

Value

A list with the following elements:

Y

The inputted gene expression marix.

cell_times_est

Inferred angles or cell cycle phases (not ordered).

loglik_est

Log-likelihoods for each gene.

cell_times_reordered

The inferred angles, in ascending order.

Y_reorded

The input gene expression matrix reordered by cell_times_reordered.

sigma_reordered

Standard error of the cyclic trend for each gene, reordered by cell_times_reordered.

funs_reordered

A list of functions for approximating the cyclic trends of gene expression levels for each gene, reordered by cell_times_reordered.

mu_reordered

Estimated cyclic trend of gene expression values for each gene, reordered by cell_times_reordered.

prob_per_cell_by_celltimes

Probabilities of each cell belonging to each bin.

See also

cycle_npreg_insample for estimating parameters for cyclic functions from training data, cycle_npreg_loglik for log-likehood at angles between 0 to 2pi, and cycle_npreg_mstep for estimating cyclic functions given inferred phases from cycle_npreg_loglik.

Examples

library(SingleCellExperiment)
#> Loading required package: SummarizedExperiment
#> Loading required package: GenomicRanges
#> Loading required package: stats4
#> Loading required package: BiocGenerics
#> Loading required package: parallel
#> #> Attaching package: ‘BiocGenerics’
#> The following objects are masked from ‘package:parallel’: #> #> clusterApply, clusterApplyLB, clusterCall, clusterEvalQ, #> clusterExport, clusterMap, parApply, parCapply, parLapply, #> parLapplyLB, parRapply, parSapply, parSapplyLB
#> The following objects are masked from ‘package:stats’: #> #> IQR, mad, sd, var, xtabs
#> The following objects are masked from ‘package:base’: #> #> anyDuplicated, append, as.data.frame, basename, cbind, colnames, #> dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep, #> grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget, #> order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank, #> rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply, #> union, unique, unsplit, which, which.max, which.min
#> Loading required package: S4Vectors
#> Warning: package ‘S4Vectors’ was built under R version 3.6.3
#> #> Attaching package: ‘S4Vectors’
#> The following object is masked from ‘package:base’: #> #> expand.grid
#> Loading required package: IRanges
#> Loading required package: GenomeInfoDb
#> Warning: package ‘GenomeInfoDb’ was built under R version 3.6.3
#> Loading required package: Biobase
#> Welcome to Bioconductor #> #> Vignettes contain introductory material; view with #> 'browseVignettes()'. To cite Bioconductor, see #> 'citation("Biobase")', and for packages 'citation("pkgname")'.
#> Loading required package: DelayedArray
#> Warning: package ‘DelayedArray’ was built under R version 3.6.3
#> Loading required package: matrixStats
#> #> Attaching package: ‘matrixStats’
#> The following objects are masked from ‘package:Biobase’: #> #> anyMissing, rowMedians
#> Loading required package: BiocParallel
#> #> Attaching package: ‘DelayedArray’
#> The following objects are masked from ‘package:matrixStats’: #> #> colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
#> The following objects are masked from ‘package:base’: #> #> aperm, apply, rowsum
data(sce_top101genes) # Select top 5 cyclic genes. sce_top5 <- sce_top101genes[order(rowData(sce_top101genes)$pve_fucci, decreasing = TRUE)[1:5],] # Select NA18511 samples for training and test sets. coldata <- colData(sce_top5) which_samples_train <- rownames(coldata)[coldata$chip_id != "NA18511"] which_samples_predict <- rownames(coldata)[coldata$chip_id == "NA18511"] # Learn gene cyclic functions from the training data using the # trend filtering method. sce_top5 <- data_transform_quantile(sce_top5)
#> computing with 2 threads
expr_quant <- assay(sce_top5, "cpm_quantNormed") Y_train <- expr_quant[, colnames(expr_quant) %in% which_samples_train] theta_train <- coldata$theta_shifted[rownames(coldata) %in% which_samples_train] names(theta_train) <- rownames(coldata)[rownames(coldata) %in% which_samples_train] model_5genes_train <- cycle_npreg_insample(Y = Y_train,theta = theta_train, polyorder = 2,ncores = 2, method.trend = "trendfilter")
#> computing on 2 cores
# Predict cell cycle in the test samples. Y_test <- sce_top5[,colnames(sce_top5) %in% which_samples_predict] model_5genes_predict <- cycle_npreg_outsample(Y_test = Y_test, sigma_est = model_5genes_train$sigma_est, funs_est = model_5genes_train$funs_est, method.trend = "trendfilter", ncores = 2,get_trend_estimates = FALSE) # Estimate cyclic gene expression levels given the cell cycle for each # gene. predict_cyclic <- fit_cyclical_many(Y = assay(model_5genes_predict$Y,"cpm_quantNormed"), theta = colData(model_5genes_predict$Y)$cellcycle_peco)
#> computing with 2 threads
# Plot the cell cycle predictions vs. FUCCI phase. par(mfrow = c(2,3),mar = c(4,4,3,1)) for (g in seq_along(rownames(model_5genes_predict$Y))) { plot(assay(model_5genes_predict$Y,"cpm_quantNormed")[ rownames(model_5genes_predict$Y)[g],], x = colData(model_5genes_predict$Y)$cellcycle_peco,axes = FALSE, xlab = "FUCCI phase",ylab = "predicted phase") x <- seq(0,2*pi,length.out = 100) lines(x = x,y = predict_cyclic$cellcycle_function[[ rownames(model_5genes_predict$Y)[g]]](x), lwd = 1.5,col = "tomato") axis(2) axis(1,at = seq(0,2*pi,length.out = 5), labels = c(0,expression(pi/2),expression(pi),expression(3*pi/2), expression(2*pi))) abline(h = 0,lwd = 1,col = "skyblue",lty = "dotted") title(names(model_5genes_predict$Y)[g]) }