Reference mapping — ReferenceMapping • Coralysis

This function allows to project new query data sets onto a reference built with Coralysis as well as transfer cell labels from the reference to queries.

Usage

ReferenceMapping.SingleCellExperiment(
  ref,
  query,
  ref.label,
  scale.query.by,
  project.umap,
  select.icp.models,
  k.nn,
  dimred.name.prefix
)

# S4 method for class 'SingleCellExperiment,SingleCellExperiment'
ReferenceMapping(
  ref,
  query,
  ref.label,
  scale.query.by = NULL,
  project.umap = FALSE,
  select.icp.models = metadata(ref)$coralysis$pca.params$select.icp.tables,
  k.nn = 10,
  dimred.name.prefix = ""
)

Arguments

ref: An object of SingleCellExperiment class trained with Coralysis and after running RunPCA(..., return.model = TRUE) function.
query: An object of SingleCellExperiment class to project onto ref.
ref.label: A character cell metadata column name from the ref object to transfer to the queries.
scale.query.by: Should the query data be scaled by cell or by feature. By default is NULL, i.e., is not scaled. Scale it if reference was scaled.
project.umap: Project query data onto reference UMAP (logical). By default FALSE. If TRUE, the ref object needs to have a UMAP embedding obtained with RunUMAP(..., return.model = TRUE) function.
select.icp.models: Select the reference ICP models to use for query cluster probability prediction. By default metadata(ref)$coralysis$pca.params$select.icp.tables, i.e., the models selected to compute the reference PCA are selected. If NULL all are used. Otherwise a numeric vector should be given to select the ICP models of interest.
k.nn: The number of k nearest neighbors to use in the classification KNN algorithm used to transfer labels from the reference to queries (integer). By default 10.
dimred.name.prefix: Dimensional reduction name prefix to add to the computed PCA and UMAP. By default nothing is added, i.e., dimred.name.prefix = "".

Value

An object of SingleCellExperiment class.

Examples

# Import package
suppressPackageStartupMessages(library("SingleCellExperiment"))

# Create toy SCE data
batches <- c("b1", "b2")
set.seed(239)
batch <- sample(x = batches, size = nrow(iris), replace = TRUE)
sce <- SingleCellExperiment(assays = list(logcounts = t(iris[,1:4])), 
                            colData = DataFrame("Species" = iris$Species, 
                                                "Batch" = batch))
colnames(sce) <- paste0("samp", 1:ncol(sce))

# Create reference & query SCE objects
ref <- sce[,sce$Batch=="b1"]
query <- sce[,sce$Batch=="b2"]

# 1) Train the reference
set.seed(123)
ref <- RunParallelDivisiveICP(object = ref, k = 2, L = 25, C = 1, 
                             train.k.nn = 10, train.k.nn.prop = NULL, 
                             use.cluster.seed = FALSE, 
                             build.train.set = FALSE, ari.cutoff = 0.1, 
                             threads = 2) 
#> WARNING: Setting 'divisive.method' to 'cluster' as 'batch.label=NULL'. 
#> If 'batch.label=NULL', 'divisive.method' can be one of: 'cluster', 'random'. 
#> 
#> Initializing divisive ICP clustering...
#> 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |======================================================================| 100%
#> 
#> Divisive ICP clustering completed successfully.
#> 
#> Predicting cell cluster probabilities using ICP models...
#> Prediction of cell cluster probabilities completed successfully.
#> 
#> Multi-level integration completed successfully.
# 2) Compute reference PCA & UMAP
ref <- RunPCA(ref, p = 5, return.model = TRUE, pca.method = "stats")
#> Divisive ICP: selecting ICP tables multiple of 1
set.seed(123)
ref <- RunUMAP(ref, return.model = TRUE)

# Plot 
PlotDimRed(object = ref, color.by = "Species", legend.nrow = 1)


# 3) Project & predict query cell labels  
map <- ReferenceMapping(ref = ref, query = query, ref.label = "Species",
                        project.umap = TRUE)

# Confusion matrix: predictions (rows) x ground-truth (cols)
preds_x_truth <- table(map$coral_labels, map$Species)
print(preds_x_truth)
#>             
#>              setosa versicolor virginica
#>   setosa         24          0         0
#>   versicolor      0         18         2
#>   virginica       0          2        20

# Accuracy score
acc <- sum(diag(preds_x_truth)) / sum(preds_x_truth) * 100  
print(paste0("Coralysis accuracy score: ", round(acc), "%"))
#> [1] "Coralysis accuracy score: 94%"

# Visualize: ground-truth, prediction, confidence scores
cowplot::plot_grid(PlotDimRed(object = map, color.by = "Species", 
                              legend.nrow = 1),
                   PlotDimRed(object = map, color.by = "coral_labels",
                             legend.nrow = 1),
                PlotExpression(object = map, color.by = "coral_probability", 
                                 color.scale = "viridis"),
                ncol = 2, align = "vh")