Detailed walk-through of de novo CIDER (dnCIDER) on pancreas data • CIDER

Introduction

This vignette performs dnCIDER on a cross-species pancreas dataset. It is aimed to show the underneath structure of dnCIDER compared to the other high-level vignette (see Getting Start with De Novo CIDER (dnCIDER): Cross-Species Pancreas Integration).

Set up

In addition to CIDER, we will load the following packages:

library(CIDER)
library(Seurat)
#> Loading required package: SeuratObject
#> Loading required package: sp
#> 'SeuratObject' was built under R 4.4.0 but the current version is
#> 4.4.1; it is recomended that you reinstall 'SeuratObject' as the ABI
#> for R may have changed
#> 
#> Attaching package: 'SeuratObject'
#> The following objects are masked from 'package:base':
#> 
#>     intersect, t
library(parallel)
library(cowplot)

Load example data

The example data can be downloaded from https://figshare.com/s/d5474749ca8c711cc205.

Pancreatic cell data $^1$ contain cells from human (8241 cells) and mouse (1886 cells).

# Download the data
data_url <- "https://figshare.com/ndownloader/files/31469387"
data_file <- file.path(tempdir(), "pancreas_counts.RData")

if (!file.exists(data_file)) {
  message("Downloading data...")
  download.file(data_url, destfile = data_file, mode = "wb")
}
#> Downloading data...

data_env <- new.env()
loaded_objects <- load(file = data_file, envir = data_env)
pancreas_counts <- data_env[[loaded_objects[1]]]
load("../data/pancreas_meta.RData") # meta data/cell information
seu <- CreateSeuratObject(counts = pancreas_counts, meta.data = pancreas_meta)
table(seu$Batch)
#> 
#> human mouse 
#>  8241  1886

Perform initial clustering

seu_list <- Seurat::SplitObject(seu, split.by = "Batch")
seu_list <- mclapply(seu_list, function(x) {
  x <- NormalizeData(x, normalization.method = "LogNormalize", 
                     scale.factor = 10000, verbose = FALSE)
  x <- FindVariableFeatures(x, selection.method = "vst", 
                            nfeatures = 2000, verbose = FALSE)
  x <- ScaleData(x, verbose = FALSE, vars.to.regress = "Sample")
  x <- RunPCA(x, features = VariableFeatures(object = x), verbose = FALSE)  
  x <- FindNeighbors(x, dims = 1:15, verbose = FALSE)
  x <- FindClusters(x, resolution = 0.6, verbose = FALSE)
  return(x)
})

knitr::kable(table(seu_list[[2]]$Group, seu_list[[2]]$seurat_clusters))

	0	1	2	3	4	5	6	7	8	9	10	11	12	13	14	15	16	17	18
acinar	0	0	823	0	0	0	1	1	2	102	0	0	0	0	0	0	0	3	0
activated_stellate	0	0	0	1	0	0	0	0	269	0	0	0	0	0	0	0	1	4	0
alpha	1067	3	0	0	0	665	0	0	0	0	251	243	1	6	0	0	1	0	4
b_cell	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
beta	1	886	2	812	740	1	0	0	0	0	2	0	3	2	1	0	4	0	1
delta	0	3	2	1	3	0	0	0	0	0	1	0	215	1	216	0	148	0	2
ductal	1	1	6	0	0	0	469	386	0	169	0	0	1	0	0	0	0	0	0
endothelial	0	0	0	0	0	0	0	0	1	0	0	0	0	0	0	212	0	1	0
epsilon	0	0	0	0	0	0	0	0	0	0	0	0	8	8	0	0	1	0	0
gamma	1	1	0	0	0	0	0	0	0	0	1	0	3	210	0	0	25	0	0
immune_other	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
macrophage	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	39
mast	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	25
quiescent_stellate	1	0	0	1	0	0	1	0	4	0	0	0	0	0	0	0	0	153	0
schwann	0	0	0	0	0	0	0	0	10	0	0	0	0	0	0	0	0	0	0
t_cell	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	7

seu_list <- mclapply(seu_list, RunTSNE, dims = 1:15)
p1 <- scatterPlot(seu_list[[1]], "tsne", colour.by = "seurat_clusters")
p2 <- scatterPlot(seu_list[[2]], "tsne", colour.by = "seurat_clusters")
cowplot::plot_grid(p1,p2)

dist_coef <- getDistMat(seu_list, downsampling.size = 50)
#>   |                                                                              |                                                                      |   0%  |                                                                              |===================================                                   |  50%  |                                                                              |======================================================================| 100%

par(mfrow = c(length(seu_list),1))
for(i in which(sapply(dist_coef, function(x) return(!is.null(x))))){
  tmp <- dist_coef[[i]] + t(dist_coef[[i]])
  diag(tmp) <- 1
  pheatmap::pheatmap(tmp, display_numbers = TRUE)
}

for(seu_itor in 1:2){
  tmp <- dist_coef[[seu_itor]] + t(dist_coef[[seu_itor]])
  diag(tmp) <- 1
  tmp <- 1 - tmp
  hc <- hclust(as.dist(tmp), method = "average")
  hres <- cutree(hc, h = 0.4)
  df_hres <- data.frame(hres)
  df_hres$hres <- paste0(df_hres$hres, "_", unique(seu_list[[seu_itor]]$Batch))
  seu_list[[seu_itor]]$inicluster_tmp <- paste0(seu_list[[seu_itor]]$seurat_clusters, "_", seu_list[[seu_itor]]$Batch)
  seu_list[[seu_itor]]$inicluster <- df_hres$hres[match(seu_list[[seu_itor]]$inicluster_tmp,rownames(df_hres))]
}

# plot(as.dendrogram(hc), horiz = T)

p1 <- scatterPlot(seu_list[[1]], "tsne", "inicluster")
p2 <- scatterPlot(seu_list[[2]], "tsne", "inicluster")
plot_grid(p1,p2)

scatterPlot(seu_list[[2]], "tsne", "Group")

Calculate of IDER similarity matrix

res <- unlist(lapply(seu_list, function(x) return(x$inicluster)))
res_names <- unlist(lapply(seu_list, function(x) return(colnames(x))))
seu@meta.data$initial_cluster <- res[match(colnames(seu), res_names)]

ider <- getIDEr(seu, 
                group.by.var = "initial_cluster",
                batch.by.var = "Batch",
                downsampling.size = 35, 
                use.parallel = FALSE, verbose = FALSE)

net <- plotNetwork(seu, ider, colour.by = "Group" , vertex.size = 0.6, weight.factor = 5)

hc <- hclust(as.dist(1-(ider[[1]] + t(ider[[1]])))/2)
plot(as.dendrogram(hc)) #, horiz = TRUE

Perform final Clustering

seu <- finalClustering(seu, ider, cutree.h = 0.35) # final clustering

seu <- NormalizeData(seu, verbose = FALSE)
seu <- FindVariableFeatures(seu, selection.method = "vst", 
                            nfeatures = 2000, verbose = FALSE)
seu <- ScaleData(seu, verbose = FALSE)
seu <- RunPCA(seu, npcs = 20, verbose = FALSE)
seu <- RunTSNE(seu, reduction = "pca", dims = 1:12)

plot_list <- list()
plot_list[[1]] <- scatterPlot(seu, "tsne", colour.by = "CIDER_cluster", title = "asCIDER clustering results") 
plot_list[[2]] <- scatterPlot(seu, "tsne", colour.by = "Group", title = "Ground truth of cell populations") 
plot_grid(plotlist = plot_list, ncol = 2)

Reproducibility

sessionInfo()
#> R version 4.4.1 (2024-06-14)
#> Platform: x86_64-apple-darwin20
#> Running under: macOS Monterey 12.5.1
#> 
#> Matrix products: default
#> BLAS:   /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRblas.0.dylib 
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
#> 
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#> 
#> time zone: Europe/London
#> tzcode source: internal
#> 
#> attached base packages:
#> [1] parallel  stats     graphics  grDevices utils     datasets  methods  
#> [8] base     
#> 
#> other attached packages:
#> [1] cowplot_1.1.3      Seurat_5.1.0       SeuratObject_5.0.2 sp_2.1-4          
#> [5] CIDER_0.99.4      
#> 
#> loaded via a namespace (and not attached):
#>   [1] RColorBrewer_1.1-3     rstudioapi_0.16.0      jsonlite_1.8.8        
#>   [4] magrittr_2.0.3         spatstat.utils_3.1-0   farver_2.1.2          
#>   [7] rmarkdown_2.27         fs_1.6.4               ragg_1.3.2            
#>  [10] vctrs_0.6.5            ROCR_1.0-11            spatstat.explore_3.3-2
#>  [13] htmltools_0.5.8.1      sass_0.4.9             sctransform_0.4.1     
#>  [16] parallelly_1.38.0      KernSmooth_2.23-24     bslib_0.7.0           
#>  [19] htmlwidgets_1.6.4      desc_1.4.3             ica_1.0-3             
#>  [22] plyr_1.8.9             plotly_4.10.4          zoo_1.8-12            
#>  [25] cachem_1.1.0           igraph_2.0.3           mime_0.12             
#>  [28] lifecycle_1.0.4        iterators_1.0.14       pkgconfig_2.0.3       
#>  [31] Matrix_1.7-0           R6_2.5.1               fastmap_1.2.0         
#>  [34] fitdistrplus_1.2-1     future_1.34.0          shiny_1.9.1           
#>  [37] digest_0.6.37          colorspace_2.1-1       patchwork_1.2.0       
#>  [40] tensor_1.5             RSpectra_0.16-2        irlba_2.3.5.1         
#>  [43] textshaping_0.4.0      labeling_0.4.3         progressr_0.14.0      
#>  [46] fansi_1.0.6            spatstat.sparse_3.1-0  httr_1.4.7            
#>  [49] polyclip_1.10-7        abind_1.4-5            compiler_4.4.1        
#>  [52] withr_3.0.1            doParallel_1.0.17      viridis_0.6.5         
#>  [55] fastDummies_1.7.4      highr_0.11             MASS_7.3-61           
#>  [58] tools_4.4.1            lmtest_0.9-40          httpuv_1.6.15         
#>  [61] future.apply_1.11.2    goftest_1.2-3          glue_1.7.0            
#>  [64] dbscan_1.2.2           nlme_3.1-165           promises_1.3.0        
#>  [67] grid_4.4.1             Rtsne_0.17             cluster_2.1.6         
#>  [70] reshape2_1.4.4         generics_0.1.3         gtable_0.3.5          
#>  [73] spatstat.data_3.1-2    tidyr_1.3.1            data.table_1.16.0     
#>  [76] utf8_1.2.4             spatstat.geom_3.3-2    RcppAnnoy_0.0.22      
#>  [79] ggrepel_0.9.5          RANN_2.6.2             foreach_1.5.2         
#>  [82] pillar_1.9.0           stringr_1.5.1          limma_3.60.6          
#>  [85] spam_2.10-0            RcppHNSW_0.6.0         later_1.3.2           
#>  [88] splines_4.4.1          dplyr_1.1.4            lattice_0.22-6        
#>  [91] survival_3.7-0         deldir_2.0-4           tidyselect_1.2.1      
#>  [94] locfit_1.5-9.10        miniUI_0.1.1.1         pbapply_1.7-2         
#>  [97] knitr_1.48             gridExtra_2.3          edgeR_4.2.2           
#> [100] scattermore_1.2        xfun_0.46              statmod_1.5.0         
#> [103] matrixStats_1.4.1      pheatmap_1.0.12        stringi_1.8.4         
#> [106] lazyeval_0.2.2         yaml_2.3.10            evaluate_0.24.0       
#> [109] codetools_0.2-20       kernlab_0.9-33         tibble_3.2.1          
#> [112] cli_3.6.3              uwot_0.2.2             xtable_1.8-4          
#> [115] reticulate_1.39.0      systemfonts_1.1.0      munsell_0.5.1         
#> [118] jquerylib_0.1.4        Rcpp_1.0.13            globals_0.16.3        
#> [121] spatstat.random_3.3-1  png_0.1-8              spatstat.univar_3.0-1 
#> [124] pkgdown_2.1.0          ggplot2_3.5.1          dotCall64_1.1-1       
#> [127] listenv_0.9.1          viridisLite_0.4.2      scales_1.3.0          
#> [130] ggridges_0.5.6         leiden_0.4.3.1         purrr_1.0.2           
#> [133] rlang_1.1.4

References

Baron, M. et al. A Single-Cell Transcriptomic Map of the Human and Mouse Pancreas Reveals Inter- and Intra-cell Population Structure. Cell Syst 3, 346–360.e4 (2016).
Satija R, et al. Spatial reconstruction of single-cell gene expression data. Nature Biotechnology 33, 495-502 (2015).
The count matrix and sample information were downloaded from NCBI GEO accession GSE84133.