-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add chuck-size parameter to cylinter config. (#181)
- Loading branch information
Showing
2 changed files
with
355 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,344 @@ | ||
# GENERAL PROGRAM CONFIGURATIONS | ||
|
||
inDir: /Users/<username>/Desktop/cylinter_demo | ||
# Path to CyLinter input directory containing multi-channel | ||
# image files (TIF or OME-TIF), segmentation outlines (OME-TIF), | ||
# segmentation masks (TIF), and corresponding single-cell feature tables (CSV) | ||
|
||
outDir: /Users/<username>/Desktop/cylinter_demo/output | ||
# CyLinter output directory. Path is created if it does not exist. | ||
|
||
sampleMetadata: | ||
"1": ["1", "Normal kidney cortex", "NKC", "CANCER-FALSE", 1] | ||
"15": ["15", "Glioblastoma", "GBM", "CANCER-TRUE", 1] | ||
"18": ["18", "Mesothelioma", "MTO", "CANCER-TRUE", 1] | ||
"68": ["68", "Tonsil", "TSL", "CANCER-FALSE", 3] | ||
# Sample metadata dictionary: keys = file names; values = list of strings. | ||
# First elements: sample names (str) | ||
# Second elements: descriptive text of experimental condition (str) | ||
# Third elements: abbreviation of experimental condition (str) | ||
# Fourth elements: comma-delimited string of arbitrary binary declarations | ||
# for computing t-statistics between two groups of samples (str dytpe) | ||
# Fifth elements: replicate number specifying biological or | ||
# technical replicates (int) | ||
|
||
samplesToExclude: [] | ||
# (list of strs) Sample names to exclude from analysis specified | ||
# according to the first elements of sampleMetadata configuration. | ||
|
||
counterstainChannel: "DNA1" | ||
# (str) Name of marker in markers.csv file for use in visualizing nuclear counterstain | ||
|
||
markersToExclude: ["Rabbit IgG", "Goat IgG", "Mouse IgG", "CD56", "CD13", | ||
"pAUR", "CCNE", "CDKN2A", "PCNA_1", "CDKN1B_2", | ||
"CD63", "CD32", "CCNA2", "CDKN1C", "PCNA_1", | ||
"CDKN1B_1", "CCND1", "cPARP", "pCREB", | ||
"CCNB1", "PCNA_2", "CDK2" | ||
] | ||
# (list of strs) Immunomarkers to exclude from analysis | ||
# Does not include nuclear dyes. They are needed for the | ||
# cycleCorrelation module to remove cell dropout. | ||
|
||
############################################################################### | ||
# MODULE-SPECIFIC CONFIGURATIONS | ||
|
||
# selectROIs------------------------------------------------------------------- | ||
delintMode: True | ||
# (bool) Whether to drop (True; negative selection) or | ||
# retain (False; positive selection) cells selected by ROIs. | ||
|
||
showAbChannels: True | ||
# (bool) Whether to show all immunomarker channels (True) when Napari | ||
# is open (may be memory limiting) or show cycle 1 DNA only (False). | ||
|
||
samplesForROISelection: ["1", "15", "18", "68"] | ||
# (list of strs) Sample names for ROI selection specified | ||
# according to the first elements of sampleMetadata configuration. | ||
|
||
autoArtifactDetection: True | ||
# (bool) Whether to display tools for automated artifact detection in Napari window | ||
|
||
artifactDetectionMethod: "classical" | ||
# (str) Algorithm used for automated artifact detection (current option: "classical"). | ||
# Multi-layer perceptron method ("MLP") currently under development. | ||
|
||
|
||
# intensityFilter------------------------------------------------------------------- | ||
numBinsIntensity: 50 | ||
# (int) Number of bins for DNA intensity histograms. | ||
|
||
|
||
# areaFilter------------------------------------------------------------------- | ||
numBinsArea: 50 | ||
# (int) Number of bins for DNA area histograms. | ||
|
||
|
||
# cycleCorrelation------------------------------------------------------------------- | ||
numBinsCorrelation: 50 | ||
# (int) Number of bins for DNA1/DNAn histograms. | ||
|
||
|
||
# pruneOutliers------------------------------------------------------------------- | ||
hexbins: False | ||
# (bool) Whether to use hexbins (True) or scatter plots (False) to plot | ||
# single-cell signal intensities. Scatter plots allow for higher resolution, | ||
# but may require longer rendering times. | ||
|
||
hexbinGridSize: 20 | ||
# (int) Hexbin grid size when hexins=True. | ||
# Higher values increase bin resolution. | ||
|
||
|
||
# metaQC (optional)------------------------------------------------------------------- | ||
metaQC: False | ||
# (bool) Whether to perform data reclassification based on | ||
# unsupervised clustering results of combinations of clean and | ||
# noisy (previously-redacted) data. | ||
|
||
embeddingAlgorithmQC: "UMAP" | ||
# (str) Embedding algorithm used for clustering (options: "TSNE" or "UMAP"). | ||
|
||
channelExclusionsClusteringQC: [] | ||
# (list of strs) Immunomarkers to exclude from clustering. | ||
|
||
samplesToRemoveClusteringQC: [] | ||
# (list of strs) Samples to exclude from clustering. | ||
|
||
fracForEmbeddingQC: 1.0 | ||
# (float) Fraction of cells to be embedded (range: 0.0-1.0) | ||
# Limits the amount of data passed to downstream modules. | ||
|
||
dimensionEmbeddingQC: 2 | ||
# (int) Dimension of the embedding (fixed to 2 in current version). | ||
|
||
topMarkersQC: "clusters" | ||
# (str) Normalization axis ("channels" or "clusters") used to define | ||
# highest expressed markers per cluster. | ||
|
||
colormapAnnotationQC: "Sample" | ||
# (str) Metadata annotation to colormap the embedding: Sample or Condition. | ||
|
||
metricQC: "euclidean" | ||
# (str) Distance metric for computing embedding. | ||
# Choose from valid metrics used by scipy.spatial.distance.pdist: | ||
# "braycurtis", "canberra", "chebyshev", "cityblock", "correlation", "cosine", | ||
# "dice", "euclidean", "hamming", "jaccard", "jensenshannon", "kulsinski", | ||
# "mahalanobis", "matching", "minkowski", "rogerstanimoto", "russellrao", | ||
# "seuclidean", "sokalmichener", "sokalsneath", "sqeuclidean", "yule". | ||
|
||
# -------------------------------------- | ||
# tSNE-specific configurations: | ||
# https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html | ||
perplexityQC: 50.0 | ||
# (float) Related to the number of nearest neighbors used in other | ||
# manifold learning algorithms. Larger datasets usually require | ||
# larger perplexity. Different values can result in significantly | ||
# different results. | ||
|
||
earlyExaggerationQC: 12.0 | ||
# (float) For larger values, the space between natural clusters | ||
# will be larger in the embedded space. | ||
|
||
learningRateTSNEQC: 200.0 | ||
# (float) tSNE learning rate (typically between 10.0 and 1000.0). | ||
|
||
randomStateQC: 5 | ||
# (int) Determines the random number generator for reproducible results | ||
# across multiple function calls. | ||
|
||
# -------------------------------------- | ||
# UMAP-specific configurations: | ||
# https://umap-learn.readthedocs.io/en/latest/api.html | ||
nNeighborsQC: 6 | ||
# (int) The size of local neighborhood (in terms of number of | ||
# neighboring sample points) used for manifold approximation. | ||
# Larger values result in more global views of the manifold, | ||
# while smaller values result in more local data being preserved. | ||
# In general values should be in the range 2 to 100. | ||
|
||
learningRateUMAPQC: 1.0 | ||
# (float) The initial learning rate for the embedding optimization. | ||
|
||
minDistQC: 0.1 | ||
# (float) The effective minimum distance between embedded points. | ||
# Smaller values will result in a more clustered/clumped | ||
# embedding where nearby points on the manifold are drawn | ||
# closer together, while larger values will result on a more | ||
# even dispersal of points. The value should be set relative | ||
# to the spread value, which determines the scale at which | ||
# embedded points will be spread out. | ||
|
||
repulsionStrengthQC: 5.0 | ||
# (float) Weighting applied to negative samples in low dimensional | ||
# embedding optimization. Values higher than one will | ||
# result in greater weight being given to negative samples. | ||
|
||
|
||
# PCA------------------------------------------------------------------- | ||
channelExclusionsPCA: [] | ||
# (strs) Immunomarkers to exclude from PCA analysis. | ||
|
||
samplesToRemovePCA: [] | ||
# (list of strs) Samples to exclude from PCA analysis. | ||
|
||
dimensionPCA: 2 | ||
# (int) Number of PCs to compute. | ||
|
||
pointSize: 90.0 | ||
# (float) scatter point size for sample scores plot. | ||
|
||
labelPoints: True | ||
# (bool) Annotate scatter points with condition abbreviations | ||
# from sampleMetadata configuration. | ||
|
||
distanceCutoff: 0.15 | ||
# (float) Maximum distance between data points in PCA scores plot to | ||
# be annotated with a common label. Useful for increasing visual clarity | ||
# of PCA plots containing many data points. Applicable when | ||
# labelPoints is True. | ||
|
||
conditionsToSilhouette: [] | ||
# (list of strs) List of abbreviated condition names whose corresponding | ||
#scores plot points will be greyed out, left unannotated, and sent to the back | ||
# of the plot (zorder). Useful for increasing visual clarity of PCA | ||
# plots containing many data points. | ||
|
||
|
||
# gating (optional)------------------------------------------------------------------- | ||
gating: True | ||
# (bool) Whether to perform SYLARAS-style gating on single-cell data. | ||
# Cell Syst. 2020 Sep 23;11(3):272-285.e9 PMID: 32898474 | ||
|
||
channelExclusionsGating: [] | ||
# (list of strs) Immunomarkers to exclude from gating. | ||
|
||
samplesToRemoveGating: [] | ||
# (list of strs) Samples to exclude from gating. | ||
|
||
vectorThreshold: 100 | ||
# (int) vizualize Boolean vectors with cell counts >= vectorThreshold | ||
|
||
classes: | ||
Tumor: | ||
definition: [+pan-CK, +KI67] | ||
subsets: [] | ||
# (dict) Boolean immunophenotype signatures. | ||
# +marker = immunopositive , -marker = immunonegative, marker = don't care | ||
|
||
|
||
# clustering------------------------------------------------------------------- | ||
embeddingAlgorithm: "UMAP" | ||
# (str) Embedding algorithm to use for clustering (options: "TSNE" or "UMAP"). | ||
|
||
channelExclusionsClustering: [] | ||
# (list of strs) Immunomarkers to exclude from clustering. | ||
|
||
samplesToRemoveClustering: [] | ||
# (list of strs) Samples to exclude from clustering. | ||
|
||
normalizeTissueCounts: True | ||
# (bool) Make the number of cells per tissue for clustering more similar | ||
# through sample-weighted random sampling. | ||
|
||
fracForEmbedding: 1.0 | ||
# (float) Fraction of cells to be embedded (range: 0.0-1.0). | ||
# Limits amount of data passed to downstream modules. | ||
|
||
dimensionEmbedding: 2 | ||
# (int) Dimension of the embedding (options: 2 or 3). | ||
|
||
topMarkers: "clusters" | ||
# (str) Normalization axis ("channels" or "clusters") used to define | ||
# highest expressed markers per cluster. | ||
|
||
colormapAnnotationClustering: "Sample" | ||
# (str) Metadata annotation to colormap the embedding: Sample or Condition. | ||
|
||
metric: "euclidean" | ||
# (str) Distance metric for computing embedding. | ||
# Choose from valid metrics used by scipy.spatial.distance.pdist: | ||
# "braycurtis", "canberra", "chebyshev", "cityblock", "correlation", "cosine", | ||
# "dice", "euclidean", "hamming", "jaccard", "jensenshannon", "kulsinski", | ||
# "mahalanobis", "matching", "minkowski", "rogerstanimoto", "russellrao", | ||
# "seuclidean", "sokalmichener", "sokalsneath", "sqeuclidean", "yule". | ||
|
||
# -------------------------------------- | ||
# tSNE-specific configurations: | ||
# https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html | ||
perplexity: 50.0 | ||
# (float) Related to the number of nearest neighbors used in other | ||
# manifold learning algorithms. Larger datasets usually require | ||
# larger perplexity. Different values can result in significantly | ||
# different results. | ||
|
||
earlyExaggeration: 12.0 | ||
# (flaot) For larger values, the space between natural clusters | ||
# will be larger in the embedded space. | ||
|
||
learningRateTSNE: 200.0 | ||
# (float) tSNE learning rate (typically between 10.0 and 1000.0). | ||
|
||
randomStateTSNE: 5 | ||
# (int) Determines the random number generator for reproducible results | ||
# across multiple function calls. | ||
|
||
# -------------------------------------- | ||
# UMAP-specific configurations: | ||
# https://umap-learn.readthedocs.io/en/latest/api.html | ||
nNeighbors: 6 | ||
# (int) The size of local neighborhood (in terms of number of | ||
# neighboring sample points) used for manifold approximation. | ||
# Larger values result in more global views of the manifold, | ||
# while smaller values result in more local data being preserved. | ||
# In general values should be in the range 2 to 100. | ||
|
||
learningRateUMAP: 1.0 | ||
# (float) The initial learning rate for the embedding optimization. | ||
|
||
minDist: 0.1 | ||
# (float) The effective minimum distance between embedded points. | ||
# Smaller values will result in a more clustered/clumped | ||
# embedding where nearby points on the manifold are drawn | ||
# closer together, while larger values will result on a more | ||
# even dispersal of points. The value should be set relative | ||
# to the spread value, which determines the scale at which | ||
# embedded points will be spread out. | ||
|
||
repulsionStrength: 5.0 | ||
# (float) Weighting applied to negative samples in low dimensional | ||
# embedding optimization. Values higher than one will | ||
# result in greater weight being given to negative samples. | ||
|
||
randomStateUMAP: 5 | ||
# (int) Determines the random number generator for reproducible results | ||
# across multiple function calls. | ||
|
||
|
||
# frequencyStats------------------------------------------------------------------- | ||
controlGroups: ["CANCER-FALSE"] | ||
# (list of strs) Corresponds to control groups for each binary declaration | ||
# specified as the third elements of sampleMetadata values. | ||
|
||
denominatorCluster: null | ||
# (None type) Cluster to be used as the denominator when computing cluster | ||
# frequency ratios. Set to null first, change to cluster integer number | ||
# to normalize cluster frequencies to a particular cluster if desired. | ||
|
||
FDRCorrection: False | ||
# (bool) Whether to compute p-vals and false discovery rate (FDR)-corrected | ||
# q-vals (True) or compute uncorrected p-vals only (False). | ||
|
||
|
||
# curateThumbnails------------------------------------------------------------- | ||
numThumbnails: 25 | ||
# (int) Number of examples per cluster to be curated. | ||
|
||
topMarkersThumbnails: "clusters" | ||
# (str) Normalization axis ("channels" or "clusters") used to define | ||
# highest expressed markers per cluster. | ||
|
||
windowSize: 30 | ||
# (int) Number of pixels in x and y dimensions per thumbnail. | ||
|
||
segOutlines: True | ||
# (bool) Whether to overlay cell segmentation outlines on thumbnail images. |
Oops, something went wrong.