-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhighly_variable_genes.json
More file actions
53 lines (53 loc) · 2.13 KB
/
highly_variable_genes.json
File metadata and controls
53 lines (53 loc) · 2.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
{
"tool_id": "highly_variable_genes",
"name": "Highly Variable Gene Selection",
"category": "feature_selection",
"framework": "scanpy",
"function": "sc.pp.highly_variable_genes",
"valid_after": ["normalization"],
"valid_before": ["pca"],
"paradigms": ["all"],
"parameters": {
"n_top_genes": {
"type": "int",
"default": 2000,
"range": [1000, 5000],
"guidance": "Number of highly variable genes to select. 2000-3000 is standard. Use higher (4000-5000) for heterogeneous tissues with many cell types."
},
"flavor": {
"type": "string",
"default": "seurat_v3",
"options": ["seurat", "cell_ranger", "seurat_v3"],
"guidance": "Method for HVG selection. 'seurat_v3' (default, recommended) uses variance-stabilizing transformation on raw counts — produces more biologically meaningful HVGs that recover known cell-type markers (Hafemeister & Satija 2019). Requires raw counts in adata.X, adata.layers['counts'], or adata.layers['raw_counts']. Falls back to 'seurat' only if no raw counts are available. 'seurat' works on log-normalized data but is biased toward lowly-expressed genes."
},
"min_mean": {
"type": "float",
"default": 0.0125,
"guidance": "Minimum mean expression cutoff (for 'seurat' and 'cell_ranger' flavors)."
},
"max_mean": {
"type": "float",
"default": 3.0,
"guidance": "Maximum mean expression cutoff."
},
"min_disp": {
"type": "float",
"default": 0.5,
"guidance": "Minimum dispersion cutoff."
},
"batch_key": {
"type": "string",
"default": null,
"guidance": "If set, select HVGs per batch and take the union. Use when integrating multi-batch data to avoid batch-driven gene selection."
}
},
"outputs": {
"hvg_mask": "adata.var['highly_variable'] (boolean mask)",
"n_hvgs": "int"
},
"validation": {
"n_hvgs_range": [500, 8000],
"check": "Number of HVGs should be between 500 and 8000. If too few, lower min_disp or increase n_top_genes."
},
"provenance_captures": ["n_top_genes", "flavor", "min_mean", "max_mean", "min_disp", "batch_key", "n_hvgs_selected"]
}