-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpca.json
More file actions
39 lines (39 loc) · 1.47 KB
/
pca.json
File metadata and controls
39 lines (39 loc) · 1.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
{
"tool_id": "pca",
"name": "Principal Component Analysis",
"category": "dimensionality_reduction",
"framework": "scanpy",
"function": "sc.pp.pca",
"valid_after": ["feature_selection"],
"valid_before": ["integration", "neighbor_graph"],
"paradigms": ["all"],
"parameters": {
"n_comps": {
"type": "int",
"default": 50,
"range": [10, 100],
"guidance": "Number of principal components to compute. 30-50 is standard. Use elbow plot of variance ratio (sc.pl.pca_variance_ratio) to determine how many PCs capture meaningful variance."
},
"use_highly_variable": {
"type": "bool",
"default": true,
"guidance": "If true, restrict PCA to highly variable genes only. Should be true after HVG selection."
},
"svd_solver": {
"type": "string",
"default": "arpack",
"options": ["arpack", "randomized", "lobpcg"],
"guidance": "SVD solver. 'arpack' is default and reliable. 'randomized' is faster for large datasets (>50K cells)."
}
},
"outputs": {
"cell_embeddings": "adata.obsm['X_pca']",
"variance_ratio": "adata.uns['pca']['variance_ratio']",
"loadings": "adata.varm['PCs']"
},
"validation": {
"pc1_max_variance": 0.5,
"check": "First PC should explain <50% of variance. If it does, it likely reflects a technical artifact (e.g., total counts, cell cycle) rather than biology."
},
"provenance_captures": ["n_comps", "use_highly_variable", "svd_solver", "total_variance_explained"]
}