Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
405 commits
Select commit Hold shift + click to select a range
0ea4b2c
Merge remote-tracking branch 'origin/master' into development
pavlovicmilena Mar 20, 2025
0786496
add ESMC encoder
pavlovicmilena Mar 21, 2025
d66a2ea
update version
pavlovicmilena Mar 21, 2025
41abfa3
add esmc default params
pavlovicmilena Mar 21, 2025
f3b3aa2
update import tutorial
pavlovicmilena Mar 21, 2025
1093f53
update clustering docs
pavlovicmilena Mar 21, 2025
c611082
update dendrogram for CompAIRRClusteringReport
pavlovicmilena Mar 21, 2025
db93206
finish CompAIRRClusteringReport
pavlovicmilena Mar 21, 2025
44533ee
update report docs
pavlovicmilena Mar 21, 2025
c3b1182
update ExternalLabelClusterSummary desc
pavlovicmilena Mar 21, 2025
78d4a0b
update sequence filter and add chain pair check on import
pavlovicmilena Mar 24, 2025
7cf387c
update index.rst
pavlovicmilena Mar 24, 2025
a52ea39
fix encoder param export in full_specs
pavlovicmilena Mar 25, 2025
f7fb2c7
add check for cuda in TCRBert
pavlovicmilena Mar 25, 2025
f2b4a25
fix device for tcrbert
pavlovicmilena Mar 25, 2025
cf9aa4f
reduce memory usage for clustering instruction
pavlovicmilena Mar 26, 2025
1e27a51
fix label for DimensionalityReduction report
pavlovicmilena Mar 26, 2025
76a3582
add LeaveOneOutSplitter for repertoire datasets
pavlovicmilena Mar 28, 2025
c7c13f1
fix protein encoders
pavlovicmilena Mar 28, 2025
2e30f11
fix PerformancePerLabel plotting
pavlovicmilena Mar 31, 2025
779c819
add psutil to requirements
pavlovicmilena Mar 31, 2025
44006d0
add detailed logging to clustering
pavlovicmilena Mar 31, 2025
40654e8
fix max threads / cpus in clustering instruction to num_processes
pavlovicmilena Mar 31, 2025
cf994d4
keep encoded dataset (use memmap)
pavlovicmilena Apr 2, 2025
730b44e
update dim red to look better
pavlovicmilena Apr 2, 2025
a90caa0
show all labels specified in dim red report
pavlovicmilena Apr 2, 2025
8937d02
update reports for usability
pavlovicmilena Apr 2, 2025
ea1b13c
add ExternalLabelMetricHeatmap clustering report
pavlovicmilena Apr 3, 2025
8cd8a16
extend clustering test
pavlovicmilena Apr 3, 2025
dec6225
add seed for gen models; allow multiple reports for exp analysis inst
pavlovicmilena Apr 3, 2025
8bfe114
add docs for training_percentage in TrainGenModelInstruction
pavlovicmilena Apr 3, 2025
046446d
feat: add dataset_split column when exporting combined dataset
Charlotte-Wurtzen Apr 3, 2025
110a773
fix similarity matrix for CompAIRRClusteringReport
pavlovicmilena Apr 3, 2025
30f1039
Merge remote-tracking branch 'origin/development' into development
pavlovicmilena Apr 3, 2025
503dd8a
feat: add a skeleton for node degree distribution report
mmamica Apr 3, 2025
eb7b5f8
fix docs
pavlovicmilena Apr 4, 2025
6177296
Merge remote-tracking branch 'origin/development' into development
pavlovicmilena Apr 4, 2025
b807a4e
add scaling to prot embeddings
pavlovicmilena Apr 9, 2025
ad7838a
wip: adding compairr execution to node degree distribution reports
mmamica Apr 9, 2025
3d0578c
fix in feature reports: choose top or bottom n features based on mean…
Charlotte-Wurtzen Apr 10, 2025
483f87c
fix memmap handling in prot embeddings
pavlovicmilena Apr 10, 2025
9afb061
Merge remote-tracking branch 'origin/development' into development
pavlovicmilena Apr 10, 2025
90e51e1
fix scaling for encoders in clustering workflow
pavlovicmilena Apr 10, 2025
39d56ab
feat: first version of node degree distribution report
mmamica Apr 11, 2025
23d1c12
feat: deduplicate sequences in node degree distribution report
mmamica Apr 18, 2025
c55e282
feat: extend node degree distribution report to repertoire datasets
mmamica Apr 18, 2025
d38a3e9
feat: add averaged out node degree distribution report across reperto…
mmamica Apr 18, 2025
16d99f2
add repertoire subsampling to subsampling instruction
pavlovicmilena Apr 22, 2025
f0a9364
Merge remote-tracking branch 'origin/development' into development
pavlovicmilena Apr 22, 2025
db27c76
add diagonal to all facets in FeatureComparison
pavlovicmilena Apr 22, 2025
09910c6
fix alignment of table headers
pavlovicmilena Apr 22, 2025
0cf623f
fix hoverinfo
pavlovicmilena Apr 22, 2025
9000970
sort labels in FeatureDist and FeatureBarplot
pavlovicmilena Apr 23, 2025
88c1c5f
add how to analyze your own dataset tutorial
pavlovicmilena Apr 23, 2025
6f9eed2
add more plots and dataset ref to tutorial
pavlovicmilena Apr 23, 2025
1d77e99
feat: add averaged out node degree distribution report across labels
mmamica Apr 24, 2025
1299e1c
fix typo in clustering_tutorial.rst
pavlovicmilena Apr 28, 2025
9f17601
Merge remote-tracking branch 'origin/development' into development
pavlovicmilena Apr 28, 2025
5485e6a
feat: add parameter to LSTM model specifying how often to print train…
mmamica Apr 28, 2025
e9bb621
add docs workflows
pavlovicmilena May 2, 2025
0339a8b
Merge remote-tracking branch 'origin/development' into development
pavlovicmilena May 2, 2025
c402d9b
add multiple label annotations to CompAIRRClusteringReport
pavlovicmilena May 2, 2025
f49b48d
add ConfusionMatrixPerLabel report
pavlovicmilena May 4, 2025
a600b8d
fix: drop empty rows on import
pavlovicmilena May 5, 2025
ab13ffa
fix tests
pavlovicmilena May 5, 2025
1a1a496
update version
pavlovicmilena May 5, 2025
c329cff
Merge remote-tracking branch 'origin/master' into development
pavlovicmilena May 5, 2025
0c0ef80
minor docs update
pavlovicmilena May 5, 2025
5d65dd0
bugfix in ConfusionMatrixPerLabel
pavlovicmilena May 6, 2025
6188913
update pytest action and requirements
pavlovicmilena May 6, 2025
8377341
update version
pavlovicmilena May 6, 2025
cc45409
Merge remote-tracking branch 'origin/master' into development
pavlovicmilena May 6, 2025
99b04d7
update docs
pavlovicmilena May 6, 2025
3f275c4
Merge remote-tracking branch 'origin/master' into development
pavlovicmilena May 6, 2025
c59b8e5
update docs file
pavlovicmilena May 6, 2025
c14fb74
update docs conf.py
pavlovicmilena May 6, 2025
1a6a050
update docs conf.py
pavlovicmilena May 6, 2025
660ab1f
update docs conf.py to python 3.12
pavlovicmilena May 6, 2025
ac21e3a
remove sentencepiece from requirements.txt
pavlovicmilena May 6, 2025
db23963
update requirements.txt
pavlovicmilena May 6, 2025
459017c
update scripts and requirements
pavlovicmilena May 6, 2025
08c462e
update pythonpath for sphinx docs generation
pavlovicmilena May 6, 2025
ccb7475
update pythonpath for sphinx docs generation
pavlovicmilena May 6, 2025
09a2205
update pythonpath for sphinx docs generation
pavlovicmilena May 6, 2025
fde84f2
update pythonpath for sphinx docs generation
pavlovicmilena May 6, 2025
4f54d86
update sphinx docs generation
pavlovicmilena May 6, 2025
cd5bd45
update sphinx docs generation
pavlovicmilena May 6, 2025
115ba1d
update sphinx docs generation
pavlovicmilena May 6, 2025
16b186a
update sphinx docs generation
pavlovicmilena May 6, 2025
8ad12ce
update sphinx docs generation
pavlovicmilena May 6, 2025
d60d19a
update docs
pavlovicmilena May 7, 2025
bd4cde2
update docs
pavlovicmilena May 7, 2025
960e8ec
update vae summary report
pavlovicmilena May 8, 2025
34c67a3
update vae summary report test
pavlovicmilena May 8, 2025
343ceb9
update vae summary report docs
pavlovicmilena May 8, 2025
e29bba1
fix caching for protein embeddings
pavlovicmilena May 13, 2025
fa059cd
allow sequence datasets in tcrdist encoding
pavlovicmilena May 15, 2025
eafb1c8
allow multiple labels in tcrdist encoder
pavlovicmilena May 15, 2025
353907a
tcrdist and precomputed metrics for clustering validation
pavlovicmilena May 19, 2025
d9589e3
fix FurthestNeighborClassifier
pavlovicmilena May 20, 2025
0e9ffae
update version
pavlovicmilena May 21, 2025
d2be442
add error func to feature reports; fix column sorting
pavlovicmilena May 21, 2025
dfa7ada
update docs
pavlovicmilena May 21, 2025
3ac7e49
Merge remote-tracking branch 'origin/master' into development
pavlovicmilena May 21, 2025
8094ea3
fix metric in ClusteringStabilityReport
pavlovicmilena May 23, 2025
a8d886e
update data import to work with new airr pacakge
pavlovicmilena May 27, 2025
11d722c
add ShannonDiversityEncoder
pavlovicmilena May 28, 2025
f3a7f14
add ShannonDiversityEncoder docs
pavlovicmilena May 28, 2025
1fd5bd2
update repertoire clonotype summary
pavlovicmilena May 30, 2025
574e74f
add shannon diversity overview
pavlovicmilena Jun 2, 2025
36e23dd
fix shannon diversity overview when subject_id is missing
pavlovicmilena Jun 2, 2025
2d91fa6
fix axis in clonotype summary
pavlovicmilena Jun 2, 2025
8df236e
fix: don't reimport dataset if possible
pavlovicmilena Jun 2, 2025
8bb4305
export internal cv results if available
pavlovicmilena Jun 2, 2025
7a3ac2f
fix subsampling test
pavlovicmilena Jun 2, 2025
167d5d8
add warning if empty repertoire on import
pavlovicmilena Jun 2, 2025
28ac398
fix path build in ShannonDiversityOverview
pavlovicmilena Jun 2, 2025
989d034
update exp analysis tutorial
pavlovicmilena Jun 3, 2025
48903c5
add draft of gen model tutorial
pavlovicmilena Jun 3, 2025
c128c95
minor fixes in reports
pavlovicmilena Jun 3, 2025
684a42b
switch to uv for sphinx docs building
pavlovicmilena Jun 3, 2025
e2d6b55
fix sphinx action
pavlovicmilena Jun 3, 2025
f569fdc
fix sphinx action
pavlovicmilena Jun 3, 2025
4fb203f
fix sphinx action
pavlovicmilena Jun 3, 2025
0a74510
fix sphinx action
pavlovicmilena Jun 3, 2025
a839dca
fix sphinx action
pavlovicmilena Jun 3, 2025
59e49a2
enable cache for sphinx and remove pkg from DeepRC
pavlovicmilena Jun 3, 2025
3ba764c
enable cache for sphinx and remove pkg from DeepRC
pavlovicmilena Jun 3, 2025
4e47739
minor bug fixes / better error messages
pavlovicmilena Jun 3, 2025
4f16baa
minor bug fixes / better error messages
pavlovicmilena Jun 3, 2025
6b1923a
minor bug fixes in reports
pavlovicmilena Jun 4, 2025
94e2e2f
support seq abundance with sklearn
pavlovicmilena Jun 4, 2025
5c806f6
fix cv result export and pandas warning
pavlovicmilena Jun 11, 2025
d2c8665
extract signal annotation to separate module
pavlovicmilena Jun 15, 2025
2404b5d
update sig annotation test
pavlovicmilena Jun 15, 2025
5326211
add multiple methods to gen models; fix hyperparam optim for classifiers
pavlovicmilena Jun 16, 2025
c9b6448
fix test_annotate_sequences with encodings
pavlovicmilena Jun 16, 2025
d7ef0ae
bug fix in signal_annotation.py; minor fixes in gen models
pavlovicmilena Jun 16, 2025
15f0f31
bugfix
pavlovicmilena Jun 17, 2025
626f0bf
add data origin to signal annotation
pavlovicmilena Jun 17, 2025
36664be
wip: add TrueMotifsSummaryBarplot data report
Charlotte-Wurtzen Jun 25, 2025
bedfd89
fix: change path to not overwrite generated sequences when running mu…
Charlotte-Wurtzen Jun 25, 2025
bc06f64
minor update clustering reports
pavlovicmilena Jun 26, 2025
310f731
Merge remote-tracking branch 'origin/development' into development
pavlovicmilena Jun 26, 2025
e79298f
add manual split to TrainGenModelInstruction
pavlovicmilena Jun 26, 2025
f796a9a
set same color template in TrueMotifsSummaryBarplot
pavlovicmilena Jun 26, 2025
63d4d44
feat: update TrueMotifsSummaryBarplot report to stack bars by novel a…
Charlotte-Wurtzen Jul 1, 2025
f60bc5e
fix: correct class mapping in ROCCurveSummary
pavlovicmilena Jul 8, 2025
a81465d
Merge remote-tracking branch 'origin/development' into development
pavlovicmilena Jul 8, 2025
81536b6
update TrueMotifsSummaryBarplot: change label to percentage
Charlotte-Wurtzen Jul 14, 2025
2d2502d
update TrueMotifsSummaryBarplot: fix sorting to always plot train first
Charlotte-Wurtzen Jul 16, 2025
f14e16c
fix: shuffle indices when using manual data splitter
Charlotte-Wurtzen Jul 16, 2025
1cf8b5d
fix: label display in RepertoireClonotypeSummary
pavlovicmilena Jul 21, 2025
e1baed4
add confusion matrix report for single method
pavlovicmilena Jul 21, 2025
e21d134
fix: labels in RepertoireClonotypeSummary
pavlovicmilena Jul 22, 2025
d6b0b72
fix: dont fail VJGeneDistribution if label is missing
pavlovicmilena Jul 22, 2025
9194163
fix: add metadata to repertoire from metadata file on dataimport
pavlovicmilena Jul 22, 2025
2bf84d8
add support for multiclass metrics
pavlovicmilena Jul 22, 2025
92be787
add param for multiclass metrics
pavlovicmilena Jul 22, 2025
03f5038
fix confusion matrix and ClassificationMetric
pavlovicmilena Jul 24, 2025
01797ac
add optional metadata features to KmerFrequencyEncoder
pavlovicmilena Jul 25, 2025
125f414
add LogRegressionCustomPenalty classifier
pavlovicmilena Jul 25, 2025
ce8a703
fix LogRegressionCustomPenalty
pavlovicmilena Jul 25, 2025
ff0df2b
add new functions to random dataset workflow test
pavlovicmilena Jul 25, 2025
b9a4f3f
fix: plotting of ConfusionMatrix
pavlovicmilena Aug 1, 2025
7d70448
fix: remove logs, fix text
pavlovicmilena Aug 5, 2025
d352355
fix: html metrics preview for classification
pavlovicmilena Aug 5, 2025
5a953be
fix: limit sonnia cpu usage
pavlovicmilena Aug 7, 2025
e110d19
fix: also limit sonnia cpu usage in sequence generation
Charlotte-Wurtzen Aug 7, 2025
8b59f52
fix: tcrdist to use proper label names
pavlovicmilena Aug 8, 2025
fa6674c
Merge remote-tracking branch 'origin/development' into development
pavlovicmilena Aug 8, 2025
1246830
update: option for tcrdist to work on cdr3 only
pavlovicmilena Aug 8, 2025
f4ceea2
fix: text alignment in HPOptimization.html
pavlovicmilena Aug 11, 2025
a4c09c4
fix: reorganize clustering plots
pavlovicmilena Aug 11, 2025
62ee087
fix: update error message in ReflectionHandler
pavlovicmilena Aug 18, 2025
52568f3
feature: add ComponentNumberExploration report
pavlovicmilena Aug 18, 2025
277cebe
fix: resolve state name in html file name
pavlovicmilena Aug 19, 2025
f0d1944
fix: label comparison in Label.py
pavlovicmilena Aug 19, 2025
7ac861f
update: color schemes in some reports
pavlovicmilena Aug 28, 2025
f95af52
add VGeneIMGTKmerEncoder and fix tests
pavlovicmilena Sep 3, 2025
3405c5c
bugfixes for clustering & added gradient boosting
pavlovicmilena Sep 10, 2025
a4acad5
minor bugfixes: paths and class names
pavlovicmilena Sep 15, 2025
2883550
update version & add ref util warning
pavlovicmilena Sep 16, 2025
075d373
add LabelDist report
pavlovicmilena Sep 18, 2025
1a08090
update docs
pavlovicmilena Sep 18, 2025
895feec
update docs
pavlovicmilena Sep 18, 2025
6c5a345
add Dendrogram report
pavlovicmilena Sep 19, 2025
2209a05
switch to pandas for tables in html without jinja2
pavlovicmilena Sep 19, 2025
20a769c
update ChainRepertoireFilter.py
pavlovicmilena Sep 19, 2025
8f73ee1
support labels with list values in DimRed
pavlovicmilena Sep 22, 2025
8177044
add HDBSCAN
pavlovicmilena Sep 22, 2025
1da1d3b
update dev installation tutorial
pavlovicmilena Sep 23, 2025
0be62a4
update datasets docs
pavlovicmilena Sep 23, 2025
4ec0b91
add check for imported refs
pavlovicmilena Sep 23, 2025
6ab4e8d
Merge remote-tracking branch 'origin/HLA_control' into development
pavlovicmilena Sep 29, 2025
e1ad76a
add: CompositeEncoder, MetadataEncoder, GeneFrequencyEncoder, update …
pavlovicmilena Oct 1, 2025
256a22c
fix requirements
pavlovicmilena Oct 1, 2025
e975a08
fix cleanup after test
pavlovicmilena Oct 1, 2025
6d1aa08
fix CompositeEncoder+LogRegressionCustomPenalty combination
pavlovicmilena Oct 1, 2025
f02c49a
adapt DimensionalityReduction for HLA plots
pavlovicmilena Oct 3, 2025
c3f757d
support sklearn for new encoders
pavlovicmilena Oct 3, 2025
344716d
support multiple genes in SequenceMatcher; add locus to KmerFrequency…
pavlovicmilena Oct 7, 2025
d24a13f
update tutorial refs
pavlovicmilena Oct 7, 2025
b28f3d6
fix: GeneFrequencyEncoder parsing, default kmer freq normalization
pavlovicmilena Oct 8, 2025
fb654fe
fix: encoder names in LogRegressionCustomPenalty.py
pavlovicmilena Oct 8, 2025
de970e3
fix: AIRRExporter makes dataset yaml when missing (after filters)
pavlovicmilena Oct 9, 2025
f79208c
drop feature annotations from kmer freq encoding and fix matched ref …
pavlovicmilena Oct 9, 2025
85e9135
fix memmap (eg for composite encoding)
pavlovicmilena Oct 9, 2025
621326c
fix memmap: cast to float32
pavlovicmilena Oct 9, 2025
20b1872
fix class mapping in log reg custom
pavlovicmilena Oct 9, 2025
f473e9b
fix ref in docs
pavlovicmilena Oct 10, 2025
425d8db
fix ref in docs
pavlovicmilena Oct 10, 2025
5102592
Merge remote-tracking branch 'origin/master' into development
pavlovicmilena Oct 10, 2025
4f29bf6
feat: add small changes to vae embedding sizes to match original impl…
mmamica Oct 13, 2025
de8adb8
fix highlighting in hyperparameter results
pavlovicmilena Oct 15, 2025
5bf4c9d
Merge remote-tracking branch 'origin/development' into development
pavlovicmilena Oct 15, 2025
9273f64
add tutorial on publication ready figures
pavlovicmilena Oct 22, 2025
4f5ca32
fix tests and numpy memmap issue; remove double confusion matrix
pavlovicmilena Oct 23, 2025
2672017
add json figure export
pavlovicmilena Oct 28, 2025
9685153
switch to pyproject.toml
pavlovicmilena Oct 28, 2025
de0eccb
fix pyproject.toml
pavlovicmilena Oct 28, 2025
d8e8afb
fix pyproject.toml
pavlovicmilena Oct 28, 2025
6d542b4
fix pyproject.toml
pavlovicmilena Oct 28, 2025
5029c12
fix pyproject.toml
pavlovicmilena Oct 28, 2025
0c8113f
fix pyproject.toml
pavlovicmilena Oct 28, 2025
c9694b4
Added get_locus utility function to dataset + display in HTML
LonnekeScheffer Oct 28, 2025
3713006
auto-detect locus in dataset a generative model is fitted to, allow u…
LonnekeScheffer Oct 29, 2025
0e8b811
auto-detect locus in dataset a generative model is fitted to, allow u…
LonnekeScheffer Oct 29, 2025
ff0a03d
Merge remote-tracking branch 'origin/nar_dev_2025' into development
pavlovicmilena Oct 29, 2025
5957b6c
change python version for tests and update package versions in requir…
pavlovicmilena Oct 29, 2025
c94ce02
add sentencepiece to requirements_generative_models.txt
pavlovicmilena Oct 29, 2025
5bfa0b5
try github action test with uv
pavlovicmilena Oct 29, 2025
3781826
try github action test with uv
pavlovicmilena Oct 29, 2025
7a637fe
remove scipy incompatibility in DataReshaper, add httpx to requirements
pavlovicmilena Oct 29, 2025
8ed7c70
fix outdated func call in kmer freq tests
pavlovicmilena Oct 30, 2025
a427dfe
update colors in plots; remove outdated report; allow multiple loci i…
pavlovicmilena Nov 3, 2025
11eeb36
fixes to clustering instruction runs
pavlovicmilena Nov 3, 2025
9deedad
minor fixes in HTML and AIRRExporter
pavlovicmilena Nov 4, 2025
8e429fb
update ChainRepertoireFilter to allow multiple chains
pavlovicmilena Nov 4, 2025
c21b2c2
update TrueMotifsSummaryBarplot to have different colors for differen…
pavlovicmilena Nov 7, 2025
c6ca4ac
update DimensionalityReduction to consider 15 as cont/discrete treshold
pavlovicmilena Nov 7, 2025
963b0c2
update ClusteringParser to check that settings are unique
pavlovicmilena Nov 7, 2025
772e20c
update Stability report to have different colors for different settings
pavlovicmilena Nov 7, 2025
ba83cd6
fixes: run summary reports in TrainGenModel instruction and avoid ove…
Charlotte-Wurtzen Nov 7, 2025
5ab9cba
fix: change str to pathlib.Path to run .with_suffix() in PlotlyUtil
Charlotte-Wurtzen Nov 7, 2025
992a649
Add progen (#195)
mmamica Nov 13, 2025
c9f1e43
Merge remote-tracking branch 'origin/development' into development
pavlovicmilena Nov 13, 2025
7121d4e
update version
pavlovicmilena Nov 13, 2025
64e4724
Merge remote-tracking branch 'origin/master' into development
pavlovicmilena Nov 13, 2025
4fb320f
fix tests
pavlovicmilena Nov 13, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion immuneML/IO/dataset_export/AIRRExporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def export_repertoire_dataset(dataset: RepertoireDataset, path: Path, omit_colum
else:

new_metadata_df = pd.read_csv(path / f"{dataset.name}.csv")
labels = {label: list(new_metadata_df[label].unique())
labels = {label: new_metadata_df[label].unique().tolist()
for label in dataset.get_label_names(refresh=True)}

dataset_yaml = RepertoireDataset.create_metadata_dict(metadata_file=path / f"{dataset.name}.csv",
Expand Down
14 changes: 14 additions & 0 deletions immuneML/config/default_params/ml_methods/progen_params.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
locus: beta
device: cpu
num_frozen_layers: 22
num_epochs: 2
learning_rate: 0.00004
fp16: False
prefix_text: '<|bos|>1'
suffix_text: '2<|eos|>'
max_new_tokens: 1024
temperature: 1
top_p: 0.9
prompt: '1'
num_gen_batches: 1
per_device_train_batch_size: 2
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
split_by_label: False
split_by_label: false
label: null
is_sequence_label: False
is_sequence_label: false
show_joint_dist: true
3 changes: 2 additions & 1 deletion immuneML/data_model/EncodedData.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class EncodedData:

def __init__(self, examples, labels: dict = None, example_ids: list = None, feature_names: list = None,
feature_annotations: pd.DataFrame = None, encoding: str = None, example_weights: list = None, info: dict = None,
dimensionality_reduced_data: np.ndarray = None):
dimensionality_reduced_data: np.ndarray = None, dim_names: list = None):

assert feature_names is None or examples.shape[1] == len(feature_names), \
(f"EncodedData: the length of feature_names ({len(feature_names)}) must match the feature dimension of the "
Expand Down Expand Up @@ -59,6 +59,7 @@ def __init__(self, examples, labels: dict = None, example_ids: list = None, feat
self.encoding = encoding
self.example_weights = example_weights
self.info = info
self.dim_names = dim_names
self.dimensionality_reduced_data = dimensionality_reduced_data

def __getstate__(self):
Expand Down
9 changes: 8 additions & 1 deletion immuneML/dsl/instruction_parsers/ClusteringParser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import copy
import inspect
import logging
from pathlib import Path
from typing import List

Expand Down Expand Up @@ -152,7 +153,13 @@ def parse_clustering_settings(key: str, instruction: dict, symbol_table: SymbolT
instruction)
settings_objs.append(setting_obj)

return settings_objs
unique_objs = list(set(settings_objs))

if len(unique_objs) < len(settings_objs):
logging.warning(f"Clustering parser: clustering_settings contains {len(settings_objs) - len(unique_objs)} "
f"duplicate settings, keep the following: {[obj.get_key() for obj in unique_objs]}")

return unique_objs


def make_setting_obj(setting, valid_encodings, valid_clusterings, valid_dim_red, symbol_table, instruction):
Expand Down
2 changes: 1 addition & 1 deletion immuneML/environment/Constants.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
class Constants:

VERSION = "3.0.16"
VERSION = "3.0.17"

# encoding constants
FEATURE_DELIMITER = "_"
Expand Down
5 changes: 2 additions & 3 deletions immuneML/ml_methods/dim_reduction/DimRedMethod.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import abc
import logging
from abc import ABC
from typing import List

import numpy as np

from immuneML.data_model.datasets.Dataset import Dataset


class DimRedMethod:
class DimRedMethod(ABC):
"""
Dimensionality reduction methods are algorithms which can be used to reduce the dimensionality
of encoded datasets, in order to uncover and analyze patterns present in the data.
Expand All @@ -21,14 +22,12 @@ def __init__(self, name: str = None):
self.method = None
self.name = name

@abc.abstractmethod
def fit(self, dataset: Dataset = None, design_matrix: np.ndarray = None):
if dataset is None:
self.method.fit(design_matrix)
else:
self.method.fit(dataset.encoded_data.get_examples_as_np_matrix())

@abc.abstractmethod
def transform(self, dataset: Dataset = None, design_matrix: np.ndarray = None):
if dataset is None:
return self.method.transform(design_matrix)
Expand Down
Loading
Loading