Skip to content

Commit 86aaff6

Browse files
authored
Merge pull request #391 from broadinstitute/development
Release 1.41.1
2 parents a412c3b + dad95b7 commit 86aaff6

File tree

5 files changed

+78
-50
lines changed

5 files changed

+78
-50
lines changed

ingest/cli_parser.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,13 @@ def create_parser():
280280
help="Accepted values: 'pairwise' or 'rest' (default)",
281281
)
282282

283+
parser_differential_expression.add_argument(
284+
"--raw-location",
285+
required=True,
286+
help="location of raw counts. '.raw' for raw slot, "
287+
"else adata.layers key value",
288+
)
289+
283290
parser_differential_expression.add_argument(
284291
"--study-accession",
285292
required=True,

ingest/de.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ def write_de_result(adata, group, annotation, rank_key, cluster_name, extra_para
367367
clean_group = DifferentialExpression.sanitize_string(group)
368368
out_file = f'{cluster_name}--{clean_annotation}--{clean_group}--{annot_scope}--{method}.tsv'
369369
DifferentialExpression.de_logger.info(
370-
f"Writing DE output for {clean_group} vs restq"
370+
f"Writing DE output for {clean_group} vs rest"
371371
)
372372
elif de_type == "pairwise":
373373
# rank_genes_groups accepts a list. For SCP pairwise, should be a list with one item

ingest/ingest_pipeline.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -546,12 +546,25 @@ def extract_from_anndata(self):
546546
if not self.kwargs["obsm_keys"]:
547547
self.kwargs["obsm_keys"] = ["X_tsne"]
548548
# TODO (SCP-5104): perform check for successful extraction or report failure and exit
549-
for key in self.kwargs["obsm_keys"]:
550-
AnnDataIngestor.generate_cluster_header(self.anndata.adata, key)
551-
AnnDataIngestor.generate_cluster_type_declaration(
552-
self.anndata.adata, key
549+
try:
550+
for key in self.kwargs["obsm_keys"]:
551+
AnnDataIngestor.generate_cluster_header(self.anndata.adata, key)
552+
AnnDataIngestor.generate_cluster_type_declaration(
553+
self.anndata.adata, key
554+
)
555+
AnnDataIngestor.generate_cluster_body(self.anndata.adata, key)
556+
except KeyError as e:
557+
msg = f"Unable to extract cluster data from anndata file. Please check the provided obsm key, {e}."
558+
self.report_validation("failure")
559+
log_exception(
560+
IngestPipeline.dev_logger, IngestPipeline.user_logger, msg
553561
)
554-
AnnDataIngestor.generate_cluster_body(self.anndata.adata, key)
562+
return 1
563+
except Exception as e:
564+
log_exception(
565+
IngestPipeline.dev_logger, IngestPipeline.user_logger, e
566+
)
567+
return 1
555568
# process matrix data
556569
### TODO (SCP-5102, SCP-5103): how to associate "raw_count" cells to anndata file
557570
if self.kwargs.get("extract") and "processed_expression" in self.kwargs.get(

tests/test_anndata.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,17 @@ def test_set_output_filename(self):
9898
"h5ad_frag.cluster.X_Umap.tsv",
9999
)
100100

101+
def test_invalid_obsm_key(self):
102+
valid_synthetic_anndata = AnnDataIngestor(*self.synthetic_args)
103+
invalid_cluster_name = "foo"
104+
self.assertRaisesRegex(
105+
KeyError,
106+
"foo",
107+
lambda: self.anndata_ingest.generate_cluster_header(
108+
valid_synthetic_anndata.obtain_adata(), invalid_cluster_name
109+
),
110+
)
111+
101112
def test_generate_cluster_header(self):
102113
self.anndata_ingest.generate_cluster_header(
103114
self.anndata_ingest.obtain_adata(), self.cluster_name

tests/test_ingest.py

Lines changed: 41 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
pytest --cov=../ingest/
3232
3333
"""
34+
3435
import unittest
3536
from unittest.mock import patch, MagicMock
3637
from test_dense import mock_load_r_files
@@ -47,7 +48,7 @@
4748
IngestPipeline,
4849
exit_pipeline,
4950
run_ingest,
50-
get_action_from_args
51+
get_action_from_args,
5152
)
5253
from expression_files.expression_files import GeneExpression
5354

@@ -102,8 +103,7 @@ def execute_ingest(args, mock_storage_client, mock_storage_blob):
102103
return_value=True,
103104
)
104105
def test_ingest_dense_matrix(self, mock_check_unique_cells):
105-
"""Ingest Pipeline should extract, transform, and load dense matrices
106-
"""
106+
"""Ingest Pipeline should extract, transform, and load dense matrices"""
107107
args = [
108108
"--study-id",
109109
"5d276a50421aa9117c982845",
@@ -132,8 +132,7 @@ def test_ingest_dense_matrix(self, mock_check_unique_cells):
132132
return_value=True,
133133
)
134134
def test_ingest_local_dense_matrix(self, mock_check_unique_cells):
135-
"""Ingest Pipeline should extract and transform local dense matrices
136-
"""
135+
"""Ingest Pipeline should extract and transform local dense matrices"""
137136

138137
args = [
139138
"--study-id",
@@ -164,7 +163,7 @@ def test_ingest_local_dense_matrix(self, mock_check_unique_cells):
164163
)
165164
def test_ingest_local_compressed_dense_matrix(self, mock_check_unique_cells):
166165
"""Ingest Pipeline should extract and transform local dense matrices
167-
from compressed file in the same manner as uncompressed file
166+
from compressed file in the same manner as uncompressed file
168167
"""
169168

170169
args = [
@@ -191,8 +190,7 @@ def test_ingest_local_compressed_dense_matrix(self, mock_check_unique_cells):
191190
self.execute_ingest(args)
192191

193192
def test_empty_dense_file(self):
194-
"""Ingest Pipeline should fail gracefully when an empty file is given
195-
"""
193+
"""Ingest Pipeline should fail gracefully when an empty file is given"""
196194

197195
args = [
198196
"--study-id",
@@ -222,8 +220,7 @@ def test_empty_dense_file(self):
222220
self.assertEqual(cm.exception.code, 1)
223221

224222
def test_empty_mtx_file(self):
225-
"""Ingest Pipeline should fail gracefully when an empty file is given
226-
"""
223+
"""Ingest Pipeline should fail gracefully when an empty file is given"""
227224

228225
args = [
229226
"--study-id",
@@ -261,8 +258,7 @@ def test_empty_mtx_file(self):
261258
return_value=True,
262259
)
263260
def test_ingest_mtx_matrix(self, mock_check_unique_cells):
264-
"""Ingest Pipeline should extract and transform MTX matrix bundles
265-
"""
261+
"""Ingest Pipeline should extract and transform MTX matrix bundles"""
266262

267263
args = [
268264
"--study-id",
@@ -296,8 +292,7 @@ def test_ingest_mtx_matrix(self, mock_check_unique_cells):
296292
return_value=True,
297293
)
298294
def test_ingest_unsorted_mtx_matrix(self, mock_check_unique_cells):
299-
"""Ingest Pipeline should extract and transform unsorted MTX matrix bundles
300-
"""
295+
"""Ingest Pipeline should extract and transform unsorted MTX matrix bundles"""
301296

302297
args = [
303298
"--study-id",
@@ -331,8 +326,7 @@ def test_ingest_unsorted_mtx_matrix(self, mock_check_unique_cells):
331326
return_value=True,
332327
)
333328
def test_ingest_zipped_mtx_matrix(self, mock_check_unique_cells):
334-
"""Ingest Pipeline should extract and transform MTX matrix bundles
335-
"""
329+
"""Ingest Pipeline should extract and transform MTX matrix bundles"""
336330

337331
args = [
338332
"--study-id",
@@ -366,8 +360,7 @@ def test_ingest_zipped_mtx_matrix(self, mock_check_unique_cells):
366360
return_value=True,
367361
)
368362
def test_remote_mtx_bundles(self, mock_check_unique_cells):
369-
"""Ingest Pipeline should handle MTX matrix files fetched from bucket
370-
"""
363+
"""Ingest Pipeline should handle MTX matrix files fetched from bucket"""
371364

372365
args = [
373366
"--study-id",
@@ -402,8 +395,7 @@ def test_remote_mtx_bundles(self, mock_check_unique_cells):
402395
return_value=True,
403396
)
404397
def test_mtx_bundle_argument_validation(self, mock_check_unique_cells):
405-
"""Omitting --gene-file and --barcode-file in MTX ingest should error
406-
"""
398+
"""Omitting --gene-file and --barcode-file in MTX ingest should error"""
407399

408400
args = [
409401
"--study-id",
@@ -452,8 +444,7 @@ def test_r_file_dense(self, mock_check_unique_cells, mock_load):
452444
self.execute_ingest(args)
453445

454446
def test_good_metadata_file(self):
455-
"""Ingest Pipeline should succeed for properly formatted metadata file
456-
"""
447+
"""Ingest Pipeline should succeed for properly formatted metadata file"""
457448
args = [
458449
"--study-id",
459450
"5d276a50421aa9117c982845",
@@ -488,7 +479,7 @@ def test_exponential_back_off_expression_file(
488479
self, mock_check_unique_cells, mock_load
489480
):
490481
"""Ingest Pipeline should not succeed if mongo cannot connect after 5
491-
reconnection tries.
482+
reconnection tries.
492483
"""
493484
args = [
494485
"--study-id",
@@ -522,8 +513,7 @@ def test_exponential_back_off_expression_file(
522513
self.assertEqual(cm.exception.code, 1)
523514

524515
def test_bad_metadata_file(self):
525-
"""Ingest Pipeline should not succeed for misformatted metadata file
526-
"""
516+
"""Ingest Pipeline should not succeed for misformatted metadata file"""
527517

528518
args = [
529519
"--study-id",
@@ -565,8 +555,7 @@ def test_bad_metadata_file_contains_coordinates(self):
565555
self.assertEqual(cm.exception.code, 1)
566556

567557
def test_good_cluster_file(self):
568-
"""Ingest Pipeline should succeed for properly formatted cluster file
569-
"""
558+
"""Ingest Pipeline should succeed for properly formatted cluster file"""
570559
args = [
571560
"--study-id",
572561
"5d276a50421aa9117c982845",
@@ -590,8 +579,7 @@ def test_good_cluster_file(self):
590579
self.assertEqual(status[0], None)
591580

592581
def test_bad_cluster_file(self):
593-
"""Ingest Pipeline should fail for misformatted cluster file
594-
"""
582+
"""Ingest Pipeline should fail for misformatted cluster file"""
595583
args = [
596584
"--study-id",
597585
"5d276a50421aa9117c982845",
@@ -611,8 +599,7 @@ def test_bad_cluster_file(self):
611599
self.assertEqual(cm.exception.code, 1)
612600

613601
def test_bad_cluster_missing_coordinate_file(self):
614-
"""Ingest Pipeline should fail for missing coordinate in cluster file
615-
"""
602+
"""Ingest Pipeline should fail for missing coordinate in cluster file"""
616603
args = [
617604
"--study-id",
618605
"5d276a50421aa9117c982845",
@@ -633,8 +620,7 @@ def test_bad_cluster_missing_coordinate_file(self):
633620

634621
@patch("ingest_pipeline.IngestPipeline.load_subsample", return_value=0)
635622
def test_subsample(self, mock_load_subsample):
636-
"""When cell values in cluster are present in cell metadata file ingest should succeed.
637-
"""
623+
"""When cell values in cluster are present in cell metadata file ingest should succeed."""
638624
args = [
639625
"--study-id",
640626
"5d276a50421aa9117c982845",
@@ -689,8 +675,7 @@ def test_get_cluster_query(self):
689675

690676
@patch("ingest_pipeline.IngestPipeline.load_subsample", return_value=0)
691677
def test_subsample_no_cell_intersection(self, mock_load_subsample):
692-
"""When cell values in cluster are not present in cell metadata file ingest should fail.
693-
"""
678+
"""When cell values in cluster are not present in cell metadata file ingest should fail."""
694679
args = [
695680
"--study-id",
696681
"5d276a50421aa9117c982845",
@@ -737,6 +722,24 @@ def test_extract_cluster_file_from_anndata(self):
737722
except:
738723
print(f"Error while deleting file : {filename}")
739724

725+
def test_invalid_obsm_key_for_anndata(self):
726+
args = [
727+
"--study-id",
728+
"5d276a50421aa9117c982845",
729+
"--study-file-id",
730+
"5dd5ae25421aa910a723a337",
731+
"ingest_anndata",
732+
"--ingest-anndata",
733+
"--extract",
734+
"['cluster']",
735+
"--anndata-file",
736+
"../tests/data/anndata/trimmed_compliant_pbmc3K.h5ad",
737+
"--obsm-keys",
738+
"['foo']",
739+
]
740+
ingest, arguments, status, status_cell_metadata = self.execute_ingest(args)
741+
self.assertEqual(status[0], 1)
742+
740743
def test_extract_metadata_file_from_anndata(self):
741744
args = [
742745
"--study-id",
@@ -857,16 +860,12 @@ def test_insert_reconnect(self):
857860
client_mock["data_arrays"].insert_many.assert_called_with(docs)
858861

859862
client_mock["data_arrays"].insert_many.side_effect = ValueError("Foo")
860-
self.assertRaises(
861-
Exception, ingest.insert_many, "data_arrays", docs
862-
)
863+
self.assertRaises(Exception, ingest.insert_many, "data_arrays", docs)
863864
client_mock.reset_mock()
864865

865866
# Test exponential back off for auto reconnect
866867
client_mock["data_arrays"].insert_many.side_effect = AutoReconnect
867-
self.assertRaises(
868-
AutoReconnect, ingest.insert_many, "data_arrays", docs
869-
)
868+
self.assertRaises(AutoReconnect, ingest.insert_many, "data_arrays", docs)
870869
self.assertEqual(client_mock["data_arrays"].insert_many.call_count, 3)
871870
client_mock.reset_mock()
872871

@@ -890,9 +889,7 @@ def raiseError(*args, **kwargs):
890889

891890
# Test exponential back off for BulkWriteError
892891
client_mock["data_arrays"].insert_many.side_effect = raiseError
893-
self.assertRaises(
894-
BulkWriteError, ingest.insert_many, "data_arrays", docs
895-
)
892+
self.assertRaises(BulkWriteError, ingest.insert_many, "data_arrays", docs)
896893
self.assertEqual(client_mock["data_arrays"].insert_many.call_count, 3)
897894

898895

0 commit comments

Comments
 (0)