diff --git a/backend/btrixcloud/basecrawls.py b/backend/btrixcloud/basecrawls.py index 2cf09a0f85..4c2eba9efe 100644 --- a/backend/btrixcloud/basecrawls.py +++ b/backend/btrixcloud/basecrawls.py @@ -44,6 +44,7 @@ UpdatedResponse, DeletedResponseQuota, CrawlSearchValuesResponse, + FAILED_STATES, ) from .pagination import paginated_format, DEFAULT_PAGE_SIZE from .utils import dt_now, get_origin, date_to_str @@ -611,6 +612,15 @@ async def bulk_presigned_files( return resources, pages_optimized + async def validate_all_crawls_successful( + self, crawl_ids: List[str], org: Organization + ): + """Validate that crawls in list exist and did not fail or else raise exception""" + for crawl_id in crawl_ids: + crawl = await self.get_base_crawl(crawl_id, org) + if crawl.state in FAILED_STATES: + raise HTTPException(status_code=400, detail="invalid_failed_crawl") + async def add_to_collection( self, crawl_ids: List[str], collection_id: UUID, org: Organization ): diff --git a/backend/btrixcloud/colls.py b/backend/btrixcloud/colls.py index e232332bec..8a58610006 100644 --- a/backend/btrixcloud/colls.py +++ b/backend/btrixcloud/colls.py @@ -121,9 +121,11 @@ async def init_index(self): [("oid", pymongo.ASCENDING), ("description", pymongo.ASCENDING)] ) - async def add_collection(self, oid: UUID, coll_in: CollIn): + async def add_collection(self, org: Organization, coll_in: CollIn): """Add new collection""" crawl_ids = coll_in.crawlIds if coll_in.crawlIds else [] + await self.crawl_ops.validate_all_crawls_successful(crawl_ids, org) + coll_id = uuid4() created = dt_now() @@ -131,7 +133,7 @@ async def add_collection(self, oid: UUID, coll_in: CollIn): coll = Collection( id=coll_id, - oid=oid, + oid=org.id, name=coll_in.name, slug=slug, description=coll_in.description, @@ -144,7 +146,6 @@ async def add_collection(self, oid: UUID, coll_in: CollIn): ) try: await self.collections.insert_one(coll.to_dict()) - org = await self.orgs.get_org_by_id(oid) await self.clear_org_previous_slugs_matching_slug(slug, org) if crawl_ids: @@ -229,7 +230,7 @@ async def add_crawls_to_collection( headers: Optional[dict] = None, ) -> CollOut: """Add crawls to collection""" - await self.crawl_ops.add_to_collection(crawl_ids, coll_id, org) + await self.crawl_ops.validate_all_crawls_successful(crawl_ids, org) modified = dt_now() result = await self.collections.find_one_and_update( @@ -240,6 +241,8 @@ async def add_crawls_to_collection( if not result: raise HTTPException(status_code=404, detail="collection_not_found") + await self.crawl_ops.add_to_collection(crawl_ids, coll_id, org) + await self.update_collection_counts_and_tags(coll_id) await self.update_collection_dates(coll_id, org.id) @@ -1019,7 +1022,7 @@ def init_collections_api( async def add_collection( new_coll: CollIn, org: Organization = Depends(org_crawl_dep) ): - return await colls.add_collection(org.id, new_coll) + return await colls.add_collection(org, new_coll) @app.get( "/orgs/{oid}/collections", diff --git a/backend/test/conftest.py b/backend/test/conftest.py index 369afd4ed6..76daeb4569 100644 --- a/backend/test/conftest.py +++ b/backend/test/conftest.py @@ -563,7 +563,7 @@ def custom_behaviors_crawl_id(admin_auth_headers, default_org_id): def canceled_crawl_id(admin_auth_headers, default_org_id): crawl_data = { "runNow": True, - "name": "Canceled crawl", + "name": "Canceled Crawl", "tags": ["canceled"], "config": { "seeds": [{"url": "https://old.webrecorder.net/"}], diff --git a/backend/test/test_collections.py b/backend/test/test_collections.py index dd28695a69..fee0bc3d49 100644 --- a/backend/test/test_collections.py +++ b/backend/test/test_collections.py @@ -1762,6 +1762,33 @@ def test_get_public_collection_slug_redirect(admin_auth_headers, default_org_id) assert r.status_code == 404 +def test_create_collection_with_failed_crawl( + admin_auth_headers, default_org_id, canceled_crawl_id +): + r = requests.post( + f"{API_PREFIX}/orgs/{default_org_id}/collections", + headers=admin_auth_headers, + json={ + "crawlIds": [canceled_crawl_id], + "name": "Should get rejected", + }, + ) + assert r.status_code == 400 + assert r.json()["detail"] == "invalid_failed_crawl" + + +def test_add_failed_crawl_to_collection( + admin_auth_headers, default_org_id, canceled_crawl_id +): + r = requests.post( + f"{API_PREFIX}/orgs/{default_org_id}/collections/{_second_coll_id}/add", + json={"crawlIds": [canceled_crawl_id]}, + headers=admin_auth_headers, + ) + assert r.status_code == 400 + assert r.json()["detail"] == "invalid_failed_crawl" + + def test_delete_collection(crawler_auth_headers, default_org_id, crawler_crawl_id): # Delete second collection r = requests.delete( diff --git a/backend/test/test_crawl_config_search_values.py b/backend/test/test_crawl_config_search_values.py index 1ebd19bed6..d890128208 100644 --- a/backend/test/test_crawl_config_search_values.py +++ b/backend/test/test_crawl_config_search_values.py @@ -44,7 +44,7 @@ def test_get_search_values_1(admin_auth_headers, default_org_id): ) data = r.json() assert sorted(data["names"]) == sorted( - [NAME_1, "Admin Test Crawl", "Crawler User Test Crawl"] + [NAME_1, "Admin Test Crawl", "Canceled Crawl", "Crawler User Test Crawl"] ) assert sorted(data["descriptions"]) == sorted( ["Admin Test Crawl description", "crawler test crawl", DESCRIPTION_1] @@ -74,7 +74,13 @@ def test_get_search_values_2(admin_auth_headers, default_org_id): ) data = r.json() assert sorted(data["names"]) == sorted( - [NAME_1, NAME_2, "Admin Test Crawl", "Crawler User Test Crawl"] + [ + NAME_1, + NAME_2, + "Admin Test Crawl", + "Canceled Crawl", + "Crawler User Test Crawl", + ] ) assert sorted(data["descriptions"]) == sorted( [ @@ -111,7 +117,13 @@ def test_get_search_values_3(admin_auth_headers, default_org_id): ) data = r.json() assert sorted(data["names"]) == sorted( - [NAME_1, NAME_2, "Admin Test Crawl", "Crawler User Test Crawl"] + [ + NAME_1, + NAME_2, + "Admin Test Crawl", + "Canceled Crawl", + "Crawler User Test Crawl", + ] ) assert sorted(data["descriptions"]) == sorted( [ diff --git a/backend/test/test_crawl_config_tags.py b/backend/test/test_crawl_config_tags.py index e611f39cbb..5e33abd17b 100644 --- a/backend/test/test_crawl_config_tags.py +++ b/backend/test/test_crawl_config_tags.py @@ -47,7 +47,7 @@ def test_get_config_by_tag_1(admin_auth_headers, default_org_id): headers=admin_auth_headers, ) data = r.json() - assert sorted(data) == ["tag-1", "tag-2", "wr-test-1", "wr-test-2"] + assert sorted(data) == ["canceled", "tag-1", "tag-2", "wr-test-1", "wr-test-2"] def test_get_config_by_tag_counts_1(admin_auth_headers, default_org_id): @@ -59,6 +59,7 @@ def test_get_config_by_tag_counts_1(admin_auth_headers, default_org_id): assert data == { "tags": [ {"tag": "wr-test-2", "count": 2}, + {"tag": "canceled", "count": 1}, {"tag": "tag-1", "count": 1}, {"tag": "tag-2", "count": 1}, {"tag": "wr-test-1", "count": 1}, @@ -91,6 +92,7 @@ def test_get_config_by_tag_2(admin_auth_headers, default_org_id): ) data = r.json() assert sorted(data) == [ + "canceled", "tag-0", "tag-1", "tag-2", @@ -109,6 +111,7 @@ def test_get_config_by_tag_counts_2(admin_auth_headers, default_org_id): assert data == { "tags": [ {"tag": "wr-test-2", "count": 2}, + {"tag": "canceled", "count": 1}, {"tag": "tag-0", "count": 1}, {"tag": "tag-1", "count": 1}, {"tag": "tag-2", "count": 1}, diff --git a/backend/test/test_filter_sort_results.py b/backend/test/test_filter_sort_results.py index 69d21ef315..631f0bfac1 100644 --- a/backend/test/test_filter_sort_results.py +++ b/backend/test/test_filter_sort_results.py @@ -102,8 +102,8 @@ def test_ensure_crawl_and_admin_user_crawls( f"{API_PREFIX}/orgs/{default_org_id}/crawls", headers=crawler_auth_headers, ) - assert len(r.json()["items"]) == 2 - assert r.json()["total"] == 2 + assert len(r.json()["items"]) == 3 + assert r.json()["total"] == 3 def test_get_crawl_job_by_user( @@ -212,9 +212,9 @@ def test_sort_crawls( headers=crawler_auth_headers, ) data = r.json() - assert data["total"] == 2 + assert data["total"] == 3 items = data["items"] - assert len(items) == 2 + assert len(items) == 3 last_created = None for crawl in items: @@ -362,9 +362,9 @@ def test_sort_crawl_configs( headers=crawler_auth_headers, ) data = r.json() - assert data["total"] == 16 + assert data["total"] == 17 items = data["items"] - assert len(items) == 16 + assert len(items) == 17 last_created = None for config in items: diff --git a/backend/test/test_uploads.py b/backend/test/test_uploads.py index cc23dbb11c..4e3b20398b 100644 --- a/backend/test/test_uploads.py +++ b/backend/test/test_uploads.py @@ -592,7 +592,7 @@ def test_get_all_crawls_by_first_seed( ) assert r.status_code == 200 data = r.json() - assert data["total"] == 5 + assert data["total"] == 6 for item in data["items"]: assert item["firstSeed"] == first_seed @@ -607,7 +607,7 @@ def test_get_all_crawls_by_type( ) assert r.status_code == 200 data = r.json() - assert data["total"] == 6 + assert data["total"] == 7 for item in data["items"]: assert item["type"] == "crawl" @@ -823,9 +823,10 @@ def test_all_crawls_search_values( assert r.status_code == 200 data = r.json() - assert len(data["names"]) == 8 + assert len(data["names"]) == 9 expected_names = [ "Crawler User Test Crawl", + "Canceled Crawl", "Custom Behavior Logs", "My Upload Updated", "test2.wacz", @@ -849,10 +850,11 @@ def test_all_crawls_search_values( assert r.status_code == 200 data = r.json() - assert len(data["names"]) == 5 + assert len(data["names"]) == 6 expected_names = [ "Admin Test Crawl", "All Crawls Test Crawl", + "Canceled Crawl", "Crawler User Crawl for Testing QA", "Crawler User Test Crawl", "Custom Behavior Logs",