Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions backend/btrixcloud/basecrawls.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
UpdatedResponse,
DeletedResponseQuota,
CrawlSearchValuesResponse,
FAILED_STATES,
)
from .pagination import paginated_format, DEFAULT_PAGE_SIZE
from .utils import dt_now, get_origin, date_to_str
Expand Down Expand Up @@ -611,6 +612,15 @@ async def bulk_presigned_files(

return resources, pages_optimized

async def validate_all_crawls_successful(
self, crawl_ids: List[str], org: Organization
):
"""Validate that crawls in list exist and did not fail or else raise exception"""
for crawl_id in crawl_ids:
crawl = await self.get_base_crawl(crawl_id, org)
if crawl.state in FAILED_STATES:
raise HTTPException(status_code=400, detail="invalid_failed_crawl")

async def add_to_collection(
self, crawl_ids: List[str], collection_id: UUID, org: Organization
):
Expand Down
13 changes: 8 additions & 5 deletions backend/btrixcloud/colls.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,17 +121,19 @@ async def init_index(self):
[("oid", pymongo.ASCENDING), ("description", pymongo.ASCENDING)]
)

async def add_collection(self, oid: UUID, coll_in: CollIn):
async def add_collection(self, org: Organization, coll_in: CollIn):
"""Add new collection"""
crawl_ids = coll_in.crawlIds if coll_in.crawlIds else []
await self.crawl_ops.validate_all_crawls_successful(crawl_ids, org)

coll_id = uuid4()
created = dt_now()

slug = coll_in.slug or slug_from_name(coll_in.name)

coll = Collection(
id=coll_id,
oid=oid,
oid=org.id,
name=coll_in.name,
slug=slug,
description=coll_in.description,
Expand All @@ -144,7 +146,6 @@ async def add_collection(self, oid: UUID, coll_in: CollIn):
)
try:
await self.collections.insert_one(coll.to_dict())
org = await self.orgs.get_org_by_id(oid)
await self.clear_org_previous_slugs_matching_slug(slug, org)

if crawl_ids:
Expand Down Expand Up @@ -229,7 +230,7 @@ async def add_crawls_to_collection(
headers: Optional[dict] = None,
) -> CollOut:
"""Add crawls to collection"""
await self.crawl_ops.add_to_collection(crawl_ids, coll_id, org)
await self.crawl_ops.validate_all_crawls_successful(crawl_ids, org)

modified = dt_now()
result = await self.collections.find_one_and_update(
Expand All @@ -240,6 +241,8 @@ async def add_crawls_to_collection(
if not result:
raise HTTPException(status_code=404, detail="collection_not_found")

await self.crawl_ops.add_to_collection(crawl_ids, coll_id, org)

await self.update_collection_counts_and_tags(coll_id)
await self.update_collection_dates(coll_id, org.id)

Expand Down Expand Up @@ -1019,7 +1022,7 @@ def init_collections_api(
async def add_collection(
new_coll: CollIn, org: Organization = Depends(org_crawl_dep)
):
return await colls.add_collection(org.id, new_coll)
return await colls.add_collection(org, new_coll)

@app.get(
"/orgs/{oid}/collections",
Expand Down
2 changes: 1 addition & 1 deletion backend/test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,7 @@ def custom_behaviors_crawl_id(admin_auth_headers, default_org_id):
def canceled_crawl_id(admin_auth_headers, default_org_id):
crawl_data = {
"runNow": True,
"name": "Canceled crawl",
"name": "Canceled Crawl",
"tags": ["canceled"],
"config": {
"seeds": [{"url": "https://old.webrecorder.net/"}],
Expand Down
27 changes: 27 additions & 0 deletions backend/test/test_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -1762,6 +1762,33 @@ def test_get_public_collection_slug_redirect(admin_auth_headers, default_org_id)
assert r.status_code == 404


def test_create_collection_with_failed_crawl(
admin_auth_headers, default_org_id, canceled_crawl_id
):
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/collections",
headers=admin_auth_headers,
json={
"crawlIds": [canceled_crawl_id],
"name": "Should get rejected",
},
)
assert r.status_code == 400
assert r.json()["detail"] == "invalid_failed_crawl"


def test_add_failed_crawl_to_collection(
admin_auth_headers, default_org_id, canceled_crawl_id
):
r = requests.post(
f"{API_PREFIX}/orgs/{default_org_id}/collections/{_second_coll_id}/add",
json={"crawlIds": [canceled_crawl_id]},
headers=admin_auth_headers,
)
assert r.status_code == 400
assert r.json()["detail"] == "invalid_failed_crawl"


def test_delete_collection(crawler_auth_headers, default_org_id, crawler_crawl_id):
# Delete second collection
r = requests.delete(
Expand Down
18 changes: 15 additions & 3 deletions backend/test/test_crawl_config_search_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def test_get_search_values_1(admin_auth_headers, default_org_id):
)
data = r.json()
assert sorted(data["names"]) == sorted(
[NAME_1, "Admin Test Crawl", "Crawler User Test Crawl"]
[NAME_1, "Admin Test Crawl", "Canceled Crawl", "Crawler User Test Crawl"]
)
assert sorted(data["descriptions"]) == sorted(
["Admin Test Crawl description", "crawler test crawl", DESCRIPTION_1]
Expand Down Expand Up @@ -74,7 +74,13 @@ def test_get_search_values_2(admin_auth_headers, default_org_id):
)
data = r.json()
assert sorted(data["names"]) == sorted(
[NAME_1, NAME_2, "Admin Test Crawl", "Crawler User Test Crawl"]
[
NAME_1,
NAME_2,
"Admin Test Crawl",
"Canceled Crawl",
"Crawler User Test Crawl",
]
)
assert sorted(data["descriptions"]) == sorted(
[
Expand Down Expand Up @@ -111,7 +117,13 @@ def test_get_search_values_3(admin_auth_headers, default_org_id):
)
data = r.json()
assert sorted(data["names"]) == sorted(
[NAME_1, NAME_2, "Admin Test Crawl", "Crawler User Test Crawl"]
[
NAME_1,
NAME_2,
"Admin Test Crawl",
"Canceled Crawl",
"Crawler User Test Crawl",
]
)
assert sorted(data["descriptions"]) == sorted(
[
Expand Down
5 changes: 4 additions & 1 deletion backend/test/test_crawl_config_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def test_get_config_by_tag_1(admin_auth_headers, default_org_id):
headers=admin_auth_headers,
)
data = r.json()
assert sorted(data) == ["tag-1", "tag-2", "wr-test-1", "wr-test-2"]
assert sorted(data) == ["canceled", "tag-1", "tag-2", "wr-test-1", "wr-test-2"]


def test_get_config_by_tag_counts_1(admin_auth_headers, default_org_id):
Expand All @@ -59,6 +59,7 @@ def test_get_config_by_tag_counts_1(admin_auth_headers, default_org_id):
assert data == {
"tags": [
{"tag": "wr-test-2", "count": 2},
{"tag": "canceled", "count": 1},
{"tag": "tag-1", "count": 1},
{"tag": "tag-2", "count": 1},
{"tag": "wr-test-1", "count": 1},
Expand Down Expand Up @@ -91,6 +92,7 @@ def test_get_config_by_tag_2(admin_auth_headers, default_org_id):
)
data = r.json()
assert sorted(data) == [
"canceled",
"tag-0",
"tag-1",
"tag-2",
Expand All @@ -109,6 +111,7 @@ def test_get_config_by_tag_counts_2(admin_auth_headers, default_org_id):
assert data == {
"tags": [
{"tag": "wr-test-2", "count": 2},
{"tag": "canceled", "count": 1},
{"tag": "tag-0", "count": 1},
{"tag": "tag-1", "count": 1},
{"tag": "tag-2", "count": 1},
Expand Down
12 changes: 6 additions & 6 deletions backend/test/test_filter_sort_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,8 @@ def test_ensure_crawl_and_admin_user_crawls(
f"{API_PREFIX}/orgs/{default_org_id}/crawls",
headers=crawler_auth_headers,
)
assert len(r.json()["items"]) == 2
assert r.json()["total"] == 2
assert len(r.json()["items"]) == 3
assert r.json()["total"] == 3


def test_get_crawl_job_by_user(
Expand Down Expand Up @@ -212,9 +212,9 @@ def test_sort_crawls(
headers=crawler_auth_headers,
)
data = r.json()
assert data["total"] == 2
assert data["total"] == 3
items = data["items"]
assert len(items) == 2
assert len(items) == 3

last_created = None
for crawl in items:
Expand Down Expand Up @@ -362,9 +362,9 @@ def test_sort_crawl_configs(
headers=crawler_auth_headers,
)
data = r.json()
assert data["total"] == 16
assert data["total"] == 17
items = data["items"]
assert len(items) == 16
assert len(items) == 17

last_created = None
for config in items:
Expand Down
10 changes: 6 additions & 4 deletions backend/test/test_uploads.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,7 @@ def test_get_all_crawls_by_first_seed(
)
assert r.status_code == 200
data = r.json()
assert data["total"] == 5
assert data["total"] == 6
for item in data["items"]:
assert item["firstSeed"] == first_seed

Expand All @@ -607,7 +607,7 @@ def test_get_all_crawls_by_type(
)
assert r.status_code == 200
data = r.json()
assert data["total"] == 6
assert data["total"] == 7
for item in data["items"]:
assert item["type"] == "crawl"

Expand Down Expand Up @@ -823,9 +823,10 @@ def test_all_crawls_search_values(
assert r.status_code == 200
data = r.json()

assert len(data["names"]) == 8
assert len(data["names"]) == 9
expected_names = [
"Crawler User Test Crawl",
"Canceled Crawl",
"Custom Behavior Logs",
"My Upload Updated",
"test2.wacz",
Expand All @@ -849,10 +850,11 @@ def test_all_crawls_search_values(
assert r.status_code == 200
data = r.json()

assert len(data["names"]) == 5
assert len(data["names"]) == 6
expected_names = [
"Admin Test Crawl",
"All Crawls Test Crawl",
"Canceled Crawl",
"Crawler User Crawl for Testing QA",
"Crawler User Test Crawl",
"Custom Behavior Logs",
Expand Down
Loading