Skip to content

Commit e30f5d8

Browse files
committed
logging
1 parent f3494ec commit e30f5d8

File tree

2 files changed

+62
-22
lines changed

2 files changed

+62
-22
lines changed

backend/app/api/routes/collections.py

Lines changed: 55 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import inspect
22
import logging
3+
import time
34
import warnings
45
from uuid import UUID, uuid4
56
from typing import Any, List, Optional
@@ -169,58 +170,90 @@ def _backout(crud: OpenAIAssistantCrud, assistant_id: str):
169170

170171
def do_create_collection(
171172
session: SessionDep,
172-
current_user: CurrentUserOrgproject,
173+
current_user: CurrentUser,
173174
request: CreationRequest,
174175
payload: ResponsePayload,
175176
):
177+
start_time = time.time()
176178
client = OpenAI(api_key=settings.OPENAI_API_KEY)
177179
if request.callback_url is None:
178180
callback = SilentCallback(payload)
179181
else:
180182
callback = WebHookCallback(request.callback_url, payload)
181183

184+
#
185+
# Create the assistant and vector store
186+
#
187+
182188
vector_store_crud = OpenAIVectorStoreCrud(client)
183-
assistant_crud = OpenAIAssistantCrud(client)
189+
try:
190+
vector_store = vector_store_crud.create()
191+
except OpenAIError as err:
192+
callback.fail(str(err))
193+
return
194+
184195
storage = AmazonCloudStorage(current_user)
185196
document_crud = DocumentCrud(session, current_user.id)
186-
collection_crud = CollectionCrud(session, current_user.id)
197+
assistant_crud = OpenAIAssistantCrud(client)
187198

188-
try:
189-
vector_store = vector_store_crud.create()
199+
docs = request(document_crud)
200+
log_doc = list(docs)
201+
doc_count = len(log_doc)
202+
flat_docs = [doc for sublist in log_doc for doc in sublist]
203+
file_exts = list(
204+
{doc.fname.split(".")[-1] for doc in flat_docs if "." in doc.fname}
205+
)
190206

191-
docs = request(document_crud)
207+
file_sizes_kb = []
208+
for doc in flat_docs:
209+
size_kb = storage.get_file_size_kb(doc.object_store_url)
210+
file_sizes_kb.append(size_kb)
211+
212+
kwargs = dict(request.extract_super_type(AssistantOptions))
213+
try:
192214
updates = vector_store_crud.update(vector_store.id, storage, docs)
193215
documents = list(updates)
194-
195-
kwargs = dict(request.extract_super_type(AssistantOptions))
196216
assistant = assistant_crud.create(vector_store.id, **kwargs)
217+
except Exception as err: # blanket to handle SQL and OpenAI errors
218+
logging.error(f"File Search setup error: {err} ({type(err).__name__})")
219+
vector_store_crud.delete(vector_store.id)
220+
callback.fail(str(err))
221+
return
222+
223+
#
224+
# Store the results
225+
#
197226

198-
# 3. Read and update collection with assistant info
227+
collection_crud = CollectionCrud(session, current_user.id)
228+
try:
229+
collection_crud = CollectionCrud(session, current_user.id)
199230
collection = collection_crud.read_one(UUID(payload.key))
200231
collection.llm_service_id = assistant.id
201232
collection.llm_service_name = request.model
202-
collection.status = "successfull"
233+
collection.status = "Successful"
203234
collection.updated_at = now()
204235

205236
dc_crud = DocumentCollectionCrud(session)
206237
dc_crud.create(collection, documents)
207238

208239
collection_crud._update(collection)
240+
except SQLAlchemyError as err:
241+
_backout(assistant_crud, assistant.id)
242+
logging.error(f"[Error during creating colletion - {err}")
243+
callback.fail(str(err))
244+
return
209245

210-
callback.success({"id": payload.key})
211-
except Exception as err:
212-
logging.error(f"[CollectionTask] Failed: {err} ({type(err).__name__})")
246+
elapsed = time.time() - start_time
247+
logging.info(
248+
f"Collection created: {collection.id} | "
249+
f"Time: {elapsed}s | Files: {doc_count} |Sizes:{file_sizes_kb} KB |Types: {file_exts}"
250+
)
213251

214-
# 4. On failure, update collection status only
215-
try:
216-
collection = collection_crud.read_one(UUID(payload.key))
217-
collection.status = "failed"
218-
collection.updated_at = now()
219-
collection_crud._update(collection)
220-
except Exception as suberr:
221-
logging.error(f"Failed to update collection status: {suberr}")
252+
#
253+
# Send back successful response
254+
#
222255

223-
callback.fail(str(err))
256+
callback.success(collection.model_dump(mode="json"))
224257

225258

226259
@router.post(

backend/app/core/cloud/storage.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,3 +124,10 @@ def stream(self, url: str) -> StreamingBody:
124124
return self.aws.client.get_object(**kwargs).get("Body")
125125
except ClientError as err:
126126
raise CloudStorageError(f'AWS Error: "{err}" ({url})') from err
127+
128+
def get_file_size_kb(self, url: str) -> float:
129+
name = SimpleStorageName.from_url(url)
130+
kwargs = asdict(name)
131+
response = self.aws.client.head_object(**kwargs)
132+
size_bytes = response["ContentLength"]
133+
return round(size_bytes / 1024, 2)

0 commit comments

Comments
 (0)