|
1 | 1 | import inspect |
2 | 2 | import logging |
| 3 | +import time |
3 | 4 | import warnings |
4 | 5 | from uuid import UUID, uuid4 |
5 | 6 | from typing import Any, List, Optional |
@@ -169,58 +170,90 @@ def _backout(crud: OpenAIAssistantCrud, assistant_id: str): |
169 | 170 |
|
170 | 171 | def do_create_collection( |
171 | 172 | session: SessionDep, |
172 | | - current_user: CurrentUserOrgproject, |
| 173 | + current_user: CurrentUser, |
173 | 174 | request: CreationRequest, |
174 | 175 | payload: ResponsePayload, |
175 | 176 | ): |
| 177 | + start_time = time.time() |
176 | 178 | client = OpenAI(api_key=settings.OPENAI_API_KEY) |
177 | 179 | if request.callback_url is None: |
178 | 180 | callback = SilentCallback(payload) |
179 | 181 | else: |
180 | 182 | callback = WebHookCallback(request.callback_url, payload) |
181 | 183 |
|
| 184 | + # |
| 185 | + # Create the assistant and vector store |
| 186 | + # |
| 187 | + |
182 | 188 | vector_store_crud = OpenAIVectorStoreCrud(client) |
183 | | - assistant_crud = OpenAIAssistantCrud(client) |
| 189 | + try: |
| 190 | + vector_store = vector_store_crud.create() |
| 191 | + except OpenAIError as err: |
| 192 | + callback.fail(str(err)) |
| 193 | + return |
| 194 | + |
184 | 195 | storage = AmazonCloudStorage(current_user) |
185 | 196 | document_crud = DocumentCrud(session, current_user.id) |
186 | | - collection_crud = CollectionCrud(session, current_user.id) |
| 197 | + assistant_crud = OpenAIAssistantCrud(client) |
187 | 198 |
|
188 | | - try: |
189 | | - vector_store = vector_store_crud.create() |
| 199 | + docs = request(document_crud) |
| 200 | + log_doc = list(docs) |
| 201 | + doc_count = len(log_doc) |
| 202 | + flat_docs = [doc for sublist in log_doc for doc in sublist] |
| 203 | + file_exts = list( |
| 204 | + {doc.fname.split(".")[-1] for doc in flat_docs if "." in doc.fname} |
| 205 | + ) |
190 | 206 |
|
191 | | - docs = request(document_crud) |
| 207 | + file_sizes_kb = [] |
| 208 | + for doc in flat_docs: |
| 209 | + size_kb = storage.get_file_size_kb(doc.object_store_url) |
| 210 | + file_sizes_kb.append(size_kb) |
| 211 | + |
| 212 | + kwargs = dict(request.extract_super_type(AssistantOptions)) |
| 213 | + try: |
192 | 214 | updates = vector_store_crud.update(vector_store.id, storage, docs) |
193 | 215 | documents = list(updates) |
194 | | - |
195 | | - kwargs = dict(request.extract_super_type(AssistantOptions)) |
196 | 216 | assistant = assistant_crud.create(vector_store.id, **kwargs) |
| 217 | + except Exception as err: # blanket to handle SQL and OpenAI errors |
| 218 | + logging.error(f"File Search setup error: {err} ({type(err).__name__})") |
| 219 | + vector_store_crud.delete(vector_store.id) |
| 220 | + callback.fail(str(err)) |
| 221 | + return |
| 222 | + |
| 223 | + # |
| 224 | + # Store the results |
| 225 | + # |
197 | 226 |
|
198 | | - # 3. Read and update collection with assistant info |
| 227 | + collection_crud = CollectionCrud(session, current_user.id) |
| 228 | + try: |
| 229 | + collection_crud = CollectionCrud(session, current_user.id) |
199 | 230 | collection = collection_crud.read_one(UUID(payload.key)) |
200 | 231 | collection.llm_service_id = assistant.id |
201 | 232 | collection.llm_service_name = request.model |
202 | | - collection.status = "successfull" |
| 233 | + collection.status = "Successful" |
203 | 234 | collection.updated_at = now() |
204 | 235 |
|
205 | 236 | dc_crud = DocumentCollectionCrud(session) |
206 | 237 | dc_crud.create(collection, documents) |
207 | 238 |
|
208 | 239 | collection_crud._update(collection) |
| 240 | + except SQLAlchemyError as err: |
| 241 | + _backout(assistant_crud, assistant.id) |
| 242 | + logging.error(f"[Error during creating colletion - {err}") |
| 243 | + callback.fail(str(err)) |
| 244 | + return |
209 | 245 |
|
210 | | - callback.success({"id": payload.key}) |
211 | | - except Exception as err: |
212 | | - logging.error(f"[CollectionTask] Failed: {err} ({type(err).__name__})") |
| 246 | + elapsed = time.time() - start_time |
| 247 | + logging.info( |
| 248 | + f"Collection created: {collection.id} | " |
| 249 | + f"Time: {elapsed}s | Files: {doc_count} |Sizes:{file_sizes_kb} KB |Types: {file_exts}" |
| 250 | + ) |
213 | 251 |
|
214 | | - # 4. On failure, update collection status only |
215 | | - try: |
216 | | - collection = collection_crud.read_one(UUID(payload.key)) |
217 | | - collection.status = "failed" |
218 | | - collection.updated_at = now() |
219 | | - collection_crud._update(collection) |
220 | | - except Exception as suberr: |
221 | | - logging.error(f"Failed to update collection status: {suberr}") |
| 252 | + # |
| 253 | + # Send back successful response |
| 254 | + # |
222 | 255 |
|
223 | | - callback.fail(str(err)) |
| 256 | + callback.success(collection.model_dump(mode="json")) |
224 | 257 |
|
225 | 258 |
|
226 | 259 | @router.post( |
|
0 commit comments