diff --git a/anonyfiles_api/routers/anonymization.py b/anonyfiles_api/routers/anonymization.py index 98ae486..8ad43bb 100644 --- a/anonyfiles_api/routers/anonymization.py +++ b/anonyfiles_api/routers/anonymization.py @@ -37,6 +37,18 @@ async def _iter_uploadfile_chunks(upload_file: UploadFile, chunk_size: int = 102 def _prepare_engine_options(config_options: dict, custom_rules: Optional[list]) -> Dict[str, Any]: + """Create options for :class:`AnonyfilesEngine` from the request payload. + + Args: + config_options: Dictionary of anonymization options coming from the + client. + custom_rules: Optional list of custom replacement rules. + + Returns: + A dictionary with keys ``exclude_entities_cli`` and + ``custom_replacement_rules`` ready to be passed to the engine. + """ + exclude_entities = [] if not config_options.get("anonymizePersons", True): exclude_entities.append("PER") if not config_options.get("anonymizeLocations", True): exclude_entities.append("LOC") @@ -51,20 +63,39 @@ def _prepare_engine_options(config_options: dict, custom_rules: Optional[list]) def _prepare_processor_kwargs(input_path: Path, has_header: Optional[bool]) -> Dict[str, Any]: + """Build keyword arguments for the engine processor. + + Args: + input_path: Path to the uploaded file. + has_header: Optional flag specifying whether a CSV file contains a + header row. + + Returns: + Dictionary of parameters to forward to the engine processing function. + """ + processor_kwargs = {} if input_path.suffix.lower() == ".csv" and has_header is not None: - processor_kwargs['has_header'] = has_header + processor_kwargs["has_header"] = has_header return processor_kwargs def _execute_engine_anonymization( engine: AnonyfilesEngine, input_path: Path, output_path: Path, log_entities_path: Path, mapping_output_path: Path, processor_kwargs: dict ) -> Dict[str, Any]: - logger.info(f"Tâche {input_path.parent.name}: Exécution du moteur AnonyfilesEngine.") + """Run the anonymization engine synchronously.""" + logger.info( + f"Tâche {input_path.parent.name}: Exécution du moteur AnonyfilesEngine." + ) return engine.anonymize( - input_path=input_path, output_path=output_path, entities=None, - dry_run=False, log_entities_path=log_entities_path, - mapping_output_path=mapping_output_path, **processor_kwargs) + input_path=input_path, + output_path=output_path, + entities=None, + dry_run=False, + log_entities_path=log_entities_path, + mapping_output_path=mapping_output_path, + **processor_kwargs, + ) async def _execute_engine_anonymization_async( engine: AnonyfilesEngine, @@ -74,7 +105,10 @@ async def _execute_engine_anonymization_async( mapping_output_path: Path, processor_kwargs: dict, ) -> Dict[str, Any]: - logger.info(f"Tâche {input_path.parent.name}: Exécution du moteur AnonyfilesEngine (async).") + """Run the anonymization engine asynchronously.""" + logger.info( + f"Tâche {input_path.parent.name}: Exécution du moteur AnonyfilesEngine (async)." + ) return await engine.anonymize_async( input_path=input_path, output_path=output_path, @@ -93,6 +127,18 @@ def _process_engine_result( mapping_output_path: Path, log_entities_path: Path ) -> None: + """Persist engine results and log the final status. + + Args: + current_job: Job instance being processed. + engine_result: Result dictionary returned by the engine. + input_path: Path of the original uploaded file. + output_path: Path to the anonymized output file. + mapping_output_path: Path to the mapping CSV file. + log_entities_path: Path to the entity log file. + """ + + engine_status_reported = engine_result.get("status") engine_status_reported = engine_result.get("status") engine_error_message = engine_result.get("error") @@ -135,7 +181,21 @@ def _handle_job_error( mapping_output_path: Optional[Path] = None, log_entities_path: Optional[Path] = None ) -> None: - logger.error(f"Tâche {current_job.job_id}: {error_context} - {e}", exc_info=True) + """Log an error and update the job status accordingly. + + Args: + current_job: Job instance concerned by the error. + e: The exception that was raised. + error_context: Contextual information about where the error happened. + input_path: Path to the original input file. + output_path: Optional path to the produced output file. + mapping_output_path: Optional path to the mapping CSV. + log_entities_path: Optional path to the entity log file. + """ + + logger.error( + f"Tâche {current_job.job_id}: {error_context} - {e}", exc_info=True + ) if isinstance(e, FileNotFoundError): error_message = f"Fichier non trouvé: {getattr(e, 'filename', 'N/A')}" elif isinstance(e, PermissionError): error_message = f"Erreur de permission: {getattr(e, 'strerror', 'N/A')} sur {getattr(e, 'filename', 'N/A')}" @@ -165,6 +225,17 @@ def run_anonymization_job_sync( custom_rules: Optional[list], passed_base_config: Dict[str, Any] ): + """Execute an anonymization job in a background thread. + + Args: + job_id: Identifier for the job directory. + input_path: Path to the uploaded file. + config_options: Parsed anonymization options. + has_header: Optional CSV header flag. + custom_rules: Optional list of user provided replacement rules. + passed_base_config: Base configuration copied from application state. + """ + set_job_id(job_id) current_job = Job(job_id) output_path: Optional[Path] = None @@ -223,7 +294,22 @@ async def anonymize_file_endpoint( custom_replacement_rules: Optional[str] = Form(None), file_type: Optional[str] = Form(None), has_header: Optional[str] = Form(None) -): +): + """Handle file upload and start an anonymization job. + + Args: + request: Current request object used to access application state. + background_tasks: FastAPI background task manager. + file: Uploaded file to anonymize. + config_options: JSON string with anonymization options. + custom_replacement_rules: Optional JSON list of replacement rules. + file_type: Optional hint about the uploaded file type. + has_header: Optional flag indicating if a CSV has a header row. + + Returns: + A dictionary containing the job ID and its initial status. + """ + job_id = str(uuid.uuid4()) set_job_id(job_id) current_job = Job(job_id) @@ -304,6 +390,15 @@ async def anonymize_file_endpoint( @router.get("/anonymize_status/{job_id}", tags=["Anonymisation"]) async def anonymize_status_endpoint(job_id: uuid.UUID): + """Return the status and, when finished, the result files for a job. + + Args: + job_id: Identifier of the job to query. + + Returns: + A JSON payload describing the current status and optionally the + anonymization results. + """ job_id_str = str(job_id) set_job_id(job_id_str) current_job = Job(job_id_str) diff --git a/anonyfiles_api/routers/deanonymization.py b/anonyfiles_api/routers/deanonymization.py index 13341a8..cf16eca 100644 --- a/anonyfiles_api/routers/deanonymization.py +++ b/anonyfiles_api/routers/deanonymization.py @@ -48,7 +48,15 @@ def run_deanonymization_job_sync( permissive: bool, # Si BASE_CONFIG était nécessaire, il faudrait l'ajouter ici : # passed_base_config: Optional[Dict[str, Any]] = None -): +) -> None: + """Execute the deanonymization process for a given job. + + Args: + job_id: Identifier for the job directory. + input_path: Path to the anonymized file to restore. + mapping_path: Path to the mapping CSV used for restoration. + permissive: Whether to continue when encountering mapping issues. + """ set_job_id(job_id) current_job = Job(job_id) run_dir = current_job.job_dir @@ -163,7 +171,19 @@ async def deanonymize_file_endpoint( file: UploadFile = File(...), mapping: UploadFile = File(...), permissive: bool = Form(False) -): +): + """Upload files and trigger a deanonymization job. + + Args: + request: Incoming request to access application state if needed. + background_tasks: FastAPI background task manager. + file: The anonymized file to restore. + mapping: Mapping CSV file. + permissive: Whether the engine should ignore missing mapping entries. + + Returns: + A dictionary containing the created job ID and its initial status. + """ job_id = str(uuid.uuid4()) set_job_id(job_id) current_job = Job(job_id) @@ -231,6 +251,15 @@ async def deanonymize_file_endpoint( @router.get("/deanonymize_status/{job_id}", tags=["Désanonymisation"]) async def get_deanonymize_status(job_id: str): + """Return the status and results for a deanonymization job. + + Args: + job_id: Identifier of the job to inspect. + + Returns: + A JSON payload describing the job state and, if available, the restored + text and report. + """ set_job_id(job_id) current_job = Job(job_id) diff --git a/anonyfiles_api/routers/files.py b/anonyfiles_api/routers/files.py index 42e9303..ca894f2 100644 --- a/anonyfiles_api/routers/files.py +++ b/anonyfiles_api/routers/files.py @@ -18,6 +18,16 @@ @router.get("/files/{job_id}/{file_key}", tags=["Fichiers"]) async def get_file_endpoint(job_id: uuid.UUID, file_key: str, as_attachment: bool = False): + """Serve a result file for a given job. + + Args: + job_id: Identifier of the job directory. + file_key: Type of file to retrieve (output, mapping, log_entities, audit_log). + as_attachment: If ``True``, force download rather than inline display. + + Returns: + A :class:`FileResponse` with the requested file. + """ job_id_str = str(job_id) set_job_id(job_id_str) current_job = Job(job_id_str) diff --git a/anonyfiles_api/routers/health.py b/anonyfiles_api/routers/health.py index 124ba82..786f4c1 100644 --- a/anonyfiles_api/routers/health.py +++ b/anonyfiles_api/routers/health.py @@ -4,4 +4,9 @@ @router.get("/health", tags=["Health"]) async def health() -> dict: + """Simple health-check endpoint. + + Returns: + ``{"status": "ok"}`` if the service is running. + """ return {"status": "ok"} diff --git a/anonyfiles_api/routers/jobs.py b/anonyfiles_api/routers/jobs.py index 6ede2d5..b85a561 100644 --- a/anonyfiles_api/routers/jobs.py +++ b/anonyfiles_api/routers/jobs.py @@ -15,6 +15,14 @@ @router.delete("/jobs/{job_id}", status_code=status.HTTP_204_NO_CONTENT, tags=["Tâches"]) async def delete_job_endpoint(job_id: uuid.UUID): # job_id peut être str aussi + """Delete all files related to a job. + + Args: + job_id: Identifier of the job directory to remove. + + Returns: + An empty ``204 NO CONTENT`` response when successful. + """ job_id_str = str(job_id) set_job_id(job_id_str) current_job = Job(job_id_str) diff --git a/anonyfiles_api/routers/websocket_status.py b/anonyfiles_api/routers/websocket_status.py index 6e998d9..2f14d4a 100644 --- a/anonyfiles_api/routers/websocket_status.py +++ b/anonyfiles_api/routers/websocket_status.py @@ -10,7 +10,12 @@ @router.websocket("/ws/{job_id}") async def websocket_job_status(websocket: WebSocket, job_id: str) -> None: - """Envoie en temps réel le statut d'un job via WebSocket.""" + """Send real-time job status updates over a WebSocket connection. + + Args: + websocket: Active WebSocket connection to the client. + job_id: Identifier of the job to monitor. + """ await websocket.accept() job = Job(job_id) if not await job.check_exists_async():