simongrossi · simongrossi · Jun 11, 2025 · Jun 11, 2025
diff --git a/anonyfiles_api/routers/anonymization.py b/anonyfiles_api/routers/anonymization.py
@@ -37,6 +37,18 @@ async def _iter_uploadfile_chunks(upload_file: UploadFile, chunk_size: int = 102
 
 
 def _prepare_engine_options(config_options: dict, custom_rules: Optional[list]) -> Dict[str, Any]:
+    """Create options for :class:`AnonyfilesEngine` from the request payload.
+
+    Args:
+        config_options: Dictionary of anonymization options coming from the
+            client.
+        custom_rules: Optional list of custom replacement rules.
+
+    Returns:
+        A dictionary with keys ``exclude_entities_cli`` and
+        ``custom_replacement_rules`` ready to be passed to the engine.
+    """
+
     exclude_entities = []
     if not config_options.get("anonymizePersons", True): exclude_entities.append("PER")
     if not config_options.get("anonymizeLocations", True): exclude_entities.append("LOC")
@@ -51,20 +63,39 @@ def _prepare_engine_options(config_options: dict, custom_rules: Optional[list])
 
 
 def _prepare_processor_kwargs(input_path: Path, has_header: Optional[bool]) -> Dict[str, Any]:
+    """Build keyword arguments for the engine processor.
+
+    Args:
+        input_path: Path to the uploaded file.
+        has_header: Optional flag specifying whether a CSV file contains a
+            header row.
+
+    Returns:
+        Dictionary of parameters to forward to the engine processing function.
+    """
+
     processor_kwargs = {}
     if input_path.suffix.lower() == ".csv" and has_header is not None:
-        processor_kwargs['has_header'] = has_header
+        processor_kwargs["has_header"] = has_header
     return processor_kwargs
 
 def _execute_engine_anonymization(
     engine: AnonyfilesEngine, input_path: Path, output_path: Path,
     log_entities_path: Path, mapping_output_path: Path, processor_kwargs: dict
 ) -> Dict[str, Any]:
-    logger.info(f"Tâche {input_path.parent.name}: Exécution du moteur AnonyfilesEngine.")
+    """Run the anonymization engine synchronously."""
+    logger.info(
+        f"Tâche {input_path.parent.name}: Exécution du moteur AnonyfilesEngine."
+    )
     return engine.anonymize(
-        input_path=input_path, output_path=output_path, entities=None,
-        dry_run=False, log_entities_path=log_entities_path,
-        mapping_output_path=mapping_output_path, **processor_kwargs)
+        input_path=input_path,
+        output_path=output_path,
+        entities=None,
+        dry_run=False,
+        log_entities_path=log_entities_path,
+        mapping_output_path=mapping_output_path,
+        **processor_kwargs,
+    )
 
 async def _execute_engine_anonymization_async(
     engine: AnonyfilesEngine,
@@ -74,7 +105,10 @@ async def _execute_engine_anonymization_async(
     mapping_output_path: Path,
     processor_kwargs: dict,
 ) -> Dict[str, Any]:
-    logger.info(f"Tâche {input_path.parent.name}: Exécution du moteur AnonyfilesEngine (async).")
+    """Run the anonymization engine asynchronously."""
+    logger.info(
+        f"Tâche {input_path.parent.name}: Exécution du moteur AnonyfilesEngine (async)."
+    )
     return await engine.anonymize_async(
         input_path=input_path,
         output_path=output_path,
@@ -93,6 +127,18 @@ def _process_engine_result(
     mapping_output_path: Path,
     log_entities_path: Path
 ) -> None:
+    """Persist engine results and log the final status.
+
+    Args:
+        current_job: Job instance being processed.
+        engine_result: Result dictionary returned by the engine.
+        input_path: Path of the original uploaded file.
+        output_path: Path to the anonymized output file.
+        mapping_output_path: Path to the mapping CSV file.
+        log_entities_path: Path to the entity log file.
+    """
+
+    engine_status_reported = engine_result.get("status")
     engine_status_reported = engine_result.get("status")
     engine_error_message = engine_result.get("error")
 
@@ -135,7 +181,21 @@ def _handle_job_error(
     mapping_output_path: Optional[Path] = None,
     log_entities_path: Optional[Path] = None
 ) -> None:
-    logger.error(f"Tâche {current_job.job_id}: {error_context} - {e}", exc_info=True)
+    """Log an error and update the job status accordingly.
+
+    Args:
+        current_job: Job instance concerned by the error.
+        e: The exception that was raised.
+        error_context: Contextual information about where the error happened.
+        input_path: Path to the original input file.
+        output_path: Optional path to the produced output file.
+        mapping_output_path: Optional path to the mapping CSV.
+        log_entities_path: Optional path to the entity log file.
+    """
+
+    logger.error(
+        f"Tâche {current_job.job_id}: {error_context} - {e}", exc_info=True
+    )
 
     if isinstance(e, FileNotFoundError): error_message = f"Fichier non trouvé: {getattr(e, 'filename', 'N/A')}"
     elif isinstance(e, PermissionError): error_message = f"Erreur de permission: {getattr(e, 'strerror', 'N/A')} sur {getattr(e, 'filename', 'N/A')}"
@@ -165,6 +225,17 @@ def run_anonymization_job_sync(
     custom_rules: Optional[list],
     passed_base_config: Dict[str, Any]
 ):
+    """Execute an anonymization job in a background thread.
+
+    Args:
+        job_id: Identifier for the job directory.
+        input_path: Path to the uploaded file.
+        config_options: Parsed anonymization options.
+        has_header: Optional CSV header flag.
+        custom_rules: Optional list of user provided replacement rules.
+        passed_base_config: Base configuration copied from application state.
+    """
+
     set_job_id(job_id)
     current_job = Job(job_id)
     output_path: Optional[Path] = None
@@ -223,7 +294,22 @@ async def anonymize_file_endpoint(
     custom_replacement_rules: Optional[str] = Form(None),
     file_type: Optional[str] = Form(None),
     has_header: Optional[str] = Form(None)
-): 
+):
+    """Handle file upload and start an anonymization job.
+
+    Args:
+        request: Current request object used to access application state.
+        background_tasks: FastAPI background task manager.
+        file: Uploaded file to anonymize.
+        config_options: JSON string with anonymization options.
+        custom_replacement_rules: Optional JSON list of replacement rules.
+        file_type: Optional hint about the uploaded file type.
+        has_header: Optional flag indicating if a CSV has a header row.
+
+    Returns:
+        A dictionary containing the job ID and its initial status.
+    """
+
     job_id = str(uuid.uuid4())
     set_job_id(job_id)
     current_job = Job(job_id)
@@ -304,6 +390,15 @@ async def anonymize_file_endpoint(
 
 @router.get("/anonymize_status/{job_id}", tags=["Anonymisation"])
 async def anonymize_status_endpoint(job_id: uuid.UUID):
+    """Return the status and, when finished, the result files for a job.
+
+    Args:
+        job_id: Identifier of the job to query.
+
+    Returns:
+        A JSON payload describing the current status and optionally the
+        anonymization results.
+    """
     job_id_str = str(job_id)
     set_job_id(job_id_str)
     current_job = Job(job_id_str)

diff --git a/anonyfiles_api/routers/deanonymization.py b/anonyfiles_api/routers/deanonymization.py
@@ -48,7 +48,15 @@ def run_deanonymization_job_sync(
     permissive: bool,
     # Si BASE_CONFIG était nécessaire, il faudrait l'ajouter ici :
     # passed_base_config: Optional[Dict[str, Any]] = None
-):
+) -> None:
+    """Execute the deanonymization process for a given job.
+
+    Args:
+        job_id: Identifier for the job directory.
+        input_path: Path to the anonymized file to restore.
+        mapping_path: Path to the mapping CSV used for restoration.
+        permissive: Whether to continue when encountering mapping issues.
+    """
     set_job_id(job_id)
     current_job = Job(job_id)
     run_dir = current_job.job_dir
@@ -163,7 +171,19 @@ async def deanonymize_file_endpoint(
     file: UploadFile = File(...),
     mapping: UploadFile = File(...),
     permissive: bool = Form(False)
-): 
+):
+    """Upload files and trigger a deanonymization job.
+
+    Args:
+        request: Incoming request to access application state if needed.
+        background_tasks: FastAPI background task manager.
+        file: The anonymized file to restore.
+        mapping: Mapping CSV file.
+        permissive: Whether the engine should ignore missing mapping entries.
+
+    Returns:
+        A dictionary containing the created job ID and its initial status.
+    """
     job_id = str(uuid.uuid4())
     set_job_id(job_id)
     current_job = Job(job_id)
@@ -231,6 +251,15 @@ async def deanonymize_file_endpoint(
 
 @router.get("/deanonymize_status/{job_id}", tags=["Désanonymisation"])
 async def get_deanonymize_status(job_id: str):
+    """Return the status and results for a deanonymization job.
+
+    Args:
+        job_id: Identifier of the job to inspect.
+
+    Returns:
+        A JSON payload describing the job state and, if available, the restored
+        text and report.
+    """
     set_job_id(job_id)
     current_job = Job(job_id)
 

diff --git a/anonyfiles_api/routers/files.py b/anonyfiles_api/routers/files.py
@@ -18,6 +18,16 @@
 
 @router.get("/files/{job_id}/{file_key}", tags=["Fichiers"])
 async def get_file_endpoint(job_id: uuid.UUID, file_key: str, as_attachment: bool = False):
+    """Serve a result file for a given job.
+
+    Args:
+        job_id: Identifier of the job directory.
+        file_key: Type of file to retrieve (output, mapping, log_entities, audit_log).
+        as_attachment: If ``True``, force download rather than inline display.
+
+    Returns:
+        A :class:`FileResponse` with the requested file.
+    """
     job_id_str = str(job_id)
     set_job_id(job_id_str)
     current_job = Job(job_id_str)

diff --git a/anonyfiles_api/routers/health.py b/anonyfiles_api/routers/health.py
@@ -4,4 +4,9 @@
 
 @router.get("/health", tags=["Health"])
 async def health() -> dict:
+    """Simple health-check endpoint.
+
+    Returns:
+        ``{"status": "ok"}`` if the service is running.
+    """
     return {"status": "ok"}
diff --git a/anonyfiles_api/routers/jobs.py b/anonyfiles_api/routers/jobs.py
@@ -15,6 +15,14 @@
 
 @router.delete("/jobs/{job_id}", status_code=status.HTTP_204_NO_CONTENT, tags=["Tâches"])
 async def delete_job_endpoint(job_id: uuid.UUID): # job_id peut être str aussi
+    """Delete all files related to a job.
+
+    Args:
+        job_id: Identifier of the job directory to remove.
+
+    Returns:
+        An empty ``204 NO CONTENT`` response when successful.
+    """
     job_id_str = str(job_id)
     set_job_id(job_id_str)
     current_job = Job(job_id_str)

diff --git a/anonyfiles_api/routers/websocket_status.py b/anonyfiles_api/routers/websocket_status.py
@@ -10,7 +10,12 @@
 
 @router.websocket("/ws/{job_id}")
 async def websocket_job_status(websocket: WebSocket, job_id: str) -> None:
-    """Envoie en temps réel le statut d'un job via WebSocket."""
+    """Send real-time job status updates over a WebSocket connection.
+
+    Args:
+        websocket: Active WebSocket connection to the client.
+        job_id: Identifier of the job to monitor.
+    """
     await websocket.accept()
     job = Job(job_id)
     if not await job.check_exists_async():