diff --git a/README.md b/README.md
index ee84a2d..6c26bc8 100644
--- a/README.md
+++ b/README.md
@@ -112,7 +112,7 @@ The CLI is a thin wrapper over the package API.
 Bundled configs live under `slide2vec/configs/preprocessing/` and `slide2vec/configs/models/`.
 
 ```shell
-python -m slide2vec --config-file /path/to/config.yaml
+slide2vec /path/to/config.yaml
 ```
 
 By default, manifest-driven CLI runs use all available GPUs. Set `speed.num_gpus=4` when you want to cap the sharding explicitly.
diff --git a/docs/cli.md b/docs/cli.md
index 200fe96..a7759c5 100644
--- a/docs/cli.md
+++ b/docs/cli.md
@@ -23,7 +23,7 @@ The CLI is usually the better fit for:
 ## Basic Command
 
 ```shell
-python -m slide2vec --config-file /path/to/config.yaml
+slide2vec /path/to/config.yaml
 ```
 
 This command:
@@ -68,8 +68,7 @@ In practice, the config controls:
 You can override config values from the command line with `path.key=value` syntax:
 
 ```shell
-python -m slide2vec \
-  --config-file /path/to/config.yaml \
+slide2vec /path/to/config.yaml \
   output_dir=/tmp/slide2vec-run \
   speed.num_gpus=4 \
   model.name=virchow2
@@ -112,7 +111,7 @@ tiling:
 Or override from the command line:
 
 ```shell
-python -m slide2vec --config-file /path/to/config.yaml tiling.gpu_decode=true
+slide2vec /path/to/config.yaml tiling.gpu_decode=true
 ```
 
 When enabled, two things happen:
@@ -134,7 +133,7 @@ By default, the CLI uses all available GPUs.
 To cap GPU usage, set:
 
 ```shell
-python -m slide2vec --config-file /path/to/config.yaml speed.num_gpus=4
+slide2vec /path/to/config.yaml speed.num_gpus=4
 ```
 
 If you pass `--run-on-cpu`, the CLI uses CPU execution instead.
@@ -170,23 +169,23 @@ When stdout is not interactive, the CLI falls back to plain text stage updates a
 Full batch run:
 
 ```shell
-python -m slide2vec --config-file /path/to/config.yaml
+slide2vec /path/to/config.yaml
 ```
 
 Full batch run with limited GPU count:
 
 ```shell
-python -m slide2vec --config-file /path/to/config.yaml speed.num_gpus=2
+slide2vec /path/to/config.yaml speed.num_gpus=2
 ```
 
 Tiling only:
 
 ```shell
-python -m slide2vec --config-file /path/to/config.yaml --tiling-only
+slide2vec /path/to/config.yaml --tiling-only
 ```
 
 CPU run:
 
 ```shell
-python -m slide2vec --config-file /path/to/config.yaml --run-on-cpu
+slide2vec /path/to/config.yaml --run-on-cpu
 ```
diff --git a/pyproject.toml b/pyproject.toml
index f8f3cd6..86a149c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -42,7 +42,7 @@ Homepage = "https://github.com/clemsgrs/slide2vec"
 "Bug Tracker" = "https://github.com/clemsgrs/slide2vec/issues"
 
 [project.scripts]
-slide2vec = "slide2vec.cli:main"
+slide2vec = "slide2vec.cli:entrypoint"
 
 [project.optional-dependencies]
 hoptimus = [
diff --git a/scripts/generate_gt.py b/scripts/generate_gt.py
index c1407f2..fe5822f 100644
--- a/scripts/generate_gt.py
+++ b/scripts/generate_gt.py
@@ -126,8 +126,8 @@ def main():
         print("Running pipeline...")
         subprocess.run(
             [
-                sys.executable, "-m", "slide2vec",
-                "--config-file", str(cfg_path),
+                "slide2vec",
+                str(cfg_path),
                 "--skip-datetime",
                 "--run-on-cpu",
             ],
diff --git a/slide2vec/cli.py b/slide2vec/cli.py
index 9aa3f45..fe520ec 100644
--- a/slide2vec/cli.py
+++ b/slide2vec/cli.py
@@ -7,20 +7,21 @@
 
 def get_args_parser(add_help: bool = True):
     parser = argparse.ArgumentParser("slide2vec", add_help=add_help)
-    parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file")
+    parser.add_argument("config_file", metavar="CONFIG", help="path to config file")
     parser.add_argument("--skip-datetime", action="store_true", help="skip run id datetime prefix")
     parser.add_argument("--tiling-only", action="store_true", help="only run slide tiling")
     parser.add_argument("--run-on-cpu", action="store_true", help="run inference on cpu")
     parser.add_argument("--output-dir", type=str, default=None, help="output directory to save artifacts")
-    parser.add_argument(
-        "opts",
-        help='Modify config options at the end of the command using "path.key=value".',
-        default=None,
-        nargs=argparse.REMAINDER,
-    )
     return parser
 
 
+def parse_args(argv=None):
+    parser = get_args_parser(add_help=True)
+    args, opts = parser.parse_known_args(argv)
+    args.opts = opts
+    return args
+
+
 def build_model_and_pipeline(args):
     cfg, _cfg_path = setup(args)
     hf_login()
@@ -39,8 +40,7 @@ def build_model_and_pipeline(args):
 
 
 def main(argv=None):
-    parser = get_args_parser(add_help=True)
-    args = parser.parse_args(argv)
+    args = parse_args(argv)
     pipeline, cfg = build_model_and_pipeline(args)
     reporter = progress.create_cli_progress_reporter(output_dir=getattr(cfg, "output_dir", None))
     with progress.activate_progress_reporter(reporter):
@@ -50,3 +50,6 @@ def main(argv=None):
         )
 
 
+def entrypoint(argv=None):
+    main(argv)
+    return 0
diff --git a/tasks/lessons.md b/tasks/lessons.md
index 9eaf26f..7cd11f4 100644
--- a/tasks/lessons.md
+++ b/tasks/lessons.md
@@ -1,5 +1,9 @@
 # Lessons Learned
 
+## 2026-04-12
+
+- When refactoring CLI parsing to support `parse_known_args()`, prefer updating the test double to match the real parser API instead of adding a production fallback for mocks. Keep the runtime code clean unless the fallback is genuinely needed by real callers.
+
 ## 2026-04-10
 
 - In this environment, never route `apply_patch` through `exec_command`; use the dedicated `apply_patch` tool directly for file edits.
diff --git a/tests/test_output_consistency.py b/tests/test_output_consistency.py
index 8f47123..aa77cb5 100644
--- a/tests/test_output_consistency.py
+++ b/tests/test_output_consistency.py
@@ -132,8 +132,8 @@ def test_output_consistency(wsi_path, mask_path, tmp_path):
     # 3. Run the pipeline
     subprocess.run(
         [
-            sys.executable, "-m", "slide2vec",
-            "--config-file", str(cfg_path),
+            "slide2vec",
+            str(cfg_path),
             "--skip-datetime",
             "--run-on-cpu",
         ],
diff --git a/tests/test_progress.py b/tests/test_progress.py
index aa7fab1..b782ba2 100644
--- a/tests/test_progress.py
+++ b/tests/test_progress.py
@@ -112,6 +112,9 @@ class FakeParser:
         def parse_args(self, argv=None):
             return SimpleNamespace(tiling_only=False)
 
+        def parse_known_args(self, argv=None):
+            return self.parse_args(argv), []
+
     monkeypatch.setattr(cli, "get_args_parser", lambda add_help=True: FakeParser())
     monkeypatch.setattr(
         cli,
@@ -128,6 +131,39 @@ def parse_args(self, argv=None):
     assert isinstance(progress.get_progress_reporter(), progress.NullProgressReporter)
 
 
+def test_cli_entrypoint_returns_zero(monkeypatch):
+    import slide2vec.cli as cli
+
+    observed = {}
+
+    def fake_main(argv=None):
+        observed["argv"] = argv
+        return "ok"
+
+    monkeypatch.setattr(cli, "main", fake_main)
+
+    assert cli.entrypoint(["/tmp/config.yaml"]) == 0
+    assert observed["argv"] == ["/tmp/config.yaml"]
+
+
+def test_cli_parse_args_preserves_flags_and_config_overrides():
+    import slide2vec.cli as cli
+
+    args = cli.parse_args(
+        [
+            "/tmp/config.yaml",
+            "--skip-datetime",
+            "--run-on-cpu",
+            "speed.num_gpus=4",
+        ]
+    )
+
+    assert args.config_file == "/tmp/config.yaml"
+    assert args.skip_datetime is True
+    assert args.run_on_cpu is True
+    assert args.opts == ["speed.num_gpus=4"]
+
+
 def test_run_pipeline_emits_local_progress_events_in_order(monkeypatch, tmp_path: Path):
     import slide2vec.inference as inference
     import slide2vec.progress as progress