assume-framework · gugrimm · Dec 9, 2025 · Dec 4, 2025 · Dec 4, 2025 · Dec 5, 2025
diff --git a/assume/common/base.py b/assume/common/base.py
@@ -752,7 +752,7 @@ class LearningConfig:
     """
     A class for the learning configuration.
 
-    Attributes:
+    Parameters:
         learning_mode (bool): Should we use learning mode at all? If False, the learning bidding strategy is
             loaded from trained_policies_load_path and no training occurs. Default is False.
         evaluation_mode (bool): This setting is modified internally. Whether to run in evaluation mode. If True, the agent uses the learned policy

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -4,19 +4,15 @@
 
 # Configuration file for the Sphinx documentation builder.
 
-import tomllib
-
-with open("../../pyproject.toml", "rb") as f:
-    pyproject_toml = tomllib.load(f)["project"]
+from setuptools_scm import get_version
 
 # -- Project information
 
 project = "ASSUME"
-copyright = "2022-2025 ASSUME Developers"
-author = ",".join([a["name"] for a in pyproject_toml["authors"]])
+author = "ASSUME Developers"
+copyright = "2022-2025 " + author
 
-version = pyproject_toml["version"]
-release = version
+release = get_version("../../")
 
 # -- General configuration
 

diff --git a/docs/source/learning.rst b/docs/source/learning.rst
@@ -227,6 +227,7 @@ the notion of an “optimal” solution is often unclear. What we *do* observe a
 depends heavily on the reward design and on how other agents behave. Therefore:
 
     **Do not rely on rewards alone.** Behavior itself must be examined carefully.
+
 **Why solely reward-based evaluation is problematic**
 
 Let :math:`R_i` denote the episodic return of agent :math:`i` under the joint policy :math:`\pi=(\pi_1,\dots,\pi_n)`. A common but potentially misleading

diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -21,19 +21,26 @@ Upcoming Release
 - **Fix tests on Windows**: One test was always failing on Windows, which is fixed so that all tests succeed on all archs
 
 **Improvements:**
+
 - **Application of new naming convention for bidding strategies**: [unit]_[market]_[method]_[comment] for bidding strategy keys (in snake_case) and [Unit][Market][Method][Comment]Strategy for bidding strategy classes (in PascalCase for classes)
 - **Restructured learning_role tasks**: Major learning changes that make learning application more generalizable across the framework.
 
   - **Simplified learning data flow:** Removed the special ``learning_unit_operator`` that previously aggregated unit data and forwarded it to the learning role. Eliminates the single-sender dependency and avoids double bookkeeping across units and operators.
   - **Direct write access:** All learning-capable entities (units, unit operators, market agents) now write learning data directly to the learning role.
   - **Centralized logic:** Learning-related functionality is now almost always contained within the learning role, improving maintainability.
-  - **Note:** Distributed learning across multiple machines is no longer supported, but this feature was not in active use.
+  .. note::
+    Distributed learning across multiple machines is no longer supported, but this feature was not in active use.
+
 - **Restructured learning configuration**: All learning-related configuration parameters are now contained within a single `learning_config` dictionary in the `config.yaml` file. This change simplifies configuration management and avoids ambiguous setting of defaults.
-  - **Note:** ``learning_mode`` is moved from the top-level config to `learning_config`. Existing config files need to be updated accordingly.
+
+  .. note::
+    ``learning_mode`` is moved from the top-level config to `learning_config`. Existing config files need to be updated accordingly.
+
 - **Learning_role in all cases involving DRL**: The `learning_role` is now available in all simulations involving DRL, also if pre-trained strategies are loaded and no policy updates are performed. This change ensures consistent handling of learning configurations and simplifies the codebase by removing special cases.
 - **Final DRL simulation with last policies**: After training, the final simulation now uses the last trained policies instead of the best policies. This change provides a more accurate representation of the learned behavior, as the last policies reflect the most recent training state. Additionally, multi-agent simulations do not always converge to the maximum reward. E.g. competing agents may underbid each other to gain market share, leading to lower overall rewards while reaching a stable state nevertheless.
 
 **New Features:**
+
 - **Unit Operator Portfolio Strategy**: A new bidding strategy type that enables portfolio optimization, where the default is called `UnitsOperatorEnergyNaiveDirectStrategy`. This strategy simply passes through bidding decisions of individual units within a portfolio, which was the default behavior beforehand as well. Further we added 'UnitsOperatorEnergyHeuristicCournotStrategy' which allows to model bidding behavior of a portfolio of units in a day-ahead market. The strategy calculates the optimal bid price and quantity for each unit in the portfolio, taking into account markup and the production costs of the units. This enables users to simulate and analyze the impact of strategic portfolio bidding on market outcomes and unit profitability.
 
 0.5.5 - (13th August 2025)

diff --git a/environment_docs.yaml b/environment_docs.yaml
@@ -20,6 +20,7 @@ dependencies:
 - pip:
   - .[all]
   - sphinx<8.2
+  - setuptools-scm
   - sphinx-book-theme
   - nbsphinx
   - nbsphinx-link

diff --git a/examples/notebooks/11a_redispatch_dsm.ipynb b/examples/notebooks/11a_redispatch_dsm.ipynb
@@ -214,7 +214,7 @@
     "id": "f647cf65"
    },
    "source": [
-    "##### Let's also import some basic libraries that we will use throughout the tutorial."
+    "Let's also import some basic libraries that we will use throughout the tutorial."
    ]
   },
   {
@@ -1857,7 +1857,7 @@
     "id": "2c600b08"
    },
    "source": [
-    "##### A) Read the line loading data"
+    "#### A) Read the line loading data"
    ]
   },
   {
@@ -1931,7 +1931,7 @@
     "id": "17f8eafc"
    },
    "source": [
-    "##### B) Plot a bar graph for redispatch by powerplants for one snapshot"
+    "#### B) Plot a bar graph for redispatch by powerplants for one snapshot"
    ]
   },
   {

diff --git a/pyproject.toml b/pyproject.toml
@@ -3,12 +3,12 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 
 [build-system]
-requires = ["setuptools>=61.0"]
+requires = ["setuptools>=80", "setuptools-scm>=8"]
 build-backend = "setuptools.build_meta"
 
 [project]
 name = "assume-framework"
-version = "0.5.5"
+dynamic = ["version"]
 description = "ASSUME - Agent-Based Electricity Markets Simulation Toolbox"
 authors = [{ name = "ASSUME Developers", email = "[email protected]"}]
 license = "AGPL-3.0-or-later"
@@ -73,7 +73,7 @@ all = [
     "assume-framework[oeds, network, learning]",
 ]
 docs = [
-  "sphinx <8.2",
+  "sphinx <9",
   "sphinx-book-theme >=1.1.4",
   "nbsphinx >=0.9.7",
   "nbsphinx-link >= 1.3.1",
@@ -95,6 +95,8 @@ assume = "assume_cli.cli:cli"
 include = ["assume*", "assume_cli*"]
 exclude = ["assume-db*"]
 
+[tool.setuptools_scm]
+
 [tool.ruff]
 
 [tool.ruff.lint]