neondatabase · arsenyinfo · Sep 3, 2025
diff --git a/agent/core/knowledge_enricher.py b/agent/core/knowledge_enricher.py
@@ -0,0 +1,201 @@
+from pathlib import Path
+import logging
+
+from llm.common import Message, TextRaw, Tool, ToolUse, Completion
+from llm.utils import get_universal_llm_client
+
+logger = logging.getLogger(__name__)
+
+
+class KnowledgeBaseEnricher:
+    """dynamic prompt enrichment based on knowledge base topics."""
+
+    _instances: dict[str, "KnowledgeBaseEnricher"] = {}
+
+    def __new__(cls, knowledge_base_dir: str | Path | None = None) -> "KnowledgeBaseEnricher":
+        # determine the canonical path for the knowledge base directory
+        if knowledge_base_dir:
+            kb_dir = Path(knowledge_base_dir).resolve()
+        else:
+            # fallback: look for knowledge_base relative to this file
+            current_dir = Path(__file__).parent
+            kb_dir = (current_dir / "knowledge_base").resolve()
+
+        # use the resolved path as the key
+        key = str(kb_dir)
+
+        if key not in cls._instances:
+            instance = super().__new__(cls)
+            cls._instances[key] = instance
+            instance._initialized = False
+
+        return cls._instances[key]
+
+    def __init__(self, knowledge_base_dir: str | Path | None = None):
+        # singleton pattern - only initialize once per directory
+        if self._initialized:
+            return
+
+        self.llm = get_universal_llm_client()
+        self.knowledge_base_dir = Path(knowledge_base_dir) if knowledge_base_dir else None
+        self.knowledge_base = self._load_knowledge_base()
+        self._select_topics_tool = self._create_selection_tool()
+        self._initialized = True
+
+    def _load_knowledge_base(self) -> dict[str, str]:
+        """load all .md files from knowledge_base directory as key-value pairs."""
+        knowledge_base = {}
+
+        # determine knowledge base directory
+        if self.knowledge_base_dir:
+            kb_dir = self.knowledge_base_dir
+        else:
+            # fallback: look for knowledge_base relative to this file
+            current_dir = Path(__file__).parent
+            kb_dir = current_dir / "knowledge_base"
+
+        if not kb_dir.exists():
+            raise FileNotFoundError(f"knowledge base directory not found: {kb_dir}")
+
+        # load all .md files
+        for md_file in kb_dir.glob("*.md"):
+            key = md_file.stem  # filename without .md extension
+            try:
+                content = md_file.read_text(encoding="utf-8")
+                knowledge_base[key] = content
+                logger.debug(f"loaded knowledge topic: {key}")
+            except Exception as e:
+                logger.error(f"failed to load {md_file}: {e}")
+
+        logger.info(f"loaded {len(knowledge_base)} knowledge base topics")
+        return knowledge_base
+
+    def _create_selection_tool(self) -> Tool:
+        """create tool for LLM to select relevant knowledge topics."""
+        return {
+            "name": "select_knowledge_topics",
+            "description": "select relevant knowledge base topics for the given task",
+            "input_schema": {
+                "type": "object",
+                "properties": {
+                    "topics": {
+                        "type": "array",
+                        "items": {"type": "string"},
+                        "description": "list of topic keys to include in the system prompt"
+                    }
+                },
+                "required": ["topics"]
+            }
+        }
+
+    def _get_phase_description(self, phase: str) -> str:
+        """get human-readable description of development phase."""
+        phase_descriptions = {
+            # tRPC phases
+            "draft": "creating schemas, types, and database models",
+            "handler": "implementing API handlers and business logic", 
+            "frontend": "building user interface components and interactions",
+            "edit": "modifying existing code based on feedback",
+            # NiceGUI phases
+            "data_model": "designing SQLModel data structures and database schemas",
+            "application": "building UI components and application logic with NiceGUI",
+            # Generic fallback
+            "default": "general development phase"
+        }
+        return phase_descriptions.get(phase, f"development phase: {phase}")
+
+    async def enrich_prompt(self, user_prompt: str, development_phase: str | None = None) -> str:
+        """select relevant knowledge topics and return concatenated content."""
+        if not self.knowledge_base:
+            logger.warning("no knowledge base topics available")
+            return ""
+
+        available_topics = list(self.knowledge_base.keys())
+
+        # build context message including development phase if provided
+        context_parts = []
+        if development_phase:
+            phase_description = self._get_phase_description(development_phase)
+            context_parts.append(f"current development phase: {development_phase} ({phase_description})")
+        context_parts.extend([
+            f"user task: {user_prompt}",
+            f"available knowledge topics: {available_topics}",
+            "",
+            "select only the relevant topics needed for this specific development phase and task. "
+            "prioritize topics that provide guidance for the current phase of development. "
+            "focus on topics that are directly applicable to what needs to be implemented right now."
+        ])
+
+        # create message asking LLM to select relevant topics
+        messages = [Message(
+            role="user",
+            content=[TextRaw("\n\n".join(context_parts))]
+        )]
+
+        try:
+            response = await self.llm.completion(
+                messages=messages,
+                max_tokens=1000,
+                tools=[self._select_topics_tool],
+                tool_choice="auto"
+            )
+
+            selected_topics = self._extract_selected_topics(response)
+            enrichment = self._build_system_prompt(selected_topics)
+
+            # log selection and size info
+            logger.info(f"selected {len(selected_topics)} topics from {len(available_topics)} available: {selected_topics}")
+            if enrichment:
+                char_count = len(enrichment)
+                line_count = enrichment.count('\n') + 1
+                logger.info(f"enrichment added: {char_count} characters, {line_count} lines")
+            else:
+                logger.info("no enrichment added (no topics selected)")
+
+            return enrichment
+
+        except Exception as e:
+            logger.error(f"failed to get topic selection from LLM: {e}")
+            # fallback to empty enrichment
+            return ""
+
+    def _extract_selected_topics(self, response: Completion) -> list[str]:
+        """extract selected topic keys from LLM tool call response."""
+        selected_topics = []
+
+        for content_block in response.content:
+            if isinstance(content_block, ToolUse) and content_block.name == "select_knowledge_topics":
+                tool_input = content_block.input
+                if isinstance(tool_input, dict) and "topics" in tool_input:
+                    topics = tool_input["topics"]
+                    if isinstance(topics, list):
+                        selected_topics.extend([str(topic) for topic in topics])
+
+        # filter out invalid topic keys
+        valid_topics = [topic for topic in selected_topics if topic in self.knowledge_base]
+
+        if len(valid_topics) != len(selected_topics):
+            invalid = [topic for topic in selected_topics if topic not in self.knowledge_base]
+            logger.warning(f"invalid topics requested: {invalid}")
+
+        return valid_topics
+
+    def _build_system_prompt(self, topic_keys: list[str]) -> str:
+        """concatenate selected topics into system prompt section."""
+        if not topic_keys:
+            return ""
+
+        sections = []
+        sections.append("# relevant knowledge base:")
+
+        for key in topic_keys:
+            if key in self.knowledge_base:
+                content = self.knowledge_base[key].strip()
+                sections.append(f"## {key}")
+                sections.append(content)
+
+        return "\n\n".join(sections)
+
+    def get_available_topics(self) -> list[str]:
+        """return list of available knowledge base topic keys."""
+        return list(self.knowledge_base.keys())
diff --git a/agent/nicegui_agent/actors.py b/agent/nicegui_agent/actors.py
@@ -1,6 +1,7 @@
 import jinja2
 import logging
 import anyio
+from pathlib import Path
 from typing import Callable, Awaitable
 from core.base_node import Node
 from core.workspace import Workspace
@@ -9,6 +10,7 @@
 from nicegui_agent import playbooks
 from core.notification_utils import notify_if_callback, notify_stage
 from integrations.dbrx import DatabricksClient
+from core.knowledge_enricher import KnowledgeBaseEnricher
 
 logger = logging.getLogger(__name__)
 
@@ -47,6 +49,44 @@ def __init__(
         ]
         self.files_allowed = files_allowed or ["app/", "tests/"]
 
+        # Knowledge base enricher with nicegui-specific knowledge base (singleton)
+        nicegui_kb_dir = Path(__file__).parent / "knowledge_base"
+        self.enricher = KnowledgeBaseEnricher(nicegui_kb_dir)
+
+    def _determine_development_phase(self, system_prompt: str) -> str:
+        """determine development phase from system prompt."""
+        prompt_lower = system_prompt.lower()
+        if ("data modeling" in prompt_lower or 
+            ("sqlmodel" in prompt_lower and "data structures" in prompt_lower) or
+            "database schemas" in prompt_lower):
+            return "data_model"
+        elif ("application development" in prompt_lower or 
+              "ui components" in prompt_lower or
+              "application logic" in prompt_lower or
+              "existing data models" in prompt_lower):
+            return "application"
+        else:
+            return "default"
+
+    async def _enrich_system_prompt(self, base_system_prompt: str, user_prompt: str) -> str:
+        """enrich system prompt with relevant knowledge base topics."""
+        try:
+            original_size = len(base_system_prompt)
+            development_phase = self._determine_development_phase(base_system_prompt)
+            enrichment = await self.enricher.enrich_prompt(user_prompt, development_phase)
+
+            if enrichment:
+                enriched_prompt = f"{base_system_prompt}\n\n{enrichment}"
+                new_size = len(enriched_prompt)
+                logger.info(f"system prompt enriched: {original_size} → {new_size} chars (+{new_size - original_size})")
+                return enriched_prompt
+            else:
+                logger.info(f"no enrichment added, keeping original: {original_size} chars")
+                return base_system_prompt
+        except Exception as e:
+            logger.warning(f"failed to enrich system prompt: {e}")
+            return base_system_prompt
+
     async def execute(
         self,
         files: dict[str, str],
@@ -90,6 +130,9 @@ async def execute(
         message = Message(role="user", content=[TextRaw(user_prompt_rendered)])
         self.root = Node(BaseData(workspace, [message], {}))
 
+        # Enrich system prompt with relevant knowledge base topics
+        enriched_system_prompt = await self._enrich_system_prompt(self.system_prompt, user_prompt)
+
         solution: Node[BaseData] | None = None
         iteration = 0
         while solution is None:
@@ -112,7 +155,7 @@ async def execute(
             )
             nodes = await self.run_llm(
                 candidates,
-                system_prompt=self.system_prompt,
+                system_prompt=enriched_system_prompt,
                 tools=self.tools,
                 max_tokens=8192,
             )

diff --git a/agent/nicegui_agent/knowledge_base/app_async_sync_patterns.md b/agent/nicegui_agent/knowledge_base/app_async_sync_patterns.md
@@ -0,0 +1,7 @@
+# Async vs Sync Page Functions
+
+Use async page functions when you need to access `app.storage.tab` (requires `await ui.context.client.connected()`), show dialogs and wait for user response, or perform asynchronous operations like API calls and file I/O. The async pattern is necessary when your page needs to wait for external resources or user interactions.
+
+Use sync page functions for simple UI rendering without async operations, basic event handlers, and state updates. Sync functions are more straightforward and perform better when you don't need to await anything. Most basic pages with forms, navigation, and timers can use sync functions.
+
+Choose the right pattern based on your needs: async for tab storage, dialogs, file uploads with processing; sync for simple forms, navigation, timers, and basic UI updates. Don't make pages async unless you actually need to await something, as it adds unnecessary complexity.
diff --git a/agent/nicegui_agent/knowledge_base/app_error_handling.md b/agent/nicegui_agent/knowledge_base/app_error_handling.md
@@ -0,0 +1,7 @@
+# Error Handling and User Feedback
+
+Use try/except blocks for operations that might fail and provide immediate user feedback through `ui.notify()`. Always log errors with appropriate detail for debugging while showing user-friendly messages. Use `ui.notify('File processed successfully!', type='positive')` for success and `ui.notify(f'Error: {str(e)}', type='negative')` for failures.
+
+Never use quiet failures or generic exception handling that hides important errors. Always log the specific error context: `logger.info(f'Error processing file: {filename}')` before showing user notifications. This dual approach ensures both user experience and debugging capability.
+
+Provide contextual feedback for different operation types: `type='positive'` for successful operations, `type='negative'` for errors, `type='warning'` for cautionary messages. Keep error messages concise but informative, avoiding technical jargon that users won't understand while maintaining enough detail for troubleshooting.
diff --git a/agent/nicegui_agent/knowledge_base/app_modularity.md b/agent/nicegui_agent/knowledge_base/app_modularity.md
@@ -0,0 +1,7 @@
+# Application Modularity
+
+Break your application into focused modules that narrow their scope and separate core logic from view components. Each module should be defined in a separate file and expose a `create()` function that assembles the module's UI. This pattern promotes code organization and reusability across your application.
+
+Define modules with clear boundaries: create functions like `word_counter.create()` that set up routes and UI components for specific features. Keep the module's logic self-contained and avoid cross-module dependencies where possible. Each module should handle its own UI setup and event handlers.
+
+Build your root application in `app/startup.py` by importing and calling each module's create function. Always call `create_tables()` first to ensure database schema exists, then initialize each module: `word_counter.create()`. This centralized startup pattern makes it easy to manage your application's initialization sequence.
diff --git a/agent/nicegui_agent/knowledge_base/app_timers_navigation.md b/agent/nicegui_agent/knowledge_base/app_timers_navigation.md
@@ -0,0 +1,7 @@
+# Timers and Navigation Patterns
+
+Use `ui.timer` for periodic tasks and auto-refreshing content. Create update functions that modify existing UI elements rather than creating new ones: `time_label.set_text(f'Current time: {datetime.now().strftime("%H:%M:%S")}')`. Call the update function once initially, then set up the timer: `ui.timer(1.0, update_time)`.
+
+Implement navigation using `ui.link` for internal links and `ui.navigate.to()` for programmatic navigation. Use `ui.link('Go to Dashboard', '/dashboard')` for user-clickable navigation and `ui.navigate.to('/settings')` within event handlers for conditional or automated navigation.
+
+For dialogs and user interactions, use async patterns with proper awaiting: `result = await ui.dialog('Are you sure?', ['Yes', 'No'])`. Handle the result appropriately and provide feedback through notifications. This pattern works well for confirmation dialogs and complex user input scenarios.
diff --git a/agent/nicegui_agent/knowledge_base/components_common_pitfalls.md b/agent/nicegui_agent/knowledge_base/components_common_pitfalls.md
@@ -0,0 +1,7 @@
+# Common NiceGUI Component Pitfalls
+
+Avoid passing both positional and keyword arguments for the same parameter. For `ui.date()`, never write `ui.date('Date', value=date.today())` as this causes "multiple values for argument 'value'". Instead use `ui.date(value=date.today())`. For date values, use `.isoformat()` when setting: `date_input.set_value(date.today().isoformat())`.
+
+Don't use non-existent parameters like `size` for `ui.button()`. Instead of `ui.button('Click', size='sm')`, use CSS classes: `ui.button('Click').classes('text-sm')`. Similarly, use proper dialog creation patterns: `with ui.dialog() as dialog, ui.card():` rather than trying to use async context managers.
+
+Capture nullable values safely in lambda functions: use `on_click=lambda user_id=user.id: delete_user(user_id) if user_id else None` instead of `on_click=lambda: delete_user(user.id)` where `user.id` might be None. Always register modules properly in startup.py by importing and calling their `create()` functions.
diff --git a/agent/nicegui_agent/knowledge_base/databricks_integration.md b/agent/nicegui_agent/knowledge_base/databricks_integration.md
@@ -0,0 +1,7 @@
+# Databricks Integration Patterns
+
+Always check real table structure and data in Databricks before implementing models. Use the `DatabricksModel` base class with proper catalog, schema, and table class variables: `__catalog__ = "samples"`, `__schema__ = "accuweather"`, `__table__ = "forecast_daily_calendar_imperial"`. The `table_name()` method constructs the full table reference.
+
+Implement the `fetch()` method for each DatabricksModel to execute SQL queries and return model instances. Use `execute_databricks_query(query)` to run SQL and convert results with `[cls(**row) for row in raw_results]`. Use parameterized queries with proper f-string formatting for dynamic values like date ranges.
+
+Follow best practices: validate query results before processing, use descriptive error messages, log query execution for monitoring, and consider performance with appropriate limits. Use reasonable default parameter values in fetch methods to prevent long-running queries. For quick results, consider fetching aggregated data and storing it in PostgreSQL for faster subsequent access.
diff --git a/agent/nicegui_agent/knowledge_base/nicegui_slot_management.md b/agent/nicegui_agent/knowledge_base/nicegui_slot_management.md
@@ -0,0 +1,7 @@
+# NiceGUI Slot Stack Management
+
+Understand that NiceGUI wraps Vue.js/Quasar components, and slots come from Vue.js architecture. The slot stack tracks which Vue slot is currently active for placing new elements. The error "slot stack empty" occurs when you try to create UI elements outside the proper context (no active Vue slot).
+
+Use the container pattern for async functions: pass containers explicitly and use them with context managers. Instead of `async def update(): ui.label('data')`, write `async def update(container): with container: container.clear(); ui.label('data')`. This ensures UI elements are created within the proper slot context.
+
+For async updates, prefer the refreshable pattern using `@ui.refreshable` decorator. Create a refreshable function that contains your UI: `@ui.refreshable def show_data(): ui.label(data)`, then call `show_data.refresh()` from async functions instead of creating UI elements directly. Never create UI elements in background tasks - always use containers or refreshable patterns.
diff --git a/agent/nicegui_agent/knowledge_base/nicegui_testing_element_access.md b/agent/nicegui_agent/knowledge_base/nicegui_testing_element_access.md
@@ -0,0 +1,7 @@
+# Element Access Patterns in Tests
+
+For single element access, use `.elements.pop()` rather than indexing. Write `upload = user.find(ui.upload).elements.pop()` and `date_input = user.find(ui.date).elements.pop()`. Never use indexing like `elements[0]` as it causes "'set' object is not subscriptable" TypeError since `.elements` returns a set.
+
+For multiple elements, convert the set to a list first: `buttons = list(user.find(ui.button).elements)` then check if the list has elements before accessing: `if buttons: buttons[0].click()`. This pattern safely handles cases where no elements are found.
+
+Always wait after UI-changing actions with `await user.should_see()` before making assertions. Write `user.find('Add Item').click(); await user.should_see('New item added')` rather than immediate assertions that may fail due to async updates. The framework needs time to process UI changes.