From 80ea273c10cf8b979e93baebab63fb5b1ff2423e Mon Sep 17 00:00:00 2001 From: Breta01 Date: Wed, 11 Jun 2025 00:38:42 +0800 Subject: [PATCH 01/13] WIP: Agenting alpha search --- brain/agentic.py | 97 ++++++++++++++++++++++++++ brain/agents/__init__.py | 0 brain/{agent.py => agents/executor.py} | 0 brain/agents/planner.py | 48 +++++++++++++ brain/graph_state.py | 12 ++++ brain/vector_store.py | 39 +++++++++++ requirements.txt | 3 + 7 files changed, 199 insertions(+) create mode 100644 brain/agentic.py create mode 100644 brain/agents/__init__.py rename brain/{agent.py => agents/executor.py} (100%) create mode 100644 brain/agents/planner.py create mode 100644 brain/graph_state.py create mode 100644 brain/vector_store.py diff --git a/brain/agentic.py b/brain/agentic.py new file mode 100644 index 0000000..f7ecf31 --- /dev/null +++ b/brain/agentic.py @@ -0,0 +1,97 @@ +# graph_v048.py ----------------------------------------------------- +""" +LangGraph >=0.4 requires you to build a *StateGraph* and to pass an +explicit state schema. The constants `START` and `END` replace the old +string names for entry/exit points. +""" +from __future__ import annotations + +from langgraph.graph import END, START, StateGraph + +from brain.graph_state import GraphState + + +# ------------------------------------------------------------------- +# 2. ── Node adapters ── +# Wrap the agents/functions you already built so that each node +# takes `state` and returns a *partial* state update (a dict). +# ------------------------------------------------------------------- +def seed_finder_node(state: GraphState) -> GraphState: + # Iterate databse till we find some decent alpha, or some other seed idea + seed_alpha = "ts_sum(close, 5) / ts_sum(open, 5) - 1" + return {"alpha": seed_alpha, "metrics": metrics, "mode": "continue"} + + +def planner_node(state: GraphState) -> GraphState: + plan = planner_agent.invoke({"alpha": state["alpha"], "metrics": state["metrics"]}) + return {"plan": plan} + + +def executor_node(state: GraphState) -> GraphState: + result = modifier_agent.invoke( + { + "alpha": state["alpha"], + "plan": state["plan"], + "best": state.get("best_metrics", {}), + } + ) + # result contains new_alpha, new_metrics, maybe better flag + update: GraphState = { + "alpha": result["alpha"], + "metrics": result["metrics"], + "mode": "continue", + } + if result.get("is_better"): # update champion + update["best_alpha"] = result["alpha"] + update["best_metrics"] = result["metrics"] + return update + + +def stagnation_node(state: GraphState) -> GraphState: + """ + Decide what happens next. + - If plateaued: return {"mode": "explore"} + - If hard-stop (budget / Sharpe ceiling reached): {"mode": "stop"} + - Else continue refining the same alpha + """ + if plateau_condition(state): + return {"mode": "explore"} + if hard_stop_condition(state): + return {"mode": "stop"} + return {"mode": "continue"} + + +# ------------------------------------------------------------------- +# 3. ── Build the graph ── +# ------------------------------------------------------------------- +builder = StateGraph(GraphState) # ← 0.4.x MUST pass schema + +builder.add_node("seed_finder", seed_finder_node) +builder.add_node("planner", planner_node) +builder.add_node("executor", executor_node) +builder.add_node("stagnation_chk", stagnation_node) + +# Static flow +builder.add_edge(START, "seed_finder") +builder.add_edge("seed_finder", "planner") +builder.add_edge("planner", "executor") +builder.add_edge("executor", "stagnation_chk") + +# Conditional branching (no more …then= kwarg in 0.4.8) +builder.add_conditional_edges( + "stagnation_chk", + # Branch selector can be a lambda OR a runnable; here we read state. + lambda state: state["mode"], + path_map={ + "continue": "planner", + "explore": "seed_finder", + "stop": END, + }, +) + +graph = builder.compile() # returns a CompiledStateGraph + + +initial = {"mode": "explore"} # minimal keys; others filled in later +out = graph.invoke(initial) +print(out.get("best_metrics")) diff --git a/brain/agents/__init__.py b/brain/agents/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/brain/agent.py b/brain/agents/executor.py similarity index 100% rename from brain/agent.py rename to brain/agents/executor.py diff --git a/brain/agents/planner.py b/brain/agents/planner.py new file mode 100644 index 0000000..21a9f3e --- /dev/null +++ b/brain/agents/planner.py @@ -0,0 +1,48 @@ +from langchain_core.messages import AnyMessage +from langgraph.prebuilt import create_react_agent +from langgraph.prebuilt.chat_agent_executor import AgentState +from pydantic import BaseModel, Field + +from brain.model import MODEL + + +def prompt( + state: AgentState, +) -> list[AnyMessage]: + """Configure the agent prompt, including all the messages.""" + prompt = """ +You are a senior quant researcher using World Quant Brain platform to create alphas. + +Based on the provided seed alpha, create a plan for exploring new alphas. +""" + return [{"role": "system", "content": prompt}] + state["messages"] + + +class ResponseFormat(BaseModel): + """Structured format of alpha edits.""" + + edits: list[str] = Field(description="List of edits to the alpha") + + +agent = create_react_agent(model=MODEL, response_format=ResponseFormat, prompt=prompt) + + +def invoke(message: str) -> ResponseFormat: + """Invoke the agent with the given messages and configuration.""" + while True: + response = agent.invoke( + { + "messages": [ + { + "role": "user", + "content": message, + } + ] + } + ) + + try: + return ResponseFormat.model_validate(response["structured_response"]) + except Exception as e: + print(f"Error: parsing response: {e}") + continue diff --git a/brain/graph_state.py b/brain/graph_state.py new file mode 100644 index 0000000..dcbcd1d --- /dev/null +++ b/brain/graph_state.py @@ -0,0 +1,12 @@ +from typing import TypedDict + + +class GraphState(TypedDict, total=False): + alpha: str + metrics: dict[str, float] # latest eval (sharpe, etc.) + plan: dict[str, list[str]] # Planner-generated JSON + # Book-keeping + best_alpha: str + best_metrics: dict[str, float] + mode: str # "continue" | "explore" | "stop" + history: list[str] # optional trace for debugging diff --git a/brain/vector_store.py b/brain/vector_store.py new file mode 100644 index 0000000..bd10876 --- /dev/null +++ b/brain/vector_store.py @@ -0,0 +1,39 @@ +import getpass +import os +from uuid import uuid4 + +import faiss +from dotenv import load_dotenv +from langchain_community.docstore.in_memory import InMemoryDocstore +from langchain_community.vectorstores import FAISS +from langchain_core.documents import Document +from langchain_openai import OpenAIEmbeddings + +load_dotenv() + +if not os.environ.get("OPENAI_API_KEY"): + os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ") + + +embeddings = OpenAIEmbeddings(model="text-embedding-3-small") + +index = faiss.IndexFlatL2(len(embeddings.embed_query("hello world"))) + +vector_store = FAISS( + embedding_function=embeddings, + index=index, + docstore=InMemoryDocstore(), + index_to_docstore_id={}, +) + +document = Document( + page_content="Test document for vector store", + metadata={"source": "tweet"}, +) + +documents = [ + document, +] +uuids = [str(uuid4()) for _ in range(len(documents))] + +vector_store.add_documents(documents=documents, ids=uuids) diff --git a/requirements.txt b/requirements.txt index 027b68e..6eede87 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,11 @@ +faiss-cpu +langchain-community langchain[google-genai,openai] langgraph langmem pandas psycopg[binary] +pydantic python-dotenv requests tqdm From 36a51f00f3f1bb27b2a7f69da76de3766e900108 Mon Sep 17 00:00:00 2001 From: Breta01 Date: Thu, 12 Jun 2025 17:21:27 +0800 Subject: [PATCH 02/13] Adding score tracking + color labeling --- .env.example | 3 +++ brain/alpha_class.py | 18 ++++++++++++++++++ brain/database.py | 10 +++++++--- brain/genetic_algorithm.py | 7 ++++++- brain/search_algorithm.py | 2 +- migrations/20250612143906_add_score.sql | 3 +++ 6 files changed, 38 insertions(+), 5 deletions(-) create mode 100644 migrations/20250612143906_add_score.sql diff --git a/.env.example b/.env.example index 2c7d0e8..d82081f 100644 --- a/.env.example +++ b/.env.example @@ -7,3 +7,6 @@ OPENAI_API_KEY= # Database URL DATABASE_URL= + +# Competition ID (update accordingly) +COMPETITION_ID=IQC2025S2 diff --git a/brain/alpha_class.py b/brain/alpha_class.py index 48bc809..8603dc1 100644 --- a/brain/alpha_class.py +++ b/brain/alpha_class.py @@ -1,9 +1,12 @@ import datetime +import os import sqlite3 import uuid from dataclasses import asdict, dataclass, field, replace from typing import Optional +from brain.api import BrainAPI + @dataclass class Alpha: @@ -39,6 +42,8 @@ class Alpha: failing_tests: Optional[list[str]] = field(default_factory=list) long_count: Optional[int] = None short_count: Optional[int] = None + # Score change + score: Optional[float] = None # timestamp (let DB default if you don’t set it) created_at: Optional[str] = field( @@ -120,6 +125,19 @@ def replace(self, **kwargs) -> "Alpha": """ return replace(self, **kwargs, alpha_id=str(uuid.uuid4()), is_temporary=True) + def update_score(self) -> float: + """Check and return the score of the Alpha.""" + results = BrainAPI.performance_comparison( + self.alpha_id, competition=os.environ.get("COMPETITION_ID") + ) + if "score" not in results or "after" not in results["score"]: + return None + + self.score = results["score"].get("after", 0) - results["score"].get( + "before", + ) + return self.score + @classmethod def create_alpha( cls, diff --git a/brain/database.py b/brain/database.py index 55718ea..197f622 100644 --- a/brain/database.py +++ b/brain/database.py @@ -22,12 +22,16 @@ def __init__(self, db_url: str = None): self.conn.set_autocommit(True) self.cursor = self.conn.cursor() - def insert_alpha(self, alpha: Alpha) -> int: - """Insert a Alpha class instance into the table.""" + def upsert_alpha(self, alpha: Alpha) -> int: + """Insert a Alpha class instance into the table (update if id already exists).""" record = alpha.as_dict() cols = ", ".join(record) bangs = ", ".join("%s" for _ in record) - sql = f"INSERT INTO alphas ({cols}) VALUES ({bangs})" + update_cols = ", ".join(f"{k} = EXCLUDED.{k}" for k in record if k != "alpha_id") + sql = ( + f"INSERT INTO alphas ({cols}) VALUES ({bangs}) ON CONFLICT (alpha_id)" + f" DO UPDATE SET {update_cols}" + ) self.cursor.execute(sql, tuple(record.values())) def find_by_alpha( diff --git a/brain/genetic_algorithm.py b/brain/genetic_algorithm.py index ed82107..1024619 100644 --- a/brain/genetic_algorithm.py +++ b/brain/genetic_algorithm.py @@ -86,8 +86,13 @@ def _update_alphas_storage( return alpha = Alpha.from_stats(stats) + if alpha.fitness is not None and alpha.fitness > 1.0 and len(alpha.failing_tests) < 2: + score = alpha.update_score() + if score > 100 and len(alpha.failing_tests) == 0: + BrainAPI.set_alpha_properties(alpha.alpha_id, color="YELLOW") + try: - Database().insert_alpha(alpha) + Database().upsert_alpha(alpha) except Exception as e: print(f"Error during database insertion: {e}") pass diff --git a/brain/search_algorithm.py b/brain/search_algorithm.py index 7eb55bf..79483de 100644 --- a/brain/search_algorithm.py +++ b/brain/search_algorithm.py @@ -1,7 +1,7 @@ import random -from brain.agent import agent from brain.agent_config import DEFAULT_CONFIG +from brain.agents.executor import agent from brain.alpha_storage import Storage from brain.database import Database from brain.genetic_algorithm import genetic_algorithm diff --git a/migrations/20250612143906_add_score.sql b/migrations/20250612143906_add_score.sql new file mode 100644 index 0000000..b2a2976 --- /dev/null +++ b/migrations/20250612143906_add_score.sql @@ -0,0 +1,3 @@ +-- add an integer "score" column (allows negative/positive values) +ALTER TABLE alphas +ADD COLUMN IF NOT EXISTS score REAL; From 569e692694f839accffb34017bc9e3a2bcbe4453 Mon Sep 17 00:00:00 2001 From: Breta01 Date: Thu, 12 Jun 2025 22:26:22 +0800 Subject: [PATCH 03/13] First working version of agentic system --- .env.example | 6 ++ brain/agentic.py | 125 +++++++++++++++-------------------- brain/agents/__init__.py | 5 ++ brain/agents/executor.py | 115 ++++++++++++++++++++++++++------ brain/agents/executor_old.py | 76 +++++++++++++++++++++ brain/agents/fine_tuner.py | 42 ++++++++++++ brain/agents/planner.py | 45 ++++++++----- brain/alpha_storage.py | 21 +++++- brain/genetic_algorithm.py | 51 +++++++++++++- brain/graph_state.py | 17 +++-- brain/helpers.py | 17 +++++ brain/main.py | 12 ++-- brain/score.py | 13 +++- brain/search_algorithm.py | 2 +- 14 files changed, 415 insertions(+), 132 deletions(-) create mode 100644 brain/agents/executor_old.py create mode 100644 brain/agents/fine_tuner.py create mode 100644 brain/helpers.py diff --git a/.env.example b/.env.example index d82081f..702692f 100644 --- a/.env.example +++ b/.env.example @@ -10,3 +10,9 @@ DATABASE_URL= # Competition ID (update accordingly) COMPETITION_ID=IQC2025S2 + +# Langsmith Tracing +LANGSMITH_TRACING=true +LANGSMITH_ENDPOINT="https://api.smith.langchain.com" +LANGSMITH_API_KEY= +LANGSMITH_PROJECT= diff --git a/brain/agentic.py b/brain/agentic.py index f7ecf31..11ac2ef 100644 --- a/brain/agentic.py +++ b/brain/agentic.py @@ -1,97 +1,82 @@ -# graph_v048.py ----------------------------------------------------- -""" -LangGraph >=0.4 requires you to build a *StateGraph* and to pass an -explicit state schema. The constants `START` and `END` replace the old -string names for entry/exit points. -""" -from __future__ import annotations - from langgraph.graph import END, START, StateGraph +from brain.agents import invoke_executor, invoke_fine_tuner, invoke_planner +from brain.alpha_storage import Storage from brain.graph_state import GraphState +from brain.score import get_score -# ------------------------------------------------------------------- -# 2. ── Node adapters ── -# Wrap the agents/functions you already built so that each node -# takes `state` and returns a *partial* state update (a dict). -# ------------------------------------------------------------------- def seed_finder_node(state: GraphState) -> GraphState: + """Find a seed alpha to start the exploration.""" # Iterate databse till we find some decent alpha, or some other seed idea seed_alpha = "ts_sum(close, 5) / ts_sum(open, 5) - 1" - return {"alpha": seed_alpha, "metrics": metrics, "mode": "continue"} + print(f"Seed alpha: {seed_alpha}") + return { + "alpha_idea": seed_alpha, + "node": "plan", + "state": "explore", + "storage": Storage(score_func=get_score, max_size=50), + } def planner_node(state: GraphState) -> GraphState: - plan = planner_agent.invoke({"alpha": state["alpha"], "metrics": state["metrics"]}) - return {"plan": plan} + """Plan the next steps based on the current alpha and state.""" + plan = invoke_planner(state) + if not plan: + return {**state, "node": "seed", "state": "explore"} + return {**state, "node": "execute", "plan": plan} def executor_node(state: GraphState) -> GraphState: - result = modifier_agent.invoke( - { - "alpha": state["alpha"], - "plan": state["plan"], - "best": state.get("best_metrics", {}), - } - ) - # result contains new_alpha, new_metrics, maybe better flag - update: GraphState = { - "alpha": result["alpha"], - "metrics": result["metrics"], - "mode": "continue", - } - if result.get("is_better"): # update champion - update["best_alpha"] = result["alpha"] - update["best_metrics"] = result["metrics"] - return update - - -def stagnation_node(state: GraphState) -> GraphState: - """ - Decide what happens next. - - If plateaued: return {"mode": "explore"} - - If hard-stop (budget / Sharpe ceiling reached): {"mode": "stop"} - - Else continue refining the same alpha - """ - if plateau_condition(state): - return {"mode": "explore"} - if hard_stop_condition(state): - return {"mode": "stop"} - return {"mode": "continue"} - - -# ------------------------------------------------------------------- -# 3. ── Build the graph ── -# ------------------------------------------------------------------- -builder = StateGraph(GraphState) # ← 0.4.x MUST pass schema + invoke_executor(state) + return {**state, "node": "plan", "state": "fine-tune"} + + +def fine_tuner_node(state: GraphState) -> GraphState: + invoke_fine_tuner(state) + return {**state, "node": "seed", "state": "explore"} + + +# def stagnation_node(state: GraphState) -> GraphState: +# """ +# Decide what happens next. +# - If plateaued: return {"mode": "explore"} +# - If hard-stop (budget / Sharpe ceiling reached): {"mode": "stop"} +# - Else continue refining the same alpha +# """ +# if plateau_condition(state): +# return {"mode": "explore"} +# if hard_stop_condition(state): +# return {"mode": "stop"} +# return {"mode": "continue"} + + +builder = StateGraph(GraphState) builder.add_node("seed_finder", seed_finder_node) builder.add_node("planner", planner_node) builder.add_node("executor", executor_node) -builder.add_node("stagnation_chk", stagnation_node) +builder.add_node("fine_tuner", fine_tuner_node) +# builder.add_node("stagnation_chk", stagnation_node) # Static flow builder.add_edge(START, "seed_finder") -builder.add_edge("seed_finder", "planner") -builder.add_edge("planner", "executor") -builder.add_edge("executor", "stagnation_chk") # Conditional branching (no more …then= kwarg in 0.4.8) -builder.add_conditional_edges( - "stagnation_chk", - # Branch selector can be a lambda OR a runnable; here we read state. - lambda state: state["mode"], - path_map={ - "continue": "planner", - "explore": "seed_finder", - "stop": END, - }, -) +for node in ["seed_finder", "planner", "executor", "fine_tuner"]: + builder.add_conditional_edges( + node, + lambda state: state["node"], + path_map={ + "plan": "planner", + "execute": "executor", + "seed": "seed_finder", + "fine_tune": "fine_tuner", + "stop": END, + }, + ) graph = builder.compile() # returns a CompiledStateGraph -initial = {"mode": "explore"} # minimal keys; others filled in later -out = graph.invoke(initial) -print(out.get("best_metrics")) +out = graph.invoke({}) diff --git a/brain/agents/__init__.py b/brain/agents/__init__.py index e69de29..efb4afa 100644 --- a/brain/agents/__init__.py +++ b/brain/agents/__init__.py @@ -0,0 +1,5 @@ +__all__ = ["invoke_executor", "invoke_fine_tuner", "invoke_planner"] + +from .executor import invoke as invoke_executor +from .fine_tuner import invoke as invoke_fine_tuner +from .planner import invoke as invoke_planner diff --git a/brain/agents/executor.py b/brain/agents/executor.py index fafb879..732f864 100644 --- a/brain/agents/executor.py +++ b/brain/agents/executor.py @@ -1,15 +1,22 @@ +from functools import partial + from langchain_core.messages import AnyMessage from langchain_core.runnables import RunnableConfig from langchain_openai import ChatOpenAI from langgraph.prebuilt import create_react_agent from langgraph.prebuilt.chat_agent_executor import AgentState +from requests import Response -from brain.agent_config import get_universe_config +from brain.agent_config import DEFAULT_CONFIG, get_universe_config +from brain.alpha_class import Alpha +from brain.alpha_storage import Storage +from brain.genetic_algorithm import genetic_algorithm +from brain.graph_state import GraphState from brain.model import MODEL from brain.tools.datafields import get_random_datafields, search_datafields from brain.tools.ideas import get_random_idea from brain.tools.operators import describe_operators, operators -from brain.tools.simulation import submit_alpha +from brain.tools.simulation import StopException, submit_alpha def prompt( @@ -29,25 +36,10 @@ def prompt( Allowed operators are: {operators.all()} Don't use named arguments in the alpha expression unless there is '=' in the definition. -Steps: -1. Think about the alphas you created so far and the PROS and CONS -2. If there is some good alpha (passing tests and seft-correlation < 0.4), try to improve it first -3. Reason about possible improvements -4 From time to time try to create a new alpha from scratch -5. Check the list of available operators and data fields -6. Always try uncommon data fields first before using close, volumn, open, close, etc. -7. Propose the alpha and run the alpha simulation. - -- Try applying different functions and changing parameter values. -- For d parameter use one of the following values: 2, 5, 10, 20, 30, 50, 100, 120, 250 -- Create simple alphas that are easy to understand and explain, use basic operations like +, -, *, / -- Start by combining two data fields and add more if alpha is good. -- Apply vec_avg(x) or vec_sum(x) if data field is a type VECTOR. -- Change alpha completely if you think it is not good enough. -- Self-correlation must be less than 0.4 - -Working with equity in universe: {conf['universe']}, region: {conf['region']} -and delay {conf['delay']}. +You HAVE TO follow the instructions to create a new alpha or improve an existing one. +Propose the alpha and run the alpha simulation. + +Working with equity in universe: {conf['universe']}, region: {conf['region']} and delay {conf['delay']}. """ return [{"role": "system", "content": prompt}] + state["messages"] @@ -74,3 +66,84 @@ class CustomState(AgentState): prompt=prompt, state_schema=CustomState, ) + + +def create_alpha_simulation( + storage: Storage, plan: list[str], config: dict = DEFAULT_CONFIG +) -> tuple[Response, Alpha]: + """Create a new alpha based on the given ID.""" + top_k = 5 + + alphas = { + cat: [alpha.prompt_format() for alpha in storage.get_top_k(cat, top_k)] + for cat in storage.categories + } + + formatted_alphas = { + "passing": "\n".join(alphas["passing"]), + "failing": "\n".join(alphas["failing"][: top_k - len(alphas["passing"])]), + "pending": "\n".join(alphas["pending"]), + } + + best_alpha = "" if storage.best_alpha is None else storage.best_alpha.prompt_format() + + prompt = f""" +Create a new alpha based on the best alpha according to the following instructions: +{plan[storage.counter % len(plan)]} + + +BEST ALPHA +---------- +{best_alpha} + +PASSING +------- +{formatted_alphas['passing']} + +FAILING +------- +{formatted_alphas['failing']} + +PENDING +------- +{formatted_alphas['pending']} +""" + + print(f"Prompt:\n{prompt}") + + alphas_store = [] + while not alphas_store: + try: + agent.invoke( + { + "messages": [ + { + "role": "user", + "content": prompt, + } + ] + }, + config={ + "recursion_limit": 50, + "configurable": { + **config, + "alphas": alphas_store, + }, + }, + ) + except StopException: + continue + + print(f"Alphas store: {alphas_store}") + return alphas_store[-1] + + +def invoke(state: GraphState) -> GraphState: + """Invoke executor agent following the plan.""" + plan = state.get("plan", []) + create_alpha = partial(create_alpha_simulation, plan=plan) + + storage = state.get("storage") + storage.reset_counter() + + genetic_algorithm(storage, create_alpha, len(plan) + 10) diff --git a/brain/agents/executor_old.py b/brain/agents/executor_old.py new file mode 100644 index 0000000..fafb879 --- /dev/null +++ b/brain/agents/executor_old.py @@ -0,0 +1,76 @@ +from langchain_core.messages import AnyMessage +from langchain_core.runnables import RunnableConfig +from langchain_openai import ChatOpenAI +from langgraph.prebuilt import create_react_agent +from langgraph.prebuilt.chat_agent_executor import AgentState + +from brain.agent_config import get_universe_config +from brain.model import MODEL +from brain.tools.datafields import get_random_datafields, search_datafields +from brain.tools.ideas import get_random_idea +from brain.tools.operators import describe_operators, operators +from brain.tools.simulation import submit_alpha + + +def prompt( + state: AgentState, + config: RunnableConfig, +) -> list[AnyMessage]: + """Configure the agent prompt, including all the messages.""" + conf = get_universe_config(config) + + if state.get("end", False): + state["is_last_step"] = True + return "END (do not continue)" + + prompt = f""" +You are a quant researcher using World Quant Brain platform writting alphas. + +Allowed operators are: {operators.all()} +Don't use named arguments in the alpha expression unless there is '=' in the definition. + +Steps: +1. Think about the alphas you created so far and the PROS and CONS +2. If there is some good alpha (passing tests and seft-correlation < 0.4), try to improve it first +3. Reason about possible improvements +4 From time to time try to create a new alpha from scratch +5. Check the list of available operators and data fields +6. Always try uncommon data fields first before using close, volumn, open, close, etc. +7. Propose the alpha and run the alpha simulation. + +- Try applying different functions and changing parameter values. +- For d parameter use one of the following values: 2, 5, 10, 20, 30, 50, 100, 120, 250 +- Create simple alphas that are easy to understand and explain, use basic operations like +, -, *, / +- Start by combining two data fields and add more if alpha is good. +- Apply vec_avg(x) or vec_sum(x) if data field is a type VECTOR. +- Change alpha completely if you think it is not good enough. +- Self-correlation must be less than 0.4 + +Working with equity in universe: {conf['universe']}, region: {conf['region']} +and delay {conf['delay']}. +""" + return [{"role": "system", "content": prompt}] + state["messages"] + + +class CustomState(AgentState): + end: bool = False + + +tools = [ + submit_alpha, + describe_operators, + search_datafields, + get_random_datafields, + get_random_idea, +] + +agent = create_react_agent( + model=( + MODEL.bind_tools(tools, parallel_tool_calls=False) + if isinstance(MODEL, ChatOpenAI) + else MODEL + ), + tools=tools, + prompt=prompt, + state_schema=CustomState, +) diff --git a/brain/agents/fine_tuner.py b/brain/agents/fine_tuner.py new file mode 100644 index 0000000..3dfd5fd --- /dev/null +++ b/brain/agents/fine_tuner.py @@ -0,0 +1,42 @@ +from functools import partial + +from brain.agents.executor import create_alpha_simulation +from brain.alpha_class import Alpha +from brain.genetic_algorithm import execute_alphas, genetic_algorithm +from brain.graph_state import GraphState + +param_options = { + "universe": ["TOP3000", "TOP1000", "TOP500", "TOP200"], + "neutralization": ["INDUSTRY", "SECTOR", "MARKET", "NONE", "SUBINDUSTRY"], + "decay": [2, 4, 6, 8, 10, 12, 14, 16, 20], + "truncation": [0.005, 0.01, 0.05, 0.1], + # "pasteurization": ["ON", "OFF"], +} + + +def get_config(alpha: Alpha) -> dict: + """Get the configuration for the alpha.""" + return { + "region": alpha.region, + "universe": alpha.universe, + "neutralization": alpha.neutralization, + "truncation": alpha.truncation, + "decay": alpha.decay, + "delay": alpha.delay, + } + + +def invoke(state: GraphState) -> GraphState: + """Invoke fine-tuning agent.""" + storage = state.get("storage") + + for param, options in param_options.items(): + alphas = [storage.best_alpha.replace(**{param: option}) for option in options] + execute_alphas(alphas, storage) + + congig = get_config(storage.best_alpha) + plan = state.get("plan", []) + create_alpha = partial(create_alpha_simulation, plan=plan, config=congig) + + storage.reset_counter() + genetic_algorithm(storage, create_alpha, len(plan) + 10) diff --git a/brain/agents/planner.py b/brain/agents/planner.py index 21a9f3e..968a1d7 100644 --- a/brain/agents/planner.py +++ b/brain/agents/planner.py @@ -1,34 +1,42 @@ -from langchain_core.messages import AnyMessage from langgraph.prebuilt import create_react_agent -from langgraph.prebuilt.chat_agent_executor import AgentState from pydantic import BaseModel, Field +from brain.graph_state import GraphState from brain.model import MODEL -def prompt( - state: AgentState, -) -> list[AnyMessage]: - """Configure the agent prompt, including all the messages.""" - prompt = """ -You are a senior quant researcher using World Quant Brain platform to create alphas. - -Based on the provided seed alpha, create a plan for exploring new alphas. -""" - return [{"role": "system", "content": prompt}] + state["messages"] - - class ResponseFormat(BaseModel): """Structured format of alpha edits.""" - edits: list[str] = Field(description="List of edits to the alpha") + edits: list[str] = Field(description="List of edits to test with the alpha") -agent = create_react_agent(model=MODEL, response_format=ResponseFormat, prompt=prompt) +agent = create_react_agent( + model=MODEL, + tools=[], + response_format=ResponseFormat, + prompt="You are a senior quant researcher using World Quant Brain platform to create alphas.", +) -def invoke(message: str) -> ResponseFormat: +def invoke(state: GraphState) -> ResponseFormat: """Invoke the agent with the given messages and configuration.""" + message = "" + if state.get("state", "explore") == "explore": + message = ( + "You are exploring a new alphas based on the new idea.\n" + f"{state.get('alpha_idea', '')}\n" + "Create a plan to explore this idea, start with simple alpha expression.\n" + ) + elif state.get("state", "explore") == "fine_tune" and state["storage"].best_alpha is not None: + message = ( + "You are fine-tuning an existing alpha.\n" + f"Current alpha: {state['storage'].best_alpha.prompt_format()}\n" + "Create a plan to improve this alpha, start with simple alpha expression.\n" + ) + else: + return None + while True: response = agent.invoke( { @@ -42,7 +50,8 @@ def invoke(message: str) -> ResponseFormat: ) try: - return ResponseFormat.model_validate(response["structured_response"]) + result = ResponseFormat.model_validate(response["structured_response"]) + return result.edits except Exception as e: print(f"Error: parsing response: {e}") continue diff --git a/brain/alpha_storage.py b/brain/alpha_storage.py index 3235f62..c8ce203 100644 --- a/brain/alpha_storage.py +++ b/brain/alpha_storage.py @@ -13,7 +13,9 @@ def __init__(self, score_func: Callable[[Alpha], float], max_size: int = 50): """ self.score_func = score_func self.max_size = max_size + self.counter = 0 self.data = {} + self.best_alpha = None self.categories = { "passing": [], "failing": [], @@ -23,6 +25,10 @@ def __init__(self, score_func: Callable[[Alpha], float], max_size: int = 50): def __getitem__(self, alpha_id: str) -> Alpha: return self.data.get(alpha_id) + def reset_counter(self) -> None: + """Reset the counter for the number of alphas added.""" + self.counter = 0 + def get_top_k(self, category: str, k: int = 10) -> list[Alpha]: """Get the top k alphas from a specific category.""" if category not in self.categories: @@ -35,6 +41,17 @@ def add_alpha(self, alpha: Alpha, category: str) -> None: if category not in self.categories: raise ValueError(f"Invalid category: {category}") + self.counter += 1 + + # Keep tract of the best alpha + if category != "pending": + if self.best_alpha is None: + self.best_alpha = alpha + elif self._score(alpha.alpha_id, False) > self._score( + self.best_alpha.alpha_id, False + ) or len(self.best_alpha.failing_tests) > len(alpha.failing_tests): + self.best_alpha = alpha + self.data[alpha.alpha_id] = alpha if alpha.alpha_id not in self.categories[category]: self._append_to_category(alpha.alpha_id, category) @@ -45,8 +62,8 @@ def remove_pending_alpha(self, alpha_id: str) -> None: self.categories["pending"].remove(alpha_id) self.data.pop(alpha_id, None) - def _score(self, alpha_id: str) -> float: - return self.score_func(self.data[alpha_id]) + def _score(self, alpha_id: str, discount: bool = True) -> float: + return self.score_func(self.data[alpha_id], discount) def _append_to_category(self, alpha_id: str, category: str) -> None: """Append an alpha to a specific category.""" diff --git a/brain/genetic_algorithm.py b/brain/genetic_algorithm.py index 1024619..beacc42 100644 --- a/brain/genetic_algorithm.py +++ b/brain/genetic_algorithm.py @@ -8,11 +8,53 @@ from brain.api import DEFAULT_CONFIG as API_DEFAULT_CONFIG from brain.api import BrainAPI from brain.database import Database +from brain.helpers import negate_expression MAX_WORKERS = 3 -def genetic_algorithm(storage: Storage, create_alpha: Callable[[Storage], tuple[Response, Alpha]]): +def execute_alphas( + alphas: list[Alpha], + storage: Storage, +): + """Execute a list of alphas and update the storage.""" + iterator = iter(alphas) + with ThreadPoolExecutor(max_workers=MAX_WORKERS) as pool: + live_jobs = {} + + for _ in range(MAX_WORKERS): + alpha = next(iterator, None) + + if alpha is None: + break + + response = BrainAPI.start_simulation(alpha.get_simulation_data(test_period="P1Y0M0D")) + live_jobs[pool.submit(_monitor_alpha, response, alpha)] = alpha + + while live_jobs: + for job in as_completed(live_jobs): + alpha = live_jobs.pop(job) + stats = job.result() + print(f"Stats: {stats}") + alpha = _update_alphas_storage(storage, stats, alpha.alpha_id) + + new_alpha = next(iterator, None) + if new_alpha is None: + continue + + response = BrainAPI.start_simulation( + new_alpha.get_simulation_data(test_period="P1Y0M0D") + ) + storage.add_alpha(new_alpha, "pending") + live_jobs[pool.submit(_monitor_alpha, response, new_alpha)] = new_alpha + + +def genetic_algorithm( + storage: Storage, + create_alpha: Callable[[Storage], tuple[Response, Alpha]], + max_steps: int = None, +): + """Run the genetic algorithm to evolve alphas.""" with ThreadPoolExecutor(max_workers=MAX_WORKERS) as pool: live_jobs = {} @@ -33,8 +75,7 @@ def genetic_algorithm(storage: Storage, create_alpha: Callable[[Storage], tuple[ # Start a new alpha simulation if alpha is not None and alpha.alpha_id is not None and alpha.fitness < -0.5: - split = alpha.regular.split(";") - regular = f'{";".join(split[:-1])}{";" if len(split) > 1 else ""}-({split[-1]})' + regular = negate_expression(alpha.regular) new_alpha = alpha.replace(regular=regular) response = BrainAPI.start_simulation( @@ -46,6 +87,10 @@ def genetic_algorithm(storage: Storage, create_alpha: Callable[[Storage], tuple[ storage.add_alpha(new_alpha, "pending") live_jobs[pool.submit(_monitor_alpha, response, new_alpha)] = new_alpha + if max_steps is not None and storage.counter >= max_steps: + print(f"Reached maximum steps: {max_steps}") + return + def _monitor_alpha(response, alpha): """Monitor the alpha simulation.""" diff --git a/brain/graph_state.py b/brain/graph_state.py index dcbcd1d..909d2b0 100644 --- a/brain/graph_state.py +++ b/brain/graph_state.py @@ -1,12 +1,15 @@ from typing import TypedDict +from brain.alpha_storage import Storage + class GraphState(TypedDict, total=False): - alpha: str - metrics: dict[str, float] # latest eval (sharpe, etc.) - plan: dict[str, list[str]] # Planner-generated JSON + alpha_idea: str + storage: Storage + # Plan + plan: list[str] + # Graph state properties + node: str # "plan", "seed", "execute", "test", "stop" + state: str # "explore", "fine-tune" # Book-keeping - best_alpha: str - best_metrics: dict[str, float] - mode: str # "continue" | "explore" | "stop" - history: list[str] # optional trace for debugging + history: list[str] # list of alpha IDs diff --git a/brain/helpers.py b/brain/helpers.py new file mode 100644 index 0000000..8c466c7 --- /dev/null +++ b/brain/helpers.py @@ -0,0 +1,17 @@ +def negate_expression(regular: str) -> str: + """Negate the alpha expression.""" + split = regular.strip().split(";") + + last_index = len(split) - 1 + while last_index > 0 and not split[last_index].strip(): + last_index -= 1 + + if "=" in split[last_index]: + assignment = split[last_index].strip().split("=", 1) + # Check if it is really a variable assignment + if "(" in assignment[0] or "+" in assignment[0] or "-" in assignment[0]: + split[last_index] = f"-({split[last_index]})" + else: + split[last_index] = f"{assignment[0]}=-({assignment[1]})" + + return ";".join(split) diff --git a/brain/main.py b/brain/main.py index 5dbf5d5..460fbe1 100644 --- a/brain/main.py +++ b/brain/main.py @@ -1,17 +1,13 @@ """Main module of the project.""" -# from brain.alphas import alpha_registry -# from brain.fine_tune import fine_tune_alpha -from brain import search_algorithm +# from brain import search_algorithm +from brain import agentic def main(): - # alpha = alpha_registry["alpha_1"] - # print(f"Alpha name: {alpha.name}") - # print(f"Alpha code: {alpha.code}") - # fine_tune_alpha(alpha.code) + agentic.graph.invoke({}) - search_algorithm.main() + # search_algorithm.main() if __name__ == "__main__": diff --git a/brain/score.py b/brain/score.py index f493b07..43ce5ba 100644 --- a/brain/score.py +++ b/brain/score.py @@ -6,12 +6,21 @@ def decay_hyperbolic(x, gamma=0.2, delta=0.1): return (gamma * x) / (1 + delta * x) -def get_score(alpha: Alpha): +def get_score(alpha: Alpha, discount: bool = True) -> float: + """Default alpha scoring for alpha seaarch.""" if not alpha.visible: return float("-inf") return ( alpha.fitness + 1.5 * alpha.sharpe - - decay_hyperbolic(alpha.print_counter, gamma=0.01, delta=0.02) + - int(discount) * decay_hyperbolic(alpha.print_counter, gamma=0.01, delta=0.02) ) + + +def get_fine_tune_score(alpha: Alpha, discount: bool = True) -> float: + """Alpha scoring for fine-tuning.""" + if not alpha.visible: + return float("-inf") + + return alpha.fitness + 1.3 * alpha.sharpe diff --git a/brain/search_algorithm.py b/brain/search_algorithm.py index 79483de..62bef81 100644 --- a/brain/search_algorithm.py +++ b/brain/search_algorithm.py @@ -1,7 +1,7 @@ import random from brain.agent_config import DEFAULT_CONFIG -from brain.agents.executor import agent +from brain.agents.executor_old import agent from brain.alpha_storage import Storage from brain.database import Database from brain.genetic_algorithm import genetic_algorithm From 7d4c44d06b25dbf5ed962664d0e00629bcc4fa31 Mon Sep 17 00:00:00 2001 From: Breta01 Date: Fri, 13 Jun 2025 11:14:20 +0800 Subject: [PATCH 04/13] Adding more agents --- brain/agentic.py | 45 ++++++++++++++++++++++++++++++++++-- brain/agents/alpha_tester.py | 43 ++++++++++++++++++++++++++++++++++ brain/agents/executor.py | 6 +++-- brain/agents/planner.py | 31 +++++++++++++++++++++---- brain/alpha_storage.py | 9 ++++---- brain/genetic_algorithm.py | 8 +++---- brain/graph_state.py | 2 ++ 7 files changed, 128 insertions(+), 16 deletions(-) create mode 100644 brain/agents/alpha_tester.py diff --git a/brain/agentic.py b/brain/agentic.py index 11ac2ef..de28c55 100644 --- a/brain/agentic.py +++ b/brain/agentic.py @@ -5,6 +5,8 @@ from brain.graph_state import GraphState from brain.score import get_score +MAX_EXPLORATION_COUNT = 3 + def seed_finder_node(state: GraphState) -> GraphState: """Find a seed alpha to start the exploration.""" @@ -15,12 +17,15 @@ def seed_finder_node(state: GraphState) -> GraphState: "alpha_idea": seed_alpha, "node": "plan", "state": "explore", + "explore_count": 0, "storage": Storage(score_func=get_score, max_size=50), } def planner_node(state: GraphState) -> GraphState: """Plan the next steps based on the current alpha and state.""" + state["explore_count"] += 1 + plan = invoke_planner(state) if not plan: return {**state, "node": "seed", "state": "explore"} @@ -29,7 +34,8 @@ def planner_node(state: GraphState) -> GraphState: def executor_node(state: GraphState) -> GraphState: invoke_executor(state) - return {**state, "node": "plan", "state": "fine-tune"} + # TODO: Pass some summary of results from the executor to planner + return {**state, "node": "explore_test", "state": "explore"} def fine_tuner_node(state: GraphState) -> GraphState: @@ -37,6 +43,37 @@ def fine_tuner_node(state: GraphState) -> GraphState: return {**state, "node": "seed", "state": "explore"} +def explore_test_node(state: GraphState) -> GraphState: + """Decide what happens next after exploring a new alpha idea.""" + best_alpha = state["storage"].best_alpha + + if best_alpha is not None and len(best_alpha.failing_tests) == 0: + # TODO: rank, sign tests + return {**state, "node": "fine_tune", "state": "fine-tune", "explore_count": 0} + + # TODO: Test plateau condition, compare previous best, with current best alpha + if state["explore_count"] < MAX_EXPLORATION_COUNT: + return {**state, "node": "plan", "state": "explore"} + + return {**state, "node": "seed", "state": "explore"} + + +def submit_test_node(state: GraphState) -> GraphState: + """Decide what happens next after fine-tuning a new alpha idea.""" + best_alpha = state["storage"].best_alpha + + if best_alpha is not None and len(best_alpha.failing_tests) == 0: + # TODO: additional test, check score, etc. + # TODO: Mark alpha as "submitted" or "ready for production" + return {**state, "node": "seed", "state": "explore", "explore_count": 0} + + # TODO: Test plateau condition, compare previous best, with current best alpha + if state["explore_count"] < MAX_EXPLORATION_COUNT: + return {**state, "node": "plan", "state": "fine-tune"} + + return {**state, "node": "seed", "state": "explore"} + + # def stagnation_node(state: GraphState) -> GraphState: # """ # Decide what happens next. @@ -57,13 +94,15 @@ def fine_tuner_node(state: GraphState) -> GraphState: builder.add_node("planner", planner_node) builder.add_node("executor", executor_node) builder.add_node("fine_tuner", fine_tuner_node) +builder.add_node("explore_test", explore_test_node) +builder.add_node("submit_test", submit_test_node) # builder.add_node("stagnation_chk", stagnation_node) # Static flow builder.add_edge(START, "seed_finder") # Conditional branching (no more …then= kwarg in 0.4.8) -for node in ["seed_finder", "planner", "executor", "fine_tuner"]: +for node in ["seed_finder", "planner", "executor", "fine_tuner", "explore_test", "submit_test"]: builder.add_conditional_edges( node, lambda state: state["node"], @@ -72,6 +111,8 @@ def fine_tuner_node(state: GraphState) -> GraphState: "execute": "executor", "seed": "seed_finder", "fine_tune": "fine_tuner", + "explore_test": "explore_test", + "submit_test": "submit_test", "stop": END, }, ) diff --git a/brain/agents/alpha_tester.py b/brain/agents/alpha_tester.py new file mode 100644 index 0000000..c65060f --- /dev/null +++ b/brain/agents/alpha_tester.py @@ -0,0 +1,43 @@ +from functools import partial + +from brain.agents.executor import create_alpha_simulation +from brain.alpha_class import Alpha +from brain.genetic_algorithm import execute_alphas, genetic_algorithm +from brain.graph_state import GraphState + +param_options = { + "universe": ["TOP3000", "TOP1000", "TOP500", "TOP200"], + "neutralization": ["INDUSTRY", "SECTOR", "MARKET", "NONE", "SUBINDUSTRY"], + "decay": [2, 4, 6, 8, 10, 12, 14, 16, 20], + "truncation": [0.005, 0.01, 0.05, 0.1], + # "pasteurization": ["ON", "OFF"], +} + + +def get_config(alpha: Alpha) -> dict: + """Get the configuration for the alpha.""" + return { + "region": alpha.region, + "universe": alpha.universe, + "neutralization": alpha.neutralization, + "truncation": alpha.truncation, + "decay": alpha.decay, + "delay": alpha.delay, + } + + +def invoke(state: GraphState) -> GraphState: + """Invoke fine-tuning agent.""" + storage = state.get("storage") + # Rank, sign, change delay +-1 (check for 0), truncation +-0.01 + + for param, options in param_options.items(): + alphas = [storage.best_alpha.replace(**{param: option}) for option in options] + execute_alphas(alphas, storage) + + congig = get_config(storage.best_alpha) + plan = state.get("plan", []) + create_alpha = partial(create_alpha_simulation, plan=plan, config=congig) + + storage.reset_counter() + genetic_algorithm(storage, create_alpha, len(plan) + 10) diff --git a/brain/agents/executor.py b/brain/agents/executor.py index 732f864..2adc008 100644 --- a/brain/agents/executor.py +++ b/brain/agents/executor.py @@ -18,6 +18,8 @@ from brain.tools.operators import describe_operators, operators from brain.tools.simulation import StopException, submit_alpha +STEPS_PER_INSTRUCTION = 4 + def prompt( state: AgentState, @@ -89,7 +91,7 @@ def create_alpha_simulation( prompt = f""" Create a new alpha based on the best alpha according to the following instructions: -{plan[storage.counter % len(plan)]} +{plan[(storage.counter // STEPS_PER_INSTRUCTION) % len(plan)]} BEST ALPHA @@ -146,4 +148,4 @@ def invoke(state: GraphState) -> GraphState: storage = state.get("storage") storage.reset_counter() - genetic_algorithm(storage, create_alpha, len(plan) + 10) + genetic_algorithm(storage, create_alpha, len(plan) * STEPS_PER_INSTRUCTION) diff --git a/brain/agents/planner.py b/brain/agents/planner.py index 968a1d7..3183d62 100644 --- a/brain/agents/planner.py +++ b/brain/agents/planner.py @@ -3,6 +3,7 @@ from brain.graph_state import GraphState from brain.model import MODEL +from brain.tools.operators import operators class ResponseFormat(BaseModel): @@ -15,24 +16,46 @@ class ResponseFormat(BaseModel): model=MODEL, tools=[], response_format=ResponseFormat, - prompt="You are a senior quant researcher using World Quant Brain platform to create alphas.", + prompt=f""" +You are a senior quantitative researcher working with the World Quant Brain platform to create alphas. Based on an initial alpha idea provided (either in written form or as an expression), outline specific steps to implement concrete changes to the alpha using the Fast Expression Language, which includes various operators. + +Allowed operators are: {operators.all()} + +1. Analyze the initial alpha idea to identify key components such as variables, conditions, and expected outcomes. +2. Propose specific changes to the alpha expression, detailing how each change can enhance its performance. For example: + + - Change the parameter value of 'X' from 0.5 to 0.7 to test sensitivity. + - Introduce a new operator, such as 'rank', to refine the conditions under which trades are executed. + - Suggest using a different data field, like 'volume', instead of 'price' to capture market dynamics more accurately. + - Suggest type of data field to search for, e.g. 'momentum', 'trend', 'news' etc. + - Add a group neutralization to the alpha expression to reduce sector bias. + +3. For each proposed change, provide a rationale explaining why it may improve the alpha's effectiveness. +4. Ensure that each change is concrete and directly related to the alpha expression, focusing on parameter adjustments, operator modifications, or data field substitutions. + +List only the steps related to changing the alpha expression. Do NOT include steps such as evaluation, backtesting, documentation, or communication. + +These steps should facilitate a targeted approach to optimizing the current alpha idea. +""".strip(), ) def invoke(state: GraphState) -> ResponseFormat: """Invoke the agent with the given messages and configuration.""" message = "" + best_alpha = state["storage"].best_alpha if state.get("state", "explore") == "explore": message = ( "You are exploring a new alphas based on the new idea.\n" f"{state.get('alpha_idea', '')}\n" + f"Best alpha so far:\n {best_alpha.prompt_format() if best_alpha else 'None'}\n" "Create a plan to explore this idea, start with simple alpha expression.\n" ) - elif state.get("state", "explore") == "fine_tune" and state["storage"].best_alpha is not None: + elif state.get("state", "explore") == "fine_tune" and best_alpha is not None: message = ( "You are fine-tuning an existing alpha.\n" - f"Current alpha: {state['storage'].best_alpha.prompt_format()}\n" - "Create a plan to improve this alpha, start with simple alpha expression.\n" + f"Current alpha: {best_alpha.prompt_format()}\n" + "Create a plan to fine-tune and improve this alpha.\n" ) else: return None diff --git a/brain/alpha_storage.py b/brain/alpha_storage.py index c8ce203..8d9bbbe 100644 --- a/brain/alpha_storage.py +++ b/brain/alpha_storage.py @@ -45,11 +45,12 @@ def add_alpha(self, alpha: Alpha, category: str) -> None: # Keep tract of the best alpha if category != "pending": - if self.best_alpha is None: + if self.best_alpha is None and alpha.long_count + alpha.short_count > 300: self.best_alpha = alpha - elif self._score(alpha.alpha_id, False) > self._score( - self.best_alpha.alpha_id, False - ) or len(self.best_alpha.failing_tests) > len(alpha.failing_tests): + elif alpha.long_count + alpha.short_count > 300 and ( + self._score(alpha.alpha_id, False) > self._score(self.best_alpha.alpha_id, False) + or len(self.best_alpha.failing_tests) > len(alpha.failing_tests) + ): self.best_alpha = alpha self.data[alpha.alpha_id] = alpha diff --git a/brain/genetic_algorithm.py b/brain/genetic_algorithm.py index beacc42..2187c62 100644 --- a/brain/genetic_algorithm.py +++ b/brain/genetic_algorithm.py @@ -73,6 +73,10 @@ def genetic_algorithm( print(f"Stats: {stats}") alpha = _update_alphas_storage(storage, stats, alpha.alpha_id) + if max_steps is not None and storage.counter >= max_steps: + print(f"Reached maximum steps: {max_steps}") + continue + # Start a new alpha simulation if alpha is not None and alpha.alpha_id is not None and alpha.fitness < -0.5: regular = negate_expression(alpha.regular) @@ -87,10 +91,6 @@ def genetic_algorithm( storage.add_alpha(new_alpha, "pending") live_jobs[pool.submit(_monitor_alpha, response, new_alpha)] = new_alpha - if max_steps is not None and storage.counter >= max_steps: - print(f"Reached maximum steps: {max_steps}") - return - def _monitor_alpha(response, alpha): """Monitor the alpha simulation.""" diff --git a/brain/graph_state.py b/brain/graph_state.py index 909d2b0..a29f97b 100644 --- a/brain/graph_state.py +++ b/brain/graph_state.py @@ -6,6 +6,8 @@ class GraphState(TypedDict, total=False): alpha_idea: str storage: Storage + # Number of times the alpha idea has been explored + explore_count: int # Plan plan: list[str] # Graph state properties From 48c2342173f7ce789c7ef6ce17e2040562711add Mon Sep 17 00:00:00 2001 From: Breta01 Date: Tue, 17 Jun 2025 15:42:09 +0800 Subject: [PATCH 05/13] Update agentic flow --- brain/agent_config.py | 14 +++++++++++++ brain/agentic.py | 31 +++++++++++++++++---------- brain/agents/__init__.py | 3 ++- brain/agents/executor.py | 27 ++++++++++++++++++++---- brain/agents/fine_tuner.py | 25 +++++++--------------- brain/agents/tester.py | 43 ++++++++++++++++++++++++++++++++++++++ brain/api.py | 2 +- brain/genetic_algorithm.py | 6 +++++- brain/graph_state.py | 4 ++++ brain/helpers.py | 13 ++++++++---- 10 files changed, 128 insertions(+), 40 deletions(-) create mode 100644 brain/agents/tester.py diff --git a/brain/agent_config.py b/brain/agent_config.py index bb2a81e..9949892 100644 --- a/brain/agent_config.py +++ b/brain/agent_config.py @@ -1,5 +1,7 @@ from langchain_core.runnables import RunnableConfig +from brain.alpha_class import Alpha + DEFAULT_CONFIG = { "region": "USA", "universe": "TOP3000", @@ -17,3 +19,15 @@ def get_universe_config(config: RunnableConfig) -> dict: **DEFAULT_CONFIG, **{k: conf[k] for k in DEFAULT_CONFIG.keys() if k in conf}, } + + +def get_config(alpha: Alpha) -> dict: + """Get the configuration for the alpha.""" + return { + "region": alpha.region, + "universe": alpha.universe, + "neutralization": alpha.neutralization, + "truncation": alpha.truncation, + "decay": alpha.decay, + "delay": alpha.delay, + } diff --git a/brain/agentic.py b/brain/agentic.py index de28c55..b01ae15 100644 --- a/brain/agentic.py +++ b/brain/agentic.py @@ -1,6 +1,6 @@ from langgraph.graph import END, START, StateGraph -from brain.agents import invoke_executor, invoke_fine_tuner, invoke_planner +from brain.agents import invoke_executor, invoke_fine_tuner, invoke_planner, invoke_tester from brain.alpha_storage import Storage from brain.graph_state import GraphState from brain.score import get_score @@ -18,6 +18,7 @@ def seed_finder_node(state: GraphState) -> GraphState: "node": "plan", "state": "explore", "explore_count": 0, + "static_finetune": True, "storage": Storage(score_func=get_score, max_size=50), } @@ -35,12 +36,12 @@ def planner_node(state: GraphState) -> GraphState: def executor_node(state: GraphState) -> GraphState: invoke_executor(state) # TODO: Pass some summary of results from the executor to planner - return {**state, "node": "explore_test", "state": "explore"} + return {**state, "node": "explore_test", "state": "explore", "static_finetune": False} def fine_tuner_node(state: GraphState) -> GraphState: invoke_fine_tuner(state) - return {**state, "node": "seed", "state": "explore"} + return {**state, "node": "submit_test", "state": "fine-tune", "static_finetune": False} def explore_test_node(state: GraphState) -> GraphState: @@ -48,8 +49,15 @@ def explore_test_node(state: GraphState) -> GraphState: best_alpha = state["storage"].best_alpha if best_alpha is not None and len(best_alpha.failing_tests) == 0: - # TODO: rank, sign tests - return {**state, "node": "fine_tune", "state": "fine-tune", "explore_count": 0} + # if tester is passing + if invoke_tester(state): + return { + **state, + "node": "fine_tuner", + "state": "fine-tune", + "static_finetune": True, + "explore_count": 0, + } # TODO: Test plateau condition, compare previous best, with current best alpha if state["explore_count"] < MAX_EXPLORATION_COUNT: @@ -63,8 +71,9 @@ def submit_test_node(state: GraphState) -> GraphState: best_alpha = state["storage"].best_alpha if best_alpha is not None and len(best_alpha.failing_tests) == 0: - # TODO: additional test, check score, etc. - # TODO: Mark alpha as "submitted" or "ready for production" + if invoke_tester(state): + # TODO: Mark alpha as "submitted" or "ready for production" + pass return {**state, "node": "seed", "state": "explore", "explore_count": 0} # TODO: Test plateau condition, compare previous best, with current best alpha @@ -98,11 +107,14 @@ def submit_test_node(state: GraphState) -> GraphState: builder.add_node("submit_test", submit_test_node) # builder.add_node("stagnation_chk", stagnation_node) + # Static flow builder.add_edge(START, "seed_finder") +builder.add_edge("executor", "explore_test") +builder.add_edge("fine_tuner", "submit_test") # Conditional branching (no more …then= kwarg in 0.4.8) -for node in ["seed_finder", "planner", "executor", "fine_tuner", "explore_test", "submit_test"]: +for node in ["seed_finder", "planner", "explore_test", "submit_test"]: builder.add_conditional_edges( node, lambda state: state["node"], @@ -118,6 +130,3 @@ def submit_test_node(state: GraphState) -> GraphState: ) graph = builder.compile() # returns a CompiledStateGraph - - -out = graph.invoke({}) diff --git a/brain/agents/__init__.py b/brain/agents/__init__.py index efb4afa..d8b62b7 100644 --- a/brain/agents/__init__.py +++ b/brain/agents/__init__.py @@ -1,5 +1,6 @@ -__all__ = ["invoke_executor", "invoke_fine_tuner", "invoke_planner"] +__all__ = ["invoke_executor", "invoke_fine_tuner", "invoke_planner", "invoke_tester"] from .executor import invoke as invoke_executor from .fine_tuner import invoke as invoke_fine_tuner from .planner import invoke as invoke_planner +from .tester import invoke as invoke_tester diff --git a/brain/agents/executor.py b/brain/agents/executor.py index 2adc008..3f7af3b 100644 --- a/brain/agents/executor.py +++ b/brain/agents/executor.py @@ -7,10 +7,10 @@ from langgraph.prebuilt.chat_agent_executor import AgentState from requests import Response -from brain.agent_config import DEFAULT_CONFIG, get_universe_config +from brain.agent_config import DEFAULT_CONFIG, get_config, get_universe_config from brain.alpha_class import Alpha from brain.alpha_storage import Storage -from brain.genetic_algorithm import genetic_algorithm +from brain.genetic_algorithm import execute_alphas, genetic_algorithm from brain.graph_state import GraphState from brain.model import MODEL from brain.tools.datafields import get_random_datafields, search_datafields @@ -140,12 +140,31 @@ def create_alpha_simulation( return alphas_store[-1] +param_options = { + "universe": ["TOP3000", "TOP500"], + "neutralization": ["INDUSTRY", "SECTOR", "MARKET", "SUBINDUSTRY"], + "decay": [1, 5, 10, 15], + "truncation": [0.01, 0.1], + # "pasteurization": ["ON", "OFF"], +} + + def invoke(state: GraphState) -> GraphState: """Invoke executor agent following the plan.""" plan = state.get("plan", []) - create_alpha = partial(create_alpha_simulation, plan=plan) - storage = state.get("storage") + + if storage.best_alpha is not None: + config = get_config(storage.best_alpha) + else: + config = state.get("default_config", DEFAULT_CONFIG) + create_alpha = partial(create_alpha_simulation, plan=plan, config=config) + storage.reset_counter() genetic_algorithm(storage, create_alpha, len(plan) * STEPS_PER_INSTRUCTION) + + if state.get("static_finetune", True): + for param, options in param_options.items(): + alphas = [storage.best_alpha.replace(**{param: option}) for option in options] + execute_alphas(alphas, storage) diff --git a/brain/agents/fine_tuner.py b/brain/agents/fine_tuner.py index 3dfd5fd..a6949b5 100644 --- a/brain/agents/fine_tuner.py +++ b/brain/agents/fine_tuner.py @@ -1,7 +1,7 @@ from functools import partial -from brain.agents.executor import create_alpha_simulation -from brain.alpha_class import Alpha +from brain.agent_config import get_config +from brain.agents.executor import STEPS_PER_INSTRUCTION, create_alpha_simulation from brain.genetic_algorithm import execute_alphas, genetic_algorithm from brain.graph_state import GraphState @@ -14,29 +14,18 @@ } -def get_config(alpha: Alpha) -> dict: - """Get the configuration for the alpha.""" - return { - "region": alpha.region, - "universe": alpha.universe, - "neutralization": alpha.neutralization, - "truncation": alpha.truncation, - "decay": alpha.decay, - "delay": alpha.delay, - } - - def invoke(state: GraphState) -> GraphState: """Invoke fine-tuning agent.""" storage = state.get("storage") - for param, options in param_options.items(): - alphas = [storage.best_alpha.replace(**{param: option}) for option in options] - execute_alphas(alphas, storage) + if state.get("static_finetune", True): + for param, options in param_options.items(): + alphas = [storage.best_alpha.replace(**{param: option}) for option in options] + execute_alphas(alphas, storage) congig = get_config(storage.best_alpha) plan = state.get("plan", []) create_alpha = partial(create_alpha_simulation, plan=plan, config=congig) storage.reset_counter() - genetic_algorithm(storage, create_alpha, len(plan) + 10) + genetic_algorithm(storage, create_alpha, len(plan) * STEPS_PER_INSTRUCTION) diff --git a/brain/agents/tester.py b/brain/agents/tester.py new file mode 100644 index 0000000..b6f1826 --- /dev/null +++ b/brain/agents/tester.py @@ -0,0 +1,43 @@ +from functools import partial + +from brain.agent_config import get_config +from brain.agents.executor import STEPS_PER_INSTRUCTION, create_alpha_simulation +from brain.genetic_algorithm import execute_alphas, genetic_algorithm +from brain.graph_state import GraphState +from brain.helpers import add_final_operator + +param_options = { + "universe": ["TOP3000", "TOP1000", "TOP500", "TOP200"], + "neutralization": ["INDUSTRY", "SECTOR", "MARKET", "NONE", "SUBINDUSTRY"], + "decay": [2, 4, 6, 8, 10, 12, 14, 16, 20], + "truncation": [0.005, 0.01, 0.05, 0.1], + # "pasteurization": ["ON", "OFF"], +} + + +def invoke(state: GraphState) -> GraphState: + """Invoke fine-tuning agent.""" + storage = state.get("storage") + + best_alpha = storage.best_alpha + config = get_config(best_alpha) + alphas = [] + + for decay in [-2, -1, 1, 2]: + if config["decay"] + decay >= 0: + alphas.append(best_alpha.replace(decay=config["decay"] + decay)) + + for op in ["rank", "sign"]: + regular = add_final_operator("rank", best_alpha.regular) + alphas.append(best_alpha.replace(regular=regular)) + + results = execute_alphas(alphas, storage) + + for alpha in results: + if alpha is None: + continue + + if alpha.fitness < best_alpha.fitness * 0.5 and alpha.sharpe < best_alpha.sharpe * 0.5: + return False + + return True diff --git a/brain/api.py b/brain/api.py index 1f1b22c..1817dc3 100644 --- a/brain/api.py +++ b/brain/api.py @@ -186,7 +186,7 @@ def get_self_corr(self, alpha_id: str): return self_corr_df - def check_self_corr_test(self, alpha_id: str, threshold: float = 0.5): + def check_self_corr_test(self, alpha_id: str, threshold: float = 0.6): """Check if alpha's self_corr test passed and saves result to dataframe""" self_corr_df = self.get_self_corr(alpha_id) if self_corr_df.empty: diff --git a/brain/genetic_algorithm.py b/brain/genetic_algorithm.py index 2187c62..05639e0 100644 --- a/brain/genetic_algorithm.py +++ b/brain/genetic_algorithm.py @@ -16,9 +16,10 @@ def execute_alphas( alphas: list[Alpha], storage: Storage, -): +) -> list[Alpha]: """Execute a list of alphas and update the storage.""" iterator = iter(alphas) + results = [] with ThreadPoolExecutor(max_workers=MAX_WORKERS) as pool: live_jobs = {} @@ -37,6 +38,7 @@ def execute_alphas( stats = job.result() print(f"Stats: {stats}") alpha = _update_alphas_storage(storage, stats, alpha.alpha_id) + results.append(alpha) new_alpha = next(iterator, None) if new_alpha is None: @@ -48,6 +50,8 @@ def execute_alphas( storage.add_alpha(new_alpha, "pending") live_jobs[pool.submit(_monitor_alpha, response, new_alpha)] = new_alpha + return results + def genetic_algorithm( storage: Storage, diff --git a/brain/graph_state.py b/brain/graph_state.py index a29f97b..dfd08f4 100644 --- a/brain/graph_state.py +++ b/brain/graph_state.py @@ -8,6 +8,10 @@ class GraphState(TypedDict, total=False): storage: Storage # Number of times the alpha idea has been explored explore_count: int + # Static fine-tuning + static_finetune: bool + # Alpha config + default_config: dict # Plan plan: list[str] # Graph state properties diff --git a/brain/helpers.py b/brain/helpers.py index 8c466c7..986389e 100644 --- a/brain/helpers.py +++ b/brain/helpers.py @@ -1,5 +1,5 @@ -def negate_expression(regular: str) -> str: - """Negate the alpha expression.""" +def add_final_operator(operator: str, regular: str) -> str: + """Change alpha expression by adding one more final operator.""" split = regular.strip().split(";") last_index = len(split) - 1 @@ -10,8 +10,13 @@ def negate_expression(regular: str) -> str: assignment = split[last_index].strip().split("=", 1) # Check if it is really a variable assignment if "(" in assignment[0] or "+" in assignment[0] or "-" in assignment[0]: - split[last_index] = f"-({split[last_index]})" + split[last_index] = f"{operator}({split[last_index]})" else: - split[last_index] = f"{assignment[0]}=-({assignment[1]})" + split[last_index] = f"{assignment[0]}={operator}({assignment[1]})" return ";".join(split) + + +def negate_expression(regular: str) -> str: + """Negate the alpha expression.""" + return add_final_operator("-", regular) From 5b303eee0b0bb9619c2c8c743e0e171a0af02f77 Mon Sep 17 00:00:00 2001 From: Breta01 Date: Wed, 18 Jun 2025 06:41:25 +0800 Subject: [PATCH 06/13] Working seeder --- brain/agentic.py | 26 +++++++++++++++++++------- brain/agents/__init__.py | 9 ++++++++- brain/agents/seeder.py | 23 +++++++++++++++++++++++ brain/agents/tester.py | 14 +++++--------- brain/alpha_class.py | 13 ++++++++++--- brain/database.py | 36 ++++++++++++++++++++++++++++++++++++ 6 files changed, 101 insertions(+), 20 deletions(-) create mode 100644 brain/agents/seeder.py diff --git a/brain/agentic.py b/brain/agentic.py index b01ae15..ea3571b 100644 --- a/brain/agentic.py +++ b/brain/agentic.py @@ -1,7 +1,15 @@ from langgraph.graph import END, START, StateGraph -from brain.agents import invoke_executor, invoke_fine_tuner, invoke_planner, invoke_tester +from brain.agent_config import get_config +from brain.agents import ( + invoke_executor, + invoke_fine_tuner, + invoke_planner, + invoke_seeder, + invoke_tester, +) from brain.alpha_storage import Storage +from brain.api import BrainAPI from brain.graph_state import GraphState from brain.score import get_score @@ -11,10 +19,11 @@ def seed_finder_node(state: GraphState) -> GraphState: """Find a seed alpha to start the exploration.""" # Iterate databse till we find some decent alpha, or some other seed idea - seed_alpha = "ts_sum(close, 5) / ts_sum(open, 5) - 1" - print(f"Seed alpha: {seed_alpha}") + alpha = invoke_seeder(state) + print(f"Seed alpha: {alpha.prompt_format()}") return { - "alpha_idea": seed_alpha, + "alpha_idea": alpha.regular, + "default_config": get_config(alpha), "node": "plan", "state": "explore", "explore_count": 0, @@ -49,8 +58,9 @@ def explore_test_node(state: GraphState) -> GraphState: best_alpha = state["storage"].best_alpha if best_alpha is not None and len(best_alpha.failing_tests) == 0: + score = best_alpha.update_score() # if tester is passing - if invoke_tester(state): + if invoke_tester(state) and score > -100: return { **state, "node": "fine_tuner", @@ -71,9 +81,11 @@ def submit_test_node(state: GraphState) -> GraphState: best_alpha = state["storage"].best_alpha if best_alpha is not None and len(best_alpha.failing_tests) == 0: - if invoke_tester(state): + score = best_alpha.update_score() + if invoke_tester(state) and score > 200: # TODO: Mark alpha as "submitted" or "ready for production" - pass + print("Submitting alpha! Score:", score) + BrainAPI.submit_alpha(best_alpha.alpha_id) return {**state, "node": "seed", "state": "explore", "explore_count": 0} # TODO: Test plateau condition, compare previous best, with current best alpha diff --git a/brain/agents/__init__.py b/brain/agents/__init__.py index d8b62b7..0075c71 100644 --- a/brain/agents/__init__.py +++ b/brain/agents/__init__.py @@ -1,6 +1,13 @@ -__all__ = ["invoke_executor", "invoke_fine_tuner", "invoke_planner", "invoke_tester"] +__all__ = [ + "invoke_executor", + "invoke_fine_tuner", + "invoke_planner", + "invoke_tester", + "invoke_seeder", +] from .executor import invoke as invoke_executor from .fine_tuner import invoke as invoke_fine_tuner from .planner import invoke as invoke_planner +from .seeder import invoke as invoke_seeder from .tester import invoke as invoke_tester diff --git a/brain/agents/seeder.py b/brain/agents/seeder.py new file mode 100644 index 0000000..b305225 --- /dev/null +++ b/brain/agents/seeder.py @@ -0,0 +1,23 @@ +import random + +from brain.alpha_class import Alpha +from brain.database import Database +from brain.graph_state import GraphState + + +def invoke(state: GraphState) -> Alpha: + """Invoke seeder agent to generate alphas.""" + page = random.randint(1, 100) + while True: + database = Database() + alphas = database.get_alphas_page(page) + + for alpha in alphas: + score = alpha.update_score() + corr = alpha.update_self_correlation() + database.upsert_alpha(alpha) + + if score > 0 and corr < 0.5: + return alpha + + page += 1 diff --git a/brain/agents/tester.py b/brain/agents/tester.py index b6f1826..15451d2 100644 --- a/brain/agents/tester.py +++ b/brain/agents/tester.py @@ -6,14 +6,6 @@ from brain.graph_state import GraphState from brain.helpers import add_final_operator -param_options = { - "universe": ["TOP3000", "TOP1000", "TOP500", "TOP200"], - "neutralization": ["INDUSTRY", "SECTOR", "MARKET", "NONE", "SUBINDUSTRY"], - "decay": [2, 4, 6, 8, 10, 12, 14, 16, 20], - "truncation": [0.005, 0.01, 0.05, 0.1], - # "pasteurization": ["ON", "OFF"], -} - def invoke(state: GraphState) -> GraphState: """Invoke fine-tuning agent.""" @@ -37,7 +29,11 @@ def invoke(state: GraphState) -> GraphState: if alpha is None: continue - if alpha.fitness < best_alpha.fitness * 0.5 and alpha.sharpe < best_alpha.sharpe * 0.5: + if ( + alpha.fitness < best_alpha.fitness * 0.5 + and alpha.sharpe < best_alpha.sharpe * 0.5 + and alpha.long_count + alpha.short_count < 400 + ): return False return True diff --git a/brain/alpha_class.py b/brain/alpha_class.py index 8603dc1..595a8a2 100644 --- a/brain/alpha_class.py +++ b/brain/alpha_class.py @@ -133,11 +133,18 @@ def update_score(self) -> float: if "score" not in results or "after" not in results["score"]: return None - self.score = results["score"].get("after", 0) - results["score"].get( - "before", - ) + self.score = results["score"].get("after", 0) - results["score"].get("before", 100) return self.score + def update_self_correlation(self) -> float: + """Check and return the score of the Alpha.""" + result = BrainAPI.get_self_corr(self.alpha_id) + if result.empty: + return self.self_correlation + value = result["correlation"].max() + self.self_correlation = value + return self.self_correlation + @classmethod def create_alpha( cls, diff --git a/brain/database.py b/brain/database.py index 197f622..ecfad03 100644 --- a/brain/database.py +++ b/brain/database.py @@ -70,3 +70,39 @@ def k_best_alphas( def close(self): self.cursor.close() self.conn.close() + + def get_alphas_page( + self, + page: int = 0, + page_size: int = 100, + max_self_corr: float = 0.7, + min_score: float = 0.0, + ) -> list[Alpha]: + """Return one 'page' of Alphas (as a list) matching criteria""" + count = self.count_alphas(max_self_corr, min_score) + offset = (page * page_size) % count + sql = """ + SELECT * + FROM alphas + WHERE self_correlation < %s + AND (score IS NULL OR score > %s) + AND (long_count IS NULL OR long_count > 100) + ORDER BY sharpe DESC, fitness DESC + LIMIT %s + OFFSET %s + """ + params = (max_self_corr, min_score, page_size, offset) + self.cursor.execute(sql, params) + rows = self.cursor.fetchall() + return [Alpha.from_row(r) for r in rows] + + def count_alphas(self, max_self_corr=0.7, min_score=0.0) -> int: + sql = """ + SELECT COUNT(*) + FROM alphas + WHERE self_correlation < %s + AND (score IS NULL OR score > %s) + AND (long_count IS NULL OR long_count > 100) + """ + self.cursor.execute(sql, (max_self_corr, min_score)) + return self.cursor.fetchone()["count"] From 2cdfd9c4f4d5ee00cc42838b378a58b0951afd57 Mon Sep 17 00:00:00 2001 From: Breta01 Date: Wed, 18 Jun 2025 06:47:26 +0800 Subject: [PATCH 07/13] Fix --- brain/alpha_storage.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/brain/alpha_storage.py b/brain/alpha_storage.py index 8d9bbbe..553bc9f 100644 --- a/brain/alpha_storage.py +++ b/brain/alpha_storage.py @@ -43,6 +43,8 @@ def add_alpha(self, alpha: Alpha, category: str) -> None: self.counter += 1 + self.data[alpha.alpha_id] = alpha + # Keep tract of the best alpha if category != "pending": if self.best_alpha is None and alpha.long_count + alpha.short_count > 300: @@ -53,7 +55,6 @@ def add_alpha(self, alpha: Alpha, category: str) -> None: ): self.best_alpha = alpha - self.data[alpha.alpha_id] = alpha if alpha.alpha_id not in self.categories[category]: self._append_to_category(alpha.alpha_id, category) From 5965df3d1b1da08409096e23d5f6a540b65f2c1a Mon Sep 17 00:00:00 2001 From: Breta01 Date: Thu, 19 Jun 2025 07:10:07 +0800 Subject: [PATCH 08/13] More updates to agentic framework --- brain/agentic.py | 31 ++++-- brain/agents/executor.py | 16 +-- brain/agents/planner.py | 15 +-- brain/agents/seeder.py | 24 ++++- brain/agents/tester.py | 6 +- brain/database.py | 4 +- brain/graph_state.py | 2 + brain/tools/data/alpha_templates.json | 134 ++++++++++++++++++++++++++ brain/tools/simulation.py | 5 +- 9 files changed, 209 insertions(+), 28 deletions(-) create mode 100644 brain/tools/data/alpha_templates.json diff --git a/brain/agentic.py b/brain/agentic.py index ea3571b..006ec1e 100644 --- a/brain/agentic.py +++ b/brain/agentic.py @@ -16,14 +16,30 @@ MAX_EXPLORATION_COUNT = 3 +def plateau_condition(state: GraphState) -> bool: + """Check if the exploration has plateaued.""" + storage = state["storage"] + best_alpha = storage.best_alpha + old_best_alpha = state.get("old_best_alpha") + + if best_alpha is None or old_best_alpha is None: + return False + + # Check if the score has not improved significantly + score_diff = ( + best_alpha.fitness - old_best_alpha.fitness + best_alpha.sharpe - old_best_alpha.sharpe + ) + return score_diff < 0.1 + + def seed_finder_node(state: GraphState) -> GraphState: """Find a seed alpha to start the exploration.""" # Iterate databse till we find some decent alpha, or some other seed idea - alpha = invoke_seeder(state) - print(f"Seed alpha: {alpha.prompt_format()}") + alpha_idea, config = invoke_seeder(state) + print(f"Seed alpha: {alpha_idea}") return { - "alpha_idea": alpha.regular, - "default_config": get_config(alpha), + "alpha_idea": alpha_idea, + "default_config": config, "node": "plan", "state": "explore", "explore_count": 0, @@ -35,10 +51,11 @@ def seed_finder_node(state: GraphState) -> GraphState: def planner_node(state: GraphState) -> GraphState: """Plan the next steps based on the current alpha and state.""" state["explore_count"] += 1 + state["old_best_alpha"] = state["storage"].best_alpha plan = invoke_planner(state) if not plan: - return {**state, "node": "seed", "state": "explore"} + return {**state, "node": "seed"} return {**state, "node": "execute", "plan": plan} @@ -70,7 +87,7 @@ def explore_test_node(state: GraphState) -> GraphState: } # TODO: Test plateau condition, compare previous best, with current best alpha - if state["explore_count"] < MAX_EXPLORATION_COUNT: + if state["explore_count"] < MAX_EXPLORATION_COUNT and plateau_condition(state): return {**state, "node": "plan", "state": "explore"} return {**state, "node": "seed", "state": "explore"} @@ -89,7 +106,7 @@ def submit_test_node(state: GraphState) -> GraphState: return {**state, "node": "seed", "state": "explore", "explore_count": 0} # TODO: Test plateau condition, compare previous best, with current best alpha - if state["explore_count"] < MAX_EXPLORATION_COUNT: + if state["explore_count"] < MAX_EXPLORATION_COUNT and plateau_condition(state): return {**state, "node": "plan", "state": "fine-tune"} return {**state, "node": "seed", "state": "explore"} diff --git a/brain/agents/executor.py b/brain/agents/executor.py index 3f7af3b..a5f72d9 100644 --- a/brain/agents/executor.py +++ b/brain/agents/executor.py @@ -18,7 +18,7 @@ from brain.tools.operators import describe_operators, operators from brain.tools.simulation import StopException, submit_alpha -STEPS_PER_INSTRUCTION = 4 +STEPS_PER_INSTRUCTION = 2 def prompt( @@ -70,10 +70,11 @@ class CustomState(AgentState): ) -def create_alpha_simulation( - storage: Storage, plan: list[str], config: dict = DEFAULT_CONFIG -) -> tuple[Response, Alpha]: +def create_alpha_simulation(storage: Storage, state: GraphState) -> tuple[Response, Alpha]: """Create a new alpha based on the given ID.""" + plan = state.get("plan", []) + config = state.get("default_config", DEFAULT_CONFIG) + top_k = 5 alphas = { @@ -93,6 +94,9 @@ def create_alpha_simulation( Create a new alpha based on the best alpha according to the following instructions: {plan[(storage.counter // STEPS_PER_INSTRUCTION) % len(plan)]} +ALPHA IDEA +---------- +{state.get("alpha_idea", "None")} BEST ALPHA ---------- @@ -126,7 +130,7 @@ def create_alpha_simulation( ] }, config={ - "recursion_limit": 50, + "recursion_limit": 30, "configurable": { **config, "alphas": alphas_store, @@ -158,7 +162,7 @@ def invoke(state: GraphState) -> GraphState: config = get_config(storage.best_alpha) else: config = state.get("default_config", DEFAULT_CONFIG) - create_alpha = partial(create_alpha_simulation, plan=plan, config=config) + create_alpha = partial(create_alpha_simulation, state=state) storage.reset_counter() diff --git a/brain/agents/planner.py b/brain/agents/planner.py index 3183d62..b44a59f 100644 --- a/brain/agents/planner.py +++ b/brain/agents/planner.py @@ -3,6 +3,8 @@ from brain.graph_state import GraphState from brain.model import MODEL +from brain.tools.datafields import get_random_datafields, search_datafields +from brain.tools.ideas import get_random_idea from brain.tools.operators import operators @@ -14,15 +16,16 @@ class ResponseFormat(BaseModel): agent = create_react_agent( model=MODEL, - tools=[], + tools=[get_random_idea, search_datafields, get_random_datafields], response_format=ResponseFormat, prompt=f""" You are a senior quantitative researcher working with the World Quant Brain platform to create alphas. Based on an initial alpha idea provided (either in written form or as an expression), outline specific steps to implement concrete changes to the alpha using the Fast Expression Language, which includes various operators. Allowed operators are: {operators.all()} -1. Analyze the initial alpha idea to identify key components such as variables, conditions, and expected outcomes. -2. Propose specific changes to the alpha expression, detailing how each change can enhance its performance. For example: +1. Execute the original alpha to obtain a baseline performance. +2. Analyze the initial alpha idea to identify key components such as variables, conditions, and expected outcomes. +3. Propose specific changes to the alpha expression, detailing how each change can enhance its performance. For example: - Change the parameter value of 'X' from 0.5 to 0.7 to test sensitivity. - Introduce a new operator, such as 'rank', to refine the conditions under which trades are executed. @@ -30,8 +33,8 @@ class ResponseFormat(BaseModel): - Suggest type of data field to search for, e.g. 'momentum', 'trend', 'news' etc. - Add a group neutralization to the alpha expression to reduce sector bias. -3. For each proposed change, provide a rationale explaining why it may improve the alpha's effectiveness. -4. Ensure that each change is concrete and directly related to the alpha expression, focusing on parameter adjustments, operator modifications, or data field substitutions. +4. For each proposed change, provide a rationale explaining why it may improve the alpha's effectiveness. +5. Ensure that each change is concrete and directly related to the alpha expression, focusing on parameter adjustments, operator modifications, or data field substitutions. List only the steps related to changing the alpha expression. Do NOT include steps such as evaluation, backtesting, documentation, or communication. @@ -49,7 +52,7 @@ def invoke(state: GraphState) -> ResponseFormat: "You are exploring a new alphas based on the new idea.\n" f"{state.get('alpha_idea', '')}\n" f"Best alpha so far:\n {best_alpha.prompt_format() if best_alpha else 'None'}\n" - "Create a plan to explore this idea, start with simple alpha expression.\n" + "Create a plan to explore this idea.\n" ) elif state.get("state", "explore") == "fine_tune" and best_alpha is not None: message = ( diff --git a/brain/agents/seeder.py b/brain/agents/seeder.py index b305225..55cc62b 100644 --- a/brain/agents/seeder.py +++ b/brain/agents/seeder.py @@ -1,13 +1,26 @@ +import json import random +from pathlib import Path +from brain.agent_config import get_config from brain.alpha_class import Alpha from brain.database import Database from brain.graph_state import GraphState +TEMPLATES = json.load((Path(__file__).parent.parent / "tools/data/alpha_templates.json").open()) -def invoke(state: GraphState) -> Alpha: + +def invoke(state: GraphState) -> tuple[str, dict]: """Invoke seeder agent to generate alphas.""" - page = random.randint(1, 100) + if random.random() < 0.3: + template = random.choice(TEMPLATES)["template"] + # TODO: Add random config + return ( + f"Use following template, fix syntax and try replacing different datafields\n```\n{template}\n```", + {}, + ) + + page = 0 while True: database = Database() alphas = database.get_alphas_page(page) @@ -17,7 +30,10 @@ def invoke(state: GraphState) -> Alpha: corr = alpha.update_self_correlation() database.upsert_alpha(alpha) - if score > 0 and corr < 0.5: - return alpha + if score > 0 and corr < 0.7 and alpha.sharpe > 1: + print( + f"Found alpha: {alpha.regular} with score {score} and self-correlation {corr}" + ) + return f"Use following alpha\n```\n{alpha.regular}\n```", get_config(alpha) page += 1 diff --git a/brain/agents/tester.py b/brain/agents/tester.py index 15451d2..0bb00d5 100644 --- a/brain/agents/tester.py +++ b/brain/agents/tester.py @@ -8,7 +8,7 @@ def invoke(state: GraphState) -> GraphState: - """Invoke fine-tuning agent.""" + """Invoke tester agent, returns False if any test fails.""" storage = state.get("storage") best_alpha = storage.best_alpha @@ -31,8 +31,8 @@ def invoke(state: GraphState) -> GraphState: if ( alpha.fitness < best_alpha.fitness * 0.5 - and alpha.sharpe < best_alpha.sharpe * 0.5 - and alpha.long_count + alpha.short_count < 400 + or alpha.sharpe < best_alpha.sharpe * 0.5 + or alpha.long_count + alpha.short_count < 400 ): return False diff --git a/brain/database.py b/brain/database.py index ecfad03..b577cba 100644 --- a/brain/database.py +++ b/brain/database.py @@ -86,8 +86,10 @@ def get_alphas_page( FROM alphas WHERE self_correlation < %s AND (score IS NULL OR score > %s) + AND sharpe > 1.0 + AND fitness > 0.9 AND (long_count IS NULL OR long_count > 100) - ORDER BY sharpe DESC, fitness DESC + ORDER BY score DESC NULLS LAST, sharpe DESC, fitness DESC LIMIT %s OFFSET %s """ diff --git a/brain/graph_state.py b/brain/graph_state.py index dfd08f4..b221d9c 100644 --- a/brain/graph_state.py +++ b/brain/graph_state.py @@ -1,11 +1,13 @@ from typing import TypedDict +from brain.alpha_class import Alpha from brain.alpha_storage import Storage class GraphState(TypedDict, total=False): alpha_idea: str storage: Storage + old_best_alpha: Alpha | None # Number of times the alpha idea has been explored explore_count: int # Static fine-tuning diff --git a/brain/tools/data/alpha_templates.json b/brain/tools/data/alpha_templates.json new file mode 100644 index 0000000..4a02be9 --- /dev/null +++ b/brain/tools/data/alpha_templates.json @@ -0,0 +1,134 @@ +[ + { + "template": "请注意,本贴仅收集总结,优秀程度不一定。\n部分存在严重overfitting风险,请参加更多顾问活动及课程解锁insight。\nregression_neut(regression_neut(group_neutralize(group_zscore(\\\nvec_avg({data}),sector),bucket(rank(cap),range=\"0.1,1,0.1\")),\\\ngroup_neutralize(group_zscore(cap,sector),bucket(rank(cap),range=\"0.1,1,0.1\"))),\\\nts_ir(returns-group_median(returns,sector),126))" + }, + { + "template": "fear = ts_mean(abs(returns - group_mean(returns,1,market))/(abs(returns)+abs(group_mean(returns,1,market))+0.1),20);\\\n-group_neutralize(fear*group_normalize(ts_decay_exp_window(ts_percentage(vec_count(rsk82_raw_m3g_tni_p_su_fte),60,percentage=0.9)\\\n-ts_percentage(vec_count({data}),60,percentage=0.1),20, factor=0.8),market)*inverse(abs(ts_entropy(volume,20)))\\\n,bucket(rank(cap),range=\"0.1,1,0.1\"))" + }, + { + "template": "d1_level=ts_max(vec_stddev({data}),20);\\\nd1_stability=ts_kurtosis(vec_stddev({data}),20);\\\nmkt_level=group_min(d1_stability,industry);\\\n-group_neutralize(d1_stability<=mkt_level?-d1_level:d1_level,bucket(rank(cap),range=\"0.1,1,0.1\"))" + }, + { + "template": "group = bucket(rank(cap),range='0.1,1,0.1');\n\nrisk = rank(-ts_av_diff(vec_min({Analyst Std}),360));\n\nalpha=rank((1-risk)*group_rank(ts_scale(vec_max({OptionHighPrice})/close,120),industry));\n\ngroup_neutralize(ts_mean(alpha,2),group)\nDecay设置为10,Neutralize设置为industry" + }, + { + "template": "my_group = market;\n\nmy_group2 = bucket(rank(cap),range='0,1,0.1');\n\nalpha=rank(group_rank(ts_decay_linear(volume/ts_sum(volume,252),10),my_group)*group_rank(ts_rank(vec_avg({Fundamental})),my_group)*group_rank(-ts_delta(close,5),my_group));\n\ntrade_when(volume>adv20,group_neutralize(alpha,my_group2),-1)" + }, + { + "template": "market_return = group_mean(returns,1,market);\n\nfear = ts_mean(abs(returns - market_return)/(abs(returns)+abs(market_return)),20);\n\nvhat = ts_regression(volume,ts_mean(vec_avg({Sentiment}),5),120);\n\nehat = ts_regression(returns-market_return,vhat,120);\n\nalpha = group_neutralize(-ehat*rank(fear),bucket(rank(cap),range='0,1,0.1'));\n\ntrade_when(abs(returns)<0.075,regression_neut(alpha,volume),abs(returns)>0.1)\nDecay设置为20,Neutralize设置为industry" + }, + { + "template": "vector_neut(group_neutralize(group_neutralize(ts_arg_max(vec_norm({datafield}), 220),bucket(rank(assets), range=\"0.1,1,0.1\")),subindustry), group_normalize(ts_delay(cap, 220),subindustry))" + }, + { + "template": "sentiment = ts_backfill(ts_delay( vec_avg(SENTIMENT FROME OTHER),1),20)\n\nvhat=ts_regression(volume,sentiment,250); \n\nehat=-ts_regression(returns,vhat,750); 1\n\nalpha=group_rank(ehat,bucket(rank(cap),range='0,0.1,0.1'))" + }, + { + "template": "IR = abs(ts_mean(returns,252)/ts_std_dev(returns,252));     \n \nregression_neut ( vector_neut (ts_zscore( vec_max (ANALYST)/close, 126),ts_median(cap, 126) ),IR)" + }, + { + "template": "small_sell = vec_sum(SPECIAL SELL ORDER);\n\nsmall_buy = vec_sum(SPECIAL BUY ORDER);\n\nfac = - small_sell - small_buy;\n\nfac_diff_mean = power(rank(fac - group_mean(fac, 1, subindustry)),D);\n\nIR = abs(ts_mean(returns,126)/ts_std_dev(returns,126));\n\ngroup_neutralize(regression_neut(group_neutralize(fac_diff_mean,bucket(rank(cap), range='-0.1,1,0.1')),IR),sta1_top3000c10)" + }, + { + "template": "trade_when(ts_rank(ts_std_dev(returns,10),252)<0.9,-regression_neut(group_neutralize(ts_std_dev(vec_avg(volatility),20)/ ts_mean(vec_avg(volatility),20),bucket(rank(assets),range = '0,1,0.1')),ts_std_dev(returns,30))+group_neutralize(-ts_std_dev(vec_avg(volume)/sharesout,30)/ ts_mean(vec_avg(volume)/sharesout,30),bucket(rank(cap), range = '0,1,0.1')),-1)" + }, + { + "template": "e = power(group_rank(-ts_decay_exp_window(ts_sum(if_else(vwap-group_mean(vwap,1,industry)-0.01>0,1,0)*ts_corr((log(volume/sharesout)),cap,5),5),20),industry),3);\ntrade_when(ts_rank(ts_std_dev(returns,10),252)<0.9,e,-1)" + }, + { + "template": "vector_neut(power(rank(group_neutralize(-ts_decay_exp_window(ts_sum(if_else((alpha)-group_mean((alpha),1,bucket(rank(assets),range = '0,1,0.1'))-0.02>0,1,0)*ts_co_kurtosis(vec_sum(turnover),cap,5),3)/3,50),industry)),2),assets)" + }, + { + "template": "signal = ts_rank(vec_stddev{fnd}, 60);\nsignal_str = group_rank(signal, bucket(rank(cap), range='0.1,1,0.2'));\npv_info = ts_rank(close, 60);\npv_info_str = group_neutralize(close, bucket(rank(cap), range='0.1,1,0.2'));\nIR = abs(ts_mean(returns,126)/ts_std_dev(returns,126));;\nrank(vector_neut(vector_neut(signal_str, pv_info_str),IR))" + }, + { + "template": "market_pv = group_mean(adv20,1,market);\nmodified = vec_avg(anl);\nshort_term_excess_return = ts_mean(pv-market_pv,5);\nlong_term_excess_return = ts_delay(ts_mean(pv-market_pv,120),120);\nparf = regression_neut(regression_neut(modified,short_term_excess_return),long_term_excess_return);\ngroup_zscore(parf,subindustry)" + }, + { + "template": "piece_1 = group_mean(vec_stddev(anl) , 1 , subindustry) - vec_stddev(anl)\ntime_mean(piece_1, 60)" + }, + { + "template": "{ts_opr_1}({group_opr}(ts_opr_2(rank({vector_opr}({pv_field})),rank({vector_opr}({vol_field})),{days1}),{grouping}){,days2})" + }, + { + "template": "market_returns = group_mean(returns,cap,market);\nmodified = vec_sum(Analyst);\nshort_term_excess_return = ts_mean(returns-market_returns,5);\nlong_term_excess_return = ts_delay(ts_mean(returns-market_returns,20),20);\nparf = regression_neut(regression_neut(modified,short_term_excess_return),long_term_excess_return);\ntrade_when(ts_rank(ts_std_dev(market_returns,10),252)<0.9,group_neutralize(parf,bucket(rank(cap),range='0,1,0.1')),-1)" + }, + { + "template": "my_group=bucket(rank(cap),range='0,1,0.1');\nAlpha=group_rank(ts_decay_linear(volume/ts_sum(volume,252),20),my_group)*group_rank(ts_co_kurtosis(news_data,returns,252),my_group)*group_rank(-ts_delta(close,5),my_group)" + }, + { + "template": "a = ts_zscore({datafield, 252);a1 = group_neutralize(a, bucket(rank(cap), range='0.1,1,0.1'));a2 = group_neutralize(a1, industry);b = ts_zscore(cap, 252);b1 = group_neutralize(b, industry);c = regression_neut(a2,b1);c" + }, + { + "template": "group_neutralize(ts_co_skewness(rp_nip_inverstor,ts_co_skewness(vec_max(nws18_qep),rp_css_ratings,225),225),bucket(rank(beta_last_30_days_spy), range=\"0,1,0.1\"))" + }, + { + "template": "regression_neut(vector_neut(ts_rank(vec_max({ANALYST})/close,120),ts_median(cap, 120) ),abs(ts_mean({RETURNS},252)/ts_std_dev({RETURNS},252)))" + }, + { + "template": "small_sell = vec_sum({sell_order});\nsmall_buy = vec_sum({buy_order});\nfac = small_sell - small_buy;\nfac_diff_mean = power(rank(fac - group_mean(fac, 1, subindustry)),{days});\nIR = abs(ts_mean(returns,126)/ts_std_dev(returns,126));\ngroup_neutralize(regression_neut(group_neutralize(fac_diff_mean,bucket(rank(cap), range='-0.1,1,0.1')),IR),sta1_top3000c10)" + }, + { + "template": "roa = group_zscore(fnd72_s_pit_or_cf_q_cf_net_inc*2/(assets+last_diff_value(assets,300)),sector);\n\npb = group_zscore(mdl175_bp,sector);\n\nITR = group_zscore(inventory_turnover,sector);\n\nDtA = group_zscore(mdl175_debtsassetratio,sector);\n\nWAtA = group_zscore(mdl175_workingcapital/assets,sector);\n\nNAYOY = group_zscore(mdl175_netassetgrowrate,sector);\n\nint2A = group_zscore(mdl175_intangibleassetratio,sector);\n\nrank(regression_neut(regression_neut(regression_neut(regression_neut(regression_neut(regression_neut(regression_neut(roa,pb),ITR),DtA),WAtA),NAYOY),int2A),log(cap)))" + }, + { + "template": "系列4\nresidual = ts_regression (ts_zscore(A,500), ts_zscore(B,500),500);\n\nresidual/ts_std_dev(residual ,500)" + }, + { + "template": "系列5\nts_regression (ts_zscore(A,500), timestep(500),500);\n上述5个系列后续可以尝试同时结合截面回归和时序回归。通过截面回归处理财务数据的分布,通过时序回归找出robust的signal\n缩小space。上述财务数据并没用限制,因此空间非常大;建议从盈利成长、成本、经营、安全性等重要维度先进行搜索,另外,可以先不考虑覆盖率较低的datafield\n可以用分析师数据代替财务数据,比如净利润\n基本面量化建议从经济逻辑出发,也不建议使用很复杂的公式,容易overfitting(因为数据点较少,很容易fit噪音)\n注意窗口设置时的500天,这实际上会导致回归时变量的方差被低估\n注意口径问题/数据及时利用问题。口径问题是指:在财报发布的月份中,比如4月,A公司比B公司先发,此时B公司还是t-1期的财务数据,而A公司是t期的财务数据,由于Brain是日频调仓,可能不具备可比性;但如果不及时利用数据,收益也会被其他交易者兑现,因此需要权衡。但大多数情况下,口径问题<<<<<<<<数据及时利用问题,因此该问题建议注意而不是规避\n在计量经济学中,还有“互为因果”的问题,我对这个问题的了解没有那么深入,不过直觉上很好理解。我们拿到的财务数据,是静态的,也就是说,我们不清楚这样的财务数据是如何形成的。例如成本越高,收益越高,互为因果现象会是:成本越高,我的收益理应变高,收益变高,我就会更激进地投入生产,提高成本,这也会导致估计量有误。传统的解决方法为引入额外的工具变量,在做alpha中,我们也许可以融合另类数据,比如财务附注中的语气,判断公司是激进扩张还是稳扎稳打,从而决定 变量在回归中的位置\nshout out to YW93864" + }, + { + "template": "(\n  (\n    (\n      greater(\n        ts_zscore((, ), ),\n        \n      ),\n      ts_zscore(, ),\n      \n    )\n  ),\n  \n), \n)\n\ne.g\ngroup_rank(\n  filter(\n    sigmoid(\n      if_else(\n        greater(ts_zscore(news_sentiment, 30), 1),\n        ts_zscore(news_sentiment, 30),\n        0\n      )\n    ),\n    h=\"1 2 3 4\",\n    t=\"0.5\"\n  ),\n  industry\n)\n\n" + }, + { + "template": "tmp = (group_rank(fnd72_s_pit_or_cf_q_cf_cash_from_inv_act, sector) > 0.5) * 4 + (group_rank(fnd72_s_pit_or_cf_q_cf_cash_from_fnc_act, sector) > 0.5) * 2 + (group_rank(fnd72_s_pit_or_cf_q_cf_cash_from_oper, sector) > 0.5) * 1;\n\n2 * (tmp == 1) - (tmp == 2) - (tmp == 6)" + }, + { + "template": "(((ts_rank(ts_backfill({i}, 30), 504) < 0.5) && (ts_rank(ts_backfill({j}, 30), 504) > 0.5)) ? ts_rank(ts_backfill({i}, 30), 504) : -ts_rank(ts_backfill({i}, 30), 504))\n目前已在GLB测试了 {i} 和 {j} 都是 fundamental72 的情况。" + }, + { + "template": "power(ts_std_dev(abs(returns),30),2)-power(ts_std_dev(returns,30),2) " + }, + { + "template": "IR = abs(ts_mean(returns,252)/ts_std_dev(returns,252));\nr=returns;\na=power(ts_std_dev(abs(r)+r,30),2);\nb=power(ts_std_dev((abs(r)-r),30),2);\nc=regression_neut(b-a,IR);\ngroup_neutralize(group_neutralize(c,bucket(rank(cap),range='0.2,1,0.2')),country)" + }, + { + "template": "a = -ts_delta(datafield,3);\n\nb=abs(ts_mean(returns,252)/ts_std_dev(returns,252));\n\ngroup_neutralize(vector_neut(a,b),subindustry)" + }, + { + "template": "- A * ts_std_dev(A, 30),我称其为“小而稳”(时间参数可根据Dataset/Datafield调整)。\n\na = - A * ts_std_dev(A, 20);\n\nb=abs(ts_mean(returns,252)/ts_std_dev(returns,252));\n\nvector_neut(a,b)" + }, + { + "template": "# day1\nd1_mean = ts_mean(close/ts_delay(close, 1)-1,20);\nd1_std = ts_std_dev(close/ts_delay(close, 1)-1,20);\nd1_mkt_mean = group_mean(d1_std, 1, market);\nd1_std_rev = d1_std 3) || (volume >= ts_sum(volume, 5) / 5);\nalphaexp = rank(rank((high + low) / 2 - close) * rank((mdl175_roediluted*mdl175_cashrateofsales)));\ntradeExitexp = -1;  \ntrade_when(triggerTradeexp, alphaexp, tradeExitexp)" + }, + { + "template": "overnight_ret = (open - ts_delay(close,1))/ts_delay(close,1);\nabs_ovn_ret = abs (overnight_ret);\nturn = volume/sharesout;\nturn_d1 = ts_delay(turn, 1);\ncorr = ts_corr (abs_ovn_ret, turn_d1,7);\n-(corr)" + }, + { + "template": "Turn20_ = ts_mean(volume/sharesout, 20);\nTurn20 = group_neutralize(Turn20_, bucket(rank(cap), range=\"0.1,1,0.1\"));\n\nSTR_ = ts_std_dev(volume/sharesout, 20);\nSTR = group_neutralize(STR_, bucket(rank(cap), range=\"0.1,1,0.1\"));\n\nscore2 = rank(- nan_mask(Turn20, if_else(rank(STR) < 0.5, 1, -1))) * 0.5;\nscore3 = rank(nan_mask(Turn20, if_else(rank(STR) >= 0.5, 1, -1))) * 0.5;\n\nsignal_ = add(rank(STR), score2, score3, filter = true);\nsignal = left_tail(rank(signal_), maximum=0.98);\n- group_rank(signal, bucket(rank(cap), range=\"0.1,1,0.1\"))" + }, + { + "template": "my_group=bucket(rank(cap), range=\"0,1,0.1\");\nshock=(high-ts_delay(low, 1))/ts_delay(low, 1);\ntalor_shock=(shock-log(shock+1))*2-log(shock+1)**2;\nalpha=-group_rank(ts_mean(talor_shock, 24), my_group);\ngroup_neutralize(alpha,my_group)" + }, + { + "template": "turnover_rank = ts_mean(rank(volume / (sharesout * 1000000)), 22);\nspe = rank(vec_avg(anl17_d1_spe_tse));\nbp = rank(vec_avg(anl17_d1_bp_tse));\nalpha = spe - bp;\n# alpha\nturnover_rank > 0.1 ? alpha : 0\n# CHN模板" + }, + { + "template": "turn = volume/sharesout ;\nturn20 = rank(regression_neut(-ts_mean(turn,20),densify(cap)));\nSTR = regression_neut(-ts_std_dev(turn,20),densify(cap));\nUTR = STR+ turn20 * (STR/(1+abs(STR)));\nregression_neut(regression_neut(regression_neut(sign(UTR) * power(abs(UTR),0.\n5),turn20),vwap),ts_delta(retained_earnings / sharesout, 120))" + }, + { + "template": "internal=ts_delay(ts_percentage(returns, 60, percentage=0.9)-ts_percentage\n(returns, 60, percentage=0.1),40);\nCV=ts_std_dev((close/open - 1), 20)/ts_mean((close/open - 1),20);\nalpha=ts_sum(-returns,20)*rank(internal)*abs (1/CV);\ngroup_neutralize (alpha, bucket(rank(cap), range='0.1,1,0.1'))" + }, + { + "template": "industry_open = group_mean(open, cap, subindustry);\nindustry_close = group_mean(close, cap, subindustry);\nindustry_high = group_mean(high, cap, subindustry);\nindustry_low = group_mean(low, cap, subindustry);\n\nTrends = if_else(industry_close > ts_delay(industry_close, 40),\nindustry_close/ts_max(industry_high, 100), rank(industry_close/ts_min\n(industry_low, 500))-1);\nOTSM = ts_sum((industry_high-ts_delay(industry_close, 1)) / (ts_delay\n(industry_close, 1)-industry_low+1), 90);\nDTSM = ts_sum((industry_high-industry_open) / (industry_open-industry_low+1),\n5);\n\nTSM = rank(OTSM) + rank(DTSM);\n\nrank(Trends) + rank(TSM)" + }, + { + "template": "small_sell = vec_avg(pv27_sell_value_small_order);\nsmall_buy = vec_avg(pv27_buy_value_small_order);\nlarge_sell = vec_avg(pv27_sell_value_exlarge_order);\nlarge_buy = vec_avg(pv27_buy_value_exlarge_order);\n\nfac_small = small_sell + small_buy;\nfac_large = large_sell + large_buy;\n\nfac_small_diff_mean = fac_small - group_mean(fac_small, 1, subindustry);\nfac_large_diff_mean = fac_large - group_mean(fac_large, 1, subindustry);\n\nfactor = if_else(rank(cap)<0.05, fac_small_diff_mean, fac_large_diff_mean);\nif_else(rank(factor) <0.45, rank(factor)*0.55, factor, -1)" + } +] diff --git a/brain/tools/simulation.py b/brain/tools/simulation.py index 4c5fd3f..868b345 100644 --- a/brain/tools/simulation.py +++ b/brain/tools/simulation.py @@ -31,7 +31,10 @@ def submit_alpha(alpha: str, config: RunnableConfig) -> Command: if len(old_alphas) > 0: alphas_formatted = "\n".join(a.prompt_format() for a in old_alphas) print(f"Alpha already exists in the database: {alphas_formatted}") - return f"Alpha was already tested with following results: {alphas_formatted}" + return ( + f"Alpha was already tested with following results: {alphas_formatted}\n" + "Create a completely different alpha" + ) except Exception as e: print(f"Error during database search: {e}") From be5a39a574d773ff650a12639366320e6a2a38cb Mon Sep 17 00:00:00 2001 From: Breta01 Date: Thu, 19 Jun 2025 14:51:32 +0800 Subject: [PATCH 09/13] Marking exploration of alphas --- brain/agentic.py | 29 ++++++++----------- brain/agents/executor.py | 26 +++++++++++------ brain/agents/fine_tuner.py | 7 +++-- brain/agents/seeder.py | 9 ++++-- brain/alpha_class.py | 2 ++ brain/database.py | 6 +++- brain/tools/simulation.py | 4 +++ .../20250619141054_add_explored_indicator.sql | 3 ++ 8 files changed, 53 insertions(+), 33 deletions(-) create mode 100644 migrations/20250619141054_add_explored_indicator.sql diff --git a/brain/agentic.py b/brain/agentic.py index 006ec1e..3f22648 100644 --- a/brain/agentic.py +++ b/brain/agentic.py @@ -29,7 +29,7 @@ def plateau_condition(state: GraphState) -> bool: score_diff = ( best_alpha.fitness - old_best_alpha.fitness + best_alpha.sharpe - old_best_alpha.sharpe ) - return score_diff < 0.1 + return score_diff < 0.01 def seed_finder_node(state: GraphState) -> GraphState: @@ -56,13 +56,22 @@ def planner_node(state: GraphState) -> GraphState: plan = invoke_planner(state) if not plan: return {**state, "node": "seed"} + + if state["storage"].best_alpha is None: + plan.insert(0, "Execute initial alpha idea to obtain baseline") + + plan.append( + "Think about previous changes and how they affected the alpha." + " Propose new alphas based on the most successful changes." + ) + return {**state, "node": "execute", "plan": plan} def executor_node(state: GraphState) -> GraphState: - invoke_executor(state) + state = invoke_executor(state) # TODO: Pass some summary of results from the executor to planner - return {**state, "node": "explore_test", "state": "explore", "static_finetune": False} + return {**state, "node": "explore_test", "state": "explore"} def fine_tuner_node(state: GraphState) -> GraphState: @@ -112,20 +121,6 @@ def submit_test_node(state: GraphState) -> GraphState: return {**state, "node": "seed", "state": "explore"} -# def stagnation_node(state: GraphState) -> GraphState: -# """ -# Decide what happens next. -# - If plateaued: return {"mode": "explore"} -# - If hard-stop (budget / Sharpe ceiling reached): {"mode": "stop"} -# - Else continue refining the same alpha -# """ -# if plateau_condition(state): -# return {"mode": "explore"} -# if hard_stop_condition(state): -# return {"mode": "stop"} -# return {"mode": "continue"} - - builder = StateGraph(GraphState) builder.add_node("seed_finder", seed_finder_node) diff --git a/brain/agents/executor.py b/brain/agents/executor.py index a5f72d9..69ef4bc 100644 --- a/brain/agents/executor.py +++ b/brain/agents/executor.py @@ -18,7 +18,7 @@ from brain.tools.operators import describe_operators, operators from brain.tools.simulation import StopException, submit_alpha -STEPS_PER_INSTRUCTION = 2 +STEPS_PER_INSTRUCTION = 1 def prompt( @@ -73,7 +73,11 @@ class CustomState(AgentState): def create_alpha_simulation(storage: Storage, state: GraphState) -> tuple[Response, Alpha]: """Create a new alpha based on the given ID.""" plan = state.get("plan", []) - config = state.get("default_config", DEFAULT_CONFIG) + + if storage.best_alpha is not None: + config = get_config(storage.best_alpha) + else: + config = state.get("default_config", DEFAULT_CONFIG) top_k = 5 @@ -92,7 +96,7 @@ def create_alpha_simulation(storage: Storage, state: GraphState) -> tuple[Respon prompt = f""" Create a new alpha based on the best alpha according to the following instructions: -{plan[(storage.counter // STEPS_PER_INSTRUCTION) % len(plan)]} +{plan[min(len(plan) - 1, storage.counter // STEPS_PER_INSTRUCTION)]} ALPHA IDEA ---------- @@ -158,17 +162,21 @@ def invoke(state: GraphState) -> GraphState: plan = state.get("plan", []) storage = state.get("storage") - if storage.best_alpha is not None: - config = get_config(storage.best_alpha) - else: - config = state.get("default_config", DEFAULT_CONFIG) create_alpha = partial(create_alpha_simulation, state=state) storage.reset_counter() - genetic_algorithm(storage, create_alpha, len(plan) * STEPS_PER_INSTRUCTION) + genetic_algorithm(storage, create_alpha, len(plan) * STEPS_PER_INSTRUCTION + 5) - if state.get("static_finetune", True): + if ( + state.get("static_finetune", True) + and storage.best_alpha is not None + and storage.best_alpha.fitness > 0.8 + ): for param, options in param_options.items(): alphas = [storage.best_alpha.replace(**{param: option}) for option in options] execute_alphas(alphas, storage) + + return {**state, "static_finetune": False} + + return {**state, "static_finetune": True} diff --git a/brain/agents/fine_tuner.py b/brain/agents/fine_tuner.py index a6949b5..967598c 100644 --- a/brain/agents/fine_tuner.py +++ b/brain/agents/fine_tuner.py @@ -23,9 +23,10 @@ def invoke(state: GraphState) -> GraphState: alphas = [storage.best_alpha.replace(**{param: option}) for option in options] execute_alphas(alphas, storage) - congig = get_config(storage.best_alpha) plan = state.get("plan", []) - create_alpha = partial(create_alpha_simulation, plan=plan, config=congig) + create_alpha = partial(create_alpha_simulation, state=state) storage.reset_counter() - genetic_algorithm(storage, create_alpha, len(plan) * STEPS_PER_INSTRUCTION) + genetic_algorithm(storage, create_alpha, len(plan) * STEPS_PER_INSTRUCTION + 10) + + return {**state, "static_finetune": False} diff --git a/brain/agents/seeder.py b/brain/agents/seeder.py index 55cc62b..452b900 100644 --- a/brain/agents/seeder.py +++ b/brain/agents/seeder.py @@ -20,20 +20,23 @@ def invoke(state: GraphState) -> tuple[str, dict]: {}, ) - page = 0 + page = random.randint(1, 10) while True: database = Database() - alphas = database.get_alphas_page(page) + alphas = database.get_alphas_page(page, page_size=20) for alpha in alphas: score = alpha.update_score() corr = alpha.update_self_correlation() - database.upsert_alpha(alpha) if score > 0 and corr < 0.7 and alpha.sharpe > 1: + alpha.explored = True + database.upsert_alpha(alpha) print( f"Found alpha: {alpha.regular} with score {score} and self-correlation {corr}" ) return f"Use following alpha\n```\n{alpha.regular}\n```", get_config(alpha) + database.upsert_alpha(alpha) + page += 1 diff --git a/brain/alpha_class.py b/brain/alpha_class.py index 595a8a2..adea6c1 100644 --- a/brain/alpha_class.py +++ b/brain/alpha_class.py @@ -44,6 +44,8 @@ class Alpha: short_count: Optional[int] = None # Score change score: Optional[float] = None + # Exploration of seed algorithm + explored: bool = False # timestamp (let DB default if you don’t set it) created_at: Optional[str] = field( diff --git a/brain/database.py b/brain/database.py index b577cba..372fb79 100644 --- a/brain/database.py +++ b/brain/database.py @@ -86,9 +86,10 @@ def get_alphas_page( FROM alphas WHERE self_correlation < %s AND (score IS NULL OR score > %s) + AND (long_count IS NULL OR long_count > 100) AND sharpe > 1.0 AND fitness > 0.9 - AND (long_count IS NULL OR long_count > 100) + AND explored = FALSE ORDER BY score DESC NULLS LAST, sharpe DESC, fitness DESC LIMIT %s OFFSET %s @@ -105,6 +106,9 @@ def count_alphas(self, max_self_corr=0.7, min_score=0.0) -> int: WHERE self_correlation < %s AND (score IS NULL OR score > %s) AND (long_count IS NULL OR long_count > 100) + AND explored = FALSE + AND sharpe > 1.0 + AND fitness > 0.9 """ self.cursor.execute(sql, (max_self_corr, min_score)) return self.cursor.fetchone()["count"] diff --git a/brain/tools/simulation.py b/brain/tools/simulation.py index 868b345..a11ac63 100644 --- a/brain/tools/simulation.py +++ b/brain/tools/simulation.py @@ -68,6 +68,10 @@ def submit_alpha(alpha: str, config: RunnableConfig) -> Command: ) elif "1e-" in error: instructions = "Numbers like 1e-05 are not allowed. Use 0.00001 instead." + elif "Attempted to use unknown variable" in error: + instructions = "Use search datafields tool or get random datafields to find available data fields." + elif "Unexpected character" in error: + instructions = "Check the alpha expression for syntax errors and correct them." print(f"Error occured: {error}") # TODO: More instructions on how to fix (operators, etc.) return ( diff --git a/migrations/20250619141054_add_explored_indicator.sql b/migrations/20250619141054_add_explored_indicator.sql new file mode 100644 index 0000000..c958fb3 --- /dev/null +++ b/migrations/20250619141054_add_explored_indicator.sql @@ -0,0 +1,3 @@ +-- add an integer "score" column (allows negative/positive values) +ALTER TABLE alphas +ADD COLUMN IF NOT EXISTS explored BOOLEAN NOT NULL DEFAULT FALSE; From 4b7f11de1deb2347980c0867e156f532856385e7 Mon Sep 17 00:00:00 2001 From: Breta01 Date: Thu, 19 Jun 2025 15:24:39 +0800 Subject: [PATCH 10/13] Update checks order --- brain/agentic.py | 7 ++++--- brain/agents/tester.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/brain/agentic.py b/brain/agentic.py index 3f22648..b34478f 100644 --- a/brain/agentic.py +++ b/brain/agentic.py @@ -85,8 +85,8 @@ def explore_test_node(state: GraphState) -> GraphState: if best_alpha is not None and len(best_alpha.failing_tests) == 0: score = best_alpha.update_score() - # if tester is passing - if invoke_tester(state) and score > -100: + trade_count = best_alpha.long_count + best_alpha.short_count + if score > -50 and trade_count > 400 and invoke_tester(state): return { **state, "node": "fine_tuner", @@ -108,7 +108,8 @@ def submit_test_node(state: GraphState) -> GraphState: if best_alpha is not None and len(best_alpha.failing_tests) == 0: score = best_alpha.update_score() - if invoke_tester(state) and score > 200: + trade_count = best_alpha.long_count + best_alpha.short_count + if score > 200 and trade_count > 400 and invoke_tester(state): # TODO: Mark alpha as "submitted" or "ready for production" print("Submitting alpha! Score:", score) BrainAPI.submit_alpha(best_alpha.alpha_id) diff --git a/brain/agents/tester.py b/brain/agents/tester.py index 0bb00d5..4649fe9 100644 --- a/brain/agents/tester.py +++ b/brain/agents/tester.py @@ -20,7 +20,7 @@ def invoke(state: GraphState) -> GraphState: alphas.append(best_alpha.replace(decay=config["decay"] + decay)) for op in ["rank", "sign"]: - regular = add_final_operator("rank", best_alpha.regular) + regular = add_final_operator(op, best_alpha.regular) alphas.append(best_alpha.replace(regular=regular)) results = execute_alphas(alphas, storage) From 64aca9fe43621613476829460323b623b3ef9496 Mon Sep 17 00:00:00 2001 From: Breta01 Date: Thu, 19 Jun 2025 15:29:00 +0800 Subject: [PATCH 11/13] Fix node naming --- brain/agentic.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/brain/agentic.py b/brain/agentic.py index b34478f..19b5650 100644 --- a/brain/agentic.py +++ b/brain/agentic.py @@ -1,6 +1,5 @@ from langgraph.graph import END, START, StateGraph -from brain.agent_config import get_config from brain.agents import ( invoke_executor, invoke_fine_tuner, @@ -76,7 +75,7 @@ def executor_node(state: GraphState) -> GraphState: def fine_tuner_node(state: GraphState) -> GraphState: invoke_fine_tuner(state) - return {**state, "node": "submit_test", "state": "fine-tune", "static_finetune": False} + return {**state, "node": "submit_test", "state": "fine_tune", "static_finetune": False} def explore_test_node(state: GraphState) -> GraphState: @@ -90,7 +89,7 @@ def explore_test_node(state: GraphState) -> GraphState: return { **state, "node": "fine_tuner", - "state": "fine-tune", + "state": "fine_tune", "static_finetune": True, "explore_count": 0, } @@ -117,7 +116,7 @@ def submit_test_node(state: GraphState) -> GraphState: # TODO: Test plateau condition, compare previous best, with current best alpha if state["explore_count"] < MAX_EXPLORATION_COUNT and plateau_condition(state): - return {**state, "node": "plan", "state": "fine-tune"} + return {**state, "node": "plan", "state": "fine_tune"} return {**state, "node": "seed", "state": "explore"} @@ -147,7 +146,7 @@ def submit_test_node(state: GraphState) -> GraphState: "plan": "planner", "execute": "executor", "seed": "seed_finder", - "fine_tune": "fine_tuner", + "fine_tuner": "fine_tuner", "explore_test": "explore_test", "submit_test": "submit_test", "stop": END, From 10f40b186db2c99aa7452fc5d5183ca7fc364255 Mon Sep 17 00:00:00 2001 From: Breta01 Date: Fri, 20 Jun 2025 05:36:03 +0800 Subject: [PATCH 12/13] Updating testing generator --- brain/agentic.py | 16 ++++++++-- brain/agents/alpha_tester.py | 2 +- brain/agents/executor.py | 2 +- brain/agents/fine_tuner.py | 2 +- brain/agents/tester.py | 6 ++-- brain/genetic_algorithm.py | 62 +++++++++++++++++++++++------------- brain/main.py | 2 +- 7 files changed, 60 insertions(+), 32 deletions(-) diff --git a/brain/agentic.py b/brain/agentic.py index 19b5650..82d66de 100644 --- a/brain/agentic.py +++ b/brain/agentic.py @@ -81,10 +81,16 @@ def fine_tuner_node(state: GraphState) -> GraphState: def explore_test_node(state: GraphState) -> GraphState: """Decide what happens next after exploring a new alpha idea.""" best_alpha = state["storage"].best_alpha + old_best_alpha = state.get("old_best_alpha") - if best_alpha is not None and len(best_alpha.failing_tests) == 0: + if ( + best_alpha is not None + and (old_best_alpha is None or old_best_alpha.alpha_id != best_alpha.alpha_id) + and len(best_alpha.failing_tests) == 0 + ): score = best_alpha.update_score() trade_count = best_alpha.long_count + best_alpha.short_count + print("Best alpha score:", score, "Trade count:", trade_count) if score > -50 and trade_count > 400 and invoke_tester(state): return { **state, @@ -104,10 +110,16 @@ def explore_test_node(state: GraphState) -> GraphState: def submit_test_node(state: GraphState) -> GraphState: """Decide what happens next after fine-tuning a new alpha idea.""" best_alpha = state["storage"].best_alpha + old_best_alpha = state.get("old_best_alpha") - if best_alpha is not None and len(best_alpha.failing_tests) == 0: + if ( + best_alpha is not None + and (old_best_alpha is None or old_best_alpha.alpha_id != best_alpha.alpha_id) + and len(best_alpha.failing_tests) == 0 + ): score = best_alpha.update_score() trade_count = best_alpha.long_count + best_alpha.short_count + print("Best alpha score:", score, "Trade count:", trade_count) if score > 200 and trade_count > 400 and invoke_tester(state): # TODO: Mark alpha as "submitted" or "ready for production" print("Submitting alpha! Score:", score) diff --git a/brain/agents/alpha_tester.py b/brain/agents/alpha_tester.py index c65060f..9c361a7 100644 --- a/brain/agents/alpha_tester.py +++ b/brain/agents/alpha_tester.py @@ -33,7 +33,7 @@ def invoke(state: GraphState) -> GraphState: for param, options in param_options.items(): alphas = [storage.best_alpha.replace(**{param: option}) for option in options] - execute_alphas(alphas, storage) + list(execute_alphas(alphas, storage)) congig = get_config(storage.best_alpha) plan = state.get("plan", []) diff --git a/brain/agents/executor.py b/brain/agents/executor.py index 69ef4bc..5535367 100644 --- a/brain/agents/executor.py +++ b/brain/agents/executor.py @@ -175,7 +175,7 @@ def invoke(state: GraphState) -> GraphState: ): for param, options in param_options.items(): alphas = [storage.best_alpha.replace(**{param: option}) for option in options] - execute_alphas(alphas, storage) + list(execute_alphas(alphas, storage)) return {**state, "static_finetune": False} diff --git a/brain/agents/fine_tuner.py b/brain/agents/fine_tuner.py index 967598c..d3fbf99 100644 --- a/brain/agents/fine_tuner.py +++ b/brain/agents/fine_tuner.py @@ -21,7 +21,7 @@ def invoke(state: GraphState) -> GraphState: if state.get("static_finetune", True): for param, options in param_options.items(): alphas = [storage.best_alpha.replace(**{param: option}) for option in options] - execute_alphas(alphas, storage) + list(execute_alphas(alphas, storage)) plan = state.get("plan", []) create_alpha = partial(create_alpha_simulation, state=state) diff --git a/brain/agents/tester.py b/brain/agents/tester.py index 4649fe9..b20e084 100644 --- a/brain/agents/tester.py +++ b/brain/agents/tester.py @@ -23,9 +23,7 @@ def invoke(state: GraphState) -> GraphState: regular = add_final_operator(op, best_alpha.regular) alphas.append(best_alpha.replace(regular=regular)) - results = execute_alphas(alphas, storage) - - for alpha in results: + for alpha in (generator := execute_alphas(alphas, storage)): if alpha is None: continue @@ -34,6 +32,8 @@ def invoke(state: GraphState) -> GraphState: or alpha.sharpe < best_alpha.sharpe * 0.5 or alpha.long_count + alpha.short_count < 400 ): + print(f"Alpha {alpha.prompt_format()} failed tests.") + generator.close() return False return True diff --git a/brain/genetic_algorithm.py b/brain/genetic_algorithm.py index 05639e0..89cd372 100644 --- a/brain/genetic_algorithm.py +++ b/brain/genetic_algorithm.py @@ -1,5 +1,6 @@ +import time from concurrent.futures import ThreadPoolExecutor, as_completed -from typing import Callable +from typing import Callable, Generator from requests import Response @@ -16,10 +17,9 @@ def execute_alphas( alphas: list[Alpha], storage: Storage, -) -> list[Alpha]: +) -> Generator[Alpha, None, None]: """Execute a list of alphas and update the storage.""" iterator = iter(alphas) - results = [] with ThreadPoolExecutor(max_workers=MAX_WORKERS) as pool: live_jobs = {} @@ -29,28 +29,31 @@ def execute_alphas( if alpha is None: break - response = BrainAPI.start_simulation(alpha.get_simulation_data(test_period="P1Y0M0D")) + response = _manual_start_alpha(alpha) live_jobs[pool.submit(_monitor_alpha, response, alpha)] = alpha + time.sleep(3) - while live_jobs: - for job in as_completed(live_jobs): - alpha = live_jobs.pop(job) - stats = job.result() - print(f"Stats: {stats}") - alpha = _update_alphas_storage(storage, stats, alpha.alpha_id) - results.append(alpha) + try: + while live_jobs: + for job in as_completed(live_jobs): + alpha = live_jobs.pop(job) + stats = job.result() + print(f"Stats: {stats}") + alpha = _update_alphas_storage(storage, stats, alpha.alpha_id) - new_alpha = next(iterator, None) - if new_alpha is None: - continue + yield alpha - response = BrainAPI.start_simulation( - new_alpha.get_simulation_data(test_period="P1Y0M0D") - ) - storage.add_alpha(new_alpha, "pending") - live_jobs[pool.submit(_monitor_alpha, response, new_alpha)] = new_alpha + new_alpha = next(iterator, None) + if new_alpha is None: + continue - return results + response = _manual_start_alpha(new_alpha) + storage.add_alpha(new_alpha, "pending") + live_jobs[pool.submit(_monitor_alpha, response, new_alpha)] = new_alpha + finally: + # Clean up if generator is closed + for pending in live_jobs: + pending.cancel() def genetic_algorithm( @@ -86,9 +89,7 @@ def genetic_algorithm( regular = negate_expression(alpha.regular) new_alpha = alpha.replace(regular=regular) - response = BrainAPI.start_simulation( - new_alpha.get_simulation_data(test_period="P1Y0M0D") - ) + response = _manual_start_alpha(new_alpha) else: response, new_alpha = create_alpha(storage) @@ -96,6 +97,21 @@ def genetic_algorithm( live_jobs[pool.submit(_monitor_alpha, response, new_alpha)] = new_alpha +def _manual_start_alpha(alpha: Alpha) -> Response: + for _ in range(3): + try: + response = BrainAPI.start_simulation(alpha.get_simulation_data(test_period="P1Y0M0D")) + return response + except Exception as e: + print(f"Error during simulation: {e}") + data = response.json() + if data["detail"] == "SIMULATION_LIMIT_EXCEEDED": + time.sleep(30) + continue + + return None + + def _monitor_alpha(response, alpha): """Monitor the alpha simulation.""" try: diff --git a/brain/main.py b/brain/main.py index 460fbe1..f07bd4f 100644 --- a/brain/main.py +++ b/brain/main.py @@ -5,7 +5,7 @@ def main(): - agentic.graph.invoke({}) + agentic.graph.invoke({}, {"recursion_limit": 1e9}) # search_algorithm.main() From a64c1c1fdf9bca7fd242eba460769c60dac607c0 Mon Sep 17 00:00:00 2001 From: Breta01 Date: Wed, 25 Jun 2025 17:27:00 +0800 Subject: [PATCH 13/13] Adding more alphas --- brain/agents/fine_tuner.py | 1 - brain/agents/seeder.py | 3 +-- brain/agents/tester.py | 5 +---- brain/alpha_class.py | 8 +++++--- brain/alphas/alpha_42.py | 20 ++++++++++++++++++++ brain/alphas/alpha_43.py | 19 +++++++++++++++++++ brain/alphas/alpha_44.py | 28 ++++++++++++++++++++++++++++ brain/alphas/alpha_45.py | 23 +++++++++++++++++++++++ brain/alphas/alpha_46.py | 35 +++++++++++++++++++++++++++++++++++ brain/alphas/alpha_47.py | 27 +++++++++++++++++++++++++++ brain/alphas/alpha_48.py | 28 ++++++++++++++++++++++++++++ brain/alphas/alpha_49.py | 24 ++++++++++++++++++++++++ brain/alphas/alpha_50.py | 20 ++++++++++++++++++++ brain/alphas/alpha_51.py | 20 ++++++++++++++++++++ brain/database.py | 1 + brain/fine_tune.py | 4 +--- 16 files changed, 253 insertions(+), 13 deletions(-) create mode 100644 brain/alphas/alpha_42.py create mode 100644 brain/alphas/alpha_43.py create mode 100644 brain/alphas/alpha_44.py create mode 100644 brain/alphas/alpha_45.py create mode 100644 brain/alphas/alpha_46.py create mode 100644 brain/alphas/alpha_47.py create mode 100644 brain/alphas/alpha_48.py create mode 100644 brain/alphas/alpha_49.py create mode 100644 brain/alphas/alpha_50.py create mode 100644 brain/alphas/alpha_51.py diff --git a/brain/agents/fine_tuner.py b/brain/agents/fine_tuner.py index d3fbf99..d11ada0 100644 --- a/brain/agents/fine_tuner.py +++ b/brain/agents/fine_tuner.py @@ -1,6 +1,5 @@ from functools import partial -from brain.agent_config import get_config from brain.agents.executor import STEPS_PER_INSTRUCTION, create_alpha_simulation from brain.genetic_algorithm import execute_alphas, genetic_algorithm from brain.graph_state import GraphState diff --git a/brain/agents/seeder.py b/brain/agents/seeder.py index 452b900..b0ab921 100644 --- a/brain/agents/seeder.py +++ b/brain/agents/seeder.py @@ -3,7 +3,6 @@ from pathlib import Path from brain.agent_config import get_config -from brain.alpha_class import Alpha from brain.database import Database from brain.graph_state import GraphState @@ -20,7 +19,7 @@ def invoke(state: GraphState) -> tuple[str, dict]: {}, ) - page = random.randint(1, 10) + page = random.randint(0, 10) while True: database = Database() alphas = database.get_alphas_page(page, page_size=20) diff --git a/brain/agents/tester.py b/brain/agents/tester.py index b20e084..91869fb 100644 --- a/brain/agents/tester.py +++ b/brain/agents/tester.py @@ -1,8 +1,5 @@ -from functools import partial - from brain.agent_config import get_config -from brain.agents.executor import STEPS_PER_INSTRUCTION, create_alpha_simulation -from brain.genetic_algorithm import execute_alphas, genetic_algorithm +from brain.genetic_algorithm import execute_alphas from brain.graph_state import GraphState from brain.helpers import add_final_operator diff --git a/brain/alpha_class.py b/brain/alpha_class.py index adea6c1..f4388b1 100644 --- a/brain/alpha_class.py +++ b/brain/alpha_class.py @@ -39,7 +39,7 @@ class Alpha: turnover: Optional[float] = None margin: Optional[float] = None self_correlation: Optional[float] = None - failing_tests: Optional[list[str]] = field(default_factory=list) + failing_tests: Optional[set[str]] = field(default_factory=set) long_count: Optional[int] = None short_count: Optional[int] = None # Score change @@ -145,6 +145,8 @@ def update_self_correlation(self) -> float: return self.self_correlation value = result["correlation"].max() self.self_correlation = value + if value > 0.7: + self.failing_tests.add("SELF_CORRELATION") return self.self_correlation @classmethod @@ -219,7 +221,7 @@ def from_stats(cls, stats: dict) -> "Alpha": self_corr = is_tests_df[is_tests_df["name"] == "SELF_CORRELATION"].iloc[0]["value"] is_tests_df = stats["is_tests"] - failing_tests = is_tests_df[is_tests_df["result"] == "FAIL"]["name"].to_list() + failing_tests = set(is_tests_df[is_tests_df["result"] == "FAIL"]["name"].to_list()) data = { "alpha_id": stats.get("alpha_id"), @@ -244,5 +246,5 @@ def from_stats(cls, stats: dict) -> "Alpha": def from_row(cls, row: sqlite3.Row) -> "Alpha": """Build an Alpha from a sqlite3.Row.""" alpha = cls(**{k: row[k] for k in cls.__dataclass_fields__ if k in row.keys()}) - alpha.failing_tests = alpha.failing_tests.split(",") + alpha.failing_tests = set(alpha.failing_tests.split(",")) return alpha diff --git a/brain/alphas/alpha_42.py b/brain/alphas/alpha_42.py new file mode 100644 index 0000000..4d90731 --- /dev/null +++ b/brain/alphas/alpha_42.py @@ -0,0 +1,20 @@ +# Code +code = """ +a = -mdl77_earningsqualityfactor_dpcapex * ts_std_dev(mdl77_earningsqualityfactor_dpcapex, 40); +b = -mdl77_2historicalgrowthfactor_chg3yepsast * ts_std_dev(mdl77_2historicalgrowthfactor_chg3yepsast, 40); +c = abs(ts_mean(returns, 252) / ts_std_dev(returns, 252)); +final_alpha = group_neutralize(vector_neut(a + b, c), bucket(rank(debt/liabilities), range="0,1,0.1")); +""" + +# Simulation Settings +config = { + "region": "USA", + "universe": "TOP3000", + "decay": 1, + "delay": 1, + "truncation": 0.1, + "neutralization": "SUBINDUSTRY", + "pasteurization": "ON", + "nan_handling": "OFF", + "unit_handling": "VERIFY", +} diff --git a/brain/alphas/alpha_43.py b/brain/alphas/alpha_43.py new file mode 100644 index 0000000..4cf8484 --- /dev/null +++ b/brain/alphas/alpha_43.py @@ -0,0 +1,19 @@ +# Code +code = """ +a = -mdl77_earningmomentumfactor_lagegp * ts_std_dev(mdl77_earningmomentumfactor_lagegp, 30); +b = abs(ts_mean(returns, 252) / ts_std_dev(returns, 252)); +final_alpha = group_neutralize(vector_neut(a, b), sector); +""" + +# Simulation Settings +config = { + "region": "USA", + "universe": "TOP3000", + "decay": 1, + "delay": 1, + "truncation": 0.01, + "neutralization": "SUBINDUSTRY", + "pasteurization": "ON", + "nan_handling": "OFF", + "unit_handling": "VERIFY", +} diff --git a/brain/alphas/alpha_44.py b/brain/alphas/alpha_44.py new file mode 100644 index 0000000..9b4d991 --- /dev/null +++ b/brain/alphas/alpha_44.py @@ -0,0 +1,28 @@ +# Code +code = """ +cash_flow_rank = mdl77_valueanalystmodelqva_cashflow; +current_ratio_metric = ts_mean(mdl77_2liquidityriskfactor_curratio, 10); +liquidity_momentum = ts_delta(current_ratio_metric, 5); +cash_flow_quality = mdl77_earningsqualityfactor_ttmaccu; +operating_cash_flow_ratio = mdl77_deepvaluefactor_pfcomtt; +net_cash_to_equity = mdl77_liquidityriskfactor_netcashp; +debt_equity_ratio = mdl77_liquidityriskfactor_ed; +cash_flow_momentum = ts_mean(mdl77_valueanalystmodelqva_cashflow, 10); +implied_volatility = implied_volatility_mean_360; +volatility_signal = -zscore(implied_volatility) * 0.2; +combined_signal = (0.30 * cash_flow_rank) + (0.25 * current_ratio_metric) + (0.15 * liquidity_momentum) + (0.15 * cash_flow_quality) + (0.05 * net_cash_to_equity) + (0.05 * debt_equity_ratio) + (0.05 * cash_flow_momentum) + volatility_signal; +final_alpha = group_neutralize(zscore(combined_signal), sector); +""" + +# Simulation Settings +config = { + "region": "USA", + "universe": "TOP3000", + "decay": 16, + "delay": 1, + "truncation": 0.01, + "neutralization": "INDUSTRY", + "pasteurization": "ON", + "nan_handling": "OFF", + "unit_handling": "VERIFY", +} diff --git a/brain/alphas/alpha_45.py b/brain/alphas/alpha_45.py new file mode 100644 index 0000000..301e16e --- /dev/null +++ b/brain/alphas/alpha_45.py @@ -0,0 +1,23 @@ +# Code +code = """ +cash_flow_rank = mdl77_valueanalystmodelqva_cashflow; +current_ratio_metric = ts_mean(mdl77_2liquidityriskfactor_curratio, 10); +liquidity_momentum = ts_delta(current_ratio_metric, 5); +earnings_quality = mdl77_earningsqualityfactor_ttmaccu; +operating_cash_flow_ratio = mdl77_deepvaluefactor_pfcomtt; +combined_signal = add(multiply(cash_flow_rank, 0.5), multiply(current_ratio_metric, 0.2), multiply(liquidity_momentum, 0.2), multiply(earnings_quality, 0.1), multiply(operating_cash_flow_ratio, -0.2)); +final_alpha = zscore(combined_signal); +""" + +# Simulation Settings +config = { + "region": "USA", + "universe": "TOP3000", + "decay": 20, + "delay": 1, + "truncation": 0.1, + "neutralization": "INDUSTRY", + "pasteurization": "ON", + "nan_handling": "OFF", + "unit_handling": "VERIFY", +} diff --git a/brain/alphas/alpha_46.py b/brain/alphas/alpha_46.py new file mode 100644 index 0000000..5703d85 --- /dev/null +++ b/brain/alphas/alpha_46.py @@ -0,0 +1,35 @@ +# Code +code = """ +std_dev_condition = ts_rank(ts_std_dev(returns, 10), 252) < 0.1; + +cash_flow_rank = mdl77_valueanalystmodelqva_cashflow; +current_ratio_metric = ts_mean(mdl77_2liquidityriskfactor_curratio, 10); +liquidity_momentum = ts_delta(current_ratio_metric, 5); +cash_flow_quality = ts_mean(mdl77_earningsqualityfactor_ttmaccu, 10); +operating_cash_flow_ratio = mdl77_deepvaluefactor_pfcomtt; +net_cash_to_equity = mdl77_liquidityriskfactor_netcashp; +debt_equity_ratio = mdl77_liquidityriskfactor_ed; +cash_flow_momentum = ts_delta(cash_flow_rank, 10); +momentum_indicator = ts_mean(mdl77_earningmomentumfactor_salesurp, 10); + +combined_signal = (0.30 * cash_flow_rank) + (0.25 * current_ratio_metric) + + (0.2 * liquidity_momentum) + (0.15 * cash_flow_quality) + + (0.05 * net_cash_to_equity) + (0.05 * debt_equity_ratio) - + (0.1 * operating_cash_flow_ratio) + (0.1 * mdl77_2sensitivityfactor400_xiv) + + (0.1 * cash_flow_momentum) + (0.1 * momentum_indicator); +final_alpha =-( trade_when(std_dev_condition, -group_neutralize(zscore(combined_signal), sector), -1)); +final_alpha_weighted = final_alpha * rank(ts_mean(volume, 10)); +""" + +# Simulation Settings +config = { + "region": "USA", + "universe": "TOP3000", + "decay": 1, + "delay": 1, + "truncation": 0.005, + "neutralization": "INDUSTRY", + "pasteurization": "ON", + "nan_handling": "OFF", + "unit_handling": "VERIFY", +} diff --git a/brain/alphas/alpha_47.py b/brain/alphas/alpha_47.py new file mode 100644 index 0000000..6d35138 --- /dev/null +++ b/brain/alphas/alpha_47.py @@ -0,0 +1,27 @@ +# Code +code = """ +vhat1 = ts_regression(volume, ts_delay(adv20, 15), 120); +ehat1 = ts_regression(returns, vhat1, 120); +alpha1 = group_neutralize(-ehat1 * ts_rank(ts_mean(volume, 60), 5), bucket(rank(cap), range='0.1,1,0.05')); +vhat2 = ts_regression(volume, ts_delay(mdl77_ohistoricalgrowthfactor_pctchgqtrast, 10), 120); +ehat2 = ts_regression(returns, vhat2, 120); +alpha2 = group_neutralize(-ehat2 * ts_rank(ts_mean(volume, 60), 5), exchange); +liquidity_condition = ts_mean(volume, 60); +trade_alpha1 = trade_when(and(abs(returns) < 0.08, liquidity_condition > 30000), alpha1, abs(returns) > 0.1); +trade_alpha2 = trade_when(and(abs(returns) < 0.08, liquidity_condition > 30000), alpha2, abs(returns) > 0.1); +final_alpha = group_neutralize(add(trade_alpha1, trade_alpha2), bucket(rank(assets/debt), range='0.1,1,0.05')) + rank(ts_mean(mdl77_garpanalystmodel_qgp_relgrowth, 5)) * 0.05; +final_alpha +""" + +# Simulation Settings +config = { + "region": "USA", + "universe": "TOP500", + "decay": 12, + "delay": 1, + "truncation": 0.01, + "neutralization": "INDUSTRY", + "pasteurization": "ON", + "nan_handling": "OFF", + "unit_handling": "VERIFY", +} diff --git a/brain/alphas/alpha_48.py b/brain/alphas/alpha_48.py new file mode 100644 index 0000000..c7e802a --- /dev/null +++ b/brain/alphas/alpha_48.py @@ -0,0 +1,28 @@ +# Code +code = """ +vhat1 = ts_regression(volume, ts_delay(adv20, 10), 60); +ehat1 = ts_regression(returns, vhat1, 60); +alpha1 = -ehat1 * ts_rank(ts_mean(volume, 30), 5); +vhat2 = ts_regression(volume, ts_delay(snt_buzz_ret, 10), 60); +ehat2 = ts_regression(returns, vhat2, 60); +alpha2 = -ehat2 * ts_rank(ts_mean(volume, 30), 5); +liquidity_condition = ts_mean(volume, 30); +liquidity_condition_strict = liquidity_condition > 50000; +trade_alpha1 = trade_when(and(abs(returns) < 0.08, liquidity_condition_strict), alpha1, abs(returns) > 0.1); +trade_alpha2 = trade_when(and(abs(returns) < 0.08, liquidity_condition_strict), alpha2, abs(returns) > 0.1); +final_alpha = add(trade_alpha1, trade_alpha2) + rank(ts_mean(mdl77_garpanalystmodel_qgp_relgrowth, 5)) * 0.1; +final_alpha +""" + +# Simulation Settings +config = { + "region": "USA", + "universe": "TOP500", + "decay": 15, + "delay": 1, + "truncation": 0.005, + "neutralization": "INDUSTRY", + "pasteurization": "ON", + "nan_handling": "OFF", + "unit_handling": "VERIFY", +} diff --git a/brain/alphas/alpha_49.py b/brain/alphas/alpha_49.py new file mode 100644 index 0000000..07e1cf6 --- /dev/null +++ b/brain/alphas/alpha_49.py @@ -0,0 +1,24 @@ +# Code +code = """ +cash_flow_rank = mdl77_valueanalystmodelqva_cashflow; +current_ratio_metric = ts_mean(mdl77_2liquidityriskfactor_curratio, 10); +liquidity_momentum = ts_delta(current_ratio_metric, 5); +trend_current_ratio = ts_mean(ts_delta(current_ratio_metric, 10), 5); +earnings_quality = mdl77_earningsqualityfactor_ttmaccu; +operating_cash_flow_ratio = mdl77_deepvaluefactor_pfcomtt; +combined_signal = add(multiply(cash_flow_rank, 0.45), multiply(current_ratio_metric, 0.25), multiply(liquidity_momentum, 0.2), multiply(trend_current_ratio, 0.1), multiply(earnings_quality, 0.05), multiply(operating_cash_flow_ratio, -0.1)); +final_alpha = group_neutralize(zscore(combined_signal), pv13_r2_min10_1000_sector); +""" + +# Simulation Settings +config = { + "region": "USA", + "universe": "TOP500", + "decay": 15, + "delay": 1, + "truncation": 0.01, + "neutralization": "INDUSTRY", + "pasteurization": "ON", + "nan_handling": "OFF", + "unit_handling": "VERIFY", +} diff --git a/brain/alphas/alpha_50.py b/brain/alphas/alpha_50.py new file mode 100644 index 0000000..9ae2bd1 --- /dev/null +++ b/brain/alphas/alpha_50.py @@ -0,0 +1,20 @@ +# Code +code = """ +group_neutralize( + -ts_backfill(zscore(goodwill / sales), 65) * (rank(fn_accrued_liab_a) * rank(capex) + rank(debt_st)), + bucket(rank(debt / liabilities), range="0,1,0.1") +) +""" + +# Simulation Settings +config = { + "region": "USA", + "universe": "TOP500", + "decay": 10, + "delay": 1, + "truncation": 0.01, + "neutralization": "INDUSTRY", + "pasteurization": "ON", + "nan_handling": "OFF", + "unit_handling": "VERIFY", +} diff --git a/brain/alphas/alpha_51.py b/brain/alphas/alpha_51.py new file mode 100644 index 0000000..b7fff5f --- /dev/null +++ b/brain/alphas/alpha_51.py @@ -0,0 +1,20 @@ +# Code +code = """ +a = -mdl77_historicalgrowthfactor_pctchgeps * ts_std_dev(mdl77_historicalgrowthfactor_pctchgeps, 30); +b = abs(ts_mean(returns, 100) / ts_std_dev(returns, 100)); +final_alpha = group_neutralize(vector_neut(a, b), sector); +winsorize(final_alpha, std=3) +""" + +# Simulation Settings +config = { + "region": "USA", + "universe": "TOP500", + "decay": 5, + "delay": 1, + "truncation": 0.01, + "neutralization": "SUBINDUSTRY", + "pasteurization": "ON", + "nan_handling": "OFF", + "unit_handling": "VERIFY", +} diff --git a/brain/database.py b/brain/database.py index 372fb79..df1b9ca 100644 --- a/brain/database.py +++ b/brain/database.py @@ -85,6 +85,7 @@ def get_alphas_page( SELECT * FROM alphas WHERE self_correlation < %s + AND failing_tests = '' AND (score IS NULL OR score > %s) AND (long_count IS NULL OR long_count > 100) AND sharpe > 1.0 diff --git a/brain/fine_tune.py b/brain/fine_tune.py index ee9632c..d670cf6 100644 --- a/brain/fine_tune.py +++ b/brain/fine_tune.py @@ -1,9 +1,7 @@ """Methods for fine-tuning parameters of alpha.""" -import random - -from brain.agent import agent from brain.agent_config import DEFAULT_CONFIG +from brain.agents.executor_old import agent from brain.alpha_class import Alpha from brain.alpha_storage import Storage from brain.genetic_algorithm import genetic_algorithm