From 86e7d6b017781d66ff03b73c02166c493324dfec Mon Sep 17 00:00:00 2001 From: Young Date: Mon, 23 Sep 2024 03:57:02 +0000 Subject: [PATCH 1/3] Add bo template --- rdagent/app/kaggle/bo_loop.py | 74 +++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 rdagent/app/kaggle/bo_loop.py diff --git a/rdagent/app/kaggle/bo_loop.py b/rdagent/app/kaggle/bo_loop.py new file mode 100644 index 000000000..f13159267 --- /dev/null +++ b/rdagent/app/kaggle/bo_loop.py @@ -0,0 +1,74 @@ +""" +Differences from kaggle loop +- focused on a specific component(we must simplify it to align the grainularity of the idea and experiment). +- replace the idea proposal to another component. +- (trick) we don't want to develop again. we want to reused the code in the BO-process + - cached Developer(input is same idea, return the cached solution) + - the cache can be disabled. + - evaluation results. + + +- Align the nouns: + - e: Workspace + - h: tasks or hypothesis? +""" + + +from rdagent.core.developer import Developer +from rdagent.core.experiment import Workspace +from rdagent.core.proposal import HypothesisGen + + +class BODev(Developer): + """ + Differences: + - save results. + - self evaluate a solution based + - directly query previous based on e. + """ + def __init__(self): + self.hypo2exp # + self.dev # normal dev + ... # knowledge storage + + def evaluate(self, ws: Workspace): + ... + + def udpate_feedback(self, e,s): + ... + + +class BOHypothesisGen(HypothesisGen): + + def __init__(self, scen: Scenario, bodev: BODev) -> None: + self.bodev = bodev + super().__init__(scen) + + def gen(self, ...): + # 1) exploration : propose idea + ideas = ... + # 2) evaluate ideas with self.bodev + # ..... scors distribution + # 3) sample idea based on ideas & scores(as weight) + return selected_idea + + +# - interface: +# - implemenation: use RepoAnalyzer + key code => score + + +class BOLoop: + @measure_time + def __init__(self, PROP_SETTING: BasePropSetting): + with logger.tag("init"): + ... + self.bodev = BODev + self.bohypogen = BOHypothesisGen(..., self.bodev) + ... + + ... + def running(self): # feedback + # collect + # or feeback + e, s = self.trace ... + self.bodev.update_feedack(e, s) # From e7b6a7dc06ad0a5bc10b9488b2b14f90906b512d Mon Sep 17 00:00:00 2001 From: SH-Src Date: Mon, 21 Oct 2024 02:29:00 +0000 Subject: [PATCH 2/3] bo update --- rdagent/app/kaggle/bo_loop.py | 277 +++++++++++++++++++++++------ rdagent/app/kaggle/bo_loop_main.py | 5 + rdagent/app/kaggle/prompts.yaml | 13 ++ 3 files changed, 236 insertions(+), 59 deletions(-) create mode 100644 rdagent/app/kaggle/bo_loop_main.py create mode 100644 rdagent/app/kaggle/prompts.yaml diff --git a/rdagent/app/kaggle/bo_loop.py b/rdagent/app/kaggle/bo_loop.py index f13159267..bfeebea0a 100644 --- a/rdagent/app/kaggle/bo_loop.py +++ b/rdagent/app/kaggle/bo_loop.py @@ -1,74 +1,233 @@ -""" -Differences from kaggle loop -- focused on a specific component(we must simplify it to align the grainularity of the idea and experiment). -- replace the idea proposal to another component. -- (trick) we don't want to develop again. we want to reused the code in the BO-process - - cached Developer(input is same idea, return the cached solution) - - the cache can be disabled. - - evaluation results. +import subprocess +from collections import defaultdict +from typing import Any +import pickle +import fire -- Align the nouns: - - e: Workspace - - h: tasks or hypothesis? -""" - +from rdagent.oai.llm_utils import APIBackend +from rdagent.app.kaggle.conf import KAGGLE_IMPLEMENT_SETTING +from rdagent.components.workflow.conf import BasePropSetting +from rdagent.components.workflow.rd_loop import RDLoop from rdagent.core.developer import Developer -from rdagent.core.experiment import Workspace -from rdagent.core.proposal import HypothesisGen - +from rdagent.core.exception import FactorEmptyError, ModelEmptyError +from rdagent.core.proposal import ( + Hypothesis2Experiment, + HypothesisExperiment2Feedback, + HypothesisGen, + Trace, +) +from rdagent.core.scenario import Scenario +from rdagent.core.utils import import_class +from rdagent.log import rdagent_logger as logger +from rdagent.log.time import measure_time +from rdagent.scenarios.kaggle.experiment.utils import python_files_to_notebook +from rdagent.scenarios.kaggle.kaggle_crawler import download_data +from rdagent.scenarios.kaggle.proposal.proposal import ( + KG_ACTION_FEATURE_ENGINEERING, + KG_ACTION_FEATURE_PROCESSING, + KG_ACTION_MODEL_FEATURE_SELECTION, + KGTrace, +) +from pathlib import Path +from jinja2 import Environment, StrictUndefined +from rdagent.utils.workflow import LoopBase, LoopMeta +from rdagent.core.prompts import Prompts + +prompt_dict = Prompts(file_path=Path("./rdagent/app/kaggle/prompts.yaml")) + +class KaggleBOLoop(LoopBase, metaclass=LoopMeta): + @measure_time + def __init__(self, PROP_SETTING: BasePropSetting): + with logger.tag("init"): + scen: Scenario = import_class(PROP_SETTING.scen)(PROP_SETTING.competition) + logger.log_object(scen, tag="scenario") + + knowledge_base = ( + import_class(PROP_SETTING.knowledge_base)(PROP_SETTING.knowledge_base_path, scen) + if PROP_SETTING.knowledge_base != "" + else None + ) + logger.log_object(knowledge_base, tag="knowledge_base") + + self.hypothesis_gen: HypothesisGen = import_class(PROP_SETTING.hypothesis_gen)(scen) + logger.log_object(self.hypothesis_gen, tag="hypothesis generator") + + self.hypothesis2experiment: Hypothesis2Experiment = import_class(PROP_SETTING.hypothesis2experiment)() + logger.log_object(self.hypothesis2experiment, tag="hypothesis2experiment") + + self.feature_coder: Developer = import_class(PROP_SETTING.feature_coder)(scen) + logger.log_object(self.feature_coder, tag="feature coder") + self.model_feature_selection_coder: Developer = import_class(PROP_SETTING.model_feature_selection_coder)( + scen + ) + logger.log_object(self.model_feature_selection_coder, tag="model feature selection coder") + self.model_coder: Developer = import_class(PROP_SETTING.model_coder)(scen) + logger.log_object(self.model_coder, tag="model coder") + + self.feature_runner: Developer = import_class(PROP_SETTING.feature_runner)(scen) + logger.log_object(self.feature_runner, tag="feature runner") + self.model_runner: Developer = import_class(PROP_SETTING.model_runner)(scen) + logger.log_object(self.model_runner, tag="model runner") + + self.summarizer: HypothesisExperiment2Feedback = import_class(PROP_SETTING.summarizer)(scen) + logger.log_object(self.summarizer, tag="summarizer") + self.trace = KGTrace(scen=scen, knowledge_base=knowledge_base) + super().__init__() -class BODev(Developer): - """ - Differences: - - save results. - - self evaluate a solution based - - directly query previous based on e. - """ - def __init__(self): - self.hypo2exp # - self.dev # normal dev - ... # knowledge storage + @measure_time + def propose(self, prev_out: dict[str, Any]): + hypothesis_list = [] + for _ in range(2): + hypothesis = self.hypothesis_gen.gen(self.trace) + hypothesis_list.append(hypothesis) + return hypothesis_list + + def _develop(self, hypothesis): + exp = self.hypothesis2experiment.convert(hypothesis, self.trace) + if hypothesis.action in [KG_ACTION_FEATURE_ENGINEERING, KG_ACTION_FEATURE_PROCESSING]: + code = self.feature_coder.develop(exp) + elif hypothesis.action == KG_ACTION_MODEL_FEATURE_SELECTION: + code = self.model_feature_selection_coder.develop(exp) + else: + code = self.model_coder.develop(exp) + return code - def evaluate(self, ws: Workspace): - ... + def _estimate(self, code): + system_prompt = prompt_dict["System"] + "Here is the trace: " + Environment(undefined=StrictUndefined).from_string(prompt_dict["Trace Convert"]).render(trace=self.trace) + user_prompt = "Here is the new implementation:" + str(code.sub_workspace_list[0].code) + \ + " Please evaluate its performance. Output a score between 0 and 1. Do not include anything else in your response." + resp = APIBackend().build_messages_and_create_chat_completion(user_prompt, system_prompt) + return float(resp) + + @measure_time + def sample(self, prev_out): + codes = [] + for h in prev_out["propose"]: + print(type(h)) + code = [] + for _ in range(2): + c = self._develop(h) + code.append(c) + codes.append(code) + return codes - def udpate_feedback(self, e,s): - ... + @measure_time + def select(self, prev_out): + results = [] + codes = [] + hs = [] + hypotheses = prev_out["propose"] + exps = prev_out["sample"] + for i in range(len(exps)): + for j in range(len(exps[i])): + codes.append(exps[i][j]) + r = self._estimate(exps[i][j]) + results.append(r) + hs.append(hypotheses[i]) + m = max(results) + index = results.index(m) + logger.log_object(codes[index].sub_workspace_list, tag="d.coder result") + logger.log_object(hs[index], tag="r.hypothesis generation") + + return codes[index], hs[index] + + # @measure_time + # def exp_gen(self, prev_out: dict[str, Any]): + # with logger.tag("r"): # research + # exp = self.hypothesis2experiment.convert(prev_out["propose"], self.trace) + # logger.log_object(exp.sub_tasks, tag="experiment generation") + # return exp + + # @measure_time + # def coding(self, prev_out: dict[str, Any]): + # with logger.tag("d"): # develop + # if prev_out["propose"].action in [KG_ACTION_FEATURE_ENGINEERING, KG_ACTION_FEATURE_PROCESSING]: + # exp = self.feature_coder.develop(prev_out["exp_gen"]) + # elif prev_out["propose"].action == KG_ACTION_MODEL_FEATURE_SELECTION: + # exp = self.model_feature_selection_coder.develop(prev_out["exp_gen"]) + # else: + # exp = self.model_coder.develop(prev_out["exp_gen"]) + # logger.log_object(exp.sub_workspace_list, tag="coder result") + # return exp + @measure_time + def running(self, prev_out: dict[str, Any]): + with logger.tag("ef"): # evaluate and feedback + if prev_out["propose"][0].action in [KG_ACTION_FEATURE_ENGINEERING, KG_ACTION_FEATURE_PROCESSING]: + exp = self.feature_runner.develop(prev_out["select"][0]) + else: + exp = self.model_runner.develop(prev_out["select"][0]) + logger.log_object(exp, tag="runner result") + + if KAGGLE_IMPLEMENT_SETTING.competition in ["optiver-realized-volatility-prediction"]: + try: + python_files_to_notebook( + KAGGLE_IMPLEMENT_SETTING.competition, exp.experiment_workspace.workspace_path + ) + except Exception as e: + logger.error(f"Merge python files to one file failed: {e}") + + if KAGGLE_IMPLEMENT_SETTING.auto_submit: + csv_path = exp.experiment_workspace.workspace_path / "submission.csv" + try: + subprocess.run( + [ + "kaggle", + "competitions", + "submit", + "-f", + str(csv_path.absolute()), + "-m", + str(csv_path.parent.absolute()), + KAGGLE_IMPLEMENT_SETTING.competition, + ], + check=True, + ) + except subprocess.CalledProcessError as e: + logger.error(f"Auto submission failed: \n{e}") + except Exception as e: + logger.error(f"Other exception when use kaggle api:\n{e}") + + return exp -class BOHypothesisGen(HypothesisGen): + @measure_time + def feedback(self, prev_out: dict[str, Any]): + feedback = self.summarizer.generate_feedback(prev_out["running"], prev_out["select"][1], self.trace) + with logger.tag("ef"): # evaluate and feedback + logger.log_object(feedback, tag="feedback") + self.trace.hist.append((prev_out["select"][1], prev_out["running"], feedback)) + # with open('trace.pkl', 'wb') as file: + # pickle.dump(self.trace, file) - def __init__(self, scen: Scenario, bodev: BODev) -> None: - self.bodev = bodev - super().__init__(scen) + skip_loop_error = (ModelEmptyError, FactorEmptyError) - def gen(self, ...): - # 1) exploration : propose idea - ideas = ... - # 2) evaluate ideas with self.bodev - # ..... scors distribution - # 3) sample idea based on ideas & scores(as weight) - return selected_idea +def main(path=None, step_n=None, competition=None): + """ + Auto R&D Evolving loop for models in a kaggle{} scenario. -# - interface: -# - implemenation: use RepoAnalyzer + key code => score + You can continue running session by + .. code-block:: bash + dotenv run -- python rdagent/app/kaggle/bo_loop.py --competition playground-series-s4e8 [--competition titanic] $LOG_PATH/__session__/1/0_propose --step_n 1 # `step_n` is a optional parameter + rdagent kaggle --competition playground-series-s4e8 # You are encouraged to use this one. -class BOLoop: - @measure_time - def __init__(self, PROP_SETTING: BasePropSetting): - with logger.tag("init"): - ... - self.bodev = BODev - self.bohypogen = BOHypothesisGen(..., self.bodev) - ... - - ... - def running(self): # feedback - # collect - # or feeback - e, s = self.trace ... - self.bodev.update_feedack(e, s) # + """ + if competition: + KAGGLE_IMPLEMENT_SETTING.competition = competition + download_data(competition=competition, local_path=KAGGLE_IMPLEMENT_SETTING.local_data_path) + else: + logger.error("Please specify competition name.") + + if path is None: + kaggle_loop = KaggleBOLoop(KAGGLE_IMPLEMENT_SETTING) + else: + kaggle_loop = KaggleBOLoop.load(path) + + kaggle_loop.run(step_n=step_n) + + +if __name__ == "__main__": + fire.Fire(main) \ No newline at end of file diff --git a/rdagent/app/kaggle/bo_loop_main.py b/rdagent/app/kaggle/bo_loop_main.py new file mode 100644 index 000000000..df4a1d498 --- /dev/null +++ b/rdagent/app/kaggle/bo_loop_main.py @@ -0,0 +1,5 @@ +from rdagent.app.kaggle.bo_loop import main +import fire + +if __name__ == "__main__": + fire.Fire(main) \ No newline at end of file diff --git a/rdagent/app/kaggle/prompts.yaml b/rdagent/app/kaggle/prompts.yaml new file mode 100644 index 000000000..52b36d5ef --- /dev/null +++ b/rdagent/app/kaggle/prompts.yaml @@ -0,0 +1,13 @@ +System: |- + Given the trace of previous implementations and corresponding performances, please estimate what the performance is for the new implementation. + Considering all informations from the trace, conduct an overal evaluation for the new implementation. Output a score between 0 and 1. Do not include anything else in your response. + +Trace Convert: |- + {% for hypothesis, experiment, feedback in trace.hist %} + Hypothesis {{ loop.index }}: {{ hypothesis }} + Observation on the result with the hypothesis: {{ feedback.observations }} + Feedback on the original hypothesis: {{ feedback.hypothesis_evaluation }} + New Feedback for Context (For your reference): {{ feedback.new_hypothesis }} + Reasoning for new hypothesis: {{ feedback.reason }} + Did changing to this hypothesis work? (focus on the change): {{ feedback.decision }} + {% endfor %} \ No newline at end of file From 0595a498c3a83c3636d1ae74352c81f6d67fd464 Mon Sep 17 00:00:00 2001 From: Bowen Xian Date: Tue, 22 Oct 2024 02:27:01 +0000 Subject: [PATCH 3/3] show coder result in webapp (not show evolving code) --- rdagent/app/kaggle/bo_loop.py | 17 +++++++++-------- rdagent/log/ui/app.py | 16 ++++++++++++++++ 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/rdagent/app/kaggle/bo_loop.py b/rdagent/app/kaggle/bo_loop.py index bfeebea0a..702bd4ed8 100644 --- a/rdagent/app/kaggle/bo_loop.py +++ b/rdagent/app/kaggle/bo_loop.py @@ -85,13 +85,14 @@ def propose(self, prev_out: dict[str, Any]): return hypothesis_list def _develop(self, hypothesis): - exp = self.hypothesis2experiment.convert(hypothesis, self.trace) - if hypothesis.action in [KG_ACTION_FEATURE_ENGINEERING, KG_ACTION_FEATURE_PROCESSING]: - code = self.feature_coder.develop(exp) - elif hypothesis.action == KG_ACTION_MODEL_FEATURE_SELECTION: - code = self.model_feature_selection_coder.develop(exp) - else: - code = self.model_coder.develop(exp) + with logger.tag("d"): # develop + exp = self.hypothesis2experiment.convert(hypothesis, self.trace) + if hypothesis.action in [KG_ACTION_FEATURE_ENGINEERING, KG_ACTION_FEATURE_PROCESSING]: + code = self.feature_coder.develop(exp) + elif hypothesis.action == KG_ACTION_MODEL_FEATURE_SELECTION: + code = self.model_feature_selection_coder.develop(exp) + else: + code = self.model_coder.develop(exp) return code def _estimate(self, code): @@ -128,8 +129,8 @@ def select(self, prev_out): hs.append(hypotheses[i]) m = max(results) index = results.index(m) - logger.log_object(codes[index].sub_workspace_list, tag="d.coder result") logger.log_object(hs[index], tag="r.hypothesis generation") + logger.log_object(codes[index].sub_workspace_list, tag="d.coder result") return codes[index], hs[index] diff --git a/rdagent/log/ui/app.py b/rdagent/log/ui/app.py index 8020e30ca..b9d9df1d7 100644 --- a/rdagent/log/ui/app.py +++ b/rdagent/log/ui/app.py @@ -551,6 +551,22 @@ def feedback_window(): def evolving_window(): title = "Development🛠️" if isinstance(state.scenario, SIMILAR_SCENARIOS) else "Development🛠️ (evolving coder)" st.subheader(title, divider="green", anchor="_development") + + # TODO: only for suhan, add a check before merging to main + if len(state.msgs[round]["d.coder result"]) != 1: + st.toast(":red[**Coder result Length Error!**]", icon="!") + ws: list[FactorFBWorkspace | ModelFBWorkspace] = state.msgs[round]["d.coder result"][-1].content + tab_names = [ + w.target_task.factor_name if isinstance(w.target_task, FactorTask) else w.target_task.name for w in ws + ] + wtabs = st.tabs(tab_names) + for j, w in enumerate(ws): + with wtabs[j]: + # Evolving Code + for k, v in w.code_dict.items(): + with st.expander(f":green[`{k}`]", expanded=True): + st.code(v, language="python") + return # Evolving Status if state.erounds[round] > 0: