Skip to content

Commit 2fcf5ab

Browse files
authored
Merge pull request #157 from microsoft/pre-release
refactor experience summarizer
2 parents 6c9fde9 + 75bf4b3 commit 2fcf5ab

File tree

11 files changed

+252
-278
lines changed

11 files changed

+252
-278
lines changed

documents/docs/configurations/developer_configuration.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ The following parameters are included in the system configuration of the UFO age
1010
|-------------------------|---------------------------------------------------------------------------------------------------------|----------|---------------|
1111
| `CONTROL_BACKEND` | The backend for control action, currently supporting `uia` and `win32`. | String | "uia" |
1212
| `MAX_STEP` | The maximum step limit for completing the user request in a session. | Integer | 100 |
13+
| `MAX_ROUND` | The maximum round limit for completing the user request in a session. | Integer | 10 |
1314
| `SLEEP_TIME` | The sleep time in seconds between each step to wait for the window to be ready. | Integer | 5 |
1415
| `RECTANGLE_TIME` | The time in seconds for the rectangle display around the selected control. | Integer | 1 |
1516
| `SAFE_GUARD` | Whether to use the safe guard to ask for user confirmation before performing sensitive operations. | Boolean | True |
@@ -25,6 +26,9 @@ The following parameters are included in the system configuration of the UFO age
2526
| `LOG_XML` | Whether to log the XML file at every step. | Boolean | False |
2627
| `SCREENSHOT_TO_MEMORY` | Whether to allow the screenshot to [`Blackboard`](../agents/design/blackboard.md) for the agent's decision making. | Boolean | True |
2728
| `SAVE_UI_TREE` | Whether to save the UI tree in the log. | Boolean | False |
29+
| `SAVE_EXPERIENCE` | Whether to save the experience, can be "always" for always save, "always_not" for always not save, "ask" for asking the user to save or not. By default, it is "always_not" | String | "always_not" |
30+
| `TASK_STATUS` | Whether to record the status of the tasks in batch execution mode. | Boolean | True |
31+
2832

2933
## Main Prompt Configuration
3034

ufo/agents/agent/evaluation_agent.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,6 @@
11
# Copyright (c) Microsoft Corporation.
22
# Licensed under the MIT License.
33

4-
import sys
5-
6-
sys.path.append("..")
7-
sys.path.append("../..")
8-
sys.path.append("./")
9-
104
from typing import Any, Dict, Optional, Tuple
115

126
from ufo.agents.agent.basic import BasicAgent

ufo/config/config_dev.yaml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
CONTROL_BACKEND: "uia" # The backend for control action, currently we support uia and win32
22
MAX_STEP: 100 # The max step limit for completing the user request
3+
MAX_ROUND: 10 # The max round limit for completing the user request
34
SLEEP_TIME: 1 # The sleep time between each step to wait for the window to be ready
45
RECTANGLE_TIME: 1
56

@@ -96,7 +97,7 @@ EVA_ROUND: FALSE
9697
EVA_ALL_SCREENSHOTS: True # Whether to include all the screenshots in the evaluation
9798

9899
# Image saving performance
99-
DEFAULT_PNG_COMPRESS_LEVEL: 9 # The compress level for the PNG image, 0-9, 0 is no compress, 1 is the fastest, 9 is the best compress
100+
DEFAULT_PNG_COMPRESS_LEVEL: 1 # The compress level for the PNG image, 0-9, 0 is no compress, 1 is the fastest, 9 is the best compress
100101

101102

102103
# Save UI tree
@@ -105,5 +106,7 @@ SAVE_UI_TREE: False # Whether to save the UI tree
105106

106107
# Record the status of the tasks
107108
TASK_STATUS: True # Whether to record the status of the tasks in batch execution mode.
108-
# TASK_STATUS_FILE # The path for the task status file.
109+
110+
# Experience saving
111+
SAVE_EXPERIENCE: "ask" # Whether to save the experience, can be "always" for always save, "always_not" for always not save, "ask" for asking the user to save or not. By default, it is "always_not"
109112

ufo/experience/experience_parser.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
# Copyright (c) Microsoft Corporation.
2+
# Licensed under the MIT License.
3+
4+
from typing import Any, Dict, List
5+
from collections import defaultdict
6+
7+
from ufo.trajectory import parser
8+
from ufo.automator.ui_control.screenshot import PhotographerFacade
9+
10+
11+
class ExperienceLogLoader:
12+
"""
13+
Loading the logs from previous runs.
14+
"""
15+
16+
_subtask_key = "Subtask"
17+
_application_key = "Application"
18+
_image_url_key = "ScreenshotURLs"
19+
20+
def __init__(self, log_path: str):
21+
"""
22+
Initialize the LogLoader.
23+
:param log_path: The path of the log file.
24+
"""
25+
self._log_path = log_path
26+
trajectory = parser.Trajectory(log_path)
27+
self._subtask_partition = self.group_by_subtask(trajectory.app_agent_log)
28+
29+
@classmethod
30+
def group_by_subtask(
31+
cls, step_log: List[Dict[str, Any]]
32+
) -> List[List[Dict[str, Any]]]:
33+
"""
34+
Group the logs by the value of the "Subtask" field.
35+
:param step_log: The step log.
36+
:return: The grouped logs.
37+
"""
38+
39+
grouped = defaultdict(list)
40+
for log in step_log:
41+
# Group by the value of the "Subtask" field
42+
image_urls = {}
43+
for key in parser.Trajectory._screenshot_keys:
44+
image_urls[key] = PhotographerFacade.encode_image(
45+
log.get(parser.Trajectory._step_screenshot_key, {}).get(key)
46+
)
47+
log[cls._image_url_key] = image_urls
48+
subtask = log.get(cls._subtask_key)
49+
grouped[subtask].append(log)
50+
51+
# Build the desired output structure
52+
result = [
53+
{
54+
"subtask_index": index,
55+
"subtask": subtask,
56+
"logs": logs,
57+
"application": logs[0][cls._application_key],
58+
}
59+
for index, (subtask, logs) in enumerate(grouped.items())
60+
]
61+
62+
return result
63+
64+
@property
65+
def subtask_partition(self) -> List[Dict[str, Any]]:
66+
"""
67+
:return: The subtask partition.
68+
"""
69+
return self._subtask_partition
70+
71+
@property
72+
def log_path(self) -> str:
73+
"""
74+
:return: The log path.
75+
"""
76+
return self._log_path

ufo/experience/parser.py

Lines changed: 0 additions & 200 deletions
This file was deleted.

ufo/experience/summarizer.py

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,14 @@
22
# Licensed under the MIT License.
33

44
import os
5+
import sys
56
from typing import Tuple
67

78
import yaml
89
from langchain.docstore.document import Document
910
from langchain_community.vectorstores import FAISS
1011

11-
from ufo.experience.parser import ExperienceLogLoader
12+
from ufo.experience.experience_parser import ExperienceLogLoader
1213
from ufo.llm.llm_call import get_completion
1314
from ufo.prompter.experience_prompter import ExperiencePrompter
1415
from ufo.utils import get_hugginface_embedding, json_parser
@@ -107,8 +108,8 @@ def get_summary_list(self, logs: list) -> Tuple[list, float]:
107108
for log_partition in logs:
108109
prompt = self.build_prompt(log_partition)
109110
summary, cost = self.get_summary(prompt)
110-
summary["request"] = ExperienceLogLoader.get_user_request(log_partition)
111-
summary["app_list"] = ExperienceLogLoader.get_app_list(log_partition)
111+
summary["request"] = log_partition.get("subtask")
112+
summary["app_list"] = [log_partition.get("application")]
112113
summaries.append(summary)
113114
total_cost += cost
114115

@@ -121,8 +122,7 @@ def read_logs(log_path: str) -> list:
121122
:param log_path: The path of the log file.
122123
"""
123124
replay_loader = ExperienceLogLoader(log_path)
124-
logs = replay_loader.create_logs()
125-
return logs
125+
return replay_loader.subtask_partition
126126

127127
@staticmethod
128128
def create_or_update_yaml(summaries: list, yaml_path: str):
@@ -184,3 +184,25 @@ def create_or_update_vector_db(summaries: list, db_path: str):
184184
db.save_local(db_path)
185185

186186
print(f"Updated vector DB successfully: {db_path}")
187+
188+
189+
if __name__ == "__main__":
190+
191+
from ufo.config.config import Config
192+
193+
configs = Config.get_instance().config_data
194+
195+
# Initialize the ExperienceSummarizer
196+
197+
summarizer = ExperienceSummarizer(
198+
configs["APP_AGENT"]["VISUAL_MODE"],
199+
configs["EXPERIENCE_PROMPT"],
200+
configs["APPAGENT_EXAMPLE_PROMPT"],
201+
configs["API_PROMPT"],
202+
)
203+
204+
log_path = "logs/test_exp"
205+
206+
experience = summarizer.read_logs(log_path)
207+
summaries, cost = summarizer.get_summary_list(experience)
208+
print(summaries, cost)

0 commit comments

Comments
 (0)