Skip to content

Commit 3d2fe58

Browse files
committed
Version to 1.15.3. Various bug fixes in agents and evals. Expanded command agent tool. Update cyclopts.
1 parent 164cfb8 commit 3d2fe58

File tree

21 files changed

+624
-315
lines changed

21 files changed

+624
-315
lines changed

docs/sdk/agent.mdx

Lines changed: 136 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,10 +91,20 @@ The maximum number of steps (generation + tool calls).
9191
### model
9292

9393
```python
94-
model: str | None = Config(default=None)
94+
model: str | Generator | None = Config(
95+
default=None, expose_as=str | None
96+
)
9597
```
9698

97-
Inference model (rigging generator identifier).
99+
Inference model (rigging generator or identifier).
100+
101+
### model\_name
102+
103+
```python
104+
model_name: str | None
105+
```
106+
107+
The model name if specified as a string, otherwise None.
98108

99109
### name
100110

@@ -160,6 +170,34 @@ tools: Annotated[
160170

161171
Tools the agent can use.
162172

173+
### clone
174+
175+
```python
176+
clone() -> te.Self
177+
```
178+
179+
Clone the agent.
180+
181+
**Returns:**
182+
183+
* `Self`
184+
–A new Agent instance with the same attributes as this one.
185+
186+
<Accordion title="Source code in dreadnode/agent/agent.py" icon="code">
187+
```python
188+
def clone(self) -> te.Self:
189+
"""
190+
Clone the agent.
191+
192+
Returns:
193+
A new Agent instance with the same attributes as this one.
194+
"""
195+
return self.model_copy(deep=True)
196+
```
197+
198+
199+
</Accordion>
200+
163201
### get\_prompt
164202

165203
```python
@@ -203,6 +241,102 @@ def reset(self) -> Thread:
203241
```
204242

205243

244+
</Accordion>
245+
246+
### with\_
247+
248+
```python
249+
with_(
250+
*,
251+
name: str | None = None,
252+
description: str | None = None,
253+
tags: list[str] | None = None,
254+
label: str | None = None,
255+
model: str | Generator | None = None,
256+
instructions: str | None = None,
257+
max_steps: int | None = None,
258+
caching: CacheMode | None = None,
259+
tools: list[AnyTool | Toolset] | None = None,
260+
tool_mode: ToolMode | None = None,
261+
hooks: list[Hook] | None = None,
262+
stop_conditions: list[StopCondition] | None = None,
263+
scorers: ScorersLike[AgentResult] | None = None,
264+
assert_scores: list[str] | Literal[True] | None = None,
265+
append: bool = False,
266+
) -> te.Self
267+
```
268+
269+
Clone the agent and modify its attributes.
270+
271+
**Returns:**
272+
273+
* `Self`
274+
–A new Agent instance with the modified attributes.
275+
276+
<Accordion title="Source code in dreadnode/agent/agent.py" icon="code">
277+
```python
278+
def with_(
279+
self,
280+
*,
281+
name: str | None = None,
282+
description: str | None = None,
283+
tags: list[str] | None = None,
284+
label: str | None = None,
285+
model: str | rg.Generator | None = None,
286+
instructions: str | None = None,
287+
max_steps: int | None = None,
288+
caching: rg.caching.CacheMode | None = None,
289+
tools: list[AnyTool | Toolset] | None = None,
290+
tool_mode: ToolMode | None = None,
291+
hooks: list[Hook] | None = None,
292+
stop_conditions: list[StopCondition] | None = None,
293+
scorers: ScorersLike[AgentResult] | None = None,
294+
assert_scores: list[str] | t.Literal[True] | None = None,
295+
append: bool = False,
296+
) -> te.Self:
297+
"""
298+
Clone the agent and modify its attributes.
299+
300+
Returns:
301+
A new Agent instance with the modified attributes.
302+
"""
303+
new = self.clone()
304+
305+
new.name = name or new.name
306+
new.description = description or new.description
307+
new.label = label or new.label
308+
new.model = model or new.model
309+
new.instructions = instructions or new.instructions
310+
new.max_steps = max_steps or new.max_steps
311+
new.caching = caching or new.caching
312+
new.tool_mode = tool_mode or new.tool_mode
313+
314+
if append:
315+
new.tags = [*new.tags, *(tags or [])]
316+
new.tools = [*new.tools, *(tools or [])]
317+
new.hooks = [*new.hooks, *(hooks or [])]
318+
new.stop_conditions = [*new.stop_conditions, *(stop_conditions or [])]
319+
new.scorers = [*new.scorers, *(scorers or [])]
320+
if isinstance(assert_scores, bool):
321+
new.assert_scores = assert_scores
322+
elif isinstance(new.assert_scores, list):
323+
new.assert_scores = [*new.assert_scores, *(assert_scores or [])]
324+
else:
325+
new.assert_scores = assert_scores or new.assert_scores
326+
else:
327+
new.tags = tags if tags is not None else new.tags
328+
new.tools = tools if tools is not None else new.tools
329+
new.hooks = hooks if hooks is not None else new.hooks
330+
new.stop_conditions = (
331+
stop_conditions if stop_conditions is not None else new.stop_conditions
332+
)
333+
new.scorers = scorers if scorers is not None else new.scorers
334+
new.assert_scores = assert_scores if assert_scores is not None else new.assert_scores
335+
336+
return new
337+
```
338+
339+
206340
</Accordion>
207341

208342
AgentWarning

docs/sdk/agent_tools.mdx

Lines changed: 77 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -330,39 +330,44 @@ command(
330330
timeout: int = 120,
331331
cwd: str | None = None,
332332
env: dict[str, str] | None = None,
333+
input: str | None = None,
333334
) -> str
334335
```
335336

336337
Execute a shell command.
337338

338-
Use this tool to run system utilities and command-line programs (e.g., `ls`, `cat`, `grep`). It is designed for straightforward, single-shot operations and returns the combined output and error streams.
339-
340339
**Best Practices**
341340

342-
* Argument Format: The command and its arguments *must* be provided as a list of strings (e.g., `["ls", "-la", "/tmp"]`), not as a single string.
343-
* No Shell Syntax: Does not use a shell. Features like pipes (`|`), redirection (`>`), and variable expansion (`$VAR`) are not supported.
344-
* Error on Failure: The tool will raise a `RuntimeError` if the command returns a non-zero exit code.
341+
* Argument Format: Command and arguments must be a list of strings.
342+
* No Shell Syntax: Does not use a shell (no pipes, redirection, var expansion, etc.).
343+
* Error on Failure: Raises RuntimeError for non-zero exit codes.
344+
* Use input Parameter: Send data to the command's standard input to avoid hanging.
345345

346346
**Parameters:**
347347

348348
* **`cmd`**
349349
(`list[str]`)
350-
–The command to execute, provided as a list of strings.
350+
–The command to execute as a list of strings.
351351
* **`timeout`**
352352
(`int`, default:
353353
`120`
354354
)
355-
–Maximum time in seconds to allow for command execution.
355+
–Maximum execution time in seconds.
356356
* **`cwd`**
357357
(`str | None`, default:
358358
`None`
359359
)
360-
–The working directory in which to execute the command.
360+
–The working directory for the command.
361361
* **`env`**
362362
(`dict[str, str] | None`, default:
363363
`None`
364364
)
365-
–Optional environment variables to set for the command.
365+
–Environment variables for the command.
366+
* **`input`**
367+
(`str | None`, default:
368+
`None`
369+
)
370+
–Optional string to send to the command's standard input.
366371

367372
<Accordion title="Source code in dreadnode/agent/tools/execute.py" icon="code">
368373
```python
@@ -373,52 +378,85 @@ async def command(
373378
timeout: int = 120,
374379
cwd: str | None = None,
375380
env: dict[str, str] | None = None,
381+
input: str | None = None,
376382
) -> str:
377383
"""
378384
Execute a shell command.
379385
380-
Use this tool to run system utilities and command-line programs (e.g., `ls`, `cat`, `grep`). \
381-
It is designed for straightforward, single-shot operations and returns the combined output and error streams.
382-
383386
## Best Practices
384-
- Argument Format: The command and its arguments *must* be provided as a \
385-
list of strings (e.g., `["ls", "-la", "/tmp"]`), not as a single string.
386-
- No Shell Syntax: Does not use a shell. Features like pipes (`|`), \
387-
redirection (`>`), and variable expansion (`$VAR`) are not supported.
388-
- Error on Failure: The tool will raise a `RuntimeError` if the command returns a non-zero exit code.
387+
- Argument Format: Command and arguments must be a list of strings.
388+
- No Shell Syntax: Does not use a shell (no pipes, redirection, var expansion, etc.).
389+
- Error on Failure: Raises RuntimeError for non-zero exit codes.
390+
- Use input Parameter: Send data to the command's standard input to avoid hanging.
389391
390392
Args:
391-
cmd: The command to execute, provided as a list of strings.
392-
timeout: Maximum time in seconds to allow for command execution.
393-
cwd: The working directory in which to execute the command.
394-
env: Optional environment variables to set for the command.
393+
cmd: The command to execute as a list of strings.
394+
timeout: Maximum execution time in seconds.
395+
cwd: The working directory for the command.
396+
env: Environment variables for the command.
397+
input: Optional string to send to the command's standard input.
395398
"""
399+
command_str = " ".join(cmd)
400+
logger.debug(f"Executing '{command_str}'")
401+
402+
process_env = os.environ.copy()
403+
if env:
404+
process_env.update(env)
405+
406+
proc = await asyncio.create_subprocess_exec(
407+
*cmd,
408+
stdout=asyncio.subprocess.PIPE,
409+
stderr=asyncio.subprocess.STDOUT,
410+
stdin=asyncio.subprocess.PIPE if input is not None else None,
411+
env=process_env,
412+
cwd=cwd,
413+
)
414+
415+
output = ""
416+
417+
async def read_stdout() -> None:
418+
nonlocal output
419+
420+
if not proc.stdout:
421+
return
422+
423+
while True:
424+
chunk = await proc.stdout.read(1024)
425+
if not chunk:
426+
break
427+
output += chunk.decode(errors="replace")
428+
429+
async def write_and_close_stdin() -> None:
430+
if proc.stdin and input:
431+
proc.stdin.write(input.encode())
432+
await proc.stdin.drain()
433+
proc.stdin.close()
434+
396435
try:
397-
command_str = " ".join(cmd)
398-
logger.debug(f"Executing '{command_str}'")
399-
proc = await asyncio.create_subprocess_exec(
400-
*cmd,
401-
stdout=asyncio.subprocess.PIPE,
402-
stderr=asyncio.subprocess.PIPE,
403-
env=env,
404-
cwd=cwd,
436+
await asyncio.wait_for(
437+
asyncio.gather(read_stdout(), write_and_close_stdin()), timeout=timeout
405438
)
406-
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=timeout)
407-
output = stdout.decode() + stderr.decode()
439+
await proc.wait()
440+
408441
except asyncio.TimeoutError as e:
409-
logger.warning(f"Command '{command_str}' timed out after {timeout} seconds.")
442+
error_message = f"Command '{command_str}' timed out after {timeout} seconds."
443+
if output:
444+
error_message += f"\n\nPartial Output:\n{output}"
445+
logger.warning(error_message)
446+
410447
with contextlib.suppress(OSError):
411448
proc.kill()
412-
raise TimeoutError(f"Command timed out after {timeout} seconds") from e
413-
except Exception as e:
414-
logger.error(f"Error executing '{command_str}': {e}")
415-
raise
449+
await proc.wait()
450+
451+
raise TimeoutError(error_message) from e
416452

417453
if proc.returncode != 0:
418-
logger.error(f"Command '{command_str}' failed with return code {proc.returncode}: {output}")
419-
raise RuntimeError(f"Command failed ({proc.returncode}): {output}")
454+
logger.error(
455+
f"Command '{command_str}' failed with return code {proc.returncode}:\n{output}"
456+
)
457+
raise RuntimeError(f"Command failed ({proc.returncode}):\n{output}")
420458

421-
logger.debug(f"Command '{command_str}':\n{output}")
459+
logger.debug(f"Command '{command_str}' completed:\n{output}")
422460
return output
423461
```
424462

docs/sdk/eval.mdx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -122,11 +122,11 @@ The name of the evaluation.
122122

123123
```python
124124
parameters: dict[str, list[Any]] | None = Config(
125-
default=None
125+
default=None, expose_as=str | None
126126
)
127127
```
128128

129-
A dictionary defining a parameter space to run experiments against.
129+
A dictionary (or JSON string) defining a parameter space to run experiments against.
130130
A scenario will be created for every combination of the parameters defined here.
131131
Key names should align with arguments on the task assigned with a `Config` context.
132132

@@ -157,7 +157,7 @@ A list of tags to associate during tracing.
157157
### task
158158

159159
```python
160-
task: Task[[In], Out] | str
160+
task: Task[..., Out] | str
161161
```
162162

163163
The task to evaluate. Can be a Task object or a string representing qualified task name.

docs/sdk/main.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2582,7 +2582,7 @@ def task_and_run(
25822582
_tracer=_tracer,
25832583
)
25842584
)
2585-
self.log_inputs(**(inputs or {}))
2585+
self.log_inputs(**(inputs or {}), to="run")
25862586

25872587
task_span = stack.enter_context(
25882588
self.task_span(name, label=label, tags=tags, _tracer=_tracer)

0 commit comments

Comments
 (0)