Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/source/garak.generators.llm.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
garak.generators.llm
==========================

.. automodule:: garak.generators.llm
:members:
:undoc-members:
:show-inheritance:
1 change: 1 addition & 0 deletions docs/source/generators.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ For a detailed oversight into how a generator operates, see :doc:`garak.generato
garak.generators.langchain
garak.generators.langchain_serve
garak.generators.litellm
garak.generators.llm
garak.generators.mistral
garak.generators.ollama
garak.generators.openai
Expand Down
94 changes: 94 additions & 0 deletions garak/generators/llm.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's worth implementing the _load_client() / _clear_client() pattern here to support parallelisation - see openai.OpenAICompatible for an example

Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
# SPDX-FileCopyrightText: Portions Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

"""LLM (simonw/llm) generator support"""

import logging
from typing import List, Union

import llm

from garak import _config
from garak.attempt import Message, Conversation
from garak.generators.base import Generator


class LLMGenerator(Generator):
"""Class supporting simonw/llm-managed models

See https://pypi.org/project/llm/ and its provider plugins.

Calls model.prompt() with the prompt text and relays the response. Per-provider
options and API keys are all handled by `llm` (e.g., `llm keys set openai`).

Set --model_name to the `llm` model id or alias (e.g., "gpt-4o-mini",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Set --model_name to the `llm` model id or alias (e.g., "gpt-4o-mini",
Set --target_name to the `llm` model id or alias (e.g., "gpt-4o-mini",

"claude-3.5-haiku", or a local alias configured in `llm models`).

Explicitly, garak delegates the majority of responsibility here:

* the generator calls prompt() on the resolved `llm` model
* provider setup, auth, and model-specific options live in `llm`
* there's no support for chains; this is a direct LLM interface

Notes:
* Not all providers support all parameters (e.g., temperature, max_tokens).
We pass only non-None params; providers ignore what they don't support.
"""

DEFAULT_PARAMS = Generator.DEFAULT_PARAMS | {
"temperature": None,
"max_tokens": None,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

temperature and max_tokens are already in Generator.DEFAULT_PARAMS is there a reason to include here?

Suggested change
"temperature": None,
"max_tokens": None,

"top_p": None,
"stop": [],
"system": None,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove, the system prompt is set via the run configuration and pass to generators as part of the prompt conversation.

Suggested change
"system": None,

}

generator_family_name = "LLM"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this might be better as lower case - that's how the tool is described


def __init__(self, name: str = "", config_root=_config):
self.name = name
self._load_config(config_root)
self.fullname = f"LLM (simonw/llm) {self.name or '(default)'}"

super().__init__(self.name, config_root=config_root)

try:
# Resolve the llm model; fall back to llm's default if no name given
self.model = llm.get_model(self.name) if self.name else llm.get_model()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we rename self.model to self.target to be consistent with overall garak nomenclature? llm supports both systems and models so the change fits this use-case fine too

except Exception as e:
logging.error("Failed to resolve `llm` model '%s': %s", self.name, repr(e))
raise e

def _call_model(
self, prompt: Conversation, generations_this_call: int = 1
) -> List[Union[Message, None]]:
"""
Continuation generation method for LLM integrations via `llm`.

This calls model.prompt() once per generation and materializes the text().
"""
text_prompt = prompt.last_message().text
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This does not grab out the full conversation. There is an existing helper function in the base class Generator._conversation_to_list() that will format the garak Conversation object as a list of dictionaries meeting the HuggingFace and OpenAI conversation list. Looking at how the llm library handles what it considers to be conversation I don't know if there is a way to load a prefilled history in a similar pattern to how chat completions APIs for other generators are working.

For best adoption, this generator should at least validate the conversation has at most one user and one system message to know if the prompt passed will be fully processed during inference.


# Build kwargs only for parameters explicitly set (non-None / non-empty)
prompt_kwargs = {}
if self.system:
prompt_kwargs["system"] = self.system
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Current system prompt support in garak is tied to the conversation passed as part of prompt. The DEFAULT_PARAMS entry here should likely be removed in favor of extracting the system_prompt from the prompt via prompt.last_message("system"). That is if passing a conversation that includes the system message would not apply it.

if self.max_tokens is not None:
prompt_kwargs["max_tokens"] = self.max_tokens
if self.temperature is not None:
prompt_kwargs["temperature"] = self.temperature
if self.top_p is not None:
prompt_kwargs["top_p"] = self.top_p
if self.stop:
prompt_kwargs["stop"] = self.stop
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

None == False and all keys defined in DEFAULT_PARAMS will exist on self.

Suggested change
if self.max_tokens is not None:
prompt_kwargs["max_tokens"] = self.max_tokens
if self.temperature is not None:
prompt_kwargs["temperature"] = self.temperature
if self.top_p is not None:
prompt_kwargs["top_p"] = self.top_p
if self.stop:
prompt_kwargs["stop"] = self.stop
if self.max_tokens:
prompt_kwargs["max_tokens"] = self.max_tokens
if self.temperature:
prompt_kwargs["temperature"] = self.temperature
if self.top_p:
prompt_kwargs["top_p"] = self.top_p
if self.stop:
prompt_kwargs["stop"] = self.stop


try:
response = self.model.prompt(text_prompt, **prompt_kwargs)
out = response.text()
return [Message(out)]
except Exception as e:
logging.error("`llm` generation failed: %s", repr(e))
return [None]


DEFAULT_CLASS = "LLMGenerator"
1 change: 1 addition & 0 deletions pyproject.toml
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should land #1199 before landing this, and then move this PR to the deferred loading pattern

Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ dependencies = [
"mistralai==1.5.2",
"pillow>=10.4.0",
"ftfy>=6.3.1",
"llm>=0.11",
]

[project.optional-dependencies]
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should land #1199 before landing this, and then move this PR to the deferred loading pattern

Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ backoff>=2.1.1
rapidfuzz>=3.0.0
jinja2>=3.1.6
nltk>=3.9.1
llm>=0.11
accelerate>=0.23.0
avidtools==0.1.2
stdlibs>=2022.10.9
Expand Down
128 changes: 128 additions & 0 deletions tests/generators/test_llm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# SPDX-FileCopyrightText: Portions Copyright (c) 2025 NVIDIA CORPORATION &
# AFFILIATES. All rights reserved.
Comment on lines +1 to +2
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# SPDX-FileCopyrightText: Portions Copyright (c) 2025 NVIDIA CORPORATION &
# AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Portions Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

# SPDX-License-Identifier: Apache-2.0

"""Tests for simonw/llm-backed garak generator"""

import pytest
from unittest.mock import MagicMock

from garak.attempt import Conversation, Turn, Message
from garak._config import GarakSubConfig

# Adjust import path/module name to where you placed the wrapper
from garak.generators.llm import LLMGenerator


# ─── Helpers & Fixtures ─────────────────────────────────────────────────

class FakeResponse:
"""Minimal `llm` Response shim with .text()"""
def __init__(self, txt: str):
self._txt = txt
def text(self) -> str:
return self._txt


class FakeModel:
"""Minimal `llm` model shim with .prompt()"""
def __init__(self):
self.calls = []
def prompt(self, prompt_text: str, **kwargs):
self.calls.append((prompt_text, kwargs))
return FakeResponse("OK_FAKE")


@pytest.fixture
def cfg():
"""Minimal garak sub-config; extend if you wire defaults via config."""
c = GarakSubConfig()
c.generators = {}
return c


@pytest.fixture
def fake_llm(monkeypatch):
"""
Patch llm.get_model to return a fresh FakeModel for each test.
Return the FakeModel so tests can inspect call args.
"""
import llm
model = FakeModel()
monkeypatch.setattr(llm, "get_model", lambda *a, **k: model)
return model


# ─── Tests ──────────────────────────────────────────────────────────────

def test_instantiation_resolves_model(cfg, fake_llm):
gen = LLMGenerator(name="my-alias", config_root=cfg)
assert gen.name == "my-alias"
Comment on lines +60 to +61
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
gen = LLMGenerator(name="my-alias", config_root=cfg)
assert gen.name == "my-alias"
test_name = "my-alias"
gen = LLMGenerator(name=test_name, config_root=cfg)
assert gen.name == test_name

assert hasattr(gen, "model")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider adding an isinstance check here

assert "LLM (simonw/llm)" in gen.fullname


def test_generate_returns_message(cfg, fake_llm):
gen = LLMGenerator(name="alias", config_root=cfg)

conv = Conversation([Turn("user", Message(text="ping"))])
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
conv = Conversation([Turn("user", Message(text="ping"))])
test_txt = "ping"
conv = Conversation([Turn("user", Message(text=test_txt))])

out = gen._call_model(conv)

assert isinstance(out, list) and len(out) == 1
assert isinstance(out[0], Message)
assert out[0].text == "OK_FAKE"

prompt_text, kwargs = fake_llm.calls[0]
assert prompt_text == "ping"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
assert prompt_text == "ping"
assert prompt_text == test_txt

assert kwargs == {}


def test_param_passthrough(cfg, fake_llm):
gen = LLMGenerator(name="alias", config_root=cfg)
gen.temperature = 0.2
gen.max_tokens = 64
gen.top_p = 0.9
gen.stop = ["\n\n"]
gen.system = "you are testy"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use vars for these values (and the checks later)


conv = Conversation([Turn("user", Message(text="hello"))])
_ = gen._call_model(conv)

_, kwargs = fake_llm.calls[0]
assert kwargs["temperature"] == 0.2
assert kwargs["max_tokens"] == 64
assert kwargs["top_p"] == 0.9
assert kwargs["stop"] == ["\n\n"]
assert kwargs["system"] == "you are testy"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vars here. could do a list assignment / check likex,y = 1,2 for brevity



def test_wrapper_handles_llm_exception(cfg, monkeypatch):
"""If the underlying `llm` call explodes, wrapper returns [None]."""
import llm
class BoomModel:
def prompt(self, *a, **k):
raise RuntimeError("boom")
monkeypatch.setattr(llm, "get_model", lambda *a, **k: BoomModel())
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice


gen = LLMGenerator(name="alias", config_root=cfg)
conv = Conversation([Turn("user", Message(text="ping"))])
out = gen._call_model(conv)
assert out == [None]


def test_default_model_when_name_empty(cfg, fake_llm, monkeypatch):
"""
If name is empty, wrapper should call llm.get_model() with no args,
i.e., use llm's configured default model.
"""
import llm
spy = MagicMock(side_effect=lambda *a, **k: fake_llm)
monkeypatch.setattr(llm, "get_model", spy)

gen = LLMGenerator(name="", config_root=cfg)
conv = Conversation([Turn("user", Message(text="x"))])
_ = gen._call_model(conv)

spy.assert_called_once()
assert spy.call_args.args == ()
assert spy.call_args.kwargs == {}
Loading