Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 26 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,22 @@ This repository contains experiments with AI-driven parsers for analyzing vulner

## Usage

You can interact with all parsers through the VulnerabilityAgent class, which provides a single entry point for:
All parsers can be accessed through the VulnerabilityAgent class, which provides a unified interface for extracting structured vulnerability data.

1. **Create an instance of the `VulnerabilityAgent`:**
**Create an instance of the `VulnerabilityAgent`:**
```bash
instance = VulnerabilityAgent()
```

2. **Get the Package URL (PURL) for the given summary:**
## Parsing a PackageURL

**Get the Package URL (PURL) for the given summary:**
```bash
purl = instance.get_purl_from_summary(summary) # Output: pkg:pypi/django-helpdesk
```
Ensure the `summary` variable contains the relevant information to extract the PURL.

3. **Get the version ranges (affected and fixed versions) from the summary:**
**Get the version ranges (affected and fixed versions) from the summary:**
```bash
version_ranges = instance.get_version_ranges(summary, purl.type)
```
Expand All @@ -27,17 +29,12 @@ You can interact with all parsers through the VulnerabilityAgent class, which pr

Example output:
```bash
print(version_ranges) # Output: ([affected_versions], [fixed_versions])
print(version_ranges) # Output: ([affected_version_range], [fixed_version_range]])
```

## Parsing a CPE

1. Create an instance of the VulnerabilityAgent:
```bash
instance = VulnerabilityAgent()
```

2. **Get the Package URL (PURL) for the given cpe:**
**Get the Package URL (PURL) for the given cpe:**
```bash
cpe = "cpe:2.3:a:django-helpdesk_project:django-helpdesk:-:*:*:*:*:*:*:*"
pkg_type = "pypi"
Expand All @@ -46,6 +43,24 @@ You can interact with all parsers through the VulnerabilityAgent class, which pr
```
Ensure the `cpe` variable contains the relevant information to extract the PURL.

## Parsing a Vulnerability

**Get the Severity for the given summary:**
```bash
summary = "..."
severity = instance.get_severity_from_summary(summary)
print(severity) # low , medium, high , critical
```
Ensure the `cpe` variable contains the relevant information to extract the PURL.

**Get the CWE for the given summary:**
```bash
summary = "Deserialization of untrusted data in Microsoft Office SharePoint allows an authorized attacker to execute code over a network."
cwes = instance.get_cwe_from_summary(summary)
print(cwes) # Output: CWE-502
```
Ensure the `cpe` variable contains the relevant information to extract the PURL.

---
## Configuration

Expand Down
104 changes: 81 additions & 23 deletions agent/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,26 @@
#

import os
from enum import Enum
from typing import List, Optional

from aboutcode.hashid import get_core_purl
from cwe2.database import Database
from dotenv import load_dotenv
from packageurl import PackageURL
from pydantic import BaseModel
from pydantic.functional_validators import field_validator
from pydantic_ai import Agent
from pydantic_ai.models.openai import OpenAIModel
from pydantic_ai.models.openai import OpenAIChatModel
from pydantic_ai.providers.openai import OpenAIProvider
from pydantic_ai.settings import ModelSettings
from univers.version_range import RANGE_CLASS_BY_SCHEMES

from prompts import (
PROMPT_CWE_FROM_SUMMARY,
PROMPT_PURL_FROM_CPE,
PROMPT_PURL_FROM_SUMMARY,
PROMPT_SEVERITY_FROM_SUMMARY,
PROMPT_VERSION_FROM_SUMMARY,
)

Expand All @@ -39,12 +43,45 @@ class Purl(BaseModel):
string: str

@field_validator("string")
def check_valid_purl(cls, v: str) -> str:
try:
PackageURL.from_string(v)
except Exception as e:
raise ValueError(f"Invalid PURL '{v}': {e}")
return v
def check_valid_purl(cls, purl: str) -> str:
PackageURL.from_string(purl)
return purl


CWE_DATABASE = Database()


class CWE(BaseModel):
string: str

@field_validator("string")
@classmethod
def check_valid_cwe(cls, v: str) -> str:
norm = v.strip().upper()
if norm.startswith("CWE-"):
norm = norm[4:].strip()

if not norm.isdigit():
raise ValueError("CWE must be a numeric identifier, e.g., 'CWE-79' or '79'")

CWE_DATABASE.get(norm)

return f"CWE-{norm}"


class CWEList(BaseModel):
cwes: List[CWE]


class SeverityEnum(str, Enum):
low = "low"
medium = "medium"
high = "high"
critical = "critical"


class Severity(BaseModel):
severity: SeverityEnum


class Versions(BaseModel):
Expand All @@ -65,12 +102,7 @@ def __init__(self, system_prompt: str, output_type):
@staticmethod
def _init_model():
"""Initialize the LLM model depending on environment variables."""
if OLLAMA_MODEL_NAME and OLLAMA_BASE_URL:
return OpenAIModel(
model_name=OLLAMA_MODEL_NAME,
provider=OpenAIProvider(openai_client=OLLAMA_BASE_URL),
)
return OpenAIModel(
return OpenAIChatModel(
model_name=OPENAI_MODEL_NAME,
provider=OpenAIProvider(api_key=OPENAI_API_KEY),
)
Expand All @@ -90,6 +122,18 @@ def get_purl(self, summary: str) -> Optional[PackageURL]:
return get_core_purl(purl)


class PurlFromCPEParser(BaseParser):
def __init__(self):
super().__init__(PROMPT_PURL_FROM_CPE, Purl)

def get_purl(self, cpe: str, pkg_type) -> Optional[PackageURL]:
result = self.run_agent(
f"**Vulnerability Known Affected Software Configurations CPE:**\n{cpe}\n **Package Type:**\n{pkg_type}"
)
purl = PackageURL.from_string(result.output.string)
return get_core_purl(purl)


class VersionsFromSummaryParser(BaseParser):
def __init__(self):
super().__init__(PROMPT_VERSION_FROM_SUMMARY, Versions)
Expand All @@ -111,16 +155,22 @@ def get_version_ranges(self, summary: str, supported_ecosystem: str):
return affected_objs, fixed_objs


class PurlFromCPEParser(BaseParser):
class SeverityFromSummaryParser(BaseParser):
def __init__(self):
super().__init__(PROMPT_PURL_FROM_CPE, Purl)
super().__init__(PROMPT_SEVERITY_FROM_SUMMARY, Severity)

def get_severity(self, summary: str) -> Optional[Severity]:
result = self.run_agent(f"**Vulnerability Description:**\n{summary}")
return result.output.severity.value

def get_purl(self, cpe: str, pkg_type) -> Optional[PackageURL]:
result = self.run_agent(
f"**Vulnerability Known Affected Software Configurations CPE:**\n{cpe}\n **Package Type:**\n{pkg_type}"
)
purl = PackageURL.from_string(result.output.string)
return get_core_purl(purl)

class CWEFromSummaryParser(BaseParser):
def __init__(self):
super().__init__(PROMPT_CWE_FROM_SUMMARY, CWEList)

def get_cwes(self, summary: str) -> List[CWEList]:
result = self.run_agent(f"**Vulnerability Description:**\n{summary}")
return [cwe.string for cwe in result.output.cwes]


class VulnerabilityAgent:
Expand All @@ -130,12 +180,20 @@ def __init__(self):
self.purl_parser = PurlFromSummaryParser()
self.versions_parser = VersionsFromSummaryParser()
self.cpe_parser = PurlFromCPEParser()
self.severity_parser = SeverityFromSummaryParser()
self.cwe_parser = CWEFromSummaryParser()

def get_purl_from_summary(self, summary: str):
return self.purl_parser.get_purl(summary)

def get_version_ranges(self, summary: str, ecosystem: str):
return self.versions_parser.get_version_ranges(summary, ecosystem)

def get_purl_from_cpe(self, cpe: str, purl_with_no_version: str):
return self.cpe_parser.get_purl(cpe, purl_with_no_version)
def get_purl_from_cpe(self, cpe: str, pkg_type: str):
return self.cpe_parser.get_purl(cpe, pkg_type)

def get_severity_from_summary(self, summary: str):
return self.severity_parser.get_severity(summary)

def get_cwe_from_summary(self, summary: str):
return self.cwe_parser.get_cwes(summary)
16 changes: 16 additions & 0 deletions prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,3 +113,19 @@
- Do not assume or hallucinate any values.

"""

PROMPT_SEVERITY_FROM_SUMMARY = """You are a cybersecurity expert. Based on the following vulnerability description, determine its severity level as one of: Low, Medium, High, or Critical.

Consider the impact on confidentiality, integrity, availability, and whether user interaction is required.

Return **only** the severity level (Low, Medium, High, or Critical).
"""

PROMPT_CWE_FROM_SUMMARY = """You are a Vulnerability Management Expert.
Based on the following vulnerability description, identify all relevant CWE (Common Weakness Enumeration) IDs that categorize the underlying weaknesses.

Use only valid CWE entries from the official MITRE CWE list (https://cwe.mitre.org/), such as CWE-79, CWE-89, CWE-287, etc.

Return **CWE IDs**, for example:
["CWE-79", "CWE-89"]
"""
Loading