aboutcode-org · ziadhany · Oct 30, 2025
diff --git a/README.md b/README.md
@@ -4,20 +4,22 @@ This repository contains experiments with AI-driven parsers for analyzing vulner
 
 ## Usage
 
-You can interact with all parsers through the VulnerabilityAgent class, which provides a single entry point for:
+All parsers can be accessed through the VulnerabilityAgent class, which provides a unified interface for extracting structured vulnerability data.
 
-1. **Create an instance of the `VulnerabilityAgent`:**
+**Create an instance of the `VulnerabilityAgent`:**
     ```bash
     instance = VulnerabilityAgent()
     ```
 
-2. **Get the Package URL (PURL) for the given summary:**
+## Parsing a PackageURL
+
+**Get the Package URL (PURL) for the given summary:**
     ```bash
     purl = instance.get_purl_from_summary(summary) # Output: pkg:pypi/django-helpdesk
     ```
     Ensure the `summary` variable contains the relevant information to extract the PURL.
 
-3. **Get the version ranges (affected and fixed versions) from the summary:**
+**Get the version ranges (affected and fixed versions) from the summary:**
     ```bash
     version_ranges = instance.get_version_ranges(summary, purl.type)
     ```
@@ -27,17 +29,12 @@ You can interact with all parsers through the VulnerabilityAgent class, which pr
 
     Example output:
     ```bash
-    print(version_ranges)  # Output: ([affected_versions], [fixed_versions])
+    print(version_ranges)  # Output: ([affected_version_range], [fixed_version_range]])
     ```
 
 ## Parsing a CPE
 
-1. Create an instance of the VulnerabilityAgent:
-    ```bash
-    instance = VulnerabilityAgent()
-    ```
-
-2. **Get the Package URL (PURL) for the given cpe:**
+**Get the Package URL (PURL) for the given cpe:**
     ```bash
     cpe = "cpe:2.3:a:django-helpdesk_project:django-helpdesk:-:*:*:*:*:*:*:*"
     pkg_type = "pypi"
@@ -46,6 +43,24 @@ You can interact with all parsers through the VulnerabilityAgent class, which pr
     ```
     Ensure the `cpe` variable contains the relevant information to extract the PURL.
 
+## Parsing a Vulnerability
+
+**Get the Severity for the given summary:**
+    ```bash
+    summary = "..."
+    severity = instance.get_severity_from_summary(summary)
+    print(severity)  # low , medium, high , critical 
+    ```
+    Ensure the `cpe` variable contains the relevant information to extract the PURL.
+
+**Get the CWE for the given summary:**
+    ```bash
+    summary = "Deserialization of untrusted data in Microsoft Office SharePoint allows an authorized attacker to execute code over a network."
+    cwes = instance.get_cwe_from_summary(summary)
+    print(cwes)  # Output: CWE-502
+    ```
+    Ensure the `cpe` variable contains the relevant information to extract the PURL.
+
 ---
 ## Configuration
 

diff --git a/agent/__init__.py b/agent/__init__.py
@@ -8,22 +8,26 @@
 #
 
 import os
+from enum import Enum
 from typing import List, Optional
 
 from aboutcode.hashid import get_core_purl
+from cwe2.database import Database
 from dotenv import load_dotenv
 from packageurl import PackageURL
 from pydantic import BaseModel
 from pydantic.functional_validators import field_validator
 from pydantic_ai import Agent
-from pydantic_ai.models.openai import OpenAIModel
+from pydantic_ai.models.openai import OpenAIChatModel
 from pydantic_ai.providers.openai import OpenAIProvider
 from pydantic_ai.settings import ModelSettings
 from univers.version_range import RANGE_CLASS_BY_SCHEMES
 
 from prompts import (
+    PROMPT_CWE_FROM_SUMMARY,
     PROMPT_PURL_FROM_CPE,
     PROMPT_PURL_FROM_SUMMARY,
+    PROMPT_SEVERITY_FROM_SUMMARY,
     PROMPT_VERSION_FROM_SUMMARY,
 )
 
@@ -39,12 +43,45 @@ class Purl(BaseModel):
     string: str
 
     @field_validator("string")
-    def check_valid_purl(cls, v: str) -> str:
-        try:
-            PackageURL.from_string(v)
-        except Exception as e:
-            raise ValueError(f"Invalid PURL '{v}': {e}")
-        return v
+    def check_valid_purl(cls, purl: str) -> str:
+        PackageURL.from_string(purl)
+        return purl
+
+
+CWE_DATABASE = Database()
+
+
+class CWE(BaseModel):
+    string: str
+
+    @field_validator("string")
+    @classmethod
+    def check_valid_cwe(cls, v: str) -> str:
+        norm = v.strip().upper()
+        if norm.startswith("CWE-"):
+            norm = norm[4:].strip()
+
+        if not norm.isdigit():
+            raise ValueError("CWE must be a numeric identifier, e.g., 'CWE-79' or '79'")
+
+        CWE_DATABASE.get(norm)
+
+        return f"CWE-{norm}"
+
+
+class CWEList(BaseModel):
+    cwes: List[CWE]
+
+
+class SeverityEnum(str, Enum):
+    low = "low"
+    medium = "medium"
+    high = "high"
+    critical = "critical"
+
+
+class Severity(BaseModel):
+    severity: SeverityEnum
 
 
 class Versions(BaseModel):
@@ -65,12 +102,7 @@ def __init__(self, system_prompt: str, output_type):
     @staticmethod
     def _init_model():
         """Initialize the LLM model depending on environment variables."""
-        if OLLAMA_MODEL_NAME and OLLAMA_BASE_URL:
-            return OpenAIModel(
-                model_name=OLLAMA_MODEL_NAME,
-                provider=OpenAIProvider(openai_client=OLLAMA_BASE_URL),
-            )
-        return OpenAIModel(
+        return OpenAIChatModel(
             model_name=OPENAI_MODEL_NAME,
             provider=OpenAIProvider(api_key=OPENAI_API_KEY),
         )
@@ -90,6 +122,18 @@ def get_purl(self, summary: str) -> Optional[PackageURL]:
         return get_core_purl(purl)
 
 
+class PurlFromCPEParser(BaseParser):
+    def __init__(self):
+        super().__init__(PROMPT_PURL_FROM_CPE, Purl)
+
+    def get_purl(self, cpe: str, pkg_type) -> Optional[PackageURL]:
+        result = self.run_agent(
+            f"**Vulnerability Known Affected Software Configurations CPE:**\n{cpe}\n **Package Type:**\n{pkg_type}"
+        )
+        purl = PackageURL.from_string(result.output.string)
+        return get_core_purl(purl)
+
+
 class VersionsFromSummaryParser(BaseParser):
     def __init__(self):
         super().__init__(PROMPT_VERSION_FROM_SUMMARY, Versions)
@@ -111,16 +155,22 @@ def get_version_ranges(self, summary: str, supported_ecosystem: str):
         return affected_objs, fixed_objs
 
 
-class PurlFromCPEParser(BaseParser):
+class SeverityFromSummaryParser(BaseParser):
     def __init__(self):
-        super().__init__(PROMPT_PURL_FROM_CPE, Purl)
+        super().__init__(PROMPT_SEVERITY_FROM_SUMMARY, Severity)
+
+    def get_severity(self, summary: str) -> Optional[Severity]:
+        result = self.run_agent(f"**Vulnerability Description:**\n{summary}")
+        return result.output.severity.value
 
-    def get_purl(self, cpe: str, pkg_type) -> Optional[PackageURL]:
-        result = self.run_agent(
-            f"**Vulnerability Known Affected Software Configurations CPE:**\n{cpe}\n **Package Type:**\n{pkg_type}"
-        )
-        purl = PackageURL.from_string(result.output.string)
-        return get_core_purl(purl)
+
+class CWEFromSummaryParser(BaseParser):
+    def __init__(self):
+        super().__init__(PROMPT_CWE_FROM_SUMMARY, CWEList)
+
+    def get_cwes(self, summary: str) -> List[CWEList]:
+        result = self.run_agent(f"**Vulnerability Description:**\n{summary}")
+        return [cwe.string for cwe in result.output.cwes]
 
 
 class VulnerabilityAgent:
@@ -130,12 +180,20 @@ def __init__(self):
         self.purl_parser = PurlFromSummaryParser()
         self.versions_parser = VersionsFromSummaryParser()
         self.cpe_parser = PurlFromCPEParser()
+        self.severity_parser = SeverityFromSummaryParser()
+        self.cwe_parser = CWEFromSummaryParser()
 
     def get_purl_from_summary(self, summary: str):
         return self.purl_parser.get_purl(summary)
 
     def get_version_ranges(self, summary: str, ecosystem: str):
         return self.versions_parser.get_version_ranges(summary, ecosystem)
 
-    def get_purl_from_cpe(self, cpe: str, purl_with_no_version: str):
-        return self.cpe_parser.get_purl(cpe, purl_with_no_version)
+    def get_purl_from_cpe(self, cpe: str, pkg_type: str):
+        return self.cpe_parser.get_purl(cpe, pkg_type)
+
+    def get_severity_from_summary(self, summary: str):
+        return self.severity_parser.get_severity(summary)
+
+    def get_cwe_from_summary(self, summary: str):
+        return self.cwe_parser.get_cwes(summary)
diff --git a/prompts.py b/prompts.py
@@ -113,3 +113,19 @@
 - Do not assume or hallucinate any values.
 
 """
+
+PROMPT_SEVERITY_FROM_SUMMARY = """You are a cybersecurity expert. Based on the following vulnerability description, determine its severity level as one of: Low, Medium, High, or Critical. 
+
+Consider the impact on confidentiality, integrity, availability, and whether user interaction is required. 
+
+Return **only** the severity level (Low, Medium, High, or Critical).
+"""
+
+PROMPT_CWE_FROM_SUMMARY = """You are a Vulnerability Management Expert. 
+Based on the following vulnerability description, identify all relevant CWE (Common Weakness Enumeration) IDs that categorize the underlying weaknesses.
+
+Use only valid CWE entries from the official MITRE CWE list (https://cwe.mitre.org/), such as CWE-79, CWE-89, CWE-287, etc.
+
+Return **CWE IDs**, for example:
+["CWE-79", "CWE-89"]
+"""