Skip to content

Modify NPM importer to support package-first mode #1941

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 49 additions & 5 deletions vulnerabilities/pipelines/npm_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,19 @@

# Author: Navonil Das (@NavonilDas)

import json
import os
import tempfile
from pathlib import Path
from typing import Iterable

import pytz
import requests
from dateutil.parser import parse
from fetchcode.vcs import fetch_via_vcs
from packageurl import PackageURL
from univers.version_range import NpmVersionRange
from univers.versions import SemverVersion

from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importer import AffectedPackage
Expand All @@ -39,14 +44,24 @@ class NpmImporterPipeline(VulnerableCodeBaseImporterPipeline):
repo_url = "git+https://github.com/nodejs/security-wg"
importer_name = "Npm Importer"

is_batch_run = True

def __init__(self, *args, purl=None, **kwargs):
super().__init__(*args, **kwargs)
self.purl = purl
if self.purl:
NpmImporterPipeline.is_batch_run = False
if self.purl.type != "npm":
print(f"Warning: This importer handles NPM packages. Current PURL: {self.purl!s}")

@classmethod
def steps(cls):
return (
return [
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this change ?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be a tuple as well to maintain code consistency. I will change this.

cls.clone,
cls.collect_and_store_advisories,
cls.import_new_advisories,
cls.clean_downloads,
)
]

def clone(self):
self.log(f"Cloning `{self.repo_url}`")
Expand All @@ -58,9 +73,26 @@ def advisories_count(self):

def collect_advisories(self) -> Iterable[AdvisoryData]:
vuln_directory = Path(self.vcs_response.dest_dir) / "vuln" / "npm"

for advisory in vuln_directory.glob("*.json"):
yield from self.to_advisory_data(advisory)
advisory_files = list(vuln_directory.glob("*.json"))

if not self.is_batch_run:
package_name = self.purl.name
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why we not actually check in our DB if the purl exists, before even checking all files ?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does that guarantee that we won't find new advisories if we have the purl in DB?
As far as I understand, in order to find out whether we have new advisories, we have to check all files.

filtered_files = []
for advisory_file in advisory_files:
try:
data = load_json(advisory_file)
if data.get("module_name") == package_name:
affected_package = self.get_affected_package(data, package_name)
if not self.purl.version or self._version_is_affected(affected_package):
filtered_files.append(advisory_file)
except Exception as e:
self.log(f"Error processing advisory file {advisory_file}: {str(e)}")
advisory_files = filtered_files

for advisory in list(advisory_files):
for result in self.to_advisory_data(advisory):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why we are not using yield from here ?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean?

for advisory in list(advisory_files):
   yield from self.to_advisory_data(advisory)

if result:
yield result

def to_advisory_data(self, file: Path) -> Iterable[AdvisoryData]:
data = load_json(file)
Expand Down Expand Up @@ -112,6 +144,11 @@ def to_advisory_data(self, file: Path) -> Iterable[AdvisoryData]:
affected_packages.append(self.get_affected_package(data, package_name))
advsisory_aliases = data.get("cves") or []

if self.purl and self.purl.version:
affected_package = affected_packages[0] if affected_packages else None
if affected_package and not self._version_is_affected(affected_package):
return

for alias in advsisory_aliases:
yield AdvisoryData(
summary=build_description(summary=summary, description=description),
Expand All @@ -122,6 +159,13 @@ def to_advisory_data(self, file: Path) -> Iterable[AdvisoryData]:
url=f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json",
)

def _version_is_affected(self, affected_package):
if not self.purl.version or not affected_package.affected_version_range:
return True

purl_version = SemverVersion(self.purl.version)
return purl_version in affected_package.affected_version_range

def get_affected_package(self, data, package_name):
affected_version_range = None
unaffected_version_range = None
Expand Down
56 changes: 53 additions & 3 deletions vulnerabilities/pipelines/v2_importers/npm_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,19 @@

# Author: Navonil Das (@NavonilDas)

import json
import os
import tempfile
from pathlib import Path
from typing import Iterable

import pytz
import requests
from dateutil.parser import parse
from fetchcode.vcs import fetch_via_vcs
from packageurl import PackageURL
from univers.version_range import NpmVersionRange
from univers.versions import SemverVersion

from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importer import AffectedPackage
Expand All @@ -42,6 +47,16 @@ class NpmImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
repo_url = "git+https://github.com/nodejs/security-wg"
unfurl_version_ranges = True

is_batch_run = True

def __init__(self, *args, purl=None, **kwargs):
super().__init__(*args, **kwargs)
self.purl = purl
if self.purl:
NpmImporterPipeline.is_batch_run = False
if self.purl.type != "npm":
print(f"Warning: This importer handles NPM packages. Current PURL: {self.purl!s}")

@classmethod
def steps(cls):
return (
Expand All @@ -60,9 +75,26 @@ def advisories_count(self):

def collect_advisories(self) -> Iterable[AdvisoryData]:
vuln_directory = Path(self.vcs_response.dest_dir) / "vuln" / "npm"

for advisory in vuln_directory.glob("*.json"):
yield self.to_advisory_data(advisory)
advisory_files = list(vuln_directory.glob("*.json"))

if not self.is_batch_run:
package_name = self.purl.name
filtered_files = []
for advisory_file in advisory_files:
try:
data = load_json(advisory_file)
if data.get("module_name") == package_name:
affected_package = self.get_affected_package(data, package_name)
if not self.purl.version or self._version_is_affected(affected_package):
filtered_files.append(advisory_file)
except Exception as e:
self.log(f"Error processing advisory file {advisory_file}: {str(e)}")
advisory_files = filtered_files

for advisory in list(advisory_files):
result = self.to_advisory_data(advisory)
if result:
yield result

def to_advisory_data(self, file: Path) -> Iterable[AdvisoryData]:
if file.name == "index.json":
Expand Down Expand Up @@ -121,6 +153,11 @@ def to_advisory_data(self, file: Path) -> Iterable[AdvisoryData]:
affected_packages.append(self.get_affected_package(data, package_name))
advsisory_aliases = data.get("cves") or []

if self.purl and self.purl.version:
affected_package = affected_packages[0] if affected_packages else None
if affected_package and not self._version_is_affected(affected_package):
return

return AdvisoryData(
advisory_id=f"npm-{id}",
aliases=advsisory_aliases,
Expand All @@ -132,6 +169,13 @@ def to_advisory_data(self, file: Path) -> Iterable[AdvisoryData]:
url=f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json",
)

def _version_is_affected(self, affected_package):
if not self.purl.version or not affected_package.affected_version_range:
return True

purl_version = SemverVersion(self.purl.version)
return purl_version in affected_package.affected_version_range

def get_affected_package(self, data, package_name):
affected_version_range = None
unaffected_version_range = None
Expand Down Expand Up @@ -174,5 +218,11 @@ def clean_downloads(self):
self.log(f"Removing cloned repository")
self.vcs_response.delete()

if hasattr(self, "temp_dir") and os.path.exists(self.temp_dir):
import shutil

self.log(f"Removing temporary directory")
shutil.rmtree(self.temp_dir)

def on_failure(self):
self.clean_downloads()
132 changes: 132 additions & 0 deletions vulnerabilities/tests/pipelines/test_npm_importer_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import json
import os
from pathlib import Path
from types import SimpleNamespace
from unittest.mock import patch

from packageurl import PackageURL
Expand Down Expand Up @@ -77,3 +78,134 @@ def test_npm_improver(mock_response):
result.extend(inference)
expected_file = os.path.join(TEST_DATA, f"npm-improver-expected.json")
util_tests.check_results_against_json(result, expected_file)


def test_package_first_mode_valid_npm_package(tmp_path):
vuln_dir = tmp_path / "vuln" / "npm"
vuln_dir.mkdir(parents=True)

npm_sample_file = os.path.join(TEST_DATA, "npm_sample.json")
with open(npm_sample_file) as f:
sample_data = json.load(f)

advisory_file = vuln_dir / "152.json"
advisory_file.write_text(json.dumps(sample_data))

mock_vcs_response = SimpleNamespace(dest_dir=str(tmp_path), delete=lambda: None)

purl = PackageURL(type="npm", name="npm", version="1.2.0")
pipeline = NpmImporterPipeline(purl=purl)
pipeline.vcs_response = mock_vcs_response

advisories = list(pipeline.collect_advisories())

assert len(advisories) == 1
assert advisories[0].aliases == ["CVE-2013-4116"]
assert len(advisories[0].affected_packages) == 1
assert advisories[0].affected_packages[0].package.name == "npm"


def test_package_first_mode_unaffected_version(tmp_path):
vuln_dir = tmp_path / "vuln" / "npm"
vuln_dir.mkdir(parents=True)

npm_sample_file = os.path.join(TEST_DATA, "npm_sample.json")
with open(npm_sample_file) as f:
sample_data = json.load(f)

advisory_file = vuln_dir / "152.json"
advisory_file.write_text(json.dumps(sample_data))

mock_vcs_response = SimpleNamespace(dest_dir=str(tmp_path), delete=lambda: None)

purl = PackageURL(type="npm", name="npm", version="1.4.0")
pipeline = NpmImporterPipeline(purl=purl)
pipeline.vcs_response = mock_vcs_response

advisories = list(pipeline.collect_advisories())

assert len(advisories) == 0


def test_package_first_mode_invalid_package_type(tmp_path):
vuln_dir = tmp_path / "vuln" / "npm"
vuln_dir.mkdir(parents=True)

mock_vcs_response = SimpleNamespace(dest_dir=str(tmp_path), delete=lambda: None)

purl = PackageURL(type="pypi", name="django", version="3.0.0")
pipeline = NpmImporterPipeline(purl=purl)
pipeline.vcs_response = mock_vcs_response

advisories = list(pipeline.collect_advisories())

assert len(advisories) == 0


def test_package_first_mode_package_not_found(tmp_path):
vuln_dir = tmp_path / "vuln" / "npm"
vuln_dir.mkdir(parents=True)

npm_sample_file = os.path.join(TEST_DATA, "npm_sample.json")
with open(npm_sample_file) as f:
sample_data = json.load(f)

sample_data["module_name"] = "some-other-package"

advisory_file = vuln_dir / "152.json"
advisory_file.write_text(json.dumps(sample_data))

mock_vcs_response = SimpleNamespace(dest_dir=str(tmp_path), delete=lambda: None)

purl = PackageURL(type="npm", name="nonexistent-package", version="1.0.0")
pipeline = NpmImporterPipeline(purl=purl)
pipeline.vcs_response = mock_vcs_response

advisories = list(pipeline.collect_advisories())

assert len(advisories) == 0


def test_package_first_mode_missing_vuln_directory(tmp_path):
mock_vcs_response = SimpleNamespace(dest_dir=str(tmp_path), delete=lambda: None)

purl = PackageURL(type="npm", name="npm", version="1.0.0")
pipeline = NpmImporterPipeline(purl=purl)
pipeline.vcs_response = mock_vcs_response

advisories = list(pipeline.collect_advisories())

assert len(advisories) == 0


def test_version_is_affected():
purl = PackageURL(type="npm", name="npm", version="1.2.0")
pipeline = NpmImporterPipeline(purl=purl)

affected_package = AffectedPackage(
package=PackageURL(type="npm", name="npm"),
affected_version_range=NpmVersionRange(
constraints=(VersionConstraint(comparator="<", version=SemverVersion(string="1.3.3")),)
),
)

assert pipeline._version_is_affected(affected_package) == True

pipeline.purl = PackageURL(type="npm", name="npm", version="1.4.0")
assert pipeline._version_is_affected(affected_package) == False

pipeline.purl = PackageURL(type="npm", name="npm")
assert pipeline._version_is_affected(affected_package) == True

affected_package_no_range = AffectedPackage(
package=PackageURL(type="npm", name="npm"),
affected_version_range=None,
fixed_version=SemverVersion(string="1.3.3"),
)
assert pipeline._version_is_affected(affected_package_no_range) == True
affected_package_no_range = AffectedPackage(
package=PackageURL(type="npm", name="npm"),
affected_version_range=None,
fixed_version=SemverVersion(string="1.3.3"),
)
assert pipeline._version_is_affected(affected_package_no_range) == True
Loading