Skip to content

Commit a33f85b

Browse files
committed
Modify Gitlab v2 importer to support package-first mode #1918
* Update Gitlab v2 importer to filter and process advisories relevant to the purl passed in the constructor Signed-off-by: Michael Ehab Mikhail <[email protected]>
1 parent f690ad7 commit a33f85b

File tree

2 files changed

+172
-30
lines changed

2 files changed

+172
-30
lines changed

vulnerabilities/pipelines/v2_importers/gitlab_importer.py

Lines changed: 172 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@
3131
from vulnerabilities.utils import build_description
3232
from vulnerabilities.utils import get_advisory_url
3333
from vulnerabilities.utils import get_cwe_id
34+
from vulntotal.datasources.gitlab import get_casesensitive_slug
35+
from vulntotal.datasources.gitlab_api import fetch_gitlab_advisories_for_purl
36+
from vulntotal.datasources.gitlab_api import get_estimated_advisories_count
3437

3538

3639
class GitLabImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
@@ -45,9 +48,22 @@ class GitLabImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
4548
license_url = "https://gitlab.com/gitlab-org/advisories-community/-/blob/main/LICENSE"
4649
repo_url = "git+https://gitlab.com/gitlab-org/advisories-community/"
4750
unfurl_version_ranges = True
51+
is_batch_run = True
52+
53+
def __init__(self, *args, purl=None, **kwargs):
54+
super().__init__(*args, **kwargs)
55+
self.purl = purl
56+
# If a purl is provided, we are running in package-first mode
57+
if self.purl:
58+
GitlabImporterPipeline.is_batch_run = False
4859

4960
@classmethod
5061
def steps(cls):
62+
if not cls.is_batch_run:
63+
return (
64+
cls.collect_and_store_advisories,
65+
cls.clean_downloads,
66+
)
5167
return (
5268
cls.clone,
5369
cls.collect_and_store_advisories,
@@ -69,14 +85,50 @@ def steps(cls):
6985
gitlab_scheme_by_purl_type = {v: k for k, v in purl_type_by_gitlab_scheme.items()}
7086

7187
def clone(self):
72-
self.log(f"Cloning `{self.repo_url}`")
73-
self.vcs_response = fetch_via_vcs(self.repo_url)
88+
if self.is_batch_run:
89+
self.log(f"Cloning `{self.repo_url}`")
90+
self.vcs_response = fetch_via_vcs(self.repo_url)
7491

7592
def advisories_count(self):
76-
root = Path(self.vcs_response.dest_dir)
77-
return sum(1 for _ in root.rglob("*.yml"))
93+
if self.is_batch_run:
94+
root = Path(self.vcs_response.dest_dir)
95+
return sum(1 for _ in root.rglob("*.yml"))
96+
else:
97+
return get_estimated_advisories_count(
98+
self.purl, self.supported_ecosystem(), get_casesensitive_slug
99+
)
78100

79101
def collect_advisories(self) -> Iterable[AdvisoryData]:
102+
if not self.is_batch_run:
103+
advisories = fetch_gitlab_advisories_for_purl(
104+
self.purl, self.supported_ecosystem(), get_casesensitive_slug
105+
)
106+
107+
input_version = self.purl.version
108+
vrc = RANGE_CLASS_BY_SCHEMES[self.purl.type]
109+
version_obj = vrc.version_class(input_version) if input_version else None
110+
111+
for advisory in advisories:
112+
advisory_data = self._advisory_dict_to_advisory_data(advisory)
113+
# If purl has version, we need to check if advisory affects the version
114+
if input_version:
115+
affected = False
116+
for affected_package in advisory_data.affected_packages:
117+
vrange = affected_package.affected_version_range
118+
fixed_version = affected_package.fixed_version
119+
if vrange and version_obj in vrange:
120+
if fixed_version:
121+
fixed_version_obj = vrc.version_class(str(fixed_version))
122+
if version_obj >= fixed_version_obj:
123+
continue
124+
affected = True
125+
break
126+
if affected:
127+
yield advisory_data
128+
else:
129+
yield advisory_data
130+
return
131+
80132
base_path = Path(self.vcs_response.dest_dir)
81133

82134
for file_path in base_path.rglob("*.yml"):
@@ -113,13 +165,22 @@ def collect_advisories(self) -> Iterable[AdvisoryData]:
113165
yield advisory
114166

115167
def clean_downloads(self):
116-
if self.vcs_response:
168+
if self.is_batch_run and hasattr(self, "vcs_response") and self.vcs_response:
117169
self.log(f"Removing cloned repository")
118170
self.vcs_response.delete()
119171

120172
def on_failure(self):
121173
self.clean_downloads()
122174

175+
def _advisory_dict_to_advisory_data(self, advisory):
176+
return advisory_dict_to_advisory_data(
177+
advisory=advisory,
178+
purl_type_by_gitlab_scheme=self.purl_type_by_gitlab_scheme,
179+
gitlab_scheme_by_purl_type=self.gitlab_scheme_by_purl_type,
180+
logger=self.log,
181+
purl=self.purl,
182+
)
183+
123184

124185
def parse_advisory_path(base_path: Path, file_path: Path) -> Tuple[str, str, str]:
125186
"""
@@ -326,3 +387,109 @@ def parse_gitlab_advisory(
326387
weaknesses=cwe_list,
327388
url=advisory_url,
328389
)
390+
391+
392+
def advisory_dict_to_advisory_data(
393+
advisory: dict,
394+
purl_type_by_gitlab_scheme,
395+
gitlab_scheme_by_purl_type,
396+
logger,
397+
purl=None,
398+
advisory_url=None,
399+
):
400+
"""
401+
Convert a GitLab advisory dict to AdvisoryData.
402+
"""
403+
aliases = advisory.get("identifiers", [])
404+
identifier = advisory.get("identifier", "")
405+
summary = build_description(advisory.get("title"), advisory.get("description"))
406+
urls = advisory.get("urls", [])
407+
references = [ReferenceV2.from_url(u) for u in urls]
408+
409+
cwe_ids = advisory.get("cwe_ids") or []
410+
cwe_list = list(map(get_cwe_id, cwe_ids))
411+
412+
date_published = dateparser.parse(advisory.get("pubdate"))
413+
date_published = date_published.replace(tzinfo=pytz.UTC)
414+
415+
package_slug = advisory.get("package_slug")
416+
417+
# Determine purl if not provided
418+
if not purl:
419+
purl = get_purl(
420+
package_slug=package_slug,
421+
purl_type_by_gitlab_scheme=purl_type_by_gitlab_scheme,
422+
logger=logger,
423+
)
424+
425+
if not purl:
426+
logger(
427+
f"advisory_dict_to_advisory_data: purl is not valid: {package_slug!r}",
428+
level=logging.ERROR,
429+
)
430+
return AdvisoryData(
431+
aliases=aliases,
432+
summary=summary,
433+
references_v2=references,
434+
date_published=date_published,
435+
url=advisory_url,
436+
)
437+
438+
affected_version_range = None
439+
fixed_versions = advisory.get("fixed_versions") or []
440+
affected_range = advisory.get("affected_range")
441+
gitlab_native_schemes = set(["pypi", "gem", "npm", "go", "packagist", "conan"])
442+
vrc: VersionRange = RANGE_CLASS_BY_SCHEMES[purl.type]
443+
gitlab_scheme = gitlab_scheme_by_purl_type[purl.type]
444+
try:
445+
if affected_range:
446+
if gitlab_scheme in gitlab_native_schemes:
447+
affected_version_range = from_gitlab_native(
448+
gitlab_scheme=gitlab_scheme, string=affected_range
449+
)
450+
else:
451+
affected_version_range = vrc.from_native(affected_range)
452+
except Exception as e:
453+
logger(
454+
f"advisory_dict_to_advisory_data: affected_range is not parsable: {affected_range!r} for: {purl!s} error: {e!r}\n {traceback.format_exc()}",
455+
level=logging.ERROR,
456+
)
457+
458+
parsed_fixed_versions = []
459+
for fixed_version in fixed_versions:
460+
try:
461+
fixed_version = vrc.version_class(fixed_version)
462+
parsed_fixed_versions.append(fixed_version)
463+
except Exception as e:
464+
logger(
465+
f"advisory_dict_to_advisory_data: fixed_version is not parsable`: {fixed_version!r} error: {e!r}\n {traceback.format_exc()}",
466+
level=logging.ERROR,
467+
)
468+
469+
if parsed_fixed_versions:
470+
affected_packages = list(
471+
extract_affected_packages(
472+
affected_version_range=affected_version_range,
473+
fixed_versions=parsed_fixed_versions,
474+
purl=purl,
475+
)
476+
)
477+
else:
478+
if not affected_version_range:
479+
affected_packages = []
480+
else:
481+
affected_packages = [
482+
AffectedPackage(
483+
package=purl,
484+
affected_version_range=affected_version_range,
485+
)
486+
]
487+
return AdvisoryData(
488+
aliases=aliases,
489+
summary=summary,
490+
references_v2=references,
491+
date_published=date_published,
492+
affected_packages=affected_packages,
493+
weaknesses=cwe_list,
494+
url=advisory_url,
495+
)

vulntotal/datasources/gitlab.py

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
from packageurl import PackageURL
2121

2222
from vulntotal.datasources.gitlab_api import fetch_gitlab_advisories_for_purl
23-
from vulntotal.datasources.gitlab_api import fetch_yaml
2423
from vulntotal.validator import DataSource
2524
from vulntotal.validator import VendorData
2625
from vulntotal.vulntotal_utils import gitlab_constraints_satisfied
@@ -63,30 +62,6 @@ def supported_ecosystem(cls):
6362
}
6463

6564

66-
def get_package_slug(purl):
67-
"""
68-
Constructs a package slug from a given purl.
69-
70-
Parameters:
71-
purl: A PackageURL instance representing the package to query.
72-
73-
Returns:
74-
A string representing the package slug, or None if the purl type is not supported by GitLab.
75-
"""
76-
supported_ecosystem = GitlabDataSource.supported_ecosystem()
77-
78-
if purl.type not in supported_ecosystem:
79-
return
80-
81-
ecosystem = supported_ecosystem[purl.type]
82-
package_name = purl.name
83-
84-
if purl.type in ("maven", "composer", "golang"):
85-
package_name = f"{purl.namespace}/{purl.name}"
86-
87-
return f"{ecosystem}/{package_name}"
88-
89-
9065
def get_casesensitive_slug(path, package_slug):
9166
payload = [
9267
{

0 commit comments

Comments
 (0)