diff --git a/README.md b/README.md index add02a1..a9773a3 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,8 @@ In all cases, Sonatype IQ Server versions 150 and newer have been confirmed as s - [Known Security Issues](#known-security-issues) - [Open Source License(s)](#open-source-licenses) - [Additional Feature Support](#additional-feature-support) +- [Caveats](#caveats) + - [PyPi Packages with No Source Distribution](#pypi-packages-with-no-source-distribution) - [Development](#development) - [Uninstallation](#uninstallation) - [Version History](#version-history) @@ -67,7 +69,7 @@ In all cases, Sonatype IQ Server versions 150 and newer have been confirmed as s | NPM JS | Javascript | ✅ | `https://www.npmjs.com/` | ✅ | | NuGet Gallery | .NET | ✅ | `https://www.nuget.org/` | ✅ | | Packagist | PHP | ✅ | `https://packagist.org/` | ✅ | -| PyPI | Python | ✅ | `https://pypi.org/` | ✅ | +| PyPI | Python | ✅ ^3 | `https://pypi.org/` | ✅ | | RubGems | Ruby | ✅ | `https://rubygems.org/` | ✅ | | Spring.io | Java | ❌ ^2 | `https://repo.spring.io/list/` | N/A | @@ -75,7 +77,7 @@ _Notes:_ 1. See issue [#36](https://github.com/sonatype-nexus-community/sonatype-platform-browser-extension/issues/36) 2. Run on a public instance of jFrog Artifactory - support coming soon -3. Where the Public Registry maintains pages for all versions, navigation to specific versions can be supported +3. By default we lookup the Source Distribution. Where no Source Distribution is published we lookup the first Built Distribution - this can lead to an incomplete view of risk - [read more](#pypi-packages-with-no-source-distribution) ### Private Hosted Registries @@ -223,6 +225,18 @@ Current and future additional features are available based on the additional cap - [Extended Observed License Detections](https://help.sonatype.com/iqserver/configuring/advanced-legal-pack-extended-observed-license-detections) - When enabled, the browser extenstion shows the observed licenses detected for that component. +## Caveats + +### PyPi Packages with No Source Distribution + +There are a few examples of projects published to PyPi (such as [mediapipe](https://pypi.org/project/mediapipe/)) that have not published a Source Distribution. + +By default, when the Sonatype Platform Browser Extension looks up data on PyPi packages, we default to looking up information based on it's Source Distribution - +this has no consideration as to your Python Version or Architecture. + +When looking up data based on a Built Distribution, this can include the Python Version and/or Architecture, and this may not provide an accurate representation +of the risks associated with your use of the Package if your Python Version and/or Architrecture differ from the first Build Distribution in the list. + ## Development We use Node 18 and Yarn 1.22.x. diff --git a/src/utils/PageParsing/PyPI.test.ts b/src/utils/PageParsing/PyPI.test.ts index 859d44b..424b888 100644 --- a/src/utils/PageParsing/PyPI.test.ts +++ b/src/utils/PageParsing/PyPI.test.ts @@ -102,4 +102,21 @@ describe('PyPI Page Parsing', () => { expect(packageURL?.version).toBe('19.2.0') expect(packageURL?.qualifiers).toEqual({ extension: 'tar.bz2' }) }) + + test('should parse valid PyPI page where there is no SOURCE distribution', () => { + const html = readFileSync(join(__dirname, 'testdata/pypi-mediapipe-0.10.14.html')) + + window.document.body.innerHTML = html.toString() + + const packageURL = getArtifactDetailsFromDOM( + ensure(repoType), + 'https://pypi.org/project/mediapipe/0.10.14/' + ) + + expect(packageURL).toBeDefined() + expect(packageURL?.type).toBe(FORMATS.pypi) + expect(packageURL?.name).toBe('mediapipe') + expect(packageURL?.version).toBe('0.10.14') + expect(packageURL?.qualifiers).toEqual({ extension: 'whl', qualifier: 'cp312-cp312-win_amd64' }) + }) }) diff --git a/src/utils/PageParsing/PyPI.ts b/src/utils/PageParsing/PyPI.ts index e30cc4c..3173c8c 100644 --- a/src/utils/PageParsing/PyPI.ts +++ b/src/utils/PageParsing/PyPI.ts @@ -19,11 +19,8 @@ import { FORMATS, REPOS, REPO_TYPES } from '../Constants' import { generatePackageURL } from './PurlUtils' const PYPI_DEFAULT_EXTENSION = 'tar.gz' -const PYPI_KNOWN_SOURCE_DISTRIBUTION_EXTENSIONS = [ - PYPI_DEFAULT_EXTENSION, - 'tar.bz2' -] -const PYPI_EXTENSION_SELECTOR = '#files > div.file div.card A:nth-child(1)' +const PYPI_KNOWN_SOURCE_DISTRIBUTION_EXTENSIONS = [PYPI_DEFAULT_EXTENSION, 'tar.bz2'] +const PYPI_EXTENSION_SELECTOR = '#files > div.file div.file__card A:nth-child(1)' const parsePyPIURL = (url: string): PackageURL | undefined => { const repoType = REPO_TYPES.find((e) => e.repoID == REPOS.pypiOrg) @@ -36,28 +33,39 @@ const parsePyPIURL = (url: string): PackageURL | undefined => { const pageVersion = $(repoType.versionDomPath).text().trim().split(' ')[1] console.debug(`URL Version: ${pathResult.groups.version}, Page Version: ${pageVersion}`) const firstDistributionFilename = $(PYPI_EXTENSION_SELECTOR).first().text().trim() - let extension = '' - if (firstDistributionFilename !== undefined) { - // Loop all known source distribution extensions checking if the first Source Distribution matches - // If it does, use that known extension - for (const i in PYPI_KNOWN_SOURCE_DISTRIBUTION_EXTENSIONS) { - if (firstDistributionFilename.endsWith(PYPI_KNOWN_SOURCE_DISTRIBUTION_EXTENSIONS[i])) { - extension = PYPI_KNOWN_SOURCE_DISTRIBUTION_EXTENSIONS[i] - break - } + let candidateExtension: string | undefined = undefined + for (const i in PYPI_KNOWN_SOURCE_DISTRIBUTION_EXTENSIONS) { + if (firstDistributionFilename.endsWith(PYPI_KNOWN_SOURCE_DISTRIBUTION_EXTENSIONS[i])) { + candidateExtension = PYPI_KNOWN_SOURCE_DISTRIBUTION_EXTENSIONS[i] + break } + } + let extension + if (candidateExtension === undefined) { + extension = firstDistributionFilename.split('.').pop() as string + } else { + extension = candidateExtension + } - // If we still haven't identified an extension for the Source Distribution, pop the last part of the filename - // as the extension - if (extension === '' && !firstDistributionFilename.endsWith(PYPI_DEFAULT_EXTENSION)) { - extension = firstDistributionFilename.split('.').pop() as string - } + console.debug( + `Parsing ${firstDistributionFilename} - given: Artifact ID = ${pathResult.groups.artifactId}, Version = ${pageVersion}, Extension = ${extension}` + ) + const start = pathResult.groups.artifactId.length + pageVersion.length + 2 + const end = firstDistributionFilename.length - extension.length - 1 + const qualifier = firstDistributionFilename.substring(start, end) + + const qualifiers = { + extension: extension, + } + if (qualifier.length > 1) { + qualifiers['qualifier'] = qualifier } + return generatePackageURL( FORMATS.pypi, pathResult.groups.artifactId, pathResult.groups.version !== undefined ? pathResult.groups.version : pageVersion, - { extension: extension } + qualifiers ) } } else { diff --git a/src/utils/PageParsing/testdata/CentralSonatypeCom.html b/src/utils/PageParsing/testdata/CentralSonatypeCom.html index b7fa3e2..c33d9a3 100644 --- a/src/utils/PageParsing/testdata/CentralSonatypeCom.html +++ b/src/utils/PageParsing/testdata/CentralSonatypeCom.html @@ -1,16 +1,16 @@ -