Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions minecode_pipelines/miners/composer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# purldb is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/aboutcode-org/purldb for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import json
from minecode_pipelines.utils import get_temp_file
import requests
from packageurl import PackageURL


def get_composer_packages():
"""Fetch all Composer packages from Packagist and save them to a temporary JSON file."""

response = requests.get("https://packagist.org/packages/list.json")
if not response.ok:
return

packages = response.json()
temp_file = get_temp_file("ComposerPackages", "json")
with open(temp_file, "w", encoding="utf-8") as f:
json.dump(packages, f, indent=4)

return temp_file


def get_composer_purl(vendor, package):
"""
Fetch all available Package URLs (purls) for a Composer package from Packagist.

get_composer_purl("monolog", "monolog")
-> ["pkg:composer/monolog/[email protected]", "pkg:composer/monolog/[email protected]", ...]
"""
purls = []
url = f"https://repo.packagist.org/p2/{vendor}/{package}.json"

try:
response = requests.get(url, timeout=10)
response.raise_for_status()
except requests.RequestException:
return purls

data = response.json()
packages = data.get("packages", {})
releases = packages.get(f"{vendor}/{package}", [])

for release in releases:
version = release.get("version")
if version:
purl = PackageURL(
type="composer",
namespace=vendor,
name=package,
version=version,
)
purls.append(purl.to_string())

return purls


def load_composer_packages(packages_file):
"""Load and return a list of (vendor, package) tuples from a JSON file."""
with open(packages_file, encoding="utf-8") as f:
packages_data = json.load(f)

package_names = packages_data.get("packageNames", [])
result = []

for item in package_names:
if "/" in item:
vendor, package = item.split("/", 1)
result.append((vendor, package))

return result
69 changes: 69 additions & 0 deletions minecode_pipelines/pipelines/mine_composer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# SPDX-License-Identifier: Apache-2.0
#
# http://nexb.com and https://github.com/aboutcode-org/scancode.io
# The ScanCode.io software is licensed under the Apache License version 2.0.
# Data generated with ScanCode.io is provided as-is without warranties.
# ScanCode is a trademark of nexB Inc.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
# for any legal advice.
#
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/aboutcode-org/scancode.io for support and download.

import os
from scanpipe.pipelines import Pipeline
from scanpipe.pipes import federatedcode
from minecode_pipelines.pipes.composer import mine_composer_packages
from minecode_pipelines.pipes.composer import mine_and_publish_composer_purls

FEDERATEDCODE_COMPOSER_GIT_URL = os.environ.get(
"FEDERATEDCODE_COMPOSER_GIT_URL", "https://github.com/ziadhany/composer-test"
)


class MineandPublishComposerPURLs(Pipeline):
"""
Mine all packageURLs from a composer index and publish them to a FederatedCode repo.
"""

@classmethod
def steps(cls):
return (
cls.check_federatedcode_eligibility,
cls.clone_fed_repo,
cls.mine_composer_packages,
cls.mine_and_publish_composer_purls,
)

def check_federatedcode_eligibility(self):
"""
Check if the project fulfills the following criteria for
pushing the project result to FederatedCode.
"""
federatedcode.check_federatedcode_eligibility(project=self.project)

def clone_fed_repo(self):
"""
Clone the federatedcode composer url and return the Repo object
"""
self.fed_repo = federatedcode.clone_repository(FEDERATEDCODE_COMPOSER_GIT_URL)

def mine_composer_packages(self):
"""Mine composer package names from composer indexes."""
self.composer_packages = mine_composer_packages(logger=self.log)

def mine_and_publish_composer_purls(self):
"""Get composer packageURLs for all mined composer package names."""
mine_and_publish_composer_purls(
packages=self.composer_packages, fed_repo=self.fed_repo, logger=self.log
)
39 changes: 37 additions & 2 deletions minecode_pipelines/pipes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,17 @@
#

import os
import textwrap
import saneyaml

from pathlib import Path

from aboutcode.hashid import PURLS_FILENAME

VERSION = os.environ.get("VERSION", "")
PURLDB_ALLOWED_HOST = os.environ.get("FEDERATEDCODE_GIT_ALLOWED_HOST", "")
author_name = os.environ.get("FEDERATEDCODE_GIT_SERVICE_NAME", "")
author_email = os.environ.get("FEDERATEDCODE_GIT_SERVICE_EMAIL", "")
remote_name = os.environ.get("FEDERATEDCODE_GIT_REMOTE_NAME", "origin")


def write_packageurls_to_file(repo, base_dir, packageurls):
purl_file_rel_path = os.path.join(base_dir, PURLS_FILENAME)
Expand All @@ -26,3 +31,33 @@ def write_data_to_file(path, data):
path.parent.mkdir(parents=True, exist_ok=True)
with open(path, encoding="utf-8", mode="w") as f:
f.write(saneyaml.dump(data))


def git_stage_purls(purls, repo, purls_file):
"""Write package URLs to a file and stage it in the local Git repository."""
relative_purl_file_path = Path(purls_file)

write_to = Path(repo.working_dir) / relative_purl_file_path

write_data_to_file(path=write_to, data=purls)

repo.index.add([relative_purl_file_path])
return relative_purl_file_path


def commit_and_push_changes(repo):
"""
Commit staged changes to the local repository and push them
to the remote on the current active branch.
"""

commit_message = f"""\
Add/Update list of available package versions
Tool: pkg:github/aboutcode-org/purldb@v{VERSION}
Reference: https://{PURLDB_ALLOWED_HOST}/
Signed-off-by: {author_name} <{author_email}>
"""

default_branch = repo.active_branch.name
repo.index.commit(textwrap.dedent(commit_message))
repo.git.push(remote_name, default_branch, "--no-verify")
70 changes: 70 additions & 0 deletions minecode_pipelines/pipes/composer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# SPDX-License-Identifier: Apache-2.0
#
# http://nexb.com and https://github.com/aboutcode-org/scancode.io
# The ScanCode.io software is licensed under the Apache License version 2.0.
# Data generated with ScanCode.io is provided as-is without warranties.
# ScanCode is a trademark of nexB Inc.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
# for any legal advice.
#
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/aboutcode-org/scancode.io for support and download.

from aboutcode.hashid import get_package_base_dir
from minecode_pipelines.miners import write_packageurls_to_file
from minecode_pipelines.miners.composer import get_composer_packages
from minecode_pipelines.miners.composer import load_composer_packages
from minecode_pipelines.miners.composer import get_composer_purl
from minecode_pipelines.pipes import commit_and_push_changes
from minecode_pipelines.pipes import git_stage_purls


def mine_composer_packages(logger=None):
"""Mine Composer package names from Packagist and return List of (vendor, package) tuples."""
packages_file = get_composer_packages()
return load_composer_packages(packages_file)


def mine_and_publish_composer_purls(packages, fed_repo, logger=None):
"""Mine Composer packages and publish their PURLs to a FederatedCode repository."""

counter = 0
for vendor, package in packages:
if logger:
logger(f"getting packageURLs for package: {vendor}/{package}")

purls = get_composer_purl(vendor, package)
if not purls:
continue

base_purl = purls[0]
package_base_dir = get_package_base_dir(purl=base_purl)

if logger:
logger(f"writing packageURLs for package: {base_purl} at: {package_base_dir}")
purls_string = " ".join(purls)
logger(f"packageURLs: {purls_string}")

purl_file = write_packageurls_to_file(
repo=fed_repo,
base_dir=package_base_dir,
packageurls=purls,
)
git_stage_purls(repo=fed_repo, purls_file=purl_file, purls=purls)

counter += 1
if counter == 1000:
commit_and_push_changes(repo=fed_repo)
counter = 0

commit_and_push_changes(repo=fed_repo)
60 changes: 60 additions & 0 deletions minecode_pipelines/tests/pipes/test_composer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# purldb is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/aboutcode-org/purldb for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import json
from pathlib import Path
from unittest.mock import patch, MagicMock
from django.test import SimpleTestCase

from minecode_pipelines.miners.composer import (
get_composer_packages,
load_composer_packages,
get_composer_purl,
)

DATA_DIR = Path(__file__).parent.parent / "test_data" / "composer"


class ComposerPipelineTests(SimpleTestCase):
@patch("requests.get")
def test_generate_purls_from_composer(self, mock_get):
"""
Test mining composer packages and generating PURLs with mocked Packagist requests
using JSON files stored in test_data/composer.
"""

with open(DATA_DIR / "packages_list.json", encoding="utf-8") as f:
fake_packages_list = json.load(f)

with open(DATA_DIR / "package_details.json", encoding="utf-8") as f:
fake_package_details = json.load(f)

with open(DATA_DIR / "expected_output.json", encoding="utf-8") as f:
expected_output = json.load(f)

resp_list = MagicMock()
resp_list.ok = True
resp_list.json.return_value = fake_packages_list

resp_package_details = MagicMock()
resp_package_details.ok = True
resp_package_details.json.return_value = fake_package_details

mock_get.side_effect = [resp_list, resp_package_details]

packages_file = get_composer_packages()
packages = load_composer_packages(packages_file)

all_purls = []
for vendor, package in packages:
purls = get_composer_purl(vendor, package)
all_purls.extend(purls)

assert len(all_purls) == 85
assert all_purls == expected_output
Loading