Skip to content

Fix and enhance support for different bazel metadata versions #4194

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ The following organizations or individuals have contributed to ScanCode:
- Abhigyan Kumar Singh @Abhigyankrsingh
- Abhishek Kumar @Abhishek-Dev09
- Aditya Viki @adityaviki
- Adrian Braemer @abraemer
- Agni Bhattacharyya @PyAgni
- Akanksha Garg @akugarg
- Alex Blekhman @a-tinsmith
Expand Down
94 changes: 46 additions & 48 deletions src/packagedcode/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from collections import defaultdict

from commoncode import fileutils
from packageurl import PackageURL

from licensedcode.cache import build_spdx_license_expression
from licensedcode.cache import get_cache
Expand Down Expand Up @@ -374,54 +375,51 @@ def parse(cls, location, package_only=True):
)
)

if (
'upstream_type'
and 'name'
and 'version'
and 'licenses'
and 'upstream_address'
in metadata_fields
):
# TODO: Create function that determines package type from download URL,
# then create a package of that package type from the metadata info
package_data = dict(
datasource_id=cls.datasource_id,
type=metadata_fields.get('upstream_type', cls.default_package_type),
name=metadata_fields.get('name'),
version=metadata_fields.get('version'),
extracted_license_statement=metadata_fields.get('licenses', []),
parties=parties,
homepage_url=metadata_fields.get('upstream_address', ''),
# TODO: Store 'upstream_hash` somewhere
)
yield models.PackageData.from_data(package_data, package_only=True)

if (
'package_type'
and 'name'
and 'version'
and 'license_expression'
and 'homepage_url'
and 'download_url'
and 'vcs_url'
and 'download_archive_sha1'
and 'vcs_commit_hash'
in metadata_fields
):
package_data = dict(
datasource_id=cls.datasource_id,
type=metadata_fields.get('package_type', cls.default_package_type),
name=metadata_fields.get('name'),
version=metadata_fields.get('version'),
extracted_license_statement=metadata_fields.get('license_expression', ''),
parties=parties,
homepage_url=metadata_fields.get('homepage_url', ''),
download_url=metadata_fields.get('download_url', ''),
vcs_url=metadata_fields.get('vcs_url', ''),
sha1=metadata_fields.get('download_archive_sha1', ''),
extra_data=dict(vcs_commit_hash=metadata_fields.get('vcs_commit_hash', ''))
)
yield models.PackageData.from_data(package_data, package_only=True)
# TODO: Create function that determines package type from download URL,
# then create a package of that package type from the metadata info

if 'upstream_type' in metadata_fields:
package_type = metadata_fields['upstream_type']
elif 'package_type' in metadata_fields:
package_type = metadata_fields['package_type']
else:
package_type = cls.default_package_type

if 'licenses' in metadata_fields:
extracted_license_statement = metadata_fields['licenses']
else:
extracted_license_statement = metadata_fields.get('license_expression')

if 'upstream_address' in metadata_fields:
homepage_url = metadata_fields['upstream_address']
else:
homepage_url = metadata_fields.get('homepage_url')


extra_data = {}
if 'vcs_commit_hash' in metadata_fields:
extra_data['vcs_commit_hash'] = metadata_fields['vcs_commit_hash']
if 'upstream_hash' in metadata_fields:
extra_data['upstream_hash'] = metadata_fields['upstream_hash']

package_data = dict(
datasource_id=cls.datasource_id,
type=package_type,
name=metadata_fields.get('name'),
version=metadata_fields.get('version'),
extracted_license_statement=extracted_license_statement,
parties=parties,
homepage_url=homepage_url,
download_url=metadata_fields.get('download_url'),
vcs_url=metadata_fields.get('vcs_url'),
sha1=metadata_fields.get('download_archive_sha1'),
extra_data=extra_data
)
if 'package_url' in metadata_fields:
package_data.update(PackageURL.from_string(metadata_fields['package_url']).to_dict())

yield models.PackageData.from_data(package_data, package_only=True)


@classmethod
def assign_package_to_resources(cls, package, resource, codebase, package_adder):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
METADATA = {
"licenses": [
"BSD-3-Clause",
],
"maintainers": [
"oss_foundation",
],
"name": "androidx.compose.animation:animation",
"upstream_address": "https://developer.android.com/jetpack/androidx/releases/compose-animation#0.0.1",
"version": "0.0.1",
"package_url" : "pkg:maven/androidx.compose.animation/[email protected]"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you maybe link to some examples of this type of manifests with package_url fields and maybe add one of those as tests, it's best to use real world examples probably. Or if you got this from some real example, you can also link that file here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately, I cannot link you one of our internal files. However my example file is very close to an actual file (the version number is wrong but that's all).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately, I cannot link you one of our internal files.

No worries on that obviously, but can you find some other real examples with package_url anywhere on github/elsewhere, or is this relatively new?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I searched on GitHub but did not find any other files that use that field but also I did not find many examples of bzl files at all. So perhaps it is not very common (yet?).
As this doesn't interfere with other things, would you be okay with merging this anyways? After all it just adds another possible data source for bzl-files that has the potential to be useful for others :)

}
23 changes: 23 additions & 0 deletions tests/packagedcode/test_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,33 @@ def test_MetadataBzl_parse(self):
role='maintainer'
)
],
extra_data=dict(upstream_hash='deadbeef'),
homepage_url='https://github.com/example/example',
)
expected_packages = [models.PackageData.from_data(package_data=package_data, package_only=True)]
compare_package_results(expected_packages, result_packages)

def test_MetadataBzl_parse_with_package_url(self):
test_file = self.get_test_loc('metadatabzl/with-package-url/METADATA.bzl')
result_packages = build.BuckMetadataBzlHandler.parse(test_file, package_only=True)
package_data = dict(
datasource_id=build.BuckMetadataBzlHandler.datasource_id,
name='animation',
namespace='androidx.compose.animation',
type='maven',
version='0.0.1',
extracted_license_statement=['BSD-3-Clause'],
parties=[
models.Party(
type=models.party_org,
name='oss_foundation',
role='maintainer'
)
],
homepage_url='https://developer.android.com/jetpack/androidx/releases/compose-animation#0.0.1',
)
expected_packages = [models.PackageData.from_data(package_data=package_data, package_only=True)]
compare_package_results(expected_packages, result_packages)

def test_MetadataBzl_recognize_new_format(self):
test_file = self.get_test_loc('metadatabzl/new-format/METADATA.bzl')
Expand Down