Skip to content

feat: allow manylinux 2.28 and 2.34 on python 3.12+ when compiled on a different architecture #762

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ lint:
ruff check aws_lambda_builders

lint-fix:
ruff aws_lambda_builders --fix
ruff check aws_lambda_builders --fix

# Command to run everytime you make changes to verify everything works
dev: lint test
Expand Down
88 changes: 77 additions & 11 deletions aws_lambda_builders/workflows/python_pip/packager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
Installs packages using PIP
"""

import itertools
import logging
import re
import subprocess
from email.parser import FeedParser
from typing import Tuple
from typing import List, Tuple

from aws_lambda_builders.architecture import ARM64, X86_64
from aws_lambda_builders.utils import extract_tarfile
Expand Down Expand Up @@ -171,19 +172,25 @@ class DependencyBuilder(object):
packager.
"""

_COMPATIBLE_PLATFORM_ARM64 = {
_COMPATIBLE_PLATFORM_ARM64 = [
"any",
"linux_aarch64",
"manylinux2014_aarch64",
}
"manylinux_2_17_aarch64",
"manylinux_2_28_aarch64",
"manylinux_2_34_aarch64",
]
Comment on lines +175 to +182

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not a codeowner here, just an interested party, but is there a reason to make this a list instead of a set? afaict order doesn't matter and set operations are very useful here

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TL; DR: I need the platforms to be sorted by release time.

Good question! The problem with the --platform option in pip download is that it tries to match exactly the platform you pass as an argument, excluding potentially compatible wheels compiled with an earlier version of glibc.

According to pip's documentation, it is up to the caller to pass the full list of compatible platforms to the target, because pip makes no a priori assumptions.

Let's take an example: suppose we want to install numpy on Amazon Linux 2023, which runs glibc version 2. 34, but have a different operating system or architecture (e.g., Mac), so let's try downloading numpy by adding the platform tag:

$ pip download numpy --platform manylinux_2_34_x86_64 --only-binary=:all:
ERROR: Could not find a version that satisfies the requirement numpy (from versions: none)
ERROR: No matching distribution found for numpy

This is strange but expected, because there is no version of numpy on pypi.org with that platform tag. You need to pass all possible compatible tags to the command, so a working command would be:

$ pip download numpy --platform manylinux_2_17_x86_64 --platform manylinux_2_28_x86_64 --platform manylinux_2_34_x86_64 --only-binary=:all:

With this command you will get numpy version 2.3.0, which comes with the manylinux_2_28_x86_64 platform tag.

This means that for each pair of glibc versions and architectures there is a different set of compatible platform tags. Unfortunately, I could not find a clever way to implement this in python, but I used this simple logic:

  1. For each architecture, list all possible platform tags ordered from oldest to newest;
  2. At runtime, determine the most recent platform tag compatible with our target (in the example it is manylinux_2_34_x86_64);
  3. Pass the platform determined in step 2 and all previously released platforms to the command.

This is why I turned the set into a list: I need the platforms to be sorted by release time.

If you have any ideas to improve the code feel free to share them here, this is the simplest approach I could find.


_COMPATIBLE_PLATFORM_X86_64 = {
_COMPATIBLE_PLATFORM_X86_64 = [
"any",
"linux_x86_64",
"manylinux1_x86_64",
"manylinux2010_x86_64",
"manylinux2014_x86_64",
}
"manylinux_2_17_x86_64",
"manylinux_2_28_x86_64",
"manylinux_2_34_x86_64",
]

_COMPATIBLE_PLATFORMS = {
ARM64: _COMPATIBLE_PLATFORM_ARM64,
Expand Down Expand Up @@ -214,6 +221,14 @@ class DependencyBuilder(object):
# Unlikely to hit this case.
_DEFAULT_GLIBC = (2, 17)

# Mapping of glibc version to the most recent manylinux version compatible.
# The offically supported manylinux versions are 2_17, 2_28 and 2_34 as per https://github.com/pypa/manylinux
_GLIBC_TO_LATEST_MANYLINUX = {
(2, 17): "manylinux_2_17",
(2, 26): "manylinux_2_17",
(2, 34): "manylinux_2_34",
}

def __init__(self, osutils, runtime, python_exe, pip_runner=None, architecture=X86_64):
"""Initialize a DependencyBuilder.

Expand Down Expand Up @@ -379,8 +394,60 @@ def _download_binary_wheels(self, packages, directory):
# Try to get binary wheels for each package that isn't compatible.
LOG.debug("Downloading missing wheels: %s", packages)
lambda_abi = get_lambda_abi(self.runtime)
platform = "manylinux2014_aarch64" if self.architecture == ARM64 else "manylinux2014_x86_64"
self._pip.download_manylinux_wheels([pkg.identifier for pkg in packages], directory, lambda_abi, platform)
self._pip.download_manylinux_wheels(
[pkg.identifier for pkg in packages], directory, lambda_abi, self.compatible_platforms
)

@property
def compatible_platforms(self) -> List[str]:
"""Get the list of all compatible platforms for the current architecture.

Examples:
```python
# Return value with python 3.11 on x86_64
[
'any',
'linux_x86_64',
'manylinux1_x86_64',
'manylinux2010_x86_64',
'manylinux2014_x86_64',
'manylinux_2_17_x86_64'
]

# Return value with python 3.12 on x86_64
[
'any',
'linux_x86_64',
'manylinux1_x86_64',
'manylinux2010_x86_64',
'manylinux2014_x86_64',
'manylinux_2_17_x86_64',
'manylinux_2_28_x86_64',
'manylinux_2_34_x86_64'
]

# Return value with python 3.13 on ARM64
[
'any',
'linux_aarch64',
'manylinux2014_aarch64',
'manylinux_2_17_aarch64',
'manylinux_2_28_aarch64',
'manylinux_2_34_aarch64'
]
```
"""
lambda_abi = get_lambda_abi(self.runtime)
manylinux_prefix = self._GLIBC_TO_LATEST_MANYLINUX.get(self._RUNTIME_GLIBC.get(lambda_abi, self._DEFAULT_GLIBC))
architecture = "aarch64" if self.architecture == ARM64 else "x86_64"

# Get the latest compatible platform tag for the current architecture,
# all the previous ones are also compatible.
latest_compatible_platform = f"{manylinux_prefix}_{architecture}"

all_platforms = self._COMPATIBLE_PLATFORMS[self.architecture]
max_index = all_platforms.index(latest_compatible_platform)
return all_platforms[: max_index + 1]

def _build_sdists(self, sdists, directory, compile_c=True):
LOG.debug("Build missing wheels from sdists " "(C compiling %s): %s", compile_c, sdists)
Expand Down Expand Up @@ -432,7 +499,7 @@ def _is_compatible_platform_tag(self, expected_abi, platform):

In addition to checking the tag pattern, we also need to verify the glibc version
"""
if platform in self._COMPATIBLE_PLATFORMS[self.architecture]:
if platform in self.compatible_platforms:
return True

arch = "aarch64" if self.architecture == ARM64 else "x86_64"
Expand Down Expand Up @@ -832,7 +899,7 @@ def download_all_dependencies(self, requirements_filename, directory):
# complain at deployment time.
self.build_wheel(wheel_package_path, directory)

def download_manylinux_wheels(self, packages, directory, lambda_abi, platform="manylinux2014_x86_64"):
def download_manylinux_wheels(self, packages, directory, lambda_abi, platforms):
"""Download wheel files for manylinux for all the given packages."""
# If any one of these dependencies fails pip will bail out. Since we
# are only interested in all the ones we can download, we need to feed
Expand All @@ -846,8 +913,7 @@ def download_manylinux_wheels(self, packages, directory, lambda_abi, platform="m
arguments = [
"--only-binary=:all:",
"--no-deps",
"--platform",
platform,
*list(itertools.chain.from_iterable(["--platform", element] for element in platforms)),
"--implementation",
"cp",
"--abi",
Expand Down
102 changes: 100 additions & 2 deletions tests/functional/workflows/python_pip/test_packager.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,10 +202,10 @@ def _write_requirements_txt(self, packages, directory):
with open(filepath, "w") as f:
f.write(contents)

def _make_appdir_and_dependency_builder(self, reqs, tmpdir, runner, **kwargs):
def _make_appdir_and_dependency_builder(self, reqs, tmpdir, runner, runtime="python3.9", **kwargs):
appdir = str(_create_app_structure(tmpdir))
self._write_requirements_txt(reqs, appdir)
builder = DependencyBuilder(OSUtils(), "python3.9", sys.executable, runner, **kwargs)
builder = DependencyBuilder(OSUtils(), runtime, sys.executable, runner, **kwargs)
return appdir, builder

def test_can_build_local_dir_as_whl(self, tmpdir, pip_runner, osutils):
Expand Down Expand Up @@ -516,6 +516,74 @@ def test_can_get_arm64_whls(self, tmpdir, osutils, pip_runner):
for req in reqs:
assert req in installed_packages

def test_can_get_newer_platforms(self, tmpdir, osutils, pip_runner):
reqs = ["foo", "bar"]
pip, runner = pip_runner
appdir, builder = self._make_appdir_and_dependency_builder(reqs, tmpdir, runner, runtime="python3.12")
requirements_file = os.path.join(appdir, "requirements.txt")
pip.packages_to_download(
expected_args=["-r", requirements_file, "--dest", mock.ANY, "--exists-action", "i"],
packages=["foo-1.0-cp312-none-any.whl", "bar-1.2-cp312-cp312-manylinux_2_28_x86_64.whl"],
)
site_packages = os.path.join(appdir, ".chalice.", "site-packages")
with osutils.tempdir() as scratch_dir:
builder.build_site_packages(requirements_file, site_packages, scratch_dir)
installed_packages = os.listdir(site_packages)

pip.validate()
for req in reqs:
assert req in installed_packages

def test_can_get_newer_platforms_cross_compile(self, tmpdir, osutils, pip_runner):
reqs = ["foo", "bar"]
pip, runner = pip_runner
appdir, builder = self._make_appdir_and_dependency_builder(
reqs, tmpdir, runner, runtime="python3.12", architecture=ARM64
)
requirements_file = os.path.join(appdir, "requirements.txt")
pip.packages_to_download(
expected_args=["-r", requirements_file, "--dest", mock.ANY, "--exists-action", "i"],
packages=["foo-1.0-cp312-none-any.whl", "bar-1.2-cp312-cp312-manylinux_2_28_x86_64.whl"],
)

# First call returned x86_64 wheels, fallback to the second call
pip.packages_to_download(
expected_args=[
"--only-binary=:all:",
"--no-deps",
"--platform",
"any",
"--platform",
"linux_aarch64",
"--platform",
"manylinux2014_aarch64",
"--platform",
"manylinux_2_17_aarch64",
# It's python 3.12, so we can use newer platforms.
"--platform",
"manylinux_2_28_aarch64",
"--platform",
"manylinux_2_34_aarch64",
"--implementation",
"cp",
"--abi",
get_lambda_abi(builder.runtime),
"--dest",
mock.ANY,
"bar==1.2",
],
packages=["bar-1.2-cp312-cp312-manylinux_2_28_aarch64.whl"],
)

site_packages = os.path.join(appdir, ".chalice.", "site-packages")
with osutils.tempdir() as scratch_dir:
builder.build_site_packages(requirements_file, site_packages, scratch_dir)
installed_packages = os.listdir(site_packages)

pip.validate()
for req in reqs:
assert req in installed_packages

def test_does_fail_on_invalid_local_package(self, tmpdir, osutils, pip_runner):
reqs = ["../foo"]
pip, runner = pip_runner
Expand Down Expand Up @@ -629,7 +697,17 @@ def test_can_replace_incompat_whl(self, tmpdir, osutils, pip_runner):
"--only-binary=:all:",
"--no-deps",
"--platform",
"any",
"--platform",
"linux_x86_64",
"--platform",
"manylinux1_x86_64",
"--platform",
"manylinux2010_x86_64",
"--platform",
"manylinux2014_x86_64",
"--platform",
"manylinux_2_17_x86_64",
"--implementation",
"cp",
"--abi",
Expand Down Expand Up @@ -663,7 +741,17 @@ def test_allowlist_sqlalchemy(self, tmpdir, osutils, pip_runner):
"--only-binary=:all:",
"--no-deps",
"--platform",
"any",
"--platform",
"linux_x86_64",
"--platform",
"manylinux1_x86_64",
"--platform",
"manylinux2010_x86_64",
"--platform",
"manylinux2014_x86_64",
"--platform",
"manylinux_2_17_x86_64",
"--implementation",
"cp",
"--abi",
Expand Down Expand Up @@ -798,7 +886,17 @@ def test_build_into_existing_dir_with_preinstalled_packages(self, tmpdir, osutil
"--only-binary=:all:",
"--no-deps",
"--platform",
"any",
"--platform",
"linux_x86_64",
"--platform",
"manylinux1_x86_64",
"--platform",
"manylinux2010_x86_64",
"--platform",
"manylinux2014_x86_64",
"--platform",
"manylinux_2_17_x86_64",
"--implementation",
"cp",
"--abi",
Expand Down
26 changes: 24 additions & 2 deletions tests/unit/workflows/python_pip/test_packager.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,13 +249,35 @@ def test_download_wheels(self, pip_factory):
# for getting lambda compatible wheels.
pip, runner = pip_factory()
packages = ["foo", "bar", "baz"]
runner.download_manylinux_wheels(packages, "directory", "abi")
runner.download_manylinux_wheels(
packages,
"directory",
"abi",
[
"any",
"linux_x86_64",
"manylinux1_x86_64",
"manylinux2010_x86_64",
"manylinux2014_x86_64",
"manylinux_2_17_x86_64",
],
)
expected_prefix = [
"download",
"--only-binary=:all:",
"--no-deps",
"--platform",
"any",
"--platform",
"linux_x86_64",
"--platform",
"manylinux1_x86_64",
"--platform",
"manylinux2010_x86_64",
"--platform",
"manylinux2014_x86_64",
"--platform",
"manylinux_2_17_x86_64",
"--implementation",
"cp",
"--abi",
Expand All @@ -270,7 +292,7 @@ def test_download_wheels(self, pip_factory):

def test_download_wheels_no_wheels(self, pip_factory):
pip, runner = pip_factory()
runner.download_manylinux_wheels([], "directory", "abi")
runner.download_manylinux_wheels([], "directory", "abi", [])
assert len(pip.calls) == 0

def test_does_find_local_directory(self, pip_factory):
Expand Down
Loading