Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
4cd2811
Proof of concept
perseoGI May 16, 2025
156e036
Simplify
perseoGI May 16, 2025
c7b5dff
Added tar_compressor
perseoGI May 16, 2025
be30ba2
Extra simplify
perseoGI May 16, 2025
1295d4f
Cache the plugin load
perseoGI May 19, 2025
cc4d3ad
WIP
perseoGI May 19, 2025
e245a93
Merge with develop2
perseoGI May 20, 2025
f094d9a
Moved plugin load to ConfigAPI
perseoGI May 20, 2025
fee5fab
Pass config_api to remote_manager and local_recipe_index
perseoGI May 20, 2025
452f774
Restore previous tar_extract fixing tests
perseoGI May 20, 2025
c1a7320
Removed compression.py module and minimize diff
perseoGI May 22, 2025
a240c8c
Remove debug print
perseoGI May 22, 2025
cf7de74
Applied thread suggestions
perseoGI May 23, 2025
557f8e0
Remove created pkglist.json on cache restore after usage
perseoGI May 23, 2025
f82d764
Remove unused and avoid rechecking FS
perseoGI May 26, 2025
549e086
Merge branch 'develop2' into pgi/plugin/compression
perseoGI May 26, 2025
ac1fc45
Added test to check extract failure and issue #18259 test
perseoGI May 26, 2025
181a736
Added config to compression.py interface and renamed parameters
perseoGI May 27, 2025
3b32ec2
Fix invokation
perseoGI May 27, 2025
b52bce2
Rename config for conf
perseoGI May 27, 2025
91da7a4
Added ref on test
perseoGI May 27, 2025
e96bd3c
Merge branch 'develop2' into pgi/plugin/compression
perseoGI May 27, 2025
a024baf
Merge branch 'develop2' into pgi/plugin/compression
perseoGI Jun 19, 2025
989fde9
Move to different approach: tar encapsulation respecting extensions
perseoGI Jun 20, 2025
f986651
Fix condition error
perseoGI Jun 23, 2025
8009bed
Addressed some issues
perseoGI Jun 25, 2025
0db1a7b
Make plugin return compressed extension
perseoGI Jun 25, 2025
dcbb29e
Adapt changes to support metadata in wrapped files
perseoGI Jul 14, 2025
28e9148
Merged with develop2 and moved compression_plugin to CacheAPI
perseoGI Aug 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 32 additions & 11 deletions conan/api/subapi/cache.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import json
import os
import shutil
import tarfile
import tempfile

from conan.api.model import PackagesList
Expand All @@ -12,19 +11,21 @@
METADATA, DOWNLOAD_EXPORT_FOLDER
from conan.internal.cache.home_paths import HomePaths
from conan.internal.cache.integrity_check import IntegrityChecker
from conan.internal.loader import load_python_file
from conan.internal.rest.download_cache import DownloadCache
from conan.errors import ConanException
from conan.api.model import PkgReference
from conan.api.model import RecipeReference
from conan.internal.util.dates import revision_timestamp_now
from conan.internal.util.files import rmdir, mkdir, remove, save
from conan.internal.util.files import rmdir, mkdir, remove, save, tar_extract


class CacheAPI:

def __init__(self, conan_api, api_helpers):
self._conan_api = conan_api
self._api_helpers = api_helpers
self._compression_plugin = None

def export_path(self, ref: RecipeReference):
cache = PkgCache(self._conan_api.cache_folder, self._api_helpers.global_conf)
Expand Down Expand Up @@ -132,7 +133,6 @@ def save(self, package_list, tgz_path, no_source=False):
cache_folder = cache.store # Note, this is not the home, but the actual package cache
out = ConanOutput()
mkdir(os.path.dirname(tgz_path))
compresslevel = global_conf.get("core.gzip:compresslevel", check_type=int)
tar_files: dict[str, str] = {} # {path_in_tar: abs_path}

for ref, ref_bundle in package_list.refs().items():
Expand Down Expand Up @@ -173,7 +173,8 @@ def save(self, package_list, tgz_path, no_source=False):
pkglist_path = os.path.join(tempfile.gettempdir(), "pkglist.json")
save(pkglist_path, serialized)
tar_files["pkglist.json"] = pkglist_path
compress_files(tar_files, os.path.basename(tgz_path), os.path.dirname(tgz_path), compresslevel, recursive=True)
compress_files(tar_files, os.path.basename(tgz_path), os.path.dirname(tgz_path), conf=self._conan_api.config,
recursive=True, ref=None, compression_plugin=self.compression_plugin)
remove(pkglist_path)

def restore(self, path):
Expand All @@ -183,13 +184,20 @@ def restore(self, path):
cache = PkgCache(self._conan_api.cache_folder, self._api_helpers.global_conf)
cache_folder = cache.store # Note, this is not the home, but the actual package cache

with open(path, mode='rb') as file_handler:
the_tar = tarfile.open(fileobj=file_handler)
fileobj = the_tar.extractfile("pkglist.json")
pkglist = fileobj.read()
the_tar.extraction_filter = (lambda member, _: member) # fully_trusted (Py 3.14)
the_tar.extractall(path=cache_folder)
the_tar.close()
with open(path, mode="rb") as file_handler:
tar_extract(
fileobj=file_handler,
destination_dir=cache_folder,
compression_plugin=self.compression_plugin,
conf=self._conan_api.config._helpers.global_conf,
)

# Retrieve the package list from the already extracted archive
pkglist_path = os.path.join(cache_folder, "pkglist.json")
with open(pkglist_path) as file_handler:
pkglist = file_handler.read()
# Delete the pkglist.json file to keep cache clean
remove(pkglist_path)

# After unzipping the files, we need to update the DB that references these files
out = ConanOutput()
Expand Down Expand Up @@ -268,6 +276,19 @@ def path_to_ref(self, path):
result = cache.path_to_ref(base)
return result

@property
def compression_plugin(self):
if self._compression_plugin is None:
compression_plugin_path = HomePaths(self._conan_api.home_folder).compression_plugin_path
if not os.path.exists(compression_plugin_path):
self._compression_plugin = False # Avoid FS re-check
return None
mod, _ = load_python_file(compression_plugin_path)
if not hasattr(mod, "tar_extract") or not hasattr(mod, "tar_compress"):
raise ConanException("The 'compression.py' plugin does not contain required `tar_extract` or `tar_compress` functions")
self._compression_plugin = mod
return self._compression_plugin or None


def _resolve_latest_ref(cache, ref):
if ref.revision is None or ref.revision == "latest":
Expand Down
47 changes: 38 additions & 9 deletions conan/internal/api/uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from conan.errors import ConanException
from conan.internal.paths import (CONAN_MANIFEST, CONANFILE, EXPORT_SOURCES_TGZ_NAME,
EXPORT_TGZ_NAME, PACKAGE_TGZ_NAME, CONANINFO)
from conan.internal.util.files import (clean_dirty, is_dirty, gather_files,
from conan.internal.util.files import (COMPRESSED_PLUGIN_TAR_NAME, clean_dirty, is_dirty, gather_files, remove,
set_dirty_context_manager, mkdir, human_size)

UPLOAD_POLICY_FORCE = "force-upload"
Expand Down Expand Up @@ -155,9 +155,9 @@ def add_tgz(tgz_name, tgz_files):
if os.path.isfile(tgz):
result[tgz_name] = tgz
elif tgz_files:
compresslevel = self._global_conf.get("core.gzip:compresslevel", check_type=int)
tgz = compress_files(tgz_files, tgz_name, download_export_folder,
compresslevel=compresslevel, ref=ref)
conf=self._global_conf, ref=ref,
compression_plugin=self._app.conan_api.cache.compression_plugin)
result[tgz_name] = tgz

add_tgz(EXPORT_TGZ_NAME, files)
Expand Down Expand Up @@ -202,9 +202,9 @@ def _compress_package_files(self, layout, pref):

if not os.path.isfile(package_tgz):
tgz_files = {f: path for f, path in files.items()}
compresslevel = self._global_conf.get("core.gzip:compresslevel", check_type=int)
tgz_path = compress_files(tgz_files, PACKAGE_TGZ_NAME, download_pkg_folder,
compresslevel=compresslevel, ref=pref)
conf=self._global_conf, ref=pref,
compression_plugin=self._app.conan_api.cache.compression_plugin)
assert tgz_path == package_tgz
assert os.path.exists(package_tgz)

Expand Down Expand Up @@ -271,13 +271,16 @@ def gzopen_without_timestamps(name, fileobj, compresslevel=None):
return t


def compress_files(files, name, dest_dir, compresslevel=None, ref=None, recursive=False):
def compress_files(files, name, dest_dir, conf=None, ref=None, recursive=False, compression_plugin=None):
if compression_plugin:
return _compress_files_with_plugin(files, name, dest_dir, conf, ref, recursive, compression_plugin)

tgz_path = os.path.join(dest_dir, name)
t1 = time.time()
# FIXME, better write to disk sequentially and not keep tgz contents in memory
tgz_path = os.path.join(dest_dir, name)
if ref:
ConanOutput(scope=str(ref) if ref else None).info(f"Compressing {name}")
ConanOutput(scope=str(ref or "")).info(f"Compressing {name}")
with set_dirty_context_manager(tgz_path), open(tgz_path, "wb") as tgz_handle:
compresslevel = conf.get("core.gzip:compresslevel", check_type=int) if conf else None
tgz = gzopen_without_timestamps(name, fileobj=tgz_handle, compresslevel=compresslevel)
for filename, abs_path in sorted(files.items()):
# recursive is False by default in case it is a symlink to a folder
Expand All @@ -288,6 +291,32 @@ def compress_files(files, name, dest_dir, compresslevel=None, ref=None, recursiv
ConanOutput().debug(f"{name} compressed in {duration} time")
return tgz_path

def _compress_files_with_plugin(files, name, dest_dir, conf, ref, recursive, compression_plugin):
t1 = time.time()
abs_path_without_extension = os.path.join(dest_dir, COMPRESSED_PLUGIN_TAR_NAME)
ConanOutput(scope=str(ref or "")).info(f"Compressing {name} using compression plugin")
compressed_extension = compression_plugin.tar_compress(
archive_path=abs_path_without_extension,
files=files,
recursive=recursive,
conf=conf,
ref=ref,
)
ConanOutput().debug(f"Compressed in {time.time() - t1} time")
ConanOutput().success(f"{time.time() - t1}")
if not compressed_extension or not compressed_extension.startswith("."):
raise ConanException("The 'compression.py' did not return the compressed extension.")

compressed_path = abs_path_without_extension + compressed_extension
t1 = time.time()
tgz_path = os.path.join(dest_dir, name)
with set_dirty_context_manager(tgz_path), open(tgz_path, "wb") as tgz_handle:
tgz = gzopen_without_timestamps(name, fileobj=tgz_handle, compresslevel=0)
tgz.add(compressed_path, arcname=os.path.basename(compressed_path), recursive=recursive)
tgz.close()
ConanOutput().debug(f"{name} wrapped in {time.time() - t1} time")
remove(compressed_path)
return tgz_path

def _total_size(cache_files):
total_size = 0
Expand Down
4 changes: 4 additions & 0 deletions conan/internal/cache/home_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,7 @@ def settings_path_user(self):
@property
def config_version_path(self):
return os.path.join(self._home, "config_version.json")

@property
def compression_plugin_path(self):
return os.path.join(self._home, _EXTENSIONS_FOLDER, _PLUGINS, "compression.py")
6 changes: 3 additions & 3 deletions conan/internal/conan_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def __init__(self, conan_api):
auth_manager = ConanApiAuthManager(conan_api.remotes.requester, cache_folder, localdb,
global_conf)
# Handle remote connections
self.remote_manager = RemoteManager(self.cache, auth_manager, cache_folder)
self.remote_manager = RemoteManager(self.cache, auth_manager, cache_folder, self.conan_api)
global_editables = conan_api.local.editable_packages
ws_editables = conan_api.workspace.packages()
self.editable_packages = global_editables.update_copy(ws_editables)
Expand Down Expand Up @@ -84,10 +84,10 @@ class LocalRecipesIndexApp:
- loader (for the export phase of local-recipes-index)
The others are internally use by other collaborators
"""
def __init__(self, cache_folder):
def __init__(self, cache_folder, conan_api):
self.global_conf = ConfDefinition()
self.cache = PkgCache(cache_folder, self.global_conf)
self.remote_manager = RemoteManager(self.cache, auth_manager=None, home_folder=cache_folder)
self.remote_manager = RemoteManager(self.cache, auth_manager=None, home_folder=cache_folder, conan_api=conan_api)
editable_packages = EditablePackages()
self.proxy = ConanProxy(self, editable_packages)
self.range_resolver = RangeResolver(self, self.global_conf, editable_packages)
Expand Down
18 changes: 11 additions & 7 deletions conan/internal/rest/remote_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,16 @@ class RemoteManager:

_ErrorMsg = namedtuple("ErrorMsg", ["message"])

def __init__(self, cache, auth_manager, home_folder):
def __init__(self, cache, auth_manager, home_folder, conan_api):
self._cache = cache
self._auth_manager = auth_manager
self._signer = PkgSignaturesPlugin(cache, home_folder)
self._home_folder = home_folder
self._conan_api = conan_api

def _local_folder_remote(self, remote):
if remote.remote_type == LOCAL_RECIPES_INDEX:
return RestApiClientLocalRecipesIndex(remote, self._home_folder)
return RestApiClientLocalRecipesIndex(remote, self._home_folder, self._conan_api)

def check_credentials(self, remote, force_auth=False):
self._call_remote(remote, "check_credentials", force_auth)
Expand Down Expand Up @@ -87,7 +88,7 @@ def get_recipe(self, ref, remote, metadata=None):
tgz_file = zipped_files.pop(EXPORT_TGZ_NAME, None)

if tgz_file:
uncompress_file(tgz_file, export_folder, scope=str(ref))
uncompress_file(tgz_file, export_folder, scope=str(ref), conan_api=self._conan_api)
mkdir(export_folder)
for file_name, file_path in zipped_files.items(): # copy CONANFILE
shutil.move(file_path, os.path.join(export_folder, file_name))
Expand Down Expand Up @@ -129,7 +130,7 @@ def get_recipe_sources(self, ref, layout, remote):

self._signer.verify(ref, download_folder, files=zipped_files)
tgz_file = zipped_files[EXPORT_SOURCES_TGZ_NAME]
uncompress_file(tgz_file, export_sources_folder, scope=str(ref))
uncompress_file(tgz_file, export_sources_folder, scope=str(ref), conan_api=self._conan_api)

def get_package(self, pref, remote, metadata=None):
output = ConanOutput(scope=str(pref.ref))
Expand Down Expand Up @@ -177,7 +178,7 @@ def _get_package(self, layout, pref, remote, scoped_output, metadata):

tgz_file = zipped_files.pop(PACKAGE_TGZ_NAME, None)
package_folder = layout.package()
uncompress_file(tgz_file, package_folder, scope=str(pref.ref))
uncompress_file(tgz_file, package_folder, scope=str(pref.ref), conan_api=self._conan_api)
mkdir(package_folder) # Just in case it doesn't exist, because uncompress did nothing
for file_name, file_path in zipped_files.items(): # copy CONANINFO and CONANMANIFEST
shutil.move(file_path, os.path.join(package_folder, file_name))
Expand Down Expand Up @@ -303,15 +304,18 @@ def _call_remote(self, remote, method, *args, **kwargs):
raise ConanException(exc, remote=remote)


def uncompress_file(src_path, dest_folder, scope=None):
def uncompress_file(src_path, dest_folder, scope="", conan_api=None):
try:
filesize = os.path.getsize(src_path)
big_file = filesize > 10000000 # 10 MB
if big_file:
hs = human_size(filesize)
ConanOutput(scope=scope).info(f"Decompressing {hs} {os.path.basename(src_path)}")

compression_plugin=conan_api.cache.compression_plugin if conan_api and conan_api.cache.compression_plugin else None
conf=conan_api.config._helpers.global_conf if conan_api else None
with open(src_path, mode='rb') as file_handler:
tar_extract(file_handler, dest_folder)
tar_extract(fileobj=file_handler, destination_dir=dest_folder, compression_plugin=compression_plugin, conf=conf)
except Exception as e:
error_msg = "Error while extracting downloaded file '%s' to %s\n%s\n"\
% (src_path, dest_folder, str(e))
Expand Down
4 changes: 2 additions & 2 deletions conan/internal/rest/rest_client_local_recipe_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,14 @@ class RestApiClientLocalRecipesIndex:
a local folder assuming the conan-center-index repo layout
"""

def __init__(self, remote, home_folder):
def __init__(self, remote, home_folder, conan_api):
self._remote = remote
local_recipes_index_path = HomePaths(home_folder).local_recipes_index_path
local_recipes_index_path = os.path.join(local_recipes_index_path, remote.name, ".conan")
repo_folder = self._remote.url

from conan.internal.conan_app import LocalRecipesIndexApp
self._app = LocalRecipesIndexApp(local_recipes_index_path)
self._app = LocalRecipesIndexApp(local_recipes_index_path, conan_api)
self._hook_manager = HookManager(HomePaths(local_recipes_index_path).hooks_path)
self._layout = _LocalRecipesIndexLayout(repo_folder)

Expand Down
42 changes: 41 additions & 1 deletion conan/internal/util/files.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import errno
from pathlib import Path
import tempfile
import gzip
import hashlib
import os
Expand All @@ -11,10 +13,13 @@

from contextlib import contextmanager

from conan.api.output import ConanOutput
from conan.errors import ConanException

_DIRTY_FOLDER = ".dirty"

# Name (without extension) of the tar file to be created by the compression plugin
COMPRESSED_PLUGIN_TAR_NAME = "__conan_plugin_compressed_contents__"

def set_dirty(folder):
dirty_file = os.path.normpath(folder) + _DIRTY_FOLDER
Expand Down Expand Up @@ -256,16 +261,51 @@ def mkdir(path):
os.makedirs(path)


def tar_extract(fileobj, destination_dir):
def tar_extract(fileobj, destination_dir, compression_plugin=None, conf=None):
if compression_plugin:
_tar_extract_with_plugin(fileobj, destination_dir, compression_plugin, conf)
return

the_tar = tarfile.open(fileobj=fileobj)
# NOTE: The errorlevel=2 has been removed because it was failing in Win10, it didn't allow to
# "could not change modification time", with time=0
# the_tar.errorlevel = 2 # raise exception if any error
the_tar.extraction_filter = (lambda member, path: member) # fully_trusted, avoid Py3.14 break
the_tar.extractall(path=destination_dir)
the_tar.close()
if list(Path(destination_dir).glob(f"{COMPRESSED_PLUGIN_TAR_NAME}.*")):
raise ConanException(f"Error while extracting {os.path.basename(fileobj.name)}.\n"
"This file has been compressed using a `compression` plugin.\n"
"If your organization uses this plugin, ensure it is correctly installed on your environment.")


def _tar_extract_with_plugin(fileobj, destination_dir, compression_plugin, conf):
"""First remove tar.gz wrapper and then call the plugin to extract"""
t1 = time.time()
the_tar = tarfile.open(fileobj=fileobj)
the_tar.extraction_filter = (lambda member, path: member) # fully_trusted, avoid Py3.14 break
the_tar.extractall(path=destination_dir)
extracted_files = the_tar.getnames()
the_tar.close()
# Check if the tar was compressed with the compression plugin by checking the existence of
# our constant COMPRESSED_PLUGIN_TAR_NAME (without extension as extension is added by the plugin)
ConanOutput().success(f"{time.time() - t1}")
for path in extracted_files:
if os.path.basename(path).startswith(COMPRESSED_PLUGIN_TAR_NAME):
# Extract the actual contents from the plugin tar (ignore other files present).
ConanOutput().debug(f"Unwrapped in {time.time() - t1}")
t1 = time.time()
compression_plugin.tar_extract(
archive_path=os.path.join(destination_dir, path),
dest_dir=destination_dir,
conf=conf,
)
# Remove extracted files from tar
for f in extracted_files:
remove(os.path.join(destination_dir, f))
break
ConanOutput().debug(f"Extracted in {time.time() - t1}")

def merge_directories(src, dst):
from conan.tools.files import copy
copy(None, pattern="*", src=src, dst=dst)
Expand Down
4 changes: 4 additions & 0 deletions test/integration/command/cache/test_cache_save_restore.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,9 +158,13 @@ def test_cache_save_excluded_folders():

# exclude source
c.run("cache save * --no-source")
# Check default compression function is being used and not compression.py plugin one
assert "Compressing conan_cache_save.tgz\n" in c.out
Copy link

Copilot AI May 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] Instead of matching the newline, assert on "Compressing conan_cache_save.tgz" in c.out to make the test more robust across platforms and output formats.

Suggested change
assert "Compressing conan_cache_save.tgz\n" in c.out
assert "Compressing conan_cache_save.tgz" in c.out

Copilot uses AI. Check for mistakes.
c3 = TestClient()
shutil.copy2(cache_path, c3.current_folder)
c3.run("cache restore conan_cache_save.tgz")
# Default decompress does not have any output
assert "Decompressing conan_cache_save.tgz" not in c3.out
ref_layout = c3.get_latest_ref_layout(ref)
assert not os.path.exists(os.path.join(ref_layout.source(), "mysrc.c"))

Expand Down
Loading
Loading