Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
4cd2811
Proof of concept
perseoGI May 16, 2025
156e036
Simplify
perseoGI May 16, 2025
c7b5dff
Added tar_compressor
perseoGI May 16, 2025
be30ba2
Extra simplify
perseoGI May 16, 2025
1295d4f
Cache the plugin load
perseoGI May 19, 2025
cc4d3ad
WIP
perseoGI May 19, 2025
e245a93
Merge with develop2
perseoGI May 20, 2025
f094d9a
Moved plugin load to ConfigAPI
perseoGI May 20, 2025
fee5fab
Pass config_api to remote_manager and local_recipe_index
perseoGI May 20, 2025
452f774
Restore previous tar_extract fixing tests
perseoGI May 20, 2025
c1a7320
Removed compression.py module and minimize diff
perseoGI May 22, 2025
a240c8c
Remove debug print
perseoGI May 22, 2025
cf7de74
Applied thread suggestions
perseoGI May 23, 2025
557f8e0
Remove created pkglist.json on cache restore after usage
perseoGI May 23, 2025
f82d764
Remove unused and avoid rechecking FS
perseoGI May 26, 2025
549e086
Merge branch 'develop2' into pgi/plugin/compression
perseoGI May 26, 2025
ac1fc45
Added test to check extract failure and issue #18259 test
perseoGI May 26, 2025
181a736
Added config to compression.py interface and renamed parameters
perseoGI May 27, 2025
3b32ec2
Fix invokation
perseoGI May 27, 2025
b52bce2
Rename config for conf
perseoGI May 27, 2025
91da7a4
Added ref on test
perseoGI May 27, 2025
e96bd3c
Merge branch 'develop2' into pgi/plugin/compression
perseoGI May 27, 2025
a024baf
Merge branch 'develop2' into pgi/plugin/compression
perseoGI Jun 19, 2025
989fde9
Move to different approach: tar encapsulation respecting extensions
perseoGI Jun 20, 2025
f986651
Fix condition error
perseoGI Jun 23, 2025
8009bed
Addressed some issues
perseoGI Jun 25, 2025
0db1a7b
Make plugin return compressed extension
perseoGI Jun 25, 2025
dcbb29e
Adapt changes to support metadata in wrapped files
perseoGI Jul 14, 2025
28e9148
Merged with develop2 and moved compression_plugin to CacheAPI
perseoGI Aug 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 13 additions & 17 deletions conan/api/subapi/cache.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
import json
import os
import shutil
import tarfile
from io import BytesIO

from conan.api.model import PackagesList
from conan.api.output import ConanOutput
from conan.internal.api.uploader import gzopen_without_timestamps
from conan.internal.cache.cache import PkgCache
from conan.internal.cache.conan_reference_layout import EXPORT_SRC_FOLDER, EXPORT_FOLDER, SRC_FOLDER, \
METADATA, DOWNLOAD_EXPORT_FOLDER
Expand All @@ -18,6 +15,7 @@
from conan.api.model import RecipeReference
from conan.internal.util.dates import revision_timestamp_now
from conan.internal.util.files import rmdir, mkdir, remove
from conan.internal.util.compression import tar_compressor, tar_extract


class CacheAPI:
Expand Down Expand Up @@ -133,9 +131,10 @@ def save(self, package_list, tgz_path, no_source=False):
mkdir(os.path.dirname(tgz_path))
name = os.path.basename(tgz_path)
compresslevel = global_conf.get("core.gzip:compresslevel", check_type=int)

with open(tgz_path, "wb") as tgz_handle:
tgz = gzopen_without_timestamps(name, fileobj=tgz_handle,
compresslevel=compresslevel)
tgz = tar_compressor(name, fileobj=tgz_handle, compresslevel=compresslevel,
cache_path=self.conan_api.cache_folder)
for ref, ref_bundle in package_list.refs().items():
ref_layout = cache.recipe_layout(ref)
recipe_folder = os.path.relpath(ref_layout.base_folder, cache_folder)
Expand Down Expand Up @@ -169,11 +168,12 @@ def save(self, package_list, tgz_path, no_source=False):
out.info(f"Saving {pref} metadata: {metadata_folder}")
tgz.add(os.path.join(cache_folder, metadata_folder), metadata_folder,
recursive=True)
# Create pgklist.json to add it to the tgz
serialized = json.dumps(package_list.serialize(), indent=2)
info = tarfile.TarInfo(name="pkglist.json")
data = serialized.encode('utf-8')
info.size = len(data)
tgz.addfile(tarinfo=info, fileobj=BytesIO(data))
pkglist_path = os.path.join(cache_folder, "pkglist.json")
with open(pkglist_path, "w") as file_handler:
file_handler.write(serialized)
tgz.add(pkglist_path, "pkglist.json", recursive=False)
tgz.close()

def restore(self, path):
Expand All @@ -182,14 +182,10 @@ def restore(self, path):

cache = PkgCache(self.conan_api.cache_folder, self.conan_api.config.global_conf)
cache_folder = cache.store # Note, this is not the home, but the actual package cache

with open(path, mode='rb') as file_handler:
the_tar = tarfile.open(fileobj=file_handler)
fileobj = the_tar.extractfile("pkglist.json")
pkglist = fileobj.read()
the_tar.extraction_filter = (lambda member, _: member) # fully_trusted (Py 3.14)
the_tar.extractall(path=cache_folder)
the_tar.close()
tar_extract(path, cache_folder, cache_folder=self.conan_api.cache_folder)
# Retrieve the package list from the already extracted archive
with open(os.path.join(cache_folder, "pkglist.json")) as file_handler:
pkglist = file_handler.read()

# After unzipping the files, we need to update the DB that references these files
out = ConanOutput()
Expand Down
11 changes: 8 additions & 3 deletions conan/internal/api/uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from conan.errors import ConanException
from conan.internal.paths import (CONAN_MANIFEST, CONANFILE, EXPORT_SOURCES_TGZ_NAME,
EXPORT_TGZ_NAME, PACKAGE_TGZ_NAME, CONANINFO)
from conan.internal.util.compression import load_compress_plugin
from conan.internal.util.files import (clean_dirty, is_dirty, gather_files,
set_dirty_context_manager, mkdir, human_size)

Expand Down Expand Up @@ -157,7 +158,7 @@ def add_tgz(tgz_name, tgz_files):
elif tgz_files:
compresslevel = self._global_conf.get("core.gzip:compresslevel", check_type=int)
tgz = compress_files(tgz_files, tgz_name, download_export_folder,
compresslevel=compresslevel, ref=ref)
compresslevel=compresslevel, ref=ref, cache_folder=self._app.cache_folder)
result[tgz_name] = tgz

add_tgz(EXPORT_TGZ_NAME, files)
Expand Down Expand Up @@ -204,7 +205,7 @@ def _compress_package_files(self, layout, pref):
tgz_files = {f: path for f, path in files.items()}
compresslevel = self._global_conf.get("core.gzip:compresslevel", check_type=int)
tgz_path = compress_files(tgz_files, PACKAGE_TGZ_NAME, download_pkg_folder,
compresslevel=compresslevel, ref=pref)
compresslevel=compresslevel, ref=pref, cache_folder=self._app.cache_folder)
assert tgz_path == package_tgz
assert os.path.exists(package_tgz)

Expand Down Expand Up @@ -271,7 +272,11 @@ def gzopen_without_timestamps(name, fileobj, compresslevel=None):
return t


def compress_files(files, name, dest_dir, compresslevel=None, ref=None):
def compress_files(files, name, dest_dir, compresslevel=None, ref=None, cache_folder=None):
compress_plugin = load_compress_plugin(cache_folder)
if compress_plugin:
return compress_plugin.tar_compress(files, name, dest_dir, compresslevel, ref)

t1 = time.time()
# FIXME, better write to disk sequentially and not keep tgz contents in memory
tgz_path = os.path.join(dest_dir, name)
Expand Down
4 changes: 4 additions & 0 deletions conan/internal/cache/home_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,7 @@ def settings_path_user(self):
@property
def config_version_path(self):
return os.path.join(self._home, "config_version.json")

@property
def compression_plugin_path(self):
return os.path.join(self._home, _EXTENSIONS_FOLDER, _PLUGINS, "compression.py")
17 changes: 11 additions & 6 deletions conan/internal/rest/remote_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from conan.internal.model.info import load_binary_info
from conan.api.model import PkgReference
from conan.api.model import RecipeReference
from conan.internal.util.compression import load_compress_plugin
from conan.internal.util.files import rmdir, human_size
from conan.internal.paths import EXPORT_SOURCES_TGZ_NAME, EXPORT_TGZ_NAME, PACKAGE_TGZ_NAME
from conan.internal.util.files import mkdir, tar_extract
Expand Down Expand Up @@ -81,7 +82,7 @@ def get_recipe(self, ref, remote, metadata=None):
tgz_file = zipped_files.pop(EXPORT_TGZ_NAME, None)

if tgz_file:
uncompress_file(tgz_file, export_folder, scope=str(ref))
uncompress_file(tgz_file, export_folder, scope=str(ref), cache_folder=self._home_folder)
mkdir(export_folder)
for file_name, file_path in zipped_files.items(): # copy CONANFILE
shutil.move(file_path, os.path.join(export_folder, file_name))
Expand Down Expand Up @@ -123,7 +124,7 @@ def get_recipe_sources(self, ref, layout, remote):

self._signer.verify(ref, download_folder, files=zipped_files)
tgz_file = zipped_files[EXPORT_SOURCES_TGZ_NAME]
uncompress_file(tgz_file, export_sources_folder, scope=str(ref))
uncompress_file(tgz_file, export_sources_folder, scope=str(ref), cache_folder=self._home_folder)

def get_package(self, pref, remote, metadata=None):
output = ConanOutput(scope=str(pref.ref))
Expand Down Expand Up @@ -171,7 +172,7 @@ def _get_package(self, layout, pref, remote, scoped_output, metadata):

tgz_file = zipped_files.pop(PACKAGE_TGZ_NAME, None)
package_folder = layout.package()
uncompress_file(tgz_file, package_folder, scope=str(pref.ref))
uncompress_file(tgz_file, package_folder, scope=str(pref.ref), cache_folder=self._home_folder)
mkdir(package_folder) # Just in case it doesn't exist, because uncompress did nothing
for file_name, file_path in zipped_files.items(): # copy CONANINFO and CONANMANIFEST
shutil.move(file_path, os.path.join(package_folder, file_name))
Expand Down Expand Up @@ -281,15 +282,19 @@ def _call_remote(self, remote, method, *args, **kwargs):
raise ConanException(exc, remote=remote)


def uncompress_file(src_path, dest_folder, scope=None):
def uncompress_file(src_path, dest_folder, scope=None, cache_folder=None):
try:
filesize = os.path.getsize(src_path)
big_file = filesize > 10000000 # 10 MB
if big_file:
hs = human_size(filesize)
ConanOutput(scope=scope).info(f"Decompressing {hs} {os.path.basename(src_path)}")
with open(src_path, mode='rb') as file_handler:
tar_extract(file_handler, dest_folder)

compression_plugin = load_compress_plugin(cache_folder)
if compression_plugin:
compression_plugin.tar_extract(src_path, dest_folder)
else:
tar_extract(src_path, dest_folder)
except Exception as e:
error_msg = "Error while extracting downloaded file '%s' to %s\n%s\n"\
% (src_path, dest_folder, str(e))
Expand Down
91 changes: 91 additions & 0 deletions conan/internal/util/compression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
from conan.internal.cache.home_paths import HomePaths
from conan.internal.loader import load_python_file
from conan.internal.errors import ConanException

import os
import gzip
import time
import tarfile
from conan.api.output import ConanOutput
from conan.internal.util.files import set_dirty_context_manager

def tar_extract(src_path, destination_dir, cache_folder=None):
compress_plugin = load_compress_plugin(cache_folder)
if compress_plugin:
return compress_plugin.tar_extract(src_path, destination_dir)

with open(src_path, mode='rb') as file_handler:
the_tar = tarfile.open(fileobj=file_handler)
# NOTE: The errorlevel=2 has been removed because it was failing in Win10, it didn't allow to
# "could not change modification time", with time=0
# the_tar.errorlevel = 2 # raise exception if any error
the_tar.extraction_filter = (lambda member, path: member) # fully_trusted, avoid Py3.14 break
the_tar.extractall(path=destination_dir)
the_tar.close()


def tar_compress(files, name, dest_dir, compresslevel=None, ref=None, cache_folder=None):
compress_plugin = load_compress_plugin(cache_folder)
if compress_plugin:
return compress_plugin.tar_compress(files, name, dest_dir, compresslevel, ref)

t1 = time.time()
# FIXME, better write to disk sequentially and not keep tgz contents in memory
tgz_path = os.path.join(dest_dir, name)
ConanOutput(scope=str(ref)).info(f"Compressing {name}")
with set_dirty_context_manager(tgz_path), open(tgz_path, "wb") as tgz_handle:
tgz = gzopen_without_timestamps(name, fileobj=tgz_handle, compresslevel=compresslevel)
for filename, abs_path in sorted(files.items()):
# recursive is False in case it is a symlink to a folder
tgz.add(abs_path, filename, recursive=False)
tgz.close()

duration = time.time() - t1
ConanOutput().debug(f"{name} compressed in {duration} time")
return tgz_path

def tar_compressor(name, fileobj, compresslevel, cache_path=None):
compress_plugin = load_compress_plugin(cache_path)
if compress_plugin:
return compress_plugin.TarCompressor(name, fileobj, compresslevel)
else:
return gzopen_without_timestamps(name, fileobj, compresslevel)


def gzopen_without_timestamps(name, fileobj, compresslevel=None):
""" !! Method overrided by laso to pass mtime=0 (!=None) to avoid time.time() was
setted in Gzip file causing md5 to change. Not possible using the
previous tarfile open because arguments are not passed to GzipFile constructor
"""
compresslevel = compresslevel if compresslevel is not None else 9 # default Gzip = 9
fileobj = gzip.GzipFile(name, "w", compresslevel, fileobj, mtime=0)
# Format is forced because in Python3.8, it changed and it generates different tarfiles
# with different checksums, which break hashes of tgzs
# PAX_FORMAT is the default for Py38, lets make it explicit for older Python versions
t = tarfile.TarFile.taropen(name, "w", fileobj, format=tarfile.PAX_FORMAT)
t._extfileobj = False
return t


def load_compress_plugin(cache_folder):
if not cache_folder:
return None
compression_plugin_path = HomePaths(cache_folder).compression_plugin_path
if not os.path.exists(compression_plugin_path):
return None

mod, _ = load_python_file(compression_plugin_path)
if not hasattr(mod, "tar_extract") or not hasattr(mod, "tar_compress"):
raise ConanException("The 'compression.py' plugin does not contain required `tar_extract` or `tar_compress` functions")
return mod


"""
Plugin `compression.py` interface:

def tar_extract(src_path, destination_dir) -> None
def tar_compress(files, name, dest_dir, compresslevel=None, ref=None) -> str
class TarCompressor(name, fileobj, compresslevel)
def add(self, abs_path, filename, recursive=True) -> None
def close() -> None
"""
Loading