-
Notifications
You must be signed in to change notification settings - Fork 56
Add zstd and xz support #2893
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Add zstd and xz support #2893
Changes from 10 commits
fc8903c
b1ad44d
31b6d0b
525a6f3
cadee34
020099a
4a4c658
7c946fd
800f646
cc009c5
fc9cca8
14660d7
a64df95
d8cb802
a0b86e5
d1a0ff6
c0b61d2
9951a38
28ffad9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -314,6 +314,18 @@ def _run_target_setup( | |
| ) | ||
| ) | ||
| target.status = target.ST_BAD | ||
|
|
||
| target.compress_cores = self._get_conf(config, t_node, | ||
| "compress-cores", default="1") | ||
| if not target.compress_cores.isdigit() or \ | ||
| int(target.compress_cores) < 0: | ||
| raise ConfigValueError( | ||
| [t_key, "compress-cores"], | ||
| target.compress_cores, | ||
| ValueError("compress-cores must be a 0 (automatic) or \ | ||
| a positive integer") | ||
| ) | ||
|
|
||
| rename_format = self._get_conf(config, t_node, "rename-format") | ||
| if rename_format: | ||
| rename_parser_str = self._get_conf(config, t_node, "rename-parser") | ||
|
|
@@ -398,7 +410,8 @@ def _run_target_update(cls, dao, app_runner, compress_manager, target): | |
| # Compress sources | ||
| if target.compress_scheme: | ||
| handler = compress_manager.get_handler(target.compress_scheme) | ||
| handler.compress_sources(target, work_dir) | ||
| compress_args = {"cores": target.compress_cores} | ||
| handler.compress_sources(target, work_dir, **compress_args) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tried a very simple check mode=rose_arch
[arch]
# rose-app.conf
command-format=cp %(sources)s %(target)s
target-prefix=/home/users/tim.pillinger/cylc-src/rose-apps/arch/archive/
source-prefix=/home/users/tim.pillinger/cylc-src/rose-apps/arch/source/
[arch:world.out]
source='world.out'
[arch:gunzipme.gz]
source='gunzipme.out'
[arch:targunzipme.tar.gz]
source='targunzipme.out'export CYLC_WORKFLOW_ID='hippo'
export CYLC_TASK_ID='task-run'
export CYLC_TASK_NAME='task-run'
export CYLC_TASK_CYCLE_POINT='task-run'
export CYLC_TASK_LOG_ROOT="${HERE}/log"
echo Running app from "${HERE}/app"
rose task-run --config="${HERE}/app"and got an error: I think that you need to add the threads argument to |
||
| times[1] = time() # transformed time | ||
| # Run archive command | ||
| sources = [] | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -25,14 +25,20 @@ class RoseArchTarGzip: | |||||
|
|
||||||
| """Compress archive sources in tar.""" | ||||||
|
|
||||||
| SCHEMES = ["pax", "pax.gz", "tar", "tar.gz", "tgz"] | ||||||
| SCHEME_FORMATS = {"pax": tarfile.PAX_FORMAT, "pax.gz": tarfile.PAX_FORMAT} | ||||||
| SCHEMES = ["pax", "pax.gz", "pax.zst", "pax.xz", | ||||||
| "tar", "tar.gz", "tgz", "tar.zst", "tar.xz", "txz"] | ||||||
| SCHEME_FORMATS = {"pax": tarfile.PAX_FORMAT, | ||||||
| "pax.gz": tarfile.PAX_FORMAT, | ||||||
| "pax.zst": tarfile.PAX_FORMAT, | ||||||
| "pax.xz": tarfile.PAX_FORMAT} | ||||||
| GZIP_EXTS = ["pax.gz", "tar.gz", "tgz"] | ||||||
| ZSTD_EXTS = ["pax.zst", "tar.zst"] | ||||||
| XZ_EXTS = ["pax.xz", "tar.xz", "txz"] | ||||||
|
|
||||||
| def __init__(self, app_runner, *args, **kwargs): | ||||||
| self.app_runner = app_runner | ||||||
|
|
||||||
| def compress_sources(self, target, work_dir): | ||||||
| def compress_sources(self, target, work_dir, cores="1"): | ||||||
| """Create a tar archive of all files in target. | ||||||
|
|
||||||
| Use work_dir to dump results. | ||||||
|
|
@@ -70,3 +76,23 @@ def compress_sources(self, target, work_dir): | |||||
| command = "gzip -c '%s' >'%s'" % (tar_name, gz_name) | ||||||
| self.app_runner.popen.run_simple(command, shell=True) | ||||||
| self.app_runner.fs_util.delete(tar_name) | ||||||
|
|
||||||
| if target.compress_scheme in self.ZSTD_EXTS: | ||||||
|
||||||
| if target.compress_scheme in self.ZSTD_EXTS: | |
| elif target.compress_scheme in self.ZSTD_EXTS: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fixed thanks!
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Due to the high likelyhood of this command being run directly on Cylc servers, we will have to be careful with this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks @oliver-sanders . I agree and I hope that the default being to not use this is a good trade-off between exposing useful functionality and maintaining order!
I have updated the documentation to note that this should be used with caution on shared resources.
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| if target.compress_scheme in self.XZ_EXTS: | |
| elif target.compress_scheme in self.XZ_EXTS: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fixed thanks!
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,49 @@ | ||
| # Copyright (C) British Crown (Met Office) & Contributors. | ||
| # This file is part of Rose, a framework for meteorological suites. | ||
| # | ||
| # Rose is free software: you can redistribute it and/or modify | ||
| # it under the terms of the GNU General Public License as published by | ||
| # the Free Software Foundation, either version 3 of the License, or | ||
| # (at your option) any later version. | ||
| # | ||
| # Rose is distributed in the hope that it will be useful, | ||
| # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| # GNU General Public License for more details. | ||
| # | ||
| # You should have received a copy of the GNU General Public License | ||
| # along with Rose. If not, see <http://www.gnu.org/licenses/>. | ||
| # ----------------------------------------------------------------------------- | ||
| """Compress archive sources using xz.""" | ||
|
|
||
|
|
||
| import os | ||
|
|
||
|
|
||
| class RoseArchXz: | ||
|
|
||
| """Compress archive sources in xz.""" | ||
|
|
||
| SCHEMES = ["xz"] | ||
|
|
||
| def __init__(self, app_runner, *args, **kwargs): | ||
| self.app_runner = app_runner | ||
|
|
||
| def compress_sources(self, target, work_dir): | ||
| """xz each source in target. | ||
|
|
||
| Use work_dir to dump results. | ||
|
|
||
| """ | ||
| for source in target.sources.values(): | ||
| if source.path.endswith("." + target.compress_scheme): | ||
| continue # assume already done | ||
| name_xz = source.name + "." + target.compress_scheme | ||
| work_path_xz = os.path.join(work_dir, name_xz) | ||
| self.app_runner.fs_util.makedirs( | ||
| self.app_runner.fs_util.dirname(work_path_xz) | ||
| ) | ||
|
|
||
| command = "xz -c '%s' >'%s'" % (source.path, work_path_xz) | ||
| self.app_runner.popen.run_simple(command, shell=True) | ||
| source.path = work_path_xz |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,48 @@ | ||
| # Copyright (C) British Crown (Met Office) & Contributors. | ||
| # This file is part of Rose, a framework for meteorological suites. | ||
| # | ||
| # Rose is free software: you can redistribute it and/or modify | ||
| # it under the terms of the GNU General Public License as published by | ||
| # the Free Software Foundation, either version 3 of the License, or | ||
| # (at your option) any later version. | ||
| # | ||
| # Rose is distributed in the hope that it will be useful, | ||
| # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| # GNU General Public License for more details. | ||
| # | ||
| # You should have received a copy of the GNU General Public License | ||
| # along with Rose. If not, see <http://www.gnu.org/licenses/>. | ||
| # ----------------------------------------------------------------------------- | ||
| """Compress archive sources using zstd.""" | ||
|
|
||
|
|
||
| import os | ||
|
|
||
|
|
||
| class RoseArchZstd: | ||
|
|
||
| """Compress archive sources in zstd.""" | ||
|
|
||
| SCHEMES = ["zst", "zstd"] | ||
|
|
||
| def __init__(self, app_runner, *args, **kwargs): | ||
| self.app_runner = app_runner | ||
|
|
||
| def compress_sources(self, target, work_dir, cores="1"): | ||
| """zstd each source in target. | ||
|
|
||
| Use work_dir to dump results. | ||
|
|
||
| """ | ||
| for source in target.sources.values(): | ||
| if source.path.endswith("." + target.compress_scheme): | ||
| continue # assume already done | ||
| name_zst = source.name + "." + target.compress_scheme | ||
| work_path_zst = os.path.join(work_dir, name_zst) | ||
| self.app_runner.fs_util.makedirs( | ||
| self.app_runner.fs_util.dirname(work_path_zst) | ||
| ) | ||
| command = f"zstd --rm -T{cores} -c {source.path} > {work_path_zst}" | ||
| self.app_runner.popen.run_simple(command, shell=True) | ||
| source.path = work_path_zst |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,57 @@ | ||
| #!/usr/bin/env bash | ||
| #------------------------------------------------------------------------------- | ||
| # Copyright (C) British Crown (Met Office) & Contributors. | ||
| # | ||
| # This file is part of Rose, a framework for meteorological suites. | ||
| # | ||
| # Rose is free software: you can redistribute it and/or modify | ||
| # it under the terms of the GNU General Public License as published by | ||
| # the Free Software Foundation, either version 3 of the License, or | ||
| # (at your option) any later version. | ||
| # | ||
| # Rose is distributed in the hope that it will be useful, | ||
| # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| # GNU General Public License for more details. | ||
| # | ||
| # You should have received a copy of the GNU General Public License | ||
| # along with Rose. If not, see <http://www.gnu.org/licenses/>. | ||
| #------------------------------------------------------------------------------- | ||
| # Test "rose_arch" built-in application, archive with optional sources. | ||
| #------------------------------------------------------------------------------- | ||
| . "$(dirname "$0")/test_header" | ||
|
|
||
|
|
||
| #------------------------------------------------------------------------------- | ||
| tests 4 | ||
| #------------------------------------------------------------------------------- | ||
| # Run the suite, and wait for it to complete | ||
| export CYLC_CONF_PATH= | ||
| export ROSE_CONF_PATH= | ||
|
|
||
| get_reg | ||
| run_pass "${TEST_KEY_BASE}-install" \ | ||
| cylc install \ | ||
| "${TEST_SOURCE_DIR}/${TEST_KEY_BASE}" \ | ||
| --workflow-name="${FLOW}" \ | ||
| --no-run-name | ||
| run_pass "${TEST_KEY_BASE}-play" \ | ||
| cylc play \ | ||
| "${FLOW}" \ | ||
| --abort-if-any-task-fails \ | ||
| --host=localhost \ | ||
| --no-detach \ | ||
| --debug | ||
| #------------------------------------------------------------------------------- | ||
| TEST_KEY="${TEST_KEY_BASE}-job.status" | ||
| file_grep "${TEST_KEY}-archive-01" \ | ||
| 'CYLC_JOB_EXIT=SUCCEEDED' \ | ||
| "${FLOW_RUN_DIR}/log/job/1/archive/01/job.status" | ||
| TEST_KEY="${TEST_KEY_BASE}-find" | ||
| (cd "${FLOW_RUN_DIR}/share/backup" && find . -type f) | sort >"${TEST_KEY}.out" | ||
| file_cmp "${TEST_KEY}.out" "${TEST_KEY}.out" <<'__FIND__' | ||
| ./archive.d/2016.txt.zst | ||
| ./archive.d/whatever.tar.zst | ||
| __FIND__ | ||
|
|
||
| exit 0 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| mode=rose_arch | ||
|
|
||
| [file:$ROSE_SUITE_DIR/share/backup/archive.d/] | ||
| mode=mkdir | ||
|
|
||
| [arch] | ||
| command-format=cp -pr %(sources)s %(target)s | ||
| target-prefix=$ROSE_SUITE_DIR/share/backup/ | ||
|
|
||
| [arch:archive.d/2016.txt.zst] | ||
| source-prefix=work/1/$ROSE_TASK_NAME/ | ||
| source=2016.txt.zst | ||
|
|
||
| [arch:archive.d/whatever.tar.zst] | ||
| source-prefix=work/1/$ROSE_TASK_NAME/ | ||
| source=whatever.tar.zst |
Uh oh!
There was an error while loading. Please reload this page.