Skip to content

Commit 7998a2b

Browse files
committed
do not skip workflow on cuda, fix no space left no device
1 parent 2f8df5b commit 7998a2b

File tree

2 files changed

+12
-6
lines changed

2 files changed

+12
-6
lines changed

.github/workflows/build.yaml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ jobs:
176176
concurrency:
177177
group: ${{ github.workflow }}-${{ github.job }}-${{ needs.build-and-push.outputs.label }}-${{ github.head_ref || github.run_id }}
178178
cancel-in-progress: true
179-
if: needs.build-and-push.outputs.runs_on == 'amd-gpu-tgi'
179+
if: needs.build-and-push.outputs.runs_on != 'ubuntu-latest'
180180
container:
181181
image: ${{ needs.build-and-push.outputs.docker_image }}
182182
options: --shm-size "16gb" --ipc host -v ${{ needs.build-and-push.outputs.docker_volume }}:/data
@@ -193,7 +193,7 @@ jobs:
193193
pwd
194194
echo "ls:"
195195
ls
196-
python integration-tests/clean_cache_and_download.py --token ${{ secrets.HF_TOKEN }}
196+
python integration-tests/clean_cache_and_download.py --token ${{ secrets.HF_TOKEN }} --cache-dir /data
197197
fi
198198
199199
integration_tests:
@@ -239,6 +239,10 @@ jobs:
239239
echo "SYSTEM:"
240240
echo $SYSTEM
241241
242+
export DOCKER_VOLUME=${{ needs.build-and-push.outputs.docker_volume }}
243+
echo "DOCKER_VOLUME:"
244+
echo $DOCKER_VOLUME
245+
242246
pytest -s -vvvvv integration-tests
243247
244248
# - name: Tailscale Wait

integration-tests/clean_cache_and_download.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
}
3636

3737

38-
def cleanup_cache(token: str):
38+
def cleanup_cache(token: str, cache_dir: str):
3939
# Retrieve the size per model for all models used in the CI.
4040
size_per_model = {}
4141
extension_per_model = {}
@@ -74,7 +74,7 @@ def cleanup_cache(token: str):
7474
total_required_size = sum(size_per_model.values())
7575
print(f"Total required disk: {total_required_size:.2f} GB")
7676

77-
cached_dir = huggingface_hub.scan_cache_dir()
77+
cached_dir = huggingface_hub.scan_cache_dir(cache_dir)
7878

7979
cache_size_per_model = {}
8080
cached_required_size_per_model = {}
@@ -121,7 +121,7 @@ def cleanup_cache(token: str):
121121

122122
print("Removing", largest_model_id)
123123
for sha in cached_shas_per_model[largest_model_id]:
124-
huggingface_hub.scan_cache_dir().delete_revisions(sha).execute()
124+
huggingface_hub.scan_cache_dir(cache_dir).delete_revisions(sha).execute()
125125

126126
del cache_size_per_model[largest_model_id]
127127

@@ -135,10 +135,11 @@ def cleanup_cache(token: str):
135135
parser.add_argument(
136136
"--token", help="Hugging Face Hub token.", required=True, type=str
137137
)
138+
parser.add_argument("--cache-dir", help="Hub cache path.", required=True, type=str)
138139
args = parser.parse_args()
139140

140141
start = time.time()
141-
extension_per_model = cleanup_cache(args.token)
142+
extension_per_model = cleanup_cache(args.token, args.cache_dir)
142143
end = time.time()
143144

144145
print(f"Cache cleanup done in {end - start:.2f} s")
@@ -153,6 +154,7 @@ def cleanup_cache(token: str):
153154
revision=revision,
154155
token=args.token,
155156
allow_patterns=f"*{extension_per_model[model_id]}",
157+
cache_dir=args.cache_dir,
156158
)
157159
end = time.time()
158160

0 commit comments

Comments
 (0)