diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index 35efbfc923..d41710ba61 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -24,7 +24,7 @@ body: id: logs attributes: label: Logs - description: "Please include the Python logs if you can." + description: "Please include the Python logs if you can. If possible, run the code with `HF_DEBUG=1` as environment variable." render: shell - type: textarea id: system-info diff --git a/docs/source/en/package_reference/environment_variables.md b/docs/source/en/package_reference/environment_variables.md index 2930a462fa..4ab3cc3cd6 100644 --- a/docs/source/en/package_reference/environment_variables.md +++ b/docs/source/en/package_reference/environment_variables.md @@ -83,6 +83,10 @@ The following environment variables expect a boolean value. The variable will be as `True` if its value is one of `{"1", "ON", "YES", "TRUE"}` (case-insensitive). Any other value (or undefined) will be considered as `False`. +### HF_DEBUG + +If set, the log level for the `huggingface_hub` logger is set to DEBUG. Additionally, all requests made by HF libraries will be logged as equivalent cURL commands for easier debugging and reproducibility. + ### HF_HUB_OFFLINE If set, no HTTP calls will be made to the Hugging Face Hub. If you try to download files, only the cached files will be accessed. If no cache file is detected, an error is raised This is useful in case your network is slow and you don't care about having the latest version of a file. @@ -159,11 +163,11 @@ Please note that using `hf_transfer` comes with certain limitations. Since it is In order to standardize all environment variables within the Hugging Face ecosystem, some variables have been marked as deprecated. Although they remain functional, they no longer take precedence over their replacements. The following table outlines the deprecated variables and their corresponding alternatives: -| Deprecated Variable | Replacement | -| --- | --- | -| `HUGGINGFACE_HUB_CACHE` | `HF_HUB_CACHE` | -| `HUGGINGFACE_ASSETS_CACHE` | `HF_ASSETS_CACHE` | -| `HUGGING_FACE_HUB_TOKEN` | `HF_TOKEN` | +| Deprecated Variable | Replacement | +| --------------------------- | ------------------ | +| `HUGGINGFACE_HUB_CACHE` | `HF_HUB_CACHE` | +| `HUGGINGFACE_ASSETS_CACHE` | `HF_ASSETS_CACHE` | +| `HUGGING_FACE_HUB_TOKEN` | `HF_TOKEN` | | `HUGGINGFACE_HUB_VERBOSITY` | `HF_HUB_VERBOSITY` | ## From external tools diff --git a/src/huggingface_hub/constants.py b/src/huggingface_hub/constants.py index 32658d92fe..6e98e3619c 100644 --- a/src/huggingface_hub/constants.py +++ b/src/huggingface_hub/constants.py @@ -132,6 +132,10 @@ def _as_int(value: Optional[str]) -> Optional[int]: HF_HUB_OFFLINE = _is_true(os.environ.get("HF_HUB_OFFLINE") or os.environ.get("TRANSFORMERS_OFFLINE")) +# If set, log level will be set to DEBUG and all requests made to the Hub will be logged +# as curl commands for reproducibility. +HF_DEBUG = _is_true(os.environ.get("HF_DEBUG")) + # Opt-out from telemetry requests HF_HUB_DISABLE_TELEMETRY = ( _is_true(os.environ.get("HF_HUB_DISABLE_TELEMETRY")) # HF-specific env variable diff --git a/src/huggingface_hub/utils/_http.py b/src/huggingface_hub/utils/_http.py index 6c1ee6a8e2..81923aa577 100644 --- a/src/huggingface_hub/utils/_http.py +++ b/src/huggingface_hub/utils/_http.py @@ -22,7 +22,8 @@ import uuid from functools import lru_cache from http import HTTPStatus -from typing import Callable, Optional, Tuple, Type, Union +from shlex import quote +from typing import Any, Callable, List, Optional, Tuple, Type, Union import requests from requests import HTTPError, Response @@ -82,13 +83,15 @@ def add_headers(self, request, **kwargs): request.headers[X_AMZN_TRACE_ID] = request.headers.get(X_REQUEST_ID) or str(uuid.uuid4()) # Add debug log - has_token = str(request.headers.get("authorization", "")).startswith("Bearer hf_") + has_token = len(str(request.headers.get("authorization", ""))) > 0 logger.debug( f"Request {request.headers[X_AMZN_TRACE_ID]}: {request.method} {request.url} (authenticated: {has_token})" ) def send(self, request: PreparedRequest, *args, **kwargs) -> Response: """Catch any RequestException to append request id to the error message for debugging.""" + if constants.HF_DEBUG: + logger.debug(f"Send: {_curlify(request)}") try: return super().send(request, *args, **kwargs) except requests.RequestException as e: @@ -549,3 +552,41 @@ def _format(error_type: Type[HfHubHTTPError], custom_message: str, response: Res # Return return error_type(final_error_message.strip(), response=response, server_message=server_message or None) + + +def _curlify(request: requests.PreparedRequest) -> str: + """Convert a `requests.PreparedRequest` into a curl command (str). + + Used for debug purposes only. + + Implementation vendored from https://github.com/ofw/curlify/blob/master/curlify.py. + MIT License Copyright (c) 2016 Egor. + """ + parts: List[Tuple[Any, Any]] = [ + ("curl", None), + ("-X", request.method), + ] + + for k, v in sorted(request.headers.items()): + if k.lower() == "authorization": + v = "" # Hide authorization header, no matter its value (can be Bearer, Key, etc.) + parts += [("-H", "{0}: {1}".format(k, v))] + + if request.body: + body = request.body + if isinstance(body, bytes): + body = body.decode("utf-8") + if len(body) > 1000: + body = body[:1000] + " ... [truncated]" + parts += [("-d", body)] + + parts += [(None, request.url)] + + flat_parts = [] + for k, v in parts: + if k: + flat_parts.append(quote(k)) + if v: + flat_parts.append(quote(v)) + + return " ".join(flat_parts) diff --git a/src/huggingface_hub/utils/logging.py b/src/huggingface_hub/utils/logging.py index 438215193f..813719683a 100644 --- a/src/huggingface_hub/utils/logging.py +++ b/src/huggingface_hub/utils/logging.py @@ -28,6 +28,8 @@ ) from typing import Optional +from .. import constants + log_levels = { "debug": logging.DEBUG, @@ -180,3 +182,7 @@ def enable_propagation() -> None: _configure_library_root_logger() + +if constants.HF_DEBUG: + # If `HF_DEBUG` environment variable is set, set the verbosity of `huggingface_hub` logger to `DEBUG`. + set_verbosity_debug()