Skip to content

Commit

Permalink
Add HF_DEBUG environment variable for debugging/reproducibility (#2819
Browse files Browse the repository at this point in the history
)

* Add HF_DEBUG environment variable for debugging/reproducibility

* make quali
  • Loading branch information
Wauplin authored Jan 31, 2025
1 parent 63aada5 commit a733933
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 8 deletions.
2 changes: 1 addition & 1 deletion .github/ISSUE_TEMPLATE/bug-report.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ body:
id: logs
attributes:
label: Logs
description: "Please include the Python logs if you can."
description: "Please include the Python logs if you can. If possible, run the code with `HF_DEBUG=1` as environment variable."
render: shell
- type: textarea
id: system-info
Expand Down
14 changes: 9 additions & 5 deletions docs/source/en/package_reference/environment_variables.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,10 @@ The following environment variables expect a boolean value. The variable will be
as `True` if its value is one of `{"1", "ON", "YES", "TRUE"}` (case-insensitive). Any other value
(or undefined) will be considered as `False`.

### HF_DEBUG

If set, the log level for the `huggingface_hub` logger is set to DEBUG. Additionally, all requests made by HF libraries will be logged as equivalent cURL commands for easier debugging and reproducibility.

### HF_HUB_OFFLINE

If set, no HTTP calls will be made to the Hugging Face Hub. If you try to download files, only the cached files will be accessed. If no cache file is detected, an error is raised This is useful in case your network is slow and you don't care about having the latest version of a file.
Expand Down Expand Up @@ -159,11 +163,11 @@ Please note that using `hf_transfer` comes with certain limitations. Since it is
In order to standardize all environment variables within the Hugging Face ecosystem, some variables have been marked as deprecated. Although they remain functional, they no longer take precedence over their replacements. The following table outlines the deprecated variables and their corresponding alternatives:


| Deprecated Variable | Replacement |
| --- | --- |
| `HUGGINGFACE_HUB_CACHE` | `HF_HUB_CACHE` |
| `HUGGINGFACE_ASSETS_CACHE` | `HF_ASSETS_CACHE` |
| `HUGGING_FACE_HUB_TOKEN` | `HF_TOKEN` |
| Deprecated Variable | Replacement |
| --------------------------- | ------------------ |
| `HUGGINGFACE_HUB_CACHE` | `HF_HUB_CACHE` |
| `HUGGINGFACE_ASSETS_CACHE` | `HF_ASSETS_CACHE` |
| `HUGGING_FACE_HUB_TOKEN` | `HF_TOKEN` |
| `HUGGINGFACE_HUB_VERBOSITY` | `HF_HUB_VERBOSITY` |

## From external tools
Expand Down
4 changes: 4 additions & 0 deletions src/huggingface_hub/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,10 @@ def _as_int(value: Optional[str]) -> Optional[int]:

HF_HUB_OFFLINE = _is_true(os.environ.get("HF_HUB_OFFLINE") or os.environ.get("TRANSFORMERS_OFFLINE"))

# If set, log level will be set to DEBUG and all requests made to the Hub will be logged
# as curl commands for reproducibility.
HF_DEBUG = _is_true(os.environ.get("HF_DEBUG"))

# Opt-out from telemetry requests
HF_HUB_DISABLE_TELEMETRY = (
_is_true(os.environ.get("HF_HUB_DISABLE_TELEMETRY")) # HF-specific env variable
Expand Down
45 changes: 43 additions & 2 deletions src/huggingface_hub/utils/_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
import uuid
from functools import lru_cache
from http import HTTPStatus
from typing import Callable, Optional, Tuple, Type, Union
from shlex import quote
from typing import Any, Callable, List, Optional, Tuple, Type, Union

import requests
from requests import HTTPError, Response
Expand Down Expand Up @@ -82,13 +83,15 @@ def add_headers(self, request, **kwargs):
request.headers[X_AMZN_TRACE_ID] = request.headers.get(X_REQUEST_ID) or str(uuid.uuid4())

# Add debug log
has_token = str(request.headers.get("authorization", "")).startswith("Bearer hf_")
has_token = len(str(request.headers.get("authorization", ""))) > 0
logger.debug(
f"Request {request.headers[X_AMZN_TRACE_ID]}: {request.method} {request.url} (authenticated: {has_token})"
)

def send(self, request: PreparedRequest, *args, **kwargs) -> Response:
"""Catch any RequestException to append request id to the error message for debugging."""
if constants.HF_DEBUG:
logger.debug(f"Send: {_curlify(request)}")
try:
return super().send(request, *args, **kwargs)
except requests.RequestException as e:
Expand Down Expand Up @@ -549,3 +552,41 @@ def _format(error_type: Type[HfHubHTTPError], custom_message: str, response: Res

# Return
return error_type(final_error_message.strip(), response=response, server_message=server_message or None)


def _curlify(request: requests.PreparedRequest) -> str:
"""Convert a `requests.PreparedRequest` into a curl command (str).
Used for debug purposes only.
Implementation vendored from https://github.com/ofw/curlify/blob/master/curlify.py.
MIT License Copyright (c) 2016 Egor.
"""
parts: List[Tuple[Any, Any]] = [
("curl", None),
("-X", request.method),
]

for k, v in sorted(request.headers.items()):
if k.lower() == "authorization":
v = "<TOKEN>" # Hide authorization header, no matter its value (can be Bearer, Key, etc.)
parts += [("-H", "{0}: {1}".format(k, v))]

if request.body:
body = request.body
if isinstance(body, bytes):
body = body.decode("utf-8")
if len(body) > 1000:
body = body[:1000] + " ... [truncated]"
parts += [("-d", body)]

parts += [(None, request.url)]

flat_parts = []
for k, v in parts:
if k:
flat_parts.append(quote(k))
if v:
flat_parts.append(quote(v))

return " ".join(flat_parts)
6 changes: 6 additions & 0 deletions src/huggingface_hub/utils/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
)
from typing import Optional

from .. import constants


log_levels = {
"debug": logging.DEBUG,
Expand Down Expand Up @@ -180,3 +182,7 @@ def enable_propagation() -> None:


_configure_library_root_logger()

if constants.HF_DEBUG:
# If `HF_DEBUG` environment variable is set, set the verbosity of `huggingface_hub` logger to `DEBUG`.
set_verbosity_debug()

0 comments on commit a733933

Please sign in to comment.