Skip to content

repo: try to support opening uninitialized/broken repositories #10736

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 13 additions & 23 deletions dvc/repo/open_repo.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import copy
import os
import tempfile
import threading
Expand Down Expand Up @@ -51,7 +50,7 @@ def open_repo(url, *args, **kwargs):
if os.path.exists(url):
url = os.path.abspath(url)
try:
config = _get_remote_config(url, *args, **kwargs)
config = _get_remote_config(url)
config.update(kwargs.get("config") or {})
kwargs["config"] = config
return Repo(url, *args, **kwargs)
Expand Down Expand Up @@ -98,24 +97,9 @@ def clean_repos():
_remove(path)


def _get_remote_config(url, *args, **kwargs):
def _get_remote_config(url):
try:
# Deepcopy to prevent modifying the original `kwargs['config']`
config = copy.deepcopy(kwargs.get("config"))

# Import operations will use this function to get the remote's cache. However,
# while the `url` sent will point to the external repo, the cache information
# in `kwargs["config"]["cache"]["dir"]`) will point to the local repo,
# see `dvc/dependency/repo.py:RepoDependency._make_fs()`
#
# This breaks this function, since we'd be instructing `Repo()` to use the wrong
# cache to being with. We need to remove the cache info from `kwargs["config"]`
# to read the actual remote repo data.
if config:
config.pop("cache", None)

repo = Repo(url, config=config)

repo = Repo(url, uninitialized=True)
except NotDvcRepoError:
return {}

Expand All @@ -125,10 +109,16 @@ def _get_remote_config(url, *args, **kwargs):
# Fill the empty upstream entry with a new remote pointing to the
# original repo's cache location.
name = "auto-generated-upstream"
return {
"core": {"remote": name},
"remote": {name: {"url": repo.cache.local_cache_dir}},
}
try:
local_cache_dir = repo.cache.local_cache_dir
except AttributeError:
# if the `.dvc` dir is missing, we get an AttributeError
return {}
else:
return {
"core": {"remote": name},
"remote": {name: {"url": local_cache_dir}},
}

# Use original remote to make sure that we are using correct url,
# credential paths, etc if they are relative to the config location.
Expand Down
8 changes: 2 additions & 6 deletions tests/func/api/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from dvc import api
from dvc.exceptions import OutputNotFoundError, PathMissingError
from dvc.scm import CloneError, SCMError
from dvc.scm import CloneError
from dvc.testing.api_tests import TestAPI # noqa: F401
from dvc.testing.tmp_dir import make_subrepo
from dvc.utils.fs import remove
Expand Down Expand Up @@ -79,11 +79,7 @@ def test_get_url_ignore_scm(tmp_dir, dvc, cloud, scm):
# Simulate gitless environment (e.g. deployed container)
(tmp_dir / ".git").rename(tmp_dir / "gitless_environment")

# Test failure mode when trying to access with git
with pytest.raises(SCMError, match="is not a git repository"):
api.get_url("foo", repo=repo_posix)

# Test successful access by ignoring git
assert api.get_url("foo", repo=repo_posix) == expected_url
assert (
api.get_url("foo", repo=repo_posix, config={"core": {"no_scm": True}})
== expected_url
Expand Down
Loading