Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert SCP-style URLs (no explicit scheme) into proper SSH URLs #1061

Open
wants to merge 20 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 128 additions & 38 deletions internal/exec/go_getter_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@ import (
"net/url"
"os"
"path/filepath"
"regexp"
"strings"
"time"

l "github.com/charmbracelet/log"
"github.com/google/uuid"
"github.com/hashicorp/go-getter"

Expand Down Expand Up @@ -56,77 +58,165 @@ func IsValidScheme(scheme string) bool {
"git": true,
"ssh": true,
"git::https": true,
"git::ssh": true,
}
return validSchemes[scheme]
}

// CustomGitHubDetector intercepts GitHub URLs and transforms them
// into something like git::https://<token>@github.com/... so we can
// do a git-based clone with a token.
type CustomGitHubDetector struct {
// CustomGitDetector intercepts Git URLs (for GitHub, Bitbucket, GitLab, etc.)
// and transforms them into a proper URL for cloning, optionally injecting tokens.
type CustomGitDetector struct {
AtmosConfig schema.AtmosConfiguration
source string
}

// Detect implements the getter.Detector interface for go-getter v1.
func (d *CustomGitHubDetector) Detect(src, _ string) (string, bool, error) {
func (d *CustomGitDetector) Detect(src, _ string) (string, bool, error) {
l.Debug("CustomGitDetector.Detect", "src", src, "source", d.source)

if len(src) == 0 {
return "", false, nil
}

// We need this block because many SCP-style URLs aren’t valid according to Go’s URL parser.
// SCP-style URLs omit an explicit scheme (like "ssh://" or "https://") and use a colon
// to separate the host from the path. Go’s URL parser expects a scheme, so without one,
// it fails to parse these URLs correctly.
// Below, we check if the URL doesn’t contain a scheme. If so, we attempt to detect an SCP-style URL:
// e.g. "[email protected]:cloudposse/terraform-null-label.git?ref=..."
// If the URL matches this pattern, we rewrite it to a proper SSH URL.
// Otherwise, we default to prepending "https://".
if !strings.Contains(src, "://") {
src = "https://" + src
// Check for SCP-style SSH URL (e.g. "[email protected]:cloudposse/terraform-null-label.git?ref=...")
// This regex supports any host with a dot (e.g. github.com, bitbucket.org, gitlab.com)
scpPattern := regexp.MustCompile(`^(([\w.-]+)@)?([\w.-]+\.[\w.-]+):([\w./-]+)(\.git)?(.*)$`)
if scpPattern.MatchString(src) {
matches := scpPattern.FindStringSubmatch(src)
// Build proper SSH URL: "ssh://[username@]host/repoPath[.git][additional]"
newSrc := "ssh://"
if matches[1] != "" {
newSrc += matches[1] // includes username and '@'
}
newSrc += matches[3] + "/" + matches[4]
if matches[5] != "" {
newSrc += matches[5]
}
if matches[6] != "" {
newSrc += matches[6]
}
l.Debug("Rewriting SCP-style SSH URL", "old_url", src, "new_url", newSrc)
src = newSrc
} else {
src = "https://" + src
l.Debug("Defaulting to https scheme", "url", src)
}
}

l.Debug(fmt.Sprintf("url = %q:", src))

parsedURL, err := url.Parse(src)
if err != nil {
u.LogDebug(fmt.Sprintf("Failed to parse URL %q: %v\n", src, err))
l.Debug("Failed to parse URL", "url", src, "error", err)
return "", false, fmt.Errorf("failed to parse URL %q: %w", src, err)
}

if strings.ToLower(parsedURL.Host) != "github.com" {
u.LogDebug(fmt.Sprintf("Host is %q, not 'github.com', skipping token injection\n", parsedURL.Host))
return "", false, nil
// Normalize Windows path separators and URL-encoded backslashes to forward slashes.
unescapedPath, err := url.PathUnescape(parsedURL.Path)
if err == nil {
parsedURL.Path = filepath.ToSlash(unescapedPath)
} else {
parsedURL.Path = filepath.ToSlash(parsedURL.Path)
}

parts := strings.SplitN(parsedURL.Path, "/", 4)
if len(parts) < 3 {
u.LogDebug(fmt.Sprintf("URL path %q doesn't look like /owner/repo\n", parsedURL.Path))
return "", false, fmt.Errorf("invalid GitHub URL %q", parsedURL.Path)
// If the URL uses the SSH scheme, check for an active SSH agent.
// Unlike HTTPS where public repos can be accessed without authentication,
// SSH requires authentication. If no SSH agent is detected, log a debug message.
if parsedURL.Scheme == "ssh" && os.Getenv("SSH_AUTH_SOCK") == "" {
l.Debug("No SSH authentication method found")
}

atmosGitHubToken := os.Getenv("ATMOS_GITHUB_TOKEN")
gitHubToken := os.Getenv("GITHUB_TOKEN")

var usedToken string
var tokenSource string
// Adjust host check to support GitHub, Bitbucket, GitLab, etc.
host := strings.ToLower(parsedURL.Host)
if host != "github.com" && host != "bitbucket.org" && host != "gitlab.com" {
l.Debug("Skipping token injection for a non-supported host", "host", parsedURL.Host)
l.Debug("Supported hosts", "supported_hosts", "github.com, bitbucket.org, gitlab.com")
}

// 1. If ATMOS_GITHUB_TOKEN is set, always use that
if atmosGitHubToken != "" {
usedToken = atmosGitHubToken
// 3 types of tokens are supported for now: Github, Bitbucket and GitLab
var token, tokenSource string
switch host {
case "github.com":
tokenSource = "ATMOS_GITHUB_TOKEN"
u.LogDebug("ATMOS_GITHUB_TOKEN is set\n")
} else {
// 2. Otherwise, only inject GITHUB_TOKEN if cfg.Settings.InjectGithubToken == true
if d.AtmosConfig.Settings.InjectGithubToken && gitHubToken != "" {
usedToken = gitHubToken
token = os.Getenv(tokenSource)
if token == "" && d.AtmosConfig.Settings.InjectGithubToken {
tokenSource = "GITHUB_TOKEN"
u.LogTrace("InjectGithubToken=true and GITHUB_TOKEN is set, using it\n")
} else {
u.LogTrace("No ATMOS_GITHUB_TOKEN or GITHUB_TOKEN found\n")
token = os.Getenv(tokenSource)
}
case "bitbucket.org":
tokenSource = "ATMOS_BITBUCKET_TOKEN"
token = os.Getenv(tokenSource)
if token == "" {
tokenSource = "BITBUCKET_TOKEN"
token = os.Getenv(tokenSource)
}
case "gitlab.com":
tokenSource = "ATMOS_GITLAB_TOKEN"
token = os.Getenv(tokenSource)
if token == "" {
tokenSource = "GITLAB_TOKEN"
token = os.Getenv(tokenSource)
}
}

if usedToken != "" {
user := parsedURL.User.Username()
pass, _ := parsedURL.User.Password()
if user == "" && pass == "" {
u.LogDebug(fmt.Sprintf("Injecting token from %s for %s\n", tokenSource, src))
parsedURL.User = url.UserPassword("x-access-token", usedToken)
// Note that Bitbucket uses 2 tokens (username and app password) for authentication.
if token != "" {
// Inject token only if no credentials are already provided.
if parsedURL.User == nil || parsedURL.User.Username() == "" {
l.Debug("Injecting token", "token_source", tokenSource, "url", src)
var defaultUsername string
switch host {
case "github.com":
defaultUsername = "x-access-token"
case "gitlab.com":
defaultUsername = "oauth2"
case "bitbucket.org":
defaultUsername = os.Getenv("BITBUCKET_USERNAME")
if defaultUsername == "" {
defaultUsername = "x-token-auth"
}
l.Debug("Using Bitbucket username", "username", defaultUsername)
default:
defaultUsername = "x-access-token"
}
parsedURL.User = url.UserPassword(defaultUsername, token)
} else {
u.LogDebug("Credentials found, skipping token injection\n")
l.Debug("Skipping token injection", "reason", "credentials already provided")
}
}

// Normalize d.source for Windows path separators.
normalizedSource := filepath.ToSlash(d.source)
// If d.source is provided (non‑empty), use it for subdir checking;
// otherwise, skip appending '//.' (so the user-defined subdir isn’t mistakenly processed).
if normalizedSource != "" && !strings.Contains(normalizedSource, "//") {
parts := strings.SplitN(parsedURL.Path, "/", 4)
if strings.HasSuffix(parsedURL.Path, ".git") || len(parts) == 3 {
l.Debug("Detected top-level repo with no subdir: appending '//.'", "url", src)
parsedURL.Path = parsedURL.Path + "//."
}
}

// Set "depth=1" for a shallow clone if not specified.
// In Go-Getter, "depth" controls how many revisions are cloned:
// - `depth=1` fetches only the latest commit (faster, less bandwidth).
// - `depth=` (empty) performs a full clone (default Git behavior).
// - `depth=N` clones the last N revisions.
q := parsedURL.Query()
if _, exists := q["depth"]; !exists {
q.Set("depth", "1")
}
parsedURL.RawQuery = q.Encode()

finalURL := "git::" + parsedURL.String()

return finalURL, true, nil
Expand All @@ -137,7 +227,7 @@ func (d *CustomGitHubDetector) Detect(src, _ string) (string, bool, error) {
func RegisterCustomDetectors(atmosConfig schema.AtmosConfiguration) {
getter.Detectors = append(
[]getter.Detector{
&CustomGitHubDetector{AtmosConfig: atmosConfig},
&CustomGitDetector{AtmosConfig: atmosConfig},
},
getter.Detectors...,
)
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Empty file.
18 changes: 18 additions & 0 deletions tests/test-cases/demo-vendoring.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,21 @@ tests:
stderr:
- 'No TTY detected\. Falling back to basic output\.'
exit_code: 0

- name: atmos vendor pull ssh
enabled: false
snapshot: true
tty: false
description: "Dry-run vendoring with SSH style URL; no SSH key provided, so tokens are not used"
workdir: "fixtures/scenarios/vendor-pulls-ssh"
command: "atmos"
args:
- "vendor"
- "pull"
- "--logs-level=Debug"
- "--dry-run"
expect:
diff: []
stderr:
- "No SSH authentication method found"
exit_code: 0
Loading