Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert SCP-style URLs (no explicit scheme) into proper SSH URLs #1061

Open
wants to merge 20 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
193 changes: 152 additions & 41 deletions internal/exec/go_getter_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@ import (
"net/url"
"os"
"path/filepath"
"regexp"
"strings"
"time"

l "github.com/charmbracelet/log"
"github.com/google/uuid"
"github.com/hashicorp/go-getter"

Expand Down Expand Up @@ -61,74 +63,183 @@ func IsValidScheme(scheme string) bool {
return validSchemes[scheme]
}

// CustomGitHubDetector intercepts GitHub URLs and transforms them
// into something like git::https://<token>@github.com/... so we can
// do a git-based clone with a token.
type CustomGitHubDetector struct {
// CustomGitDetector intercepts Git URLs (for GitHub, Bitbucket, GitLab, etc.)
// and transforms them into a proper URL for cloning, optionally injecting tokens.
type CustomGitDetector struct {
AtmosConfig schema.AtmosConfiguration
source string
}

// Detect implements the getter.Detector interface for go-getter v1.
func (d *CustomGitHubDetector) Detect(src, _ string) (string, bool, error) {
func (d *CustomGitDetector) Detect(src, _ string) (string, bool, error) {
l.Debug("CustomGitDetector.Detect called")

if len(src) == 0 {
return "", false, nil
}

// We need this block because many SCP-style URLs aren’t valid according to Go’s URL parser.
// SCP-style URLs omit an explicit scheme (like "ssh://" or "https://") and use a colon
// to separate the host from the path. Go’s URL parser expects a scheme, so without one,
// it fails to parse these URLs correctly.
// Below, we check if the URL doesn’t contain a scheme. If so, we attempt to detect an SCP-style URL:
// e.g. "[email protected]:cloudposse/terraform-null-label.git?ref=..."
// If the URL matches this pattern, we rewrite it to a proper SSH URL.
// Otherwise, we default to prepending "https://".
if !strings.Contains(src, "://") {
src = "https://" + src
// Check for SCP-style SSH URL (e.g. "[email protected]:cloudposse/terraform-null-label.git?ref=...")
// This regex supports any host with a dot (e.g. github.com, bitbucket.org, gitlab.com)
scpPattern := regexp.MustCompile(`^(([\w.-]+)@)?([\w.-]+\.[\w.-]+):([\w./-]+)(\.git)?(.*)$`)
if scpPattern.MatchString(src) {
matches := scpPattern.FindStringSubmatch(src)
// Build proper SSH URL: "ssh://[username@]host/repoPath[.git][additional]"
newSrc := "ssh://"
if matches[1] != "" {
newSrc += matches[1] // includes username and '@'
}
newSrc += matches[3] + "/" + matches[4]
if matches[5] != "" {
newSrc += matches[5]
}
if matches[6] != "" {
newSrc += matches[6]
}
maskedOld, _ := u.MaskBasicAuth(src)
maskedNew, _ := u.MaskBasicAuth(newSrc)
l.Debug("Rewriting SCP-style SSH URL", "old_url", maskedOld, "new_url", maskedNew)

src = newSrc
} else {
src = "https://" + src
maskedSrc, _ := u.MaskBasicAuth(src)
l.Debug("Defaulting to https scheme", "url", maskedSrc)

}
}

// Parse the URL to extract the host and path.
parsedURL, err := url.Parse(src)
if err != nil {
u.LogDebug(fmt.Sprintf("Failed to parse URL %q: %v\n", src, err))
return "", false, fmt.Errorf("failed to parse URL %q: %w", src, err)
maskedSrc, _ := u.MaskBasicAuth(src)
l.Debug("Failed to parse URL", "url", maskedSrc, "error", err)
return "", false, fmt.Errorf("failed to parse URL %q: %w", maskedSrc, err)
}

if strings.ToLower(parsedURL.Host) != "github.com" {
u.LogDebug(fmt.Sprintf("Host is %q, not 'github.com', skipping token injection\n", parsedURL.Host))
return "", false, nil
// Normalize Windows path separators and URL-encoded backslashes to forward slashes.
unescapedPath, err := url.PathUnescape(parsedURL.Path)
if err == nil {
parsedURL.Path = filepath.ToSlash(unescapedPath)
} else {
parsedURL.Path = filepath.ToSlash(parsedURL.Path)
}

parts := strings.SplitN(parsedURL.Path, "/", 4)
if len(parts) < 3 {
u.LogDebug(fmt.Sprintf("URL path %q doesn't look like /owner/repo\n", parsedURL.Path))
return "", false, fmt.Errorf("invalid GitHub URL %q", parsedURL.Path)
// If the URL uses the SSH scheme, check for an active SSH agent.
// Unlike HTTPS where public repos can be accessed without authentication,
// SSH requires authentication. An SSH agent being one of the popular ones, so we log a debug message in case it is missing (could be false alert thoguh).
if parsedURL.Scheme == "ssh" && os.Getenv("SSH_AUTH_SOCK") == "" {
maskedSrc, _ := u.MaskBasicAuth(src)
l.Debug("SSH agent-based authentication may not work because SSH_AUTH_SOCK is not set", "url", maskedSrc)
}

atmosGitHubToken := os.Getenv("ATMOS_GITHUB_TOKEN")
gitHubToken := os.Getenv("GITHUB_TOKEN")

var usedToken string
var tokenSource string
// Adjust host check to support GitHub, Bitbucket, GitLab, etc.
host := strings.ToLower(parsedURL.Host)
if host != "github.com" && host != "bitbucket.org" && host != "gitlab.com" {
l.Debug("Skipping token injection for a unsupported host", "host", parsedURL.Host)
}

// 1. If ATMOS_GITHUB_TOKEN is set, always use that
if atmosGitHubToken != "" {
usedToken = atmosGitHubToken
tokenSource = "ATMOS_GITHUB_TOKEN"
u.LogDebug("ATMOS_GITHUB_TOKEN is set\n")
} else {
// 2. Otherwise, only inject GITHUB_TOKEN if cfg.Settings.InjectGithubToken == true
if d.AtmosConfig.Settings.InjectGithubToken && gitHubToken != "" {
usedToken = gitHubToken
tokenSource = "GITHUB_TOKEN"
u.LogTrace("InjectGithubToken=true and GITHUB_TOKEN is set, using it\n")
l.Debug("Reading config param", "InjectGithubToken", d.AtmosConfig.Settings.InjectGithubToken)

// 3 types of tokens are supported for now: Github, Bitbucket and GitLab
var token, tokenSource string
switch host {
case "github.com":
// Prioritize ATMOS_GITHUB_TOKEN if InjectGithubToken is enabled; otherwise, fallback to GITHUB_TOKEN.
if d.AtmosConfig.Settings.InjectGithubToken {
tokenSource = "ATMOS_GITHUB_TOKEN"
token = os.Getenv(tokenSource)
if token == "" {
tokenSource = "GITHUB_TOKEN"
token = os.Getenv(tokenSource)
}
} else {
u.LogTrace("No ATMOS_GITHUB_TOKEN or GITHUB_TOKEN found\n")
tokenSource = "GITHUB_TOKEN"
token = os.Getenv(tokenSource)
}
case "bitbucket.org":
tokenSource = "BITBUCKET_TOKEN"
token = os.Getenv(tokenSource)
if token == "" {
tokenSource = "ATMOS_BITBUCKET_TOKEN"
token = os.Getenv(tokenSource)
}
case "gitlab.com":
tokenSource = "GITLAB_TOKEN"
token = os.Getenv(tokenSource)
if token == "" {
tokenSource = "ATMOS_GITLAB_TOKEN"
token = os.Getenv(tokenSource)
}
}

if usedToken != "" {
user := parsedURL.User.Username()
pass, _ := parsedURL.User.Password()
if user == "" && pass == "" {
u.LogDebug(fmt.Sprintf("Injecting token from %s for %s\n", tokenSource, src))
parsedURL.User = url.UserPassword("x-access-token", usedToken)
} else {
u.LogDebug("Credentials found, skipping token injection\n")
// Always inject token if available, regardless of existing credentials.
if token != "" {
var defaultUsername string
switch host {
case "github.com":
defaultUsername = "x-access-token"
case "gitlab.com":
defaultUsername = "oauth2"
case "bitbucket.org":
defaultUsername = os.Getenv("ATMOS_BITBUCKET_USERNAME")
if defaultUsername == "" {
defaultUsername = os.Getenv("BITBUCKET_USERNAME")
if defaultUsername == "" {
defaultUsername = "x-token-auth"
}
}
l.Debug("Using Bitbucket username", "username", defaultUsername)
default:
defaultUsername = "x-access-token"
}
parsedURL.User = url.UserPassword(defaultUsername, token)
maskedURL, _ := u.MaskBasicAuth(parsedURL.String())
l.Debug("Injected token in vendor URL", "env", tokenSource, "url", maskedURL)
} else {
l.Debug("No token found for injection")
}

// Normalize d.source for Windows path separators.
normalizedSource := filepath.ToSlash(d.source)
// If d.source is provided (non‑empty), use it for subdir checking;
// otherwise, skip appending '//.' (so the user-defined subdir isn’t mistakenly processed).
if normalizedSource != "" && !strings.Contains(normalizedSource, "//") {
parts := strings.SplitN(parsedURL.Path, "/", 4)
if strings.HasSuffix(parsedURL.Path, ".git") || len(parts) == 3 {
maskedSrc, _ := u.MaskBasicAuth(src)
l.Debug("Detected top-level repo with no subdir: appending '//.'", "url", maskedSrc)
parsedURL.Path = parsedURL.Path + "//."
}
}

// Set "depth=1" for a shallow clone if not specified.
// In Go-Getter, "depth" controls how many revisions are cloned:
// - depth=1 fetches only the latest commit (faster, less bandwidth).
// - depth= (empty) performs a full clone (default Git behavior).
// - depth=N clones the last N revisions.
q := parsedURL.Query()
if _, exists := q["depth"]; !exists {
q.Set("depth", "1")
}
parsedURL.RawQuery = q.Encode()

finalURL := "git::" + parsedURL.String()
urlForMasking := strings.TrimPrefix(finalURL, "git::")
maskedFinal, err := u.MaskBasicAuth(urlForMasking)
if err != nil {
l.Debug("Masking of URL failed", "error", err)
} else {
l.Debug("normalized SSH vendor URL ", "url", "git::"+maskedFinal)
}

return finalURL, true, nil
}
Expand All @@ -138,7 +249,7 @@ func (d *CustomGitHubDetector) Detect(src, _ string) (string, bool, error) {
func RegisterCustomDetectors(atmosConfig schema.AtmosConfiguration) {
getter.Detectors = append(
[]getter.Detector{
&CustomGitHubDetector{AtmosConfig: atmosConfig},
&CustomGitDetector{AtmosConfig: atmosConfig},
},
getter.Detectors...,
)
Expand Down
20 changes: 20 additions & 0 deletions pkg/utils/url_utils.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package utils

import (
"fmt"
"net/url"
)

// MaskBasicAuth replaces the username and password in a URL with "xxx" if present.
func MaskBasicAuth(rawURL string) (string, error) {
parsedURL, err := url.Parse(rawURL)
if err != nil {
return "", fmt.Errorf("failed to parse URL: %w", err)
}

if parsedURL.User != nil {
parsedURL.User = url.UserPassword("xxx", "xxx")
}

return parsedURL.String(), nil
}
29 changes: 29 additions & 0 deletions tests/fixtures/scenarios/vendor-pulls-ssh/atmos.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
base_path: "./"
settings:
inject_github_token: true


components:
terraform:
base_path: "components/terraform"
apply_auto_approve: false
deploy_run_init: true
init_run_reconfigure: true
auto_generate_backend_file: false

stacks:
base_path: "stacks"
included_paths:
- "deploy/**/*"
excluded_paths:
- "**/_defaults.yaml"
name_pattern: "{stage}"

logs:
file: "/dev/stderr"
level: Info





35 changes: 35 additions & 0 deletions tests/fixtures/scenarios/vendor-pulls-ssh/vendor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
apiVersion: atmos/v1
kind: AtmosVendorConfig
metadata:
name: demo-vendoring
description: Atmos vendoring manifest for Atmos demo component library
spec:
imports: []

sources:
# Basic HTTPS default (token injection expected)
- component: "terraform-null-label-basic"
source: "github.com/cloudposse/terraform-null-label.git?ref={{ .Version }}"
version: "0.25.0"
targets:
- "library/basic/{{ .Component }}"
tags:
- demo

# Direct credentials provided in the URL (token injection should be skipped)
- component: "terraform-null-label-direct"
source: "https://myuser:[email protected]/cloudposse/terraform-null-label.git?ref={{ .Version }}"
version: "0.25.0"
targets:
- "library/direct/{{ .Component }}"
tags:
- demo

# HTTPS with pre-existing credentials (token injection skipped)
- component: "terraform-null-label-cred"
source: "https://[email protected]/cloudposse/terraform-null-label.git?ref={{ .Version }}"
version: "0.25.0"
targets:
- "library/cred/{{ .Component }}"
tags:
- demo
Loading
Loading