diff --git a/addlicense/main.go b/addlicense/main.go index 6f311db..8f58b76 100644 --- a/addlicense/main.go +++ b/addlicense/main.go @@ -492,3 +492,674 @@ func hasLicense(b []byte) bool { bytes.Contains(bytes.ToLower(b[:n]), []byte("mozilla public")) || bytes.Contains(bytes.ToLower(b[:n]), []byte("spdx-license-identifier")) } + +// hasCopyrightButNotHashiCorpOrIBM checks if file has copyright from other companies +// that should NOT be modified (only HashiCorp and IBM copyrights should be processed) +func hasCopyrightButNotHashiCorpOrIBM(b []byte) bool { + n := 1000 + if len(b) < 1000 { + n = len(b) + } + + content := string(bytes.ToLower(b[:n])) + + // First, check for actual copyright header patterns in the top few lines (first 300 chars) + // This is where copyright headers typically appear + topContent := content + if len(content) > 300 { + topContent = content[:300] + } + + // Check for copyright regex patterns primarily in the top section + copyrightRegex := regexp.MustCompile(`copyright\s+\d{4}`) + if copyrightRegex.MatchString(topContent) { + // Found copyright with year in top section - check if it's HashiCorp or IBM + if strings.Contains(topContent, "hashicorp") || strings.Contains(topContent, "ibm corp") { + return false // It's HashiCorp or IBM, we should process it + } + return true // It's another company's copyright, don't modify + } + + // If no copyright regex pattern in top section, check for other copyright header patterns + hasCopyrightHeader := strings.Contains(topContent, "copyright (c)") || + strings.Contains(topContent, "copyright ©") || + strings.Contains(topContent, "copyright:") + + // If found copyright header patterns in top section, check ownership + if hasCopyrightHeader { + if strings.Contains(topContent, "hashicorp") || strings.Contains(topContent, "ibm corp") { + return false // It's HashiCorp or IBM, we should process it + } + return true // It's another company's copyright, don't modify + } + + // No copyright header patterns found in top section, check entire content for any copyright mentions + // but be more restrictive - only consider it a real copyright if it has specific patterns + fullContentHasCopyright := strings.Contains(content, "copyright (c)") || + strings.Contains(content, "copyright ©") || + strings.Contains(content, "copyright:") || + copyrightRegex.MatchString(content) + + if !fullContentHasCopyright { + return false // No actual copyright header found anywhere + } + + // Found copyright patterns somewhere in file - check if it's HashiCorp or IBM + if strings.Contains(content, "hashicorp") || strings.Contains(content, "ibm corp") { + return false // It's HashiCorp or IBM, we should process it + } + + // Has actual copyright header from another company - don't modify + return true +} + +// RunUpdate executes addLicense with supplied variables, but instead of only adding +// headers to files that don't have them, it also updates existing HashiCorp headers +// to IBM headers and updates existing IBM headers with new year/license information +func RunUpdate( + ignorePatternList []string, + spdx spdxFlag, + license LicenseData, + licenseFileOverride string, // Provide a file to use as the license header + verbose bool, + checkonly bool, + patterns []string, + logger *log.Logger, +) error { + // Set the target license data for comparison + setTargetLicenseData(license) + + // verify that all ignorePatterns are valid + err := validatePatterns(ignorePatternList) + if err != nil { + return err + } + ignorePatterns = ignorePatternList + + tpl, err := fetchTemplate(license.SPDXID, licenseFileOverride, spdx) + if err != nil { + return err + } + t, err := template.New("").Parse(tpl) + if err != nil { + return err + } + + // process at most 1000 files in parallel + ch := make(chan *file, 1000) + done := make(chan struct{}) + var out error + go func() { + var wg errgroup.Group + for f := range ch { + f := f // https://golang.org/doc/faq#closures_and_goroutines + wg.Go(func() error { + err := processFileUpdate(f, t, license, checkonly, verbose, logger) + return err + }) + } + out = wg.Wait() + close(done) + }() + + for _, d := range patterns { + if err := walk(ch, d, logger); err != nil { + return err + } + } + close(ch) + <-done + + return out +} + +// processFileUpdate processes a file for the update command, which handles both +// adding headers to files without them and replacing HashiCorp headers with IBM headers +func processFileUpdate(f *file, t *template.Template, license LicenseData, checkonly bool, verbose bool, logger *log.Logger) error { + if checkonly { + // Check if file extension is known + lic, err := licenseHeader(f.path, t, license) + if err != nil { + logger.Printf("%s: %v", f.path, err) + return err + } + if lic == nil { // Unknown fileExtension + return nil + } + + // Check if file needs updating (either no license or has HashiCorp header) + needsUpdate, err := fileNeedsUpdate(f.path) + if err != nil { + logger.Printf("%s: %v", f.path, err) + return err + } + if needsUpdate { + logger.Printf("%s\n", f.path) + return errors.New("file needs header update") + } + } else { + modified, err := updateLicense(f.path, f.mode, t, license) + if err != nil { + logger.Printf("%s: %v", f.path, err) + return err + } + if verbose && modified { + logger.Printf("%s modified", f.path) + } + } + return nil +} + +// fileNeedsUpdate reports whether the file at path needs a header update +// (only HashiCorp headers or IBM headers with different year/license info) +func fileNeedsUpdate(path string) (bool, error) { + b, err := os.ReadFile(path) + if err != nil { + return false, err + } + + // If generated, we don't update it + if isGenerated(b) { + return false, nil + } + + // If no license header at all, do NOT update (removed this feature) + if !hasLicense(b) { + return false, nil + } + + // If it has a HashiCorp header, it needs to be replaced + if hasHashiCorpHeader(b) { + return true, nil + } + + // If it has an IBM header, check if it needs to be updated + if hasIBMHeader(b) { + return hasIBMHeaderNeedingUpdate(b), nil + } + + // If it has SPDX but no copyright header (license-only files), it needs copyright added + n := len(b) + if n > 1000 { + n = 1000 + } + + content := strings.ToLower(string(b[:n])) + + // First, check for actual copyright header patterns in the top few lines (first 300 chars) + // This is where copyright headers typically appear + topContent := content + if len(content) > 300 { + topContent = content[:300] + } + + // Check for copyright regex patterns primarily in the top section + copyrightRegex := regexp.MustCompile(`copyright\s+\d{4}`) + if copyrightRegex.MatchString(topContent) { + return false, nil // Found copyright with year in top section, don't modify + } + + // If no copyright regex pattern in top section, check for other copyright header patterns + hasCopyrightHeader := strings.Contains(topContent, "copyright (c)") || + strings.Contains(topContent, "copyright ©") || + strings.Contains(topContent, "copyright:") + + if hasCopyrightHeader { + return false, nil // Found copyright header patterns in top section, don't modify + } + + // No copyright header patterns found in top section, check entire content for any copyright mentions + // but be more restrictive - only consider it a real copyright if it has specific patterns + fullContentHasCopyright := strings.Contains(content, "copyright (c)") || + strings.Contains(content, "copyright ©") || + strings.Contains(content, "copyright:") || + copyrightRegex.MatchString(content) + + if !fullContentHasCopyright { + return true, nil // No actual copyright header found anywhere, needs update + } + + // File has copyright from other companies, don't modify + return false, nil +} + +// hasHashiCorpHeader checks if the file contains a HashiCorp copyright header +// This function is comprehensive and detects various forms of HashiCorp headers, +// including those with additional text or formatting variations +func hasHashiCorpHeader(b []byte) bool { + n := 1000 + if len(b) < 1000 { + n = len(b) + } + content := string(bytes.ToLower(b[:n])) + + // Split content into lines for line-by-line analysis + lines := strings.Split(content, "\n") + + for _, line := range lines { + // Clean the line by removing comment markers and extra whitespace + cleanLine := strings.TrimSpace(line) + cleanLine = strings.TrimPrefix(cleanLine, "//") + cleanLine = strings.TrimPrefix(cleanLine, "/*") + cleanLine = strings.TrimPrefix(cleanLine, "*") + cleanLine = strings.TrimPrefix(cleanLine, "#") + cleanLine = strings.TrimPrefix(cleanLine, "