diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index aa2fcee765507..48305ea27df86 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -2655,7 +2655,12 @@ LEVEL = Info ;LIMIT_SIZE_HELM = -1 ;; Maximum size of a Maven upload (`-1` means no limits, format `1000`, `1 MB`, `1 GiB`) ;LIMIT_SIZE_MAVEN = -1 +;; Specifies the number of most recent Maven snapshot builds to retain. `-1` retains all builds, while `1` retains only the latest build. Value should be -1 or positive. +;; Cleanup expired packages/data then targets the files within all maven snapshots versions +;RETAIN_MAVEN_SNAPSHOT_BUILDS = -1 ;; Maximum size of a npm upload (`-1` means no limits, format `1000`, `1 MB`, `1 GiB`) +; Enable debug logging for Maven cleanup. Enabling debug will stop snapshot version artifacts from being deleted but will log the files which were meant for deletion. +; DEBUG_MAVEN_CLEANUP = true ;LIMIT_SIZE_NPM = -1 ;; Maximum size of a NuGet upload (`-1` means no limits, format `1000`, `1 MB`, `1 GiB`) ;LIMIT_SIZE_NUGET = -1 diff --git a/models/packages/package_file.go b/models/packages/package_file.go index bf877485d62a6..d407241e93b6d 100644 --- a/models/packages/package_file.go +++ b/models/packages/package_file.go @@ -5,6 +5,9 @@ package packages import ( "context" + "errors" + "fmt" + "sort" "strconv" "strings" "time" @@ -21,6 +24,8 @@ func init() { } var ( + // ErrMetadataFile indicated a metadata file + ErrMetadataFile = errors.New("metadata file") // ErrDuplicatePackageFile indicates a duplicated package file error ErrDuplicatePackageFile = util.NewAlreadyExistErrorf("package file already exists") // ErrPackageFileNotExist indicates a package file not exist error @@ -231,6 +236,79 @@ func HasFiles(ctx context.Context, opts *PackageFileSearchOptions) (bool, error) return db.Exist[PackageFile](ctx, opts.toConds()) } +// GetFilesBelowBuildNumber retrieves all files for maven snapshot version where the build number is <= maxBuildNumber. +// Returns two slices: one for filtered files and one for skipped files. +func GetFilesBelowBuildNumber(ctx context.Context, versionID int64, maxBuildNumber int, classifiers ...string) ([]*PackageFile, []*PackageFile, error) { + if maxBuildNumber <= 0 { + return nil, nil, errors.New("maxBuildNumber must be a positive integer") + } + + files, err := GetFilesByVersionID(ctx, versionID) + if err != nil { + return nil, nil, fmt.Errorf("failed to retrieve files: %w", err) + } + + // Sort classifiers by length (longest first) once per call + sort.SliceStable(classifiers, func(i, j int) bool { + return len(classifiers[i]) > len(classifiers[j]) + }) + + var filteredFiles, skippedFiles []*PackageFile + for _, file := range files { + buildNumber, err := ExtractBuildNumberFromFileName(file.Name, classifiers...) + if err != nil { + if !errors.Is(err, ErrMetadataFile) { + skippedFiles = append(skippedFiles, file) + } + continue + } + if buildNumber <= maxBuildNumber { + filteredFiles = append(filteredFiles, file) + } + } + + return filteredFiles, skippedFiles, nil +} + +// ExtractBuildNumberFromFileName extracts the build number from a Maven snapshot file name. +// Expected formats: +// +// "artifact-1.0.0-20250311.083409-9.tgz" returns 9 +// "artifact-to-test-2.0.0-20250311.083409-10-sources.tgz" returns 10 +func ExtractBuildNumberFromFileName(filename string, classifiers ...string) (int, error) { + if strings.Contains(filename, "maven-metadata.xml") { + return 0, ErrMetadataFile + } + + dotIdx := strings.LastIndex(filename, ".") + if dotIdx == -1 { + return 0, fmt.Errorf("extract build number from filename: no file extension found in '%s'", filename) + } + base := filename[:dotIdx] + + // Remove classifier suffix if present. + for _, classifier := range classifiers { + suffix := "-" + classifier + if strings.HasSuffix(base, suffix) { + base = base[:len(base)-len(suffix)] + break + } + } + + // The build number should be the token after the last dash. + lastDash := strings.LastIndex(base, "-") + if lastDash == -1 { + return 0, fmt.Errorf("extract build number from filename: invalid file name format in '%s'", filename) + } + buildNumberStr := base[lastDash+1:] + buildNumber, err := strconv.Atoi(buildNumberStr) + if err != nil { + return 0, fmt.Errorf("extract build number from filename: failed to convert build number '%s' to integer in '%s': %v", buildNumberStr, filename, err) + } + + return buildNumber, nil +} + // CalculateFileSize sums up all blob sizes matching the search options. // It does NOT respect the deduplication of blobs. func CalculateFileSize(ctx context.Context, opts *PackageFileSearchOptions) (int64, error) { diff --git a/models/packages/package_version.go b/models/packages/package_version.go index 0a478c03234c8..f030e28ca86da 100644 --- a/models/packages/package_version.go +++ b/models/packages/package_version.go @@ -129,11 +129,16 @@ func getVersionByNameAndVersion(ctx context.Context, ownerID int64, packageType // GetVersionsByPackageType gets all versions of a specific type func GetVersionsByPackageType(ctx context.Context, ownerID int64, packageType Type) ([]*PackageVersion, error) { - pvs, _, err := SearchVersions(ctx, &PackageSearchOptions{ - OwnerID: ownerID, + opts := &PackageSearchOptions{ Type: packageType, IsInternal: optional.Some(false), - }) + } + + if ownerID != 0 { + opts.OwnerID = ownerID + } + + pvs, _, err := SearchVersions(ctx, opts) return pvs, err } diff --git a/modules/packages/maven/metadata.go b/modules/packages/maven/metadata.go index a61a62c086208..8dcbf14d18135 100644 --- a/modules/packages/maven/metadata.go +++ b/modules/packages/maven/metadata.go @@ -5,7 +5,9 @@ package maven import ( "encoding/xml" + "errors" "io" + "strconv" "code.gitea.io/gitea/modules/util" "code.gitea.io/gitea/modules/validation" @@ -31,6 +33,12 @@ type Dependency struct { Version string `json:"version,omitempty"` } +// SnapshotMetadata struct holds the build number and the list of classifiers for a snapshot version +type SnapshotMetadata struct { + BuildNumber int `json:"build_number,omitempty"` + Classifiers []string `json:"classifiers,omitempty"` +} + type pomStruct struct { XMLName xml.Name `xml:"project"` @@ -61,6 +69,26 @@ type pomStruct struct { } `xml:"dependencies>dependency"` } +type snapshotMetadataStruct struct { + XMLName xml.Name `xml:"metadata"` + GroupID string `xml:"groupId"` + ArtifactID string `xml:"artifactId"` + Version string `xml:"version"` + Versioning struct { + LastUpdated string `xml:"lastUpdated"` + Snapshot struct { + Timestamp string `xml:"timestamp"` + BuildNumber string `xml:"buildNumber"` + } `xml:"snapshot"` + SnapshotVersions []struct { + Extension string `xml:"extension"` + Classifier string `xml:"classifier"` + Value string `xml:"value"` + Updated string `xml:"updated"` + } `xml:"snapshotVersions>snapshotVersion"` + } `xml:"versioning"` +} + // ParsePackageMetaData parses the metadata of a pom file func ParsePackageMetaData(r io.Reader) (*Metadata, error) { var pom pomStruct @@ -109,3 +137,31 @@ func ParsePackageMetaData(r io.Reader) (*Metadata, error) { Dependencies: dependencies, }, nil } + +// ParseSnapshotVersionMetadata parses the Maven Snapshot Version metadata to extract the build number and list of available classifiers. +func ParseSnapshotVersionMetaData(r io.Reader) (*SnapshotMetadata, error) { + var metadata snapshotMetadataStruct + + dec := xml.NewDecoder(r) + dec.CharsetReader = charset.NewReaderLabel + if err := dec.Decode(&metadata); err != nil { + return nil, err + } + + buildNumber, err := strconv.Atoi(metadata.Versioning.Snapshot.BuildNumber) + if err != nil { + return nil, errors.New("invalid or missing build number in snapshot metadata") + } + + var classifiers []string + for _, snapshotVersion := range metadata.Versioning.SnapshotVersions { + if snapshotVersion.Classifier != "" { + classifiers = append(classifiers, snapshotVersion.Classifier) + } + } + + return &SnapshotMetadata{ + BuildNumber: buildNumber, + Classifiers: classifiers, + }, nil +} diff --git a/modules/setting/packages.go b/modules/setting/packages.go index b598424064832..c1628a8ca80b3 100644 --- a/modules/setting/packages.go +++ b/modules/setting/packages.go @@ -41,10 +41,13 @@ var ( LimitSizeSwift int64 LimitSizeVagrant int64 - DefaultRPMSignEnabled bool + DefaultRPMSignEnabled bool + RetainMavenSnapshotBuilds int + DebugMavenCleanup bool }{ - Enabled: true, - LimitTotalOwnerCount: -1, + Enabled: true, + LimitTotalOwnerCount: -1, + RetainMavenSnapshotBuilds: -1, } ) @@ -88,6 +91,8 @@ func loadPackagesFrom(rootCfg ConfigProvider) (err error) { Packages.LimitSizeSwift = mustBytes(sec, "LIMIT_SIZE_SWIFT") Packages.LimitSizeVagrant = mustBytes(sec, "LIMIT_SIZE_VAGRANT") Packages.DefaultRPMSignEnabled = sec.Key("DEFAULT_RPM_SIGN_ENABLED").MustBool(false) + Packages.RetainMavenSnapshotBuilds = sec.Key("RETAIN_MAVEN_SNAPSHOT_BUILDS").MustInt(Packages.RetainMavenSnapshotBuilds) + Packages.DebugMavenCleanup = sec.Key("DEBUG_MAVEN_CLEANUP").MustBool(true) return nil } diff --git a/services/packages/cleanup/cleanup.go b/services/packages/cleanup/cleanup.go index ec860db1bbd42..391bd28e71d49 100644 --- a/services/packages/cleanup/cleanup.go +++ b/services/packages/cleanup/cleanup.go @@ -1,7 +1,7 @@ // Copyright 2022 The Gitea Authors. All rights reserved. // SPDX-License-Identifier: MIT -package container +package cleanup import ( "context" @@ -20,6 +20,7 @@ import ( cargo_service "code.gitea.io/gitea/services/packages/cargo" container_service "code.gitea.io/gitea/services/packages/container" debian_service "code.gitea.io/gitea/services/packages/debian" + maven_service "code.gitea.io/gitea/services/packages/maven" rpm_service "code.gitea.io/gitea/services/packages/rpm" ) @@ -171,6 +172,10 @@ func CleanupExpiredData(ctx context.Context, olderThan time.Duration) error { return err } + if err := maven_service.CleanupSnapshotVersions(ctx); err != nil { + log.Error("Error cleaning up Maven snapshot versions: %v", err) + } + ps, err := packages_model.FindUnreferencedPackages(ctx) if err != nil { return err diff --git a/services/packages/maven/cleanup.go b/services/packages/maven/cleanup.go new file mode 100644 index 0000000000000..4e5a18eb6f51b --- /dev/null +++ b/services/packages/maven/cleanup.go @@ -0,0 +1,154 @@ +package maven + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "strings" + + "code.gitea.io/gitea/models/packages" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/packages/maven" + "code.gitea.io/gitea/modules/setting" + packages_service "code.gitea.io/gitea/services/packages" +) + +// CleanupSnapshotVersions removes outdated files for SNAPHOT versions for all Maven packages. +func CleanupSnapshotVersions(ctx context.Context) error { + retainBuilds := setting.Packages.RetainMavenSnapshotBuilds + debugSession := setting.Packages.DebugMavenCleanup + log.Debug("Maven Cleanup: starting with retainBuilds: %d, debugSession: %t", retainBuilds, debugSession) + + if retainBuilds < 1 { + log.Warn("Maven Cleanup: skipped as value for retainBuilds less than 1: %d. Minimum 1 build should be retained", retainBuilds) + return nil + } + + versions, err := packages.GetVersionsByPackageType(ctx, 0, packages.TypeMaven) + if err != nil { + return fmt.Errorf("maven Cleanup: failed to retrieve Maven package versions: %w", err) + } + + var errs []error + var metadataErrors []error + + for _, version := range versions { + if !isSnapshotVersion(version.Version) { + continue + } + + var artifactId, groupId string + if version.MetadataJSON != "" { + var metadata map[string]interface{} + if err := json.Unmarshal([]byte(version.MetadataJSON), &metadata); err != nil { + log.Warn("Maven Cleanup: error during cleanup: failed to unmarshal metadataJSON for package version ID: %d: %w", version.ID, err) + } else { + artifactId, _ = metadata["artifact_id"].(string) + groupId, _ = metadata["group_id"].(string) + log.Trace("Maven Cleanup: processing package version with ID: %s, Group ID: %s, Artifact ID: %s, Version: %s", version.ID, groupId, artifactId, version.Version) + } + } + + if err := cleanSnapshotFiles(ctx, version.ID, retainBuilds, debugSession); err != nil { + formattedErr := fmt.Errorf("version '%s' (ID: %d, Group ID: %s, Artifact ID: %s): %w", + version.Version, version.ID, groupId, artifactId, err) + + if errors.Is(err, packages.ErrMetadataFile) { + metadataErrors = append(metadataErrors, formattedErr) + } else { + errs = append(errs, formattedErr) + } + } + } + + for _, err := range metadataErrors { + log.Warn("Maven Cleanup: error during cleanup: %v", err) + } + + if len(errs) > 0 { + for _, err := range errs { + log.Error("Maven Cleanup: error during cleanup: %v", err) + } + return fmt.Errorf("maven Cleanup: completed with errors: %v", errs) + } + + log.Trace("Completed Maven Cleanup") + return nil +} + +func isSnapshotVersion(version string) bool { + return strings.HasSuffix(version, "-SNAPSHOT") +} + +func cleanSnapshotFiles(ctx context.Context, versionID int64, retainBuilds int, debugSession bool) error { + log.Trace("Maven Cleanup: starting cleanSnapshotFiles for versionID: %d with retainBuilds: %d, debugSession: %t", versionID, retainBuilds, debugSession) + + metadataFile, err := packages.GetFileForVersionByName(ctx, versionID, "maven-metadata.xml", packages.EmptyFileKey) + if err != nil { + return fmt.Errorf("%w: failed to retrieve maven-metadata.xml: %w", packages.ErrMetadataFile, err) + } + + maxBuildNumber, classifiers, err := extractMaxBuildNumber(ctx, metadataFile) + if err != nil { + return fmt.Errorf("%w: failed to extract max build number from maven-metadata.xml: %w", packages.ErrMetadataFile, err) + } + + thresholdBuildNumber := maxBuildNumber - retainBuilds + if thresholdBuildNumber <= 0 { + log.Trace("Maven Cleanup: no files to clean up, as the threshold build number is less than or equal to zero for versionID %d", versionID) + return nil + } + + filesToRemove, skippedFiles, err := packages.GetFilesBelowBuildNumber(ctx, versionID, thresholdBuildNumber, classifiers...) + if err != nil { + return fmt.Errorf("cleanSnapshotFiles: failed to retrieve files for version: %w", err) + } + + if debugSession { + var fileNamesToRemove, skippedFileNames []string + + for _, file := range filesToRemove { + fileNamesToRemove = append(fileNamesToRemove, file.Name) + } + + for _, file := range skippedFiles { + skippedFileNames = append(skippedFileNames, file.Name) + } + + log.Debug("Maven Cleanup: debug session active. Files to remove: %v, Skipped files: %v", fileNamesToRemove, skippedFileNames) + return nil + } + + for _, file := range filesToRemove { + log.Trace("Maven Cleanup: removing file '%s' below threshold %d", file.Name, thresholdBuildNumber) + if err := packages_service.DeletePackageFile(ctx, file); err != nil { + return fmt.Errorf("cleanSnapshotFiles: failed to delete file '%s': %w", file.Name, err) + } + } + + return nil +} + +func extractMaxBuildNumber(ctx context.Context, metadataFile *packages.PackageFile) (int, []string, error) { + pb, err := packages.GetBlobByID(ctx, metadataFile.BlobID) + if err != nil { + return 0, nil, fmt.Errorf("failed to get package blob: %w", err) + } + + content, _, _, err := packages_service.OpenBlobForDownload(ctx, metadataFile, pb, "", nil, true) + if err != nil { + return 0, nil, fmt.Errorf("failed to get package file stream: %w", err) + } + defer content.Close() + + snapshotMetadata, err := maven.ParseSnapshotVersionMetaData(content) + if err != nil { + return 0, nil, fmt.Errorf("failed to parse maven-metadata.xml: %w", err) + } + + buildNumber := snapshotMetadata.BuildNumber + classifiers := snapshotMetadata.Classifiers + + return buildNumber, classifiers, nil +} diff --git a/services/packages/packages.go b/services/packages/packages.go index 22b26b65637ab..96a9b5f9c8034 100644 --- a/services/packages/packages.go +++ b/services/packages/packages.go @@ -599,7 +599,7 @@ func OpenBlobStream(pb *packages_model.PackageBlob) (io.ReadSeekCloser, error) { // OpenBlobForDownload returns the content of the specific package blob and increases the download counter. // If the storage supports direct serving and it's enabled, only the direct serving url is returned. -func OpenBlobForDownload(ctx context.Context, pf *packages_model.PackageFile, pb *packages_model.PackageBlob, method string, serveDirectReqParams url.Values) (io.ReadSeekCloser, *url.URL, *packages_model.PackageFile, error) { +func OpenBlobForDownload(ctx context.Context, pf *packages_model.PackageFile, pb *packages_model.PackageBlob, method string, serveDirectReqParams url.Values, forceInternalServe ...bool) (io.ReadSeekCloser, *url.URL, *packages_model.PackageFile, error) { key := packages_module.BlobHash256Key(pb.HashSHA256) cs := packages_module.NewContentStore() @@ -608,7 +608,9 @@ func OpenBlobForDownload(ctx context.Context, pf *packages_model.PackageFile, pb var u *url.URL var err error - if cs.ShouldServeDirect() { + internalServe := len(forceInternalServe) > 0 && forceInternalServe[0] + + if !internalServe && cs.ShouldServeDirect() { u, err = cs.GetServeDirectURL(key, pf.Name, method, serveDirectReqParams) if err != nil && !errors.Is(err, storage.ErrURLNotSupported) { log.Error("Error getting serve direct url (fallback to local reader): %v", err)