From 2563231e022261a1f4d74f1b6ed4040d907b2b27 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 1 Jul 2022 17:22:53 +0200 Subject: [PATCH 001/129] started new PageRank branch --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 552dfa64b..176a8b022 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# hackage-server +# Hackage-server [![Build Status](https://travis-ci.org/haskell/hackage-server.png?branch=master)](https://travis-ci.org/haskell/hackage-server) [![Build status](https://github.com/haskell/hackage-server/actions/workflows/haskell-ci.yml/badge.svg)](https://github.com/haskell/hackage-server/actions/workflows/haskell-ci.yml) From 64b7f9ba705ba1a0505100e9ceb9e971c3648ad2 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 1 Jul 2022 17:45:41 +0200 Subject: [PATCH 002/129] correct the error --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 176a8b022..552dfa64b 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Hackage-server +# hackage-server [![Build Status](https://travis-ci.org/haskell/hackage-server.png?branch=master)](https://travis-ci.org/haskell/hackage-server) [![Build status](https://github.com/haskell/hackage-server/actions/workflows/haskell-ci.yml/badge.svg)](https://github.com/haskell/hackage-server/actions/workflows/haskell-ci.yml) From b937df55bde0ddc4219803d0389238f04b44668c Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 3 Jul 2022 21:27:14 +0200 Subject: [PATCH 003/129] Created PackageRank module and added it to build --- hackage-server.cabal | 1 + src/Distribution/Server/Features/PackageRank.hs | 9 +++++++++ 2 files changed, 10 insertions(+) create mode 100644 src/Distribution/Server/Features/PackageRank.hs diff --git a/hackage-server.cabal b/hackage-server.cabal index c32615ea9..78e9d3e9f 100644 --- a/hackage-server.cabal +++ b/hackage-server.cabal @@ -356,6 +356,7 @@ library lib-server Distribution.Server.Features.StaticFiles Distribution.Server.Features.ServerIntrospect Distribution.Server.Features.Sitemap + Distribution.Server.Features.PackageRank if flag(debug) cpp-options: -DDEBUG diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs new file mode 100644 index 000000000..3bbe62215 --- /dev/null +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -0,0 +1,9 @@ +module Distribution.Server.Features.PackageRank ( + rankPackage + ) where + +import Distribution.Package +import Distribution.Server.Packages.Types + +rankPackage :: (Package a) => a -> IO Double +rankPackage p=return 0 From 5eed33c028b05885e9eabc0f3d5338fce0470b53 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 3 Jul 2022 21:37:37 +0200 Subject: [PATCH 004/129] write out ranking criteria --- src/Distribution/Server/Features/PackageRank.hs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 3bbe62215..4438f5668 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -3,7 +3,15 @@ module Distribution.Server.Features.PackageRank ( ) where import Distribution.Package -import Distribution.Server.Packages.Types rankPackage :: (Package a) => a -> IO Double -rankPackage p=return 0 +rankPackage p=return$ reverseDeps+usageTrend+docScore+stability + +authNum+goodMetadata+weightUniqueDeps+activelyMaintained + where reverseDeps=1 + usageTrend=1 + docScore=1 + stability=1 + authNum=1 + goodMetadata=1 + weightUniqueDeps=1 + activelyMaintained=1 From d6abb2fcea666c0efa038f1c50dbf5a6caa30d82 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Mon, 4 Jul 2022 23:22:32 +0200 Subject: [PATCH 005/129] started with maintainer number --- .../Server/Features/PackageRank.hs | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 4438f5668..0d51bffc2 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -3,15 +3,25 @@ module Distribution.Server.Features.PackageRank ( ) where import Distribution.Package +import Distribution.PackageDescription +import Distribution.Server.Features.Upload +import Distribution.Server.Users.UserIdSet as UserIdSet -rankPackage :: (Package a) => a -> IO Double -rankPackage p=return$ reverseDeps+usageTrend+docScore+stability - +authNum+goodMetadata+weightUniqueDeps+activelyMaintained +rankPackage :: PackageDescription -> IO Double +rankPackage p=do + maintainers <- maintNum + return maintainers+reverseDeps+usageTrend+docScore+stability + +goodMetadata+weightUniqueDeps+activelyMaintained where reverseDeps=1 usageTrend=1 docScore=1 stability=1 - authNum=1 + maintNum :: IO Double + maintNum=do + maintSet<-queryUserGroup$maintainersGroupDescription pkgNm + return fromInteger.UserIdSet$size maintSet goodMetadata=1 weightUniqueDeps=1 activelyMaintained=1 + pkgNm :: PackageName + pkgNm=pkgName$package p From 1cf1a9f7e741c915f599eec725c81099ab5b6c58 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 5 Jul 2022 22:31:19 +0200 Subject: [PATCH 006/129] added Upload Feature and got number of maintainers for package --- src/Distribution/Server/Features/PackageRank.hs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 0d51bffc2..ae5d7fda7 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -4,22 +4,22 @@ module Distribution.Server.Features.PackageRank ( import Distribution.Package import Distribution.PackageDescription +import Distribution.Server.Users.Group import Distribution.Server.Features.Upload -import Distribution.Server.Users.UserIdSet as UserIdSet -rankPackage :: PackageDescription -> IO Double -rankPackage p=do +rankPackage :: UploadFeature -> PackageDescription -> IO Double +rankPackage upload p=do maintainers <- maintNum - return maintainers+reverseDeps+usageTrend+docScore+stability + return$maintainers+reverseDeps+usageTrend+docScore+stabilityScore +goodMetadata+weightUniqueDeps+activelyMaintained where reverseDeps=1 usageTrend=1 docScore=1 - stability=1 + stabilityScore=1 maintNum :: IO Double maintNum=do - maintSet<-queryUserGroup$maintainersGroupDescription pkgNm - return fromInteger.UserIdSet$size maintSet + maint<-queryUserGroups$[maintainersGroup upload pkgNm] + return.fromInteger.toInteger$size maint goodMetadata=1 weightUniqueDeps=1 activelyMaintained=1 From 6ad9db214202cbee52bf60b7c4c3489932becdac Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 5 Jul 2022 22:42:20 +0200 Subject: [PATCH 007/129] divided rankPackage to pure and IO version --- .../Server/Features/PackageRank.hs | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index ae5d7fda7..aa3921cf2 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -7,21 +7,23 @@ import Distribution.PackageDescription import Distribution.Server.Users.Group import Distribution.Server.Features.Upload -rankPackage :: UploadFeature -> PackageDescription -> IO Double -rankPackage upload p=do - maintainers <- maintNum - return$maintainers+reverseDeps+usageTrend+docScore+stabilityScore +rankPackageIO upload p=maintNum + where + maintNum :: IO Double + maintNum=do + maint<-queryUserGroups$[maintainersGroup upload pkgNm] + return.fromInteger.toInteger$size maint + pkgNm :: PackageName + pkgNm=pkgName$package p +rankPackagePure p=reverseDeps+usageTrend+docScore+stabilityScore +goodMetadata+weightUniqueDeps+activelyMaintained where reverseDeps=1 usageTrend=1 docScore=1 stabilityScore=1 - maintNum :: IO Double - maintNum=do - maint<-queryUserGroups$[maintainersGroup upload pkgNm] - return.fromInteger.toInteger$size maint goodMetadata=1 weightUniqueDeps=1 activelyMaintained=1 - pkgNm :: PackageName - pkgNm=pkgName$package p + +rankPackage :: UploadFeature -> PackageDescription -> IO Double +rankPackage upload p=rankPackageIO upload p>>=(\x->return$x + rankPackagePure p) From b9a2417537307ca855bf5193b47e45bca6921695 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 5 Jul 2022 23:04:25 +0200 Subject: [PATCH 008/129] added benchmark and test info --- src/Distribution/Server/Features/PackageRank.hs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index aa3921cf2..8d11ad8c3 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -21,9 +21,13 @@ rankPackagePure p=reverseDeps+usageTrend+docScore+stabilityScore usageTrend=1 docScore=1 stabilityScore=1 + testsBench=(bool2Double.hasTests) p + (bool2Double.hasBenchmarks) p goodMetadata=1 weightUniqueDeps=1 activelyMaintained=1 + bool2Double :: Bool -> Double + bool2Double true=1 + bool2Double false=0 rankPackage :: UploadFeature -> PackageDescription -> IO Double rankPackage upload p=rankPackageIO upload p>>=(\x->return$x + rankPackagePure p) From 9dbea877fa3275e4d1a25cffd8c5532817a30661 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Thu, 7 Jul 2022 23:29:12 +0200 Subject: [PATCH 009/129] added Download Feature --- src/Distribution/Server/Features/PackageRank.hs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 8d11ad8c3..7fa7ca665 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -6,8 +6,9 @@ import Distribution.Package import Distribution.PackageDescription import Distribution.Server.Users.Group import Distribution.Server.Features.Upload +import Distribution.Server.Features.DownloadCount -rankPackageIO upload p=maintNum +rankPackageIO download upload p=maintNum where maintNum :: IO Double maintNum=do @@ -29,5 +30,5 @@ rankPackagePure p=reverseDeps+usageTrend+docScore+stabilityScore bool2Double true=1 bool2Double false=0 -rankPackage :: UploadFeature -> PackageDescription -> IO Double -rankPackage upload p=rankPackageIO upload p>>=(\x->return$x + rankPackagePure p) +rankPackage :: DownloadFeature -> UploadFeature -> PackageDescription -> IO Double +rankPackage download upload p=rankPackageIO download upload p>>=(\x->return$x + rankPackagePure p) From fcbdd2d5769a972e4054580675f9fba6b9501ed2 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 8 Jul 2022 23:06:24 +0200 Subject: [PATCH 010/129] formatted a bit --- .../Server/Features/PackageRank.hs | 42 +++++++++++-------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 7fa7ca665..43fc0a025 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -8,27 +8,35 @@ import Distribution.Server.Users.Group import Distribution.Server.Features.Upload import Distribution.Server.Features.DownloadCount -rankPackageIO download upload p=maintNum +rankPackageIO download upload p = maintNum where + -- Number of maintainers maintNum :: IO Double - maintNum=do - maint<-queryUserGroups$[maintainersGroup upload pkgNm] - return.fromInteger.toInteger$size maint + maintNum = do + maint <- queryUserGroups $ + [maintainersGroup upload pkgNm] + return . fromInteger . toInteger $ size maint pkgNm :: PackageName - pkgNm=pkgName$package p -rankPackagePure p=reverseDeps+usageTrend+docScore+stabilityScore + pkgNm = pkgName $ package p + +rankPackagePure p = reverseDeps+usageTrend+docScore+stabilityScore +goodMetadata+weightUniqueDeps+activelyMaintained - where reverseDeps=1 - usageTrend=1 - docScore=1 - stabilityScore=1 - testsBench=(bool2Double.hasTests) p + (bool2Double.hasBenchmarks) p - goodMetadata=1 - weightUniqueDeps=1 - activelyMaintained=1 + where reverseDeps = 1 + usageTrend = 1 + docScore = 1 + stabilityScore = 1 + -- Does the Package have tests and Benchmarks + testsBench = (bool2Double . hasTests) p + + (bool2Double . hasBenchmarks) p + goodMetadata = 1 + weightUniqueDeps = 1 + activelyMaintained = 1 bool2Double :: Bool -> Double - bool2Double true=1 - bool2Double false=0 + bool2Double true = 1 + bool2Double false = 0 rankPackage :: DownloadFeature -> UploadFeature -> PackageDescription -> IO Double -rankPackage download upload p=rankPackageIO download upload p>>=(\x->return$x + rankPackagePure p) +rankPackage download upload p = rankPackageIO download upload p + >>= (\x->return$x + rankPackagePure p) + + From d0b73cf61b913766b948c4bd6d17aff5f93ce0bc Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 8 Jul 2022 23:17:27 +0200 Subject: [PATCH 011/129] used brittany --- .../Server/Features/PackageRank.hs | 74 ++++++++++--------- 1 file changed, 40 insertions(+), 34 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 43fc0a025..aee6adb5d 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -1,42 +1,48 @@ -module Distribution.Server.Features.PackageRank ( - rankPackage +module Distribution.Server.Features.PackageRank + ( rankPackage ) where -import Distribution.Package -import Distribution.PackageDescription -import Distribution.Server.Users.Group -import Distribution.Server.Features.Upload -import Distribution.Server.Features.DownloadCount +import Distribution.Package +import Distribution.PackageDescription +import Distribution.Server.Features.DownloadCount +import Distribution.Server.Features.Upload +import Distribution.Server.Users.Group rankPackageIO download upload p = maintNum - where - -- Number of maintainers - maintNum :: IO Double - maintNum = do - maint <- queryUserGroups $ - [maintainersGroup upload pkgNm] - return . fromInteger . toInteger $ size maint - pkgNm :: PackageName - pkgNm = pkgName $ package p + where + -- Number of maintainers + maintNum :: IO Double + maintNum = do + maint <- queryUserGroups [maintainersGroup upload pkgNm] + return . fromInteger . toInteger $ size maint + pkgNm :: PackageName + pkgNm = pkgName $ package p -rankPackagePure p = reverseDeps+usageTrend+docScore+stabilityScore - +goodMetadata+weightUniqueDeps+activelyMaintained - where reverseDeps = 1 - usageTrend = 1 - docScore = 1 - stabilityScore = 1 - -- Does the Package have tests and Benchmarks - testsBench = (bool2Double . hasTests) p - + (bool2Double . hasBenchmarks) p - goodMetadata = 1 - weightUniqueDeps = 1 - activelyMaintained = 1 - bool2Double :: Bool -> Double - bool2Double true = 1 - bool2Double false = 0 +rankPackagePure p = + reverseDeps + + usageTrend + + docScore + + stabilityScore + + goodMetadata + + weightUniqueDeps + + activelyMaintained + where + reverseDeps = 1 + usageTrend = 1 + docScore = 1 + stabilityScore = 1 + -- Does the Package have tests and Benchmarks + testsBench = (bool2Double . hasTests) p + (bool2Double . hasBenchmarks) p + goodMetadata = 1 + weightUniqueDeps = 1 + activelyMaintained = 1 + bool2Double :: Bool -> Double + bool2Double true = 1 + bool2Double false = 0 -rankPackage :: DownloadFeature -> UploadFeature -> PackageDescription -> IO Double -rankPackage download upload p = rankPackageIO download upload p - >>= (\x->return$x + rankPackagePure p) +rankPackage + :: DownloadFeature -> UploadFeature -> PackageDescription -> IO Double +rankPackage download upload p = + rankPackageIO download upload p >>= (\x -> return $ x + rankPackagePure p) From 8a8027788f2b71d7e06734f505fc71fa85002f97 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Mon, 11 Jul 2022 17:43:04 +0200 Subject: [PATCH 012/129] added further info about the package --- src/Distribution/Server/Features/PackageRank.hs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index aee6adb5d..4db507005 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -4,6 +4,7 @@ module Distribution.Server.Features.PackageRank import Distribution.Package import Distribution.PackageDescription +import Distribution.Types.Version import Distribution.Server.Features.DownloadCount import Distribution.Server.Features.Upload import Distribution.Server.Users.Group @@ -28,6 +29,8 @@ rankPackagePure p = + activelyMaintained where reverseDeps = 1 + versions = versionNumbers . pkgVersion $ package p + dependencies = allBuildDepends p usageTrend = 1 docScore = 1 stabilityScore = 1 From 9d683038e28226f07dba43fa5ea8f5d4fa13c07d Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 12 Jul 2022 22:34:59 +0200 Subject: [PATCH 013/129] added isApp function --- src/Distribution/Server/Features/PackageRank.hs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 4db507005..5184ed1ae 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -9,6 +9,8 @@ import Distribution.Server.Features.DownloadCount import Distribution.Server.Features.Upload import Distribution.Server.Users.Group +import Data.Maybe (isNothing) + rankPackageIO download upload p = maintNum where -- Number of maintainers @@ -39,6 +41,7 @@ rankPackagePure p = goodMetadata = 1 weightUniqueDeps = 1 activelyMaintained = 1 + isApp = (isNothing.library) p && (not.null.executables) p bool2Double :: Bool -> Double bool2Double true = 1 bool2Double false = 0 From 2af5b62ca4cc23c788b330832553b3af18e3728c Mon Sep 17 00:00:00 2001 From: kubaneko Date: Wed, 13 Jul 2022 11:31:19 +0200 Subject: [PATCH 014/129] added Scorer type and started to extract versions --- .../Server/Features/PackageRank.hs | 32 +++++++++++-------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 5184ed1ae..e009c87d2 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -4,12 +4,25 @@ module Distribution.Server.Features.PackageRank import Distribution.Package import Distribution.PackageDescription -import Distribution.Types.Version import Distribution.Server.Features.DownloadCount +import Distribution.Server.Features.PackageInfoJSON.State + ( getVersionsFor ) import Distribution.Server.Features.Upload import Distribution.Server.Users.Group + ( queryUserGroups + , size + ) +import Distribution.Types.Version + +import Data.Maybe ( isNothing ) + +data Scorer = Scorer + { total :: Double + , score :: Double + } -import Data.Maybe (isNothing) +instance Num Scorer where + Scorer a b + Scorer c d = Scorer (a + c) (b + d) rankPackageIO download upload p = maintNum where @@ -18,6 +31,8 @@ rankPackageIO download upload p = maintNum maintNum = do maint <- queryUserGroups [maintainersGroup upload pkgNm] return . fromInteger . toInteger $ size maint + versionsPkg :: IO Double + versionsPkg = getVersionsFor pkgNm >>= return length pkgNm :: PackageName pkgNm = pkgName $ package p @@ -25,23 +40,14 @@ rankPackagePure p = reverseDeps + usageTrend + docScore - + stabilityScore - + goodMetadata - + weightUniqueDeps - + activelyMaintained + + reverseDeps where reverseDeps = 1 - versions = versionNumbers . pkgVersion $ package p dependencies = allBuildDepends p usageTrend = 1 docScore = 1 - stabilityScore = 1 - -- Does the Package have tests and Benchmarks testsBench = (bool2Double . hasTests) p + (bool2Double . hasBenchmarks) p - goodMetadata = 1 - weightUniqueDeps = 1 - activelyMaintained = 1 - isApp = (isNothing.library) p && (not.null.executables) p + isApp = (isNothing . library) p && (not . null . executables) p bool2Double :: Bool -> Double bool2Double true = 1 bool2Double false = 0 From 3d6b96e16f9309ba68eb7b296ec2c5365d5a1c46 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Thu, 14 Jul 2022 21:46:06 +0200 Subject: [PATCH 015/129] more work on versions --- .../Server/Features/PackageRank.hs | 53 +++++++++++-------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index e009c87d2..e333a3f71 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -5,8 +5,8 @@ module Distribution.Server.Features.PackageRank import Distribution.Package import Distribution.PackageDescription import Distribution.Server.Features.DownloadCount -import Distribution.Server.Features.PackageInfoJSON.State - ( getVersionsFor ) +import Distribution.Server.Features.HaskellPlatform +import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.Upload import Distribution.Server.Users.Group ( queryUserGroups @@ -21,40 +21,47 @@ data Scorer = Scorer , score :: Double } -instance Num Scorer where - Scorer a b + Scorer c d = Scorer (a + c) (b + d) +add (Scorer a b) (Scorer c d) = Scorer (a + c) (b + d) -rankPackageIO download upload p = maintNum +rankPackageIO + :: VersionsFeature + -> PlatformFeature + -> DownloadFeature + -> UploadFeature + -> PackageDescription + -> IO Double +rankPackageIO prefferedV platform download upload p = maintNum where + pkgNm :: PackageName + pkgNm = pkgName $ package p -- Number of maintainers maintNum :: IO Double maintNum = do maint <- queryUserGroups [maintainersGroup upload pkgNm] return . fromInteger . toInteger $ size maint - versionsPkg :: IO Double - versionsPkg = getVersionsFor pkgNm >>= return length - pkgNm :: PackageName - pkgNm = pkgName $ package p + versions = platformVersions platform pkgNm -rankPackagePure p = - reverseDeps - + usageTrend - + docScore - + reverseDeps +rankPackagePure p = reverseDeps + usageTrend + docScore + reverseDeps where - reverseDeps = 1 - dependencies = allBuildDepends p - usageTrend = 1 - docScore = 1 - testsBench = (bool2Double . hasTests) p + (bool2Double . hasBenchmarks) p - isApp = (isNothing . library) p && (not . null . executables) p + reverseDeps = 1 + dependencies = allBuildDepends p + usageTrend = 1 + docScore = 1 + testsBench = (bool2Double . hasTests) p + (bool2Double . hasBenchmarks) p + isApp = (isNothing . library) p && (not . null . executables) p bool2Double :: Bool -> Double bool2Double true = 1 bool2Double false = 0 rankPackage - :: DownloadFeature -> UploadFeature -> PackageDescription -> IO Double -rankPackage download upload p = - rankPackageIO download upload p >>= (\x -> return $ x + rankPackagePure p) + :: VersionsFeature + -> PlatformFeature + -> DownloadFeature + -> UploadFeature + -> PackageDescription + -> IO Double +rankPackage versions platform download upload p = + rankPackageIO versions platform download upload p + >>= (\x -> return $ x + rankPackagePure p) From 7dc45891b80479b5dbf64ebb6b19bae5c6696274 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 15 Jul 2022 11:36:56 +0200 Subject: [PATCH 016/129] got versions and partitioned based on status --- .../Server/Features/HaskellPlatform.hs | 2 +- .../Server/Features/PackageRank.hs | 41 ++++++++++++------- 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/src/Distribution/Server/Features/HaskellPlatform.hs b/src/Distribution/Server/Features/HaskellPlatform.hs index 9d0840bd8..15be3e815 100644 --- a/src/Distribution/Server/Features/HaskellPlatform.hs +++ b/src/Distribution/Server/Features/HaskellPlatform.hs @@ -1,6 +1,6 @@ {-# LANGUAGE RankNTypes, NamedFieldPuns, RecordWildCards #-} module Distribution.Server.Features.HaskellPlatform ( - PlatformFeature, + PlatformFeature(..), PlatformResource(..), initPlatformFeature, ) where diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index e333a3f71..b26887e63 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -4,10 +4,17 @@ module Distribution.Server.Features.PackageRank import Distribution.Package import Distribution.PackageDescription +import Distribution.Server.Features.Core import Distribution.Server.Features.DownloadCount -import Distribution.Server.Features.HaskellPlatform import Distribution.Server.Features.PreferredVersions +import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.Upload +import Distribution.Server.Framework +import Distribution.Server.Packages.PackageIndex + ( PackageIndex ) +import qualified Distribution.Server.Packages.PackageIndex + as PackageIndex +import Distribution.Server.Packages.Types import Distribution.Server.Users.Group ( queryUserGroups , size @@ -23,14 +30,7 @@ data Scorer = Scorer add (Scorer a b) (Scorer c d) = Scorer (a + c) (b + d) -rankPackageIO - :: VersionsFeature - -> PlatformFeature - -> DownloadFeature - -> UploadFeature - -> PackageDescription - -> IO Double -rankPackageIO prefferedV platform download upload p = maintNum +rankPackageIO core versions download upload p = maintNum where pkgNm :: PackageName pkgNm = pkgName $ package p @@ -39,7 +39,20 @@ rankPackageIO prefferedV platform download upload p = maintNum maintNum = do maint <- queryUserGroups [maintainersGroup upload pkgNm] return . fromInteger . toInteger $ size maint - versions = platformVersions platform pkgNm + descriptions = do + desc <- lookupPackageName core pkgNm + return (pkgDesc <$> desc) + partVer :: ServerPartE (IO ([Version], [Version], [Version])) + partVer = do + desc <- descriptions + return + $ queryGetPreferredInfo versions pkgNm + >>= (\x -> return $ partitionVersions + x + (map (pkgVersion . package . packageDescription) desc) + ) + + rankPackagePure p = reverseDeps + usageTrend + docScore + reverseDeps where @@ -54,14 +67,14 @@ rankPackagePure p = reverseDeps + usageTrend + docScore + reverseDeps bool2Double false = 0 rankPackage - :: VersionsFeature - -> PlatformFeature + :: CoreResource + -> VersionsFeature -> DownloadFeature -> UploadFeature -> PackageDescription -> IO Double -rankPackage versions platform download upload p = - rankPackageIO versions platform download upload p +rankPackage core versions download upload p = + rankPackageIO core versions download upload p >>= (\x -> return $ x + rankPackagePure p) From 96cdd019ac2f5473b2e41b25eaec89c5a955a7e2 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 15 Jul 2022 11:46:31 +0200 Subject: [PATCH 017/129] separated versions into versionList and verPart --- .../Server/Features/PackageRank.hs | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index b26887e63..20876e98f 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -42,15 +42,20 @@ rankPackageIO core versions download upload p = maintNum descriptions = do desc <- lookupPackageName core pkgNm return (pkgDesc <$> desc) - partVer :: ServerPartE (IO ([Version], [Version], [Version])) - partVer = do + + versionList = do desc <- descriptions - return - $ queryGetPreferredInfo versions pkgNm - >>= (\x -> return $ partitionVersions - x - (map (pkgVersion . package . packageDescription) desc) + return (map (pkgVersion . package . packageDescription) desc) + + partVer :: ServerPartE (IO ([Version], [Version], [Version])) + partVer = + versionList + >>= (\y -> + return + $ queryGetPreferredInfo versions pkgNm + >>= (\x -> return $ partitionVersions x y) ) + From 206a41251d793d24d7c5069004e83630f59db89a Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 15 Jul 2022 14:41:49 +0200 Subject: [PATCH 018/129] added last upload times --- .../Server/Features/PackageRank.hs | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 20876e98f..ff1c37d5e 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -21,7 +21,11 @@ import Distribution.Server.Users.Group ) import Distribution.Types.Version +import Data.List ( sort + , sortBy + ) import Data.Maybe ( isNothing ) +import Data.Time.Clock ( UTCTime(..) ) data Scorer = Scorer { total :: Double @@ -39,9 +43,10 @@ rankPackageIO core versions download upload p = maintNum maintNum = do maint <- queryUserGroups [maintainersGroup upload pkgNm] return . fromInteger . toInteger $ size maint + info = lookupPackageName core pkgNm descriptions = do - desc <- lookupPackageName core pkgNm - return (pkgDesc <$> desc) + infPkg <- info + return (pkgDesc <$> infPkg) versionList = do desc <- descriptions @@ -55,9 +60,12 @@ rankPackageIO core versions download upload p = maintNum $ queryGetPreferredInfo versions pkgNm >>= (\x -> return $ partitionVersions x y) ) - - - + lastUploads = do + infPkg <- info + return + $ sortBy (flip compare) + $ (\x -> fst (pkgOriginalUploadInfo x)) + <$> infPkg rankPackagePure p = reverseDeps + usageTrend + docScore + reverseDeps where From 1676b3d99bcbfbffc78a6edfa96f96e877cd3dc0 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 15 Jul 2022 20:40:51 +0200 Subject: [PATCH 019/129] added freshnessScore --- .../Server/Features/PackageRank.hs | 95 ++++++++++++++----- 1 file changed, 72 insertions(+), 23 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index ff1c37d5e..1401cb9e5 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -9,11 +9,7 @@ import Distribution.Server.Features.DownloadCount import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.Upload -import Distribution.Server.Framework -import Distribution.Server.Packages.PackageIndex - ( PackageIndex ) -import qualified Distribution.Server.Packages.PackageIndex - as PackageIndex +import Distribution.Server.Framework ( ServerPartE ) import Distribution.Server.Packages.Types import Distribution.Server.Users.Group ( queryUserGroups @@ -21,20 +17,73 @@ import Distribution.Server.Users.Group ) import Distribution.Types.Version +import Control.Monad.IO.Class ( liftIO ) import Data.List ( sort , sortBy ) import Data.Maybe ( isNothing ) -import Data.Time.Clock ( UTCTime(..) ) +import Data.Ord ( max + , min + ) +import Data.Time.Clock ( UTCTime(..) + , diffUTCTime + , getCurrentTime + , nominalDay + ) +import GHC.Float ( int2Double ) data Scorer = Scorer - { total :: Double - , score :: Double + { maximum :: Double + , score :: Double } add (Scorer a b) (Scorer c d) = Scorer (a + c) (b + d) -rankPackageIO core versions download upload p = maintNum +total (Scorer a b) = a / b + +freshnessScore :: [Version] -> UTCTime -> Bool -> IO Double +freshnessScore [] _ app = return 0 +freshnessScore (x : xs) lastUpd app = + daysPastExpiration + >>= (\dExp -> return $ max 0 $ (decayDays - dExp) / decayDays) + where + versionLatest = versionNumbers x + isNightly = case major versionLatest of + 0 -> True + _ -> False + daysPastExpiration = + age >>= (\a -> return $ max 0 a - expectedUpdateInterval) + expectedUpdateInterval = + int2Double (min (versionStabilityInterval versionLatest) $ length (x : xs)) + / (if isNightly then 4 else 1) + versionStabilityInterval v | patches v > 3 && major v > 0 = 700 + | patches v > 3 = 450 + | patches v > 0 = 300 + | major v > 0 = 200 + | minor v > 3 = 140 + | otherwise = 80 + age = + getCurrentTime + >>= (\x -> + return + $ fromRational + $ toRational + $ diffUTCTime x lastUpd + / fromRational (toRational nominalDay) + ) + -- expected_update_interval/2 + if cr.is_nightly { 30 } else if is_app_only {300} else {200}; + decayDays = + expectedUpdateInterval + / 2 + + (if isNightly then 30 else (if app then 300 else 200)) + major (x : xs) = x + major _ = 0 + minor (x : y : xs) = y + minor _ = 0 + patches (x : y : xs) = sum xs + patches _ = 0 + +rankPackageIO core versions download upload p = liftIO maintNum where pkgNm :: PackageName pkgNm = pkgName $ package p @@ -42,30 +91,29 @@ rankPackageIO core versions download upload p = maintNum maintNum :: IO Double maintNum = do maint <- queryUserGroups [maintainersGroup upload pkgNm] - return . fromInteger . toInteger $ size maint + return . int2Double $ size maint info = lookupPackageName core pkgNm descriptions = do infPkg <- info return (pkgDesc <$> infPkg) - versionList = do - desc <- descriptions - return (map (pkgVersion . package . packageDescription) desc) + versionList = + do + sortBy (flip compare) + . map (pkgVersion . package . packageDescription) + <$> descriptions - partVer :: ServerPartE (IO ([Version], [Version], [Version])) + partVer :: ServerPartE ([Version], [Version], [Version]) partVer = versionList >>= (\y -> - return + liftIO $ queryGetPreferredInfo versions pkgNm >>= (\x -> return $ partitionVersions x y) ) lastUploads = do infPkg <- info - return - $ sortBy (flip compare) - $ (\x -> fst (pkgOriginalUploadInfo x)) - <$> infPkg + return $ sortBy (flip compare) $ fst . pkgOriginalUploadInfo <$> infPkg rankPackagePure p = reverseDeps + usageTrend + docScore + reverseDeps where @@ -75,9 +123,10 @@ rankPackagePure p = reverseDeps + usageTrend + docScore + reverseDeps docScore = 1 testsBench = (bool2Double . hasTests) p + (bool2Double . hasBenchmarks) p isApp = (isNothing . library) p && (not . null . executables) p - bool2Double :: Bool -> Double - bool2Double true = 1 - bool2Double false = 0 + +bool2Double :: Bool -> Double +bool2Double true = 1 +bool2Double false = 0 rankPackage :: CoreResource @@ -85,7 +134,7 @@ rankPackage -> DownloadFeature -> UploadFeature -> PackageDescription - -> IO Double + -> ServerPartE Double rankPackage core versions download upload p = rankPackageIO core versions download upload p >>= (\x -> return $ x + rankPackagePure p) From 19d200d5db2e8fc9de765bc785ef876acc0a6a71 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 15 Jul 2022 22:07:08 +0200 Subject: [PATCH 020/129] added Download Scorer --- src/Distribution/Server/Features/PackageRank.hs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 1401cb9e5..db5ab28ad 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -6,6 +6,7 @@ import Distribution.Package import Distribution.PackageDescription import Distribution.Server.Features.Core import Distribution.Server.Features.DownloadCount +import Distribution.Server.Features.DownloadCount.State import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.Upload @@ -16,6 +17,7 @@ import Distribution.Server.Users.Group , size ) import Distribution.Types.Version +import Distribution.Server.Util.CountingMap (cmFind) import Control.Monad.IO.Class ( liftIO ) import Data.List ( sort @@ -87,6 +89,7 @@ rankPackageIO core versions download upload p = liftIO maintNum where pkgNm :: PackageName pkgNm = pkgName $ package p + isApp = (isNothing . library) p && (not . null . executables) p -- Number of maintainers maintNum :: IO Double maintNum = do @@ -96,7 +99,9 @@ rankPackageIO core versions download upload p = liftIO maintNum descriptions = do infPkg <- info return (pkgDesc <$> infPkg) - + downloadScore :: IO Scorer + downloadScore = recentPackageDownloads download >>=return.calcDownScore.(cmFind pkgNm) + calcDownScore i = Scorer 5 $ (logBase 2 (int2Double$max 0 (i-100) + 100) - 6.6) / (if isApp then 5 else 6) versionList = do sortBy (flip compare) From 22ed2b48efd7b28c562c1fbc937f42e2b49a8a84 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sat, 16 Jul 2022 20:00:56 +0200 Subject: [PATCH 021/129] finished simple temporalScore (rankPackageIO) --- .../Server/Features/PackageRank.hs | 66 ++++++++++++------- 1 file changed, 43 insertions(+), 23 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index db5ab28ad..c5c480b89 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -6,7 +6,6 @@ import Distribution.Package import Distribution.PackageDescription import Distribution.Server.Features.Core import Distribution.Server.Features.DownloadCount -import Distribution.Server.Features.DownloadCount.State import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.Upload @@ -16,8 +15,9 @@ import Distribution.Server.Users.Group ( queryUserGroups , size ) +import Distribution.Server.Util.CountingMap + ( cmFind ) import Distribution.Types.Version -import Distribution.Server.Util.CountingMap (cmFind) import Control.Monad.IO.Class ( liftIO ) import Data.List ( sort @@ -39,25 +39,27 @@ data Scorer = Scorer , score :: Double } +-- frac 0<=frac<=1 +fracScor maxim frac = Scorer maxim (maxim * frac) + +boolScor k true = Scorer k k +boolScor k true = Scorer k 0 + add (Scorer a b) (Scorer c d) = Scorer (a + c) (b + d) total (Scorer a b) = a / b -freshnessScore :: [Version] -> UTCTime -> Bool -> IO Double -freshnessScore [] _ app = return 0 -freshnessScore (x : xs) lastUpd app = +freshness :: [Version] -> UTCTime -> Bool -> IO Double +freshness [] _ app = return 0 +freshness (x : xs) lastUpd app = daysPastExpiration >>= (\dExp -> return $ max 0 $ (decayDays - dExp) / decayDays) where versionLatest = versionNumbers x - isNightly = case major versionLatest of - 0 -> True - _ -> False daysPastExpiration = age >>= (\a -> return $ max 0 a - expectedUpdateInterval) - expectedUpdateInterval = - int2Double (min (versionStabilityInterval versionLatest) $ length (x : xs)) - / (if isNightly then 4 else 1) + expectedUpdateInterval = int2Double + (min (versionStabilityInterval versionLatest) $ length (x : xs)) versionStabilityInterval v | patches v > 3 && major v > 0 = 700 | patches v > 3 = 450 | patches v > 0 = 300 @@ -73,11 +75,7 @@ freshnessScore (x : xs) lastUpd app = $ diffUTCTime x lastUpd / fromRational (toRational nominalDay) ) - -- expected_update_interval/2 + if cr.is_nightly { 30 } else if is_app_only {300} else {200}; - decayDays = - expectedUpdateInterval - / 2 - + (if isNightly then 30 else (if app then 300 else 200)) + decayDays = expectedUpdateInterval / 2 + (if app then 300 else 200) major (x : xs) = x major _ = 0 minor (x : y : xs) = y @@ -85,11 +83,15 @@ freshnessScore (x : xs) lastUpd app = patches (x : y : xs) = sum xs patches _ = 0 -rankPackageIO core versions download upload p = liftIO maintNum +temporalScore core versions download upload p = do + fresh <- freshnessScore + downs <- downloadScore + tract <- tractionScore + return $ add tract $ add fresh downs where pkgNm :: PackageName pkgNm = pkgName $ package p - isApp = (isNothing . library) p && (not . null . executables) p + isApp = (isNothing . library) p && (not . null . executables) p -- Number of maintainers maintNum :: IO Double maintNum = do @@ -99,9 +101,14 @@ rankPackageIO core versions download upload p = liftIO maintNum descriptions = do infPkg <- info return (pkgDesc <$> infPkg) - downloadScore :: IO Scorer - downloadScore = recentPackageDownloads download >>=return.calcDownScore.(cmFind pkgNm) - calcDownScore i = Scorer 5 $ (logBase 2 (int2Double$max 0 (i-100) + 100) - 6.6) / (if isApp then 5 else 6) + downloadScore = downloadsPerMonth >>= return . calcDownScore + downloadsPerMonth = + liftIO $ recentPackageDownloads download >>= return . cmFind pkgNm + calcDownScore i = Scorer 5 $ max + ( (logBase 2 (int2Double $ max 0 (i - 100) + 100) - 6.6) + / (if isApp then 5 else 6) + ) + 5 versionList = do sortBy (flip compare) @@ -119,6 +126,19 @@ rankPackageIO core versions download upload p = liftIO maintNum lastUploads = do infPkg <- info return $ sortBy (flip compare) $ fst . pkgOriginalUploadInfo <$> infPkg + -- [Version] -> UTCTime -> Bool + packageFreshness = do + ups <- lastUploads + vers <- versionList + case ups of + [] -> return 0 + _ -> liftIO $ freshness vers (head ups) isApp + freshnessScore = packageFreshness >>= return . fracScor 10 + -- Missing dependencyFreshnessScore for reasonable effectivity needs caching + tractionScore = do + fresh <- packageFreshness + downs <- downloadsPerMonth + return $ boolScor 1 (fresh * int2Double downs > 1000) rankPackagePure p = reverseDeps + usageTrend + docScore + reverseDeps where @@ -141,7 +161,7 @@ rankPackage -> PackageDescription -> ServerPartE Double rankPackage core versions download upload p = - rankPackageIO core versions download upload p - >>= (\x -> return $ x + rankPackagePure p) + temporalScore core versions download upload p + >>= (\x -> return $ total x + rankPackagePure p) From 05c74bf518b7bb7c19c57db703c58736027b7d40 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 17 Jul 2022 21:00:29 +0200 Subject: [PATCH 022/129] separated rankIO from temporalScore --- .../Server/Features/PackageRank.hs | 81 ++++++++++++------- 1 file changed, 50 insertions(+), 31 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index c5c480b89..c8cd2fdfa 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -83,7 +83,55 @@ freshness (x : xs) lastUpd app = patches (x : y : xs) = sum xs patches _ = 0 -temporalScore core versions download upload p = do + +-- partVer :: ServerPartE ([Version], [Version], [Version]) +-- partVer = +-- versionList +-- >>= (\y -> +-- liftIO +-- $ queryGetPreferredInfo versions pkgNm +-- >>= (\x -> return $ partitionVersions x y) +-- ) +-- +-- -- Number of maintainers +-- maintNum :: IO Double +-- maintNum = do +-- maint <- queryUserGroups [maintainersGroup upload pkgNm] +-- return . int2Double $ size maint + +rankIO + :: CoreResource + -> VersionsFeature + -> DownloadFeature + -> UploadFeature + -> PackageDescription + -> ServerPartE Scorer + +rankIO core vers downs upl pkg = do + temp <- temporalScore core vers downs upl pkg lastUploads versionList downloadsPerMonth + return temp + + where + pkgNm :: PackageName + pkgNm = pkgName $ package pkg + info = lookupPackageName core pkgNm + descriptions = do + infPkg <- info + return (pkgDesc <$> infPkg) + lastUploads = do + infPkg <- info + return $ sortBy (flip compare) $ fst . pkgOriginalUploadInfo <$> infPkg + versionList = + do + sortBy (flip compare) + . map (pkgVersion . package . packageDescription) + <$> descriptions + downloadsPerMonth = + liftIO $ recentPackageDownloads downs >>= return . cmFind pkgNm + + + +temporalScore core versions download upload p lastUploads versionList downloadsPerMonth= do fresh <- freshnessScore downs <- downloadScore tract <- tractionScore @@ -92,41 +140,12 @@ temporalScore core versions download upload p = do pkgNm :: PackageName pkgNm = pkgName $ package p isApp = (isNothing . library) p && (not . null . executables) p - -- Number of maintainers - maintNum :: IO Double - maintNum = do - maint <- queryUserGroups [maintainersGroup upload pkgNm] - return . int2Double $ size maint - info = lookupPackageName core pkgNm - descriptions = do - infPkg <- info - return (pkgDesc <$> infPkg) downloadScore = downloadsPerMonth >>= return . calcDownScore - downloadsPerMonth = - liftIO $ recentPackageDownloads download >>= return . cmFind pkgNm calcDownScore i = Scorer 5 $ max ( (logBase 2 (int2Double $ max 0 (i - 100) + 100) - 6.6) / (if isApp then 5 else 6) ) 5 - versionList = - do - sortBy (flip compare) - . map (pkgVersion . package . packageDescription) - <$> descriptions - - partVer :: ServerPartE ([Version], [Version], [Version]) - partVer = - versionList - >>= (\y -> - liftIO - $ queryGetPreferredInfo versions pkgNm - >>= (\x -> return $ partitionVersions x y) - ) - lastUploads = do - infPkg <- info - return $ sortBy (flip compare) $ fst . pkgOriginalUploadInfo <$> infPkg - -- [Version] -> UTCTime -> Bool packageFreshness = do ups <- lastUploads vers <- versionList @@ -161,7 +180,7 @@ rankPackage -> PackageDescription -> ServerPartE Double rankPackage core versions download upload p = - temporalScore core versions download upload p + rankIO core versions download upload p >>= (\x -> return $ total x + rankPackagePure p) From f8f78bca595ad027a2e5acafe0086592fdf52339 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 17 Jul 2022 22:10:39 +0200 Subject: [PATCH 023/129] added pageRank --- .../Server/Features/PackageRank.hs | 102 +++++++++--------- 1 file changed, 54 insertions(+), 48 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index c8cd2fdfa..e7011085f 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -32,6 +32,7 @@ import Data.Time.Clock ( UTCTime(..) , getCurrentTime , nominalDay ) +import qualified Distribution.Utils.ShortText as S import GHC.Float ( int2Double ) data Scorer = Scorer @@ -42,10 +43,11 @@ data Scorer = Scorer -- frac 0<=frac<=1 fracScor maxim frac = Scorer maxim (maxim * frac) -boolScor k true = Scorer k k -boolScor k true = Scorer k 0 +boolScor k true = Scorer k k +boolScor k false = Scorer k 0 -add (Scorer a b) (Scorer c d) = Scorer (a + c) (b + d) +(><) :: Scorer -> Scorer -> Scorer +(><) (Scorer a b) (Scorer c d) = Scorer (a + c) (b + d) total (Scorer a b) = a / b @@ -108,39 +110,46 @@ rankIO -> ServerPartE Scorer rankIO core vers downs upl pkg = do - temp <- temporalScore core vers downs upl pkg lastUploads versionList downloadsPerMonth - return temp - - where - pkgNm :: PackageName - pkgNm = pkgName $ package pkg - info = lookupPackageName core pkgNm - descriptions = do - infPkg <- info - return (pkgDesc <$> infPkg) - lastUploads = do - infPkg <- info - return $ sortBy (flip compare) $ fst . pkgOriginalUploadInfo <$> infPkg - versionList = - do - sortBy (flip compare) - . map (pkgVersion . package . packageDescription) - <$> descriptions - downloadsPerMonth = - liftIO $ recentPackageDownloads downs >>= return . cmFind pkgNm - - - -temporalScore core versions download upload p lastUploads versionList downloadsPerMonth= do - fresh <- freshnessScore - downs <- downloadScore - tract <- tractionScore - return $ add tract $ add fresh downs + temp <- temporalScore core + vers + downs + upl + pkg + lastUploads + versionList + downloadsPerMonth + return temp + + where + pkgNm :: PackageName + pkgNm = pkgName $ package pkg + info = lookupPackageName core pkgNm + descriptions = do + infPkg <- info + return (pkgDesc <$> infPkg) + lastUploads = do + infPkg <- info + return $ sortBy (flip compare) $ fst . pkgOriginalUploadInfo <$> infPkg + versionList = + do + sortBy (flip compare) + . map (pkgVersion . package . packageDescription) + <$> descriptions + downloadsPerMonth = liftIO $ cmFind pkgNm <$> recentPackageDownloads downs + + + +temporalScore core versions download upload p lastUploads versionList downloadsPerMonth + = do + fresh <- freshnessScore + downs <- downloadScore + tract <- tractionScore + return $ tract >< fresh >< downs where pkgNm :: PackageName - pkgNm = pkgName $ package p - isApp = (isNothing . library) p && (not . null . executables) p - downloadScore = downloadsPerMonth >>= return . calcDownScore + pkgNm = pkgName $ package p + isApp = (isNothing . library) p && (not . null . executables) p + downloadScore = calcDownScore <$> downloadsPerMonth calcDownScore i = Scorer 5 $ max ( (logBase 2 (int2Double $ max 0 (i - 100) + 100) - 6.6) / (if isApp then 5 else 6) @@ -152,25 +161,22 @@ temporalScore core versions download upload p lastUploads versionList downloadsP case ups of [] -> return 0 _ -> liftIO $ freshness vers (head ups) isApp - freshnessScore = packageFreshness >>= return . fracScor 10 - -- Missing dependencyFreshnessScore for reasonable effectivity needs caching + freshnessScore = fracScor 10 <$> packageFreshness +-- Missing dependencyFreshnessScore for reasonable effectivity needs caching tractionScore = do fresh <- packageFreshness downs <- downloadsPerMonth return $ boolScor 1 (fresh * int2Double downs > 1000) -rankPackagePure p = reverseDeps + usageTrend + docScore + reverseDeps +rankPackagePage p = tests >< benchs >< desc >< homeP >< sourceRp >< cats where - reverseDeps = 1 - dependencies = allBuildDepends p - usageTrend = 1 - docScore = 1 - testsBench = (bool2Double . hasTests) p + (bool2Double . hasBenchmarks) p - isApp = (isNothing . library) p && (not . null . executables) p - -bool2Double :: Bool -> Double -bool2Double true = 1 -bool2Double false = 0 + tests = boolScor 50 (hasTests p) + benchs = boolScor 10 (hasBenchmarks p) + desc = Scorer 30 (min 1 (int2Double (S.length $ description p) / 300)) + -- ducumentation = boolScor 30 () + homeP = boolScor 30 (not $ S.null $ homepage p) + sourceRp = boolScor 8 (not $ null $ sourceRepos p) + cats = boolScor 5 (not $ S.null $ category p) rankPackage :: CoreResource @@ -181,6 +187,6 @@ rankPackage -> ServerPartE Double rankPackage core versions download upload p = rankIO core versions download upload p - >>= (\x -> return $ total x + rankPackagePure p) + >>= (\x -> return $ total x + total (rankPackagePage p)) From a1f5315efa074ff120d1ae3aed74cc949b7a0ff3 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Mon, 18 Jul 2022 23:34:51 +0200 Subject: [PATCH 024/129] added versionScore --- .../Server/Features/PackageRank.hs | 105 ++++++++++++------ 1 file changed, 74 insertions(+), 31 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index e7011085f..df2868c93 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -24,7 +24,8 @@ import Data.List ( sort , sortBy ) import Data.Maybe ( isNothing ) -import Data.Ord ( max +import Data.Ord ( comparing + , max , min ) import Data.Time.Clock ( UTCTime(..) @@ -32,6 +33,9 @@ import Data.Time.Clock ( UTCTime(..) , getCurrentTime , nominalDay ) +import Distribution.Simple.Utils ( safeHead + , safeLast + ) import qualified Distribution.Utils.ShortText as S import GHC.Float ( int2Double ) @@ -40,6 +44,9 @@ data Scorer = Scorer , score :: Double } +scorer maxim frac = case maxim >= frac of + true -> Scorer maxim frac + false -> Scorer maxim maxim -- frac 0<=frac<=1 fracScor maxim frac = Scorer maxim (maxim * frac) @@ -51,6 +58,19 @@ boolScor k false = Scorer k 0 total (Scorer a b) = a / b +major (x : xs) = x +major _ = 0 +minor (x : y : xs) = y +minor _ = 0 +patches (x : y : xs) = sum xs +patches _ = 0 + +numDays :: Maybe UTCTime -> Maybe UTCTime -> Double +numDays (Just first) (Just last) = + fromRational $ toRational $ diffUTCTime first last / fromRational + (toRational nominalDay) +numDays _ _ = 0 + freshness :: [Version] -> UTCTime -> Bool -> IO Double freshness [] _ app = return 0 freshness (x : xs) lastUpd app = @@ -68,33 +88,10 @@ freshness (x : xs) lastUpd app = | major v > 0 = 200 | minor v > 3 = 140 | otherwise = 80 - age = - getCurrentTime - >>= (\x -> - return - $ fromRational - $ toRational - $ diffUTCTime x lastUpd - / fromRational (toRational nominalDay) - ) + age = flip numDays (Just lastUpd) . Just <$> getCurrentTime decayDays = expectedUpdateInterval / 2 + (if app then 300 else 200) - major (x : xs) = x - major _ = 0 - minor (x : y : xs) = y - minor _ = 0 - patches (x : y : xs) = sum xs - patches _ = 0 - - --- partVer :: ServerPartE ([Version], [Version], [Version]) --- partVer = --- versionList --- >>= (\y -> --- liftIO --- $ queryGetPreferredInfo versions pkgNm --- >>= (\x -> return $ partitionVersions x y) --- ) --- + + -- -- Number of maintainers -- maintNum :: IO Double -- maintNum = do @@ -118,10 +115,10 @@ rankIO core vers downs upl pkg = do lastUploads versionList downloadsPerMonth - return temp + vers <- versionScore versionList vers lastUploads pkg + return (temp >< vers) where - pkgNm :: PackageName pkgNm = pkgName $ package pkg info = lookupPackageName core pkgNm descriptions = do @@ -137,7 +134,53 @@ rankIO core vers downs upl pkg = do <$> descriptions downloadsPerMonth = liftIO $ cmFind pkgNm <$> recentPackageDownloads downs - +versionScore + :: ServerPartE [Version] + -> VersionsFeature + -> ServerPartE [UTCTime] + -> PackageDescription + -> ServerPartE Scorer +versionScore versionList versions lastUploads desc = do + intUse <- intUsable + depre <- deprec + lUps <- lastUploads + return $ calculateScore depre lUps intUse + where + pkgNm = pkgName $ package desc + partVers = + versionList + >>= (\y -> + liftIO + $ queryGetPreferredInfo versions pkgNm + >>= (\x -> return $ partitionVersions x y) + ) + intUsable = do + (norm, _, unpref) <- partVers + return $ versionNumbers <$> norm ++ unpref + deprec = do + (_, deprec, _) <- partVers + return deprec + calculateScore :: [Version] -> [UTCTime] -> [[Int]] -> Scorer + calculateScore [] _ _ = Scorer 118 0 + calculateScore depre lUps intUse = + boolScor 20 (length intUse > 1) + >< scorer 40 (numDays (safeHead lUps) (safeLast lUps)) + >< scorer + 15 + (int2Double $ length $ filter (\x -> major x > 0 || minor x > 0) + intUse + ) + >< scorer + 20 + (int2Double $ 4 * length + (filter (\x -> major x > 0 && patches x > 0) intUse) + ) + >< scorer + 10 + (int2Double $ patches $ head $ sortBy (comparing patches) intUse) + >< boolScor 8 (any (\x -> major x == 0 && patches x > 0) intUse) + >< boolScor 10 (any (\x -> major x > 0 && major x < 20) intUse) + >< boolScor 5 (not $ null $ depre) temporalScore core versions download upload p lastUploads versionList downloadsPerMonth = do @@ -150,7 +193,7 @@ temporalScore core versions download upload p lastUploads versionList downloadsP pkgNm = pkgName $ package p isApp = (isNothing . library) p && (not . null . executables) p downloadScore = calcDownScore <$> downloadsPerMonth - calcDownScore i = Scorer 5 $ max + calcDownScore i = Scorer 5 $ min ( (logBase 2 (int2Double $ max 0 (i - 100) + 100) - 6.6) / (if isApp then 5 else 6) ) From 0e6db6757111c1c11c7ef2c26f660bfd633503cc Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 19 Jul 2022 22:27:38 +0200 Subject: [PATCH 025/129] added authorScore --- .../Server/Features/PackageRank.hs | 33 +++++++++++-------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index df2868c93..0315c079b 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -20,7 +20,8 @@ import Distribution.Server.Util.CountingMap import Distribution.Types.Version import Control.Monad.IO.Class ( liftIO ) -import Data.List ( sort +import Data.List ( maximumBy + , sort , sortBy ) import Data.Maybe ( isNothing ) @@ -92,12 +93,6 @@ freshness (x : xs) lastUpd app = decayDays = expectedUpdateInterval / 2 + (if app then 300 else 200) --- -- Number of maintainers --- maintNum :: IO Double --- maintNum = do --- maint <- queryUserGroups [maintainersGroup upload pkgNm] --- return . int2Double $ size maint - rankIO :: CoreResource -> VersionsFeature @@ -116,7 +111,8 @@ rankIO core vers downs upl pkg = do versionList downloadsPerMonth vers <- versionScore versionList vers lastUploads pkg - return (temp >< vers) + auth <- authorScore upl pkg + return (temp >< vers >< auth) where pkgNm = pkgName $ package pkg @@ -134,6 +130,19 @@ rankIO core vers downs upl pkg = do <$> descriptions downloadsPerMonth = liftIO $ cmFind pkgNm <$> recentPackageDownloads downs +authorScore :: UploadFeature -> PackageDescription -> ServerPartE Scorer +authorScore upload desc = + liftIO maintScore + >>= (\x -> return $ boolScor 1 (not $ S.null $ author desc) >< x) + where + pkgNm = pkgName $ package desc + maintScore :: IO Scorer + maintScore = do + maint <- queryUserGroups [maintainersGroup upload pkgNm] + + return $ boolScor 3 (size maint > 1) >< scorer 5 (int2Double $ size maint) + + versionScore :: ServerPartE [Version] -> VersionsFeature @@ -175,12 +184,10 @@ versionScore versionList versions lastUploads desc = do (int2Double $ 4 * length (filter (\x -> major x > 0 && patches x > 0) intUse) ) - >< scorer - 10 - (int2Double $ patches $ head $ sortBy (comparing patches) intUse) - >< boolScor 8 (any (\x -> major x == 0 && patches x > 0) intUse) + >< scorer 10 (int2Double $ patches $ maximumBy (comparing patches) intUse) + >< boolScor 8 (any (\x -> major x == 0 && patches x > 0) intUse) >< boolScor 10 (any (\x -> major x > 0 && major x < 20) intUse) - >< boolScor 5 (not $ null $ depre) + >< boolScor 5 (not $ null depre) temporalScore core versions download upload p lastUploads versionList downloadsPerMonth = do From 10d14a21377196546d089d3c094a49e55f72c0ff Mon Sep 17 00:00:00 2001 From: Andreas Abel Date: Thu, 21 Jul 2022 12:33:17 +0200 Subject: [PATCH 026/129] Fix `non-canonical-return` warnings --- src/Distribution/Server/Features/Security/Migration.hs | 4 ++-- src/Distribution/Server/Framework/BackupRestore.hs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Distribution/Server/Features/Security/Migration.hs b/src/Distribution/Server/Features/Security/Migration.hs index 5fb09f054..b9ee61f50 100644 --- a/src/Distribution/Server/Features/Security/Migration.hs +++ b/src/Distribution/Server/Features/Security/Migration.hs @@ -206,11 +206,11 @@ data Migrated a = Migrated MigrationStats a | AlreadyMigrated a deriving (Functor) instance Applicative Migrated where - pure = return + pure = AlreadyMigrated f <*> x = do f' <- f ; x' <- x ; return $ f' x' instance Monad Migrated where - return = AlreadyMigrated + return = pure AlreadyMigrated a >>= f = f a Migrated stats a >>= f = case f a of diff --git a/src/Distribution/Server/Framework/BackupRestore.hs b/src/Distribution/Server/Framework/BackupRestore.hs index 64e5c0bfa..d2158f74f 100644 --- a/src/Distribution/Server/Framework/BackupRestore.hs +++ b/src/Distribution/Server/Framework/BackupRestore.hs @@ -251,7 +251,7 @@ data Restore a = RestoreDone a | RestoreFindBlob BlobId (Bool -> Restore a) instance Monad Restore where - return = RestoreDone + return = pure RestoreDone x >>= g = g x RestoreFail err >>= _ = RestoreFail err RestoreAddBlob bs f >>= g = RestoreAddBlob bs $ \bid -> f bid >>= g @@ -270,7 +270,7 @@ instance Functor Restore where fmap = liftM instance Applicative Restore where - pure = return + pure = RestoreDone mf <*> mx = do f <- mf ; x <- mx ; return (f x) runRestore :: BlobStores -> Restore a -> IO (Either String a) From 1daad17a9defc56211db49ebd0dc7a0c9ab10870 Mon Sep 17 00:00:00 2001 From: Andreas Abel Date: Thu, 21 Jul 2022 13:03:26 +0200 Subject: [PATCH 027/129] Bump CI to GHC 9.2.3 and restrict to master branch --- .github/workflows/haskell-ci.yml | 32 +++++++++++++++++++------------- .github/workflows/nix-shell.yml | 10 ++++++++-- cabal.haskell-ci | 2 ++ hackage-server.cabal | 2 +- 4 files changed, 30 insertions(+), 16 deletions(-) diff --git a/.github/workflows/haskell-ci.yml b/.github/workflows/haskell-ci.yml index 0aa3a3c9e..84b405fcf 100644 --- a/.github/workflows/haskell-ci.yml +++ b/.github/workflows/haskell-ci.yml @@ -8,18 +8,24 @@ # # For more information, see https://github.com/haskell-CI/haskell-ci # -# version: 0.14.3.20220416 +# version: 0.15.20220710 # -# REGENDATA ("0.14.3.20220416",["github","hackage-server.cabal"]) +# REGENDATA ("0.15.20220710",["github","hackage-server.cabal"]) # name: Haskell-CI on: - - push - - pull_request + push: + branches: + - master + - ci* + pull_request: + branches: + - master + - ci* jobs: linux: name: Haskell-CI - Linux - ${{ matrix.compiler }} - runs-on: ubuntu-18.04 + runs-on: ubuntu-20.04 timeout-minutes: 60 container: @@ -28,9 +34,9 @@ jobs: strategy: matrix: include: - - compiler: ghc-9.2.2 + - compiler: ghc-9.2.3 compilerKind: ghc - compilerVersion: 9.2.2 + compilerVersion: 9.2.3 setup-method: ghcup allow-failure: false - compiler: ghc-9.0.2 @@ -56,10 +62,10 @@ jobs: apt-get install -y --no-install-recommends gnupg ca-certificates dirmngr curl git software-properties-common libtinfo5 if [ "${{ matrix.setup-method }}" = ghcup ]; then mkdir -p "$HOME/.ghcup/bin" - curl -sL https://downloads.haskell.org/ghcup/0.1.17.5/x86_64-linux-ghcup-0.1.17.5 > "$HOME/.ghcup/bin/ghcup" + curl -sL https://downloads.haskell.org/ghcup/0.1.17.8/x86_64-linux-ghcup-0.1.17.8 > "$HOME/.ghcup/bin/ghcup" chmod a+x "$HOME/.ghcup/bin/ghcup" - "$HOME/.ghcup/bin/ghcup" install ghc "$HCVER" - "$HOME/.ghcup/bin/ghcup" install cabal 3.6.2.0 + "$HOME/.ghcup/bin/ghcup" install ghc "$HCVER" || (cat "$HOME"/.ghcup/logs/*.* && false) + "$HOME/.ghcup/bin/ghcup" install cabal 3.6.2.0 || (cat "$HOME"/.ghcup/logs/*.* && false) apt-get update apt-get install -y libbrotli-dev else @@ -67,9 +73,9 @@ jobs: apt-get update apt-get install -y "$HCNAME" libbrotli-dev mkdir -p "$HOME/.ghcup/bin" - curl -sL https://downloads.haskell.org/ghcup/0.1.17.5/x86_64-linux-ghcup-0.1.17.5 > "$HOME/.ghcup/bin/ghcup" + curl -sL https://downloads.haskell.org/ghcup/0.1.17.8/x86_64-linux-ghcup-0.1.17.8 > "$HOME/.ghcup/bin/ghcup" chmod a+x "$HOME/.ghcup/bin/ghcup" - "$HOME/.ghcup/bin/ghcup" install cabal 3.6.2.0 + "$HOME/.ghcup/bin/ghcup" install cabal 3.6.2.0 || (cat "$HOME"/.ghcup/logs/*.* && false) fi env: HCKIND: ${{ matrix.compilerKind }} @@ -212,7 +218,7 @@ jobs: ${CABAL} -vnormal check - name: haddock run: | - $CABAL v2-haddock $ARG_COMPILER --with-haddock $HADDOCK $ARG_TESTS $ARG_BENCH all + $CABAL v2-haddock --haddock-all $ARG_COMPILER --with-haddock $HADDOCK $ARG_TESTS $ARG_BENCH all - name: unconstrained build run: | rm -f cabal.project.local diff --git a/.github/workflows/nix-shell.yml b/.github/workflows/nix-shell.yml index 06b6e2781..80cb6f1ac 100644 --- a/.github/workflows/nix-shell.yml +++ b/.github/workflows/nix-shell.yml @@ -1,7 +1,13 @@ name: "Test nix-shell" on: - - push - - pull_request + push: + branches: + - master + - ci* + pull_request: + branches: + - master + - ci* jobs: nix-shell: runs-on: ubuntu-latest diff --git a/cabal.haskell-ci b/cabal.haskell-ci index bc8774bef..8b3444e8f 100644 --- a/cabal.haskell-ci +++ b/cabal.haskell-ci @@ -1,3 +1,5 @@ +branches: master ci* + installed: +all -Cabal -text -parsec -- -- irc-channels works with GHA, but why send to a channel diff --git a/hackage-server.cabal b/hackage-server.cabal index c32615ea9..ba6b86341 100644 --- a/hackage-server.cabal +++ b/hackage-server.cabal @@ -27,7 +27,7 @@ copyright: 2008-2015 Duncan Coutts, license: BSD-3-Clause license-file: LICENSE -tested-with: GHC == { 9.2.2, 9.0.2, 8.10.7, 8.8.4 } +tested-with: GHC == { 9.2.3, 9.0.2, 8.10.7, 8.8.4 } data-dir: datafiles data-files: From 53295949e966567f5977ea1d99880b52cfee2630 Mon Sep 17 00:00:00 2001 From: Alias Qli <2576814881@qq.com> Date: Fri, 22 Jul 2022 04:15:10 +0800 Subject: [PATCH 028/129] Check authorisation (#1111) --- src/Distribution/Server/Features/UserDetails.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Distribution/Server/Features/UserDetails.hs b/src/Distribution/Server/Features/UserDetails.hs index 04b5e750a..6240e9e8c 100644 --- a/src/Distribution/Server/Features/UserDetails.hs +++ b/src/Distribution/Server/Features/UserDetails.hs @@ -330,6 +330,7 @@ userDetailsFeature templates userDetailsState UserFeature{..} CoreFeature{..} Up handlerGetUserNameContactHtml :: DynamicPath -> ServerPartE Response handlerGetUserNameContactHtml dpath = do (uid, uinfo) <- lookupUserNameFull =<< userNameInPath dpath + guardAuthorised_ [IsUserId uid, InGroup adminGroup] template <- getTemplate templates "user-details-form.html" udetails <- queryUserDetails uid showConfirmationOfSave <- not . null <$> queryString (lookBSs "showConfirmationOfSave") From 22614b524a1d9c4f4017e245d322496393c9d3a2 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 22 Jul 2022 23:19:38 +0200 Subject: [PATCH 029/129] instance Semigroup Scorer --- .../Server/Features/PackageRank.hs | 44 +++++++++++-------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 0315c079b..2d5caeaaa 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -10,6 +10,8 @@ import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.Upload import Distribution.Server.Framework ( ServerPartE ) +import Distribution.Server.Framework.Feature + ( queryState ) import Distribution.Server.Packages.Types import Distribution.Server.Users.Group ( queryUserGroups @@ -45,6 +47,9 @@ data Scorer = Scorer , score :: Double } +instance Semigroup Scorer where + (Scorer a b) <> (Scorer c d) = Scorer (a + b) (c + d) + scorer maxim frac = case maxim >= frac of true -> Scorer maxim frac false -> Scorer maxim maxim @@ -54,9 +59,6 @@ fracScor maxim frac = Scorer maxim (maxim * frac) boolScor k true = Scorer k k boolScor k false = Scorer k 0 -(><) :: Scorer -> Scorer -> Scorer -(><) (Scorer a b) (Scorer c d) = Scorer (a + c) (b + d) - total (Scorer a b) = a / b major (x : xs) = x @@ -92,9 +94,11 @@ freshness (x : xs) lastUpd app = age = flip numDays (Just lastUpd) . Just <$> getCurrentTime decayDays = expectedUpdateInterval / 2 + (if app then 300 else 200) +-- lookupPackageId +-- queryHasDocumentation rankIO - :: CoreResource + :: CoreFeature -> VersionsFeature -> DownloadFeature -> UploadFeature @@ -102,7 +106,7 @@ rankIO -> ServerPartE Scorer rankIO core vers downs upl pkg = do - temp <- temporalScore core + temp <- temporalScore coreR vers downs upl @@ -112,11 +116,13 @@ rankIO core vers downs upl pkg = do downloadsPerMonth vers <- versionScore versionList vers lastUploads pkg auth <- authorScore upl pkg - return (temp >< vers >< auth) + return (temp <> vers <> auth) where + pkgId = package pkg pkgNm = pkgName $ package pkg - info = lookupPackageName core pkgNm + info = lookupPackageName coreR pkgNm + coreR = coreResource core descriptions = do infPkg <- info return (pkgDesc <$> infPkg) @@ -133,14 +139,14 @@ rankIO core vers downs upl pkg = do authorScore :: UploadFeature -> PackageDescription -> ServerPartE Scorer authorScore upload desc = liftIO maintScore - >>= (\x -> return $ boolScor 1 (not $ S.null $ author desc) >< x) + >>= (\x -> return $ boolScor 1 (not $ S.null $ author desc) <> x) where pkgNm = pkgName $ package desc maintScore :: IO Scorer maintScore = do maint <- queryUserGroups [maintainersGroup upload pkgNm] - return $ boolScor 3 (size maint > 1) >< scorer 5 (int2Double $ size maint) + return $ boolScor 3 (size maint > 1) <> scorer 5 (int2Double $ size maint) versionScore @@ -173,28 +179,28 @@ versionScore versionList versions lastUploads desc = do calculateScore [] _ _ = Scorer 118 0 calculateScore depre lUps intUse = boolScor 20 (length intUse > 1) - >< scorer 40 (numDays (safeHead lUps) (safeLast lUps)) - >< scorer + <> scorer 40 (numDays (safeHead lUps) (safeLast lUps)) + <> scorer 15 (int2Double $ length $ filter (\x -> major x > 0 || minor x > 0) intUse ) - >< scorer + <> scorer 20 (int2Double $ 4 * length (filter (\x -> major x > 0 && patches x > 0) intUse) ) - >< scorer 10 (int2Double $ patches $ maximumBy (comparing patches) intUse) - >< boolScor 8 (any (\x -> major x == 0 && patches x > 0) intUse) - >< boolScor 10 (any (\x -> major x > 0 && major x < 20) intUse) - >< boolScor 5 (not $ null depre) + <> scorer 10 (int2Double $ patches $ maximumBy (comparing patches) intUse) + <> boolScor 8 (any (\x -> major x == 0 && patches x > 0) intUse) + <> boolScor 10 (any (\x -> major x > 0 && major x < 20) intUse) + <> boolScor 5 (not $ null depre) temporalScore core versions download upload p lastUploads versionList downloadsPerMonth = do fresh <- freshnessScore downs <- downloadScore tract <- tractionScore - return $ tract >< fresh >< downs + return $ tract <> fresh <> downs where pkgNm :: PackageName pkgNm = pkgName $ package p @@ -218,7 +224,7 @@ temporalScore core versions download upload p lastUploads versionList downloadsP downs <- downloadsPerMonth return $ boolScor 1 (fresh * int2Double downs > 1000) -rankPackagePage p = tests >< benchs >< desc >< homeP >< sourceRp >< cats +rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats where tests = boolScor 50 (hasTests p) benchs = boolScor 10 (hasBenchmarks p) @@ -229,7 +235,7 @@ rankPackagePage p = tests >< benchs >< desc >< homeP >< sourceRp >< cats cats = boolScor 5 (not $ S.null $ category p) rankPackage - :: CoreResource + :: CoreFeature -> VersionsFeature -> DownloadFeature -> UploadFeature From 90717ddb2f407376e701d714a5dffb3f31759a2e Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sat, 23 Jul 2022 15:41:21 +0200 Subject: [PATCH 030/129] got tarballs and fixed warnings --- .../Server/Features/PackageRank.hs | 126 +++++++++++------- 1 file changed, 80 insertions(+), 46 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 2d5caeaaa..0ff9dfec4 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -5,11 +5,15 @@ module Distribution.Server.Features.PackageRank import Distribution.Package import Distribution.PackageDescription import Distribution.Server.Features.Core +import Distribution.Server.Features.Documentation + ( DocumentationFeature(..) ) import Distribution.Server.Features.DownloadCount import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.Upload import Distribution.Server.Framework ( ServerPartE ) +import Distribution.Server.Framework.BlobStorage + ( BlobId ) import Distribution.Server.Framework.Feature ( queryState ) import Distribution.Server.Packages.Types @@ -23,7 +27,6 @@ import Distribution.Types.Version import Control.Monad.IO.Class ( liftIO ) import Data.List ( maximumBy - , sort , sortBy ) import Data.Maybe ( isNothing ) @@ -50,32 +53,37 @@ data Scorer = Scorer instance Semigroup Scorer where (Scorer a b) <> (Scorer c d) = Scorer (a + b) (c + d) -scorer maxim frac = case maxim >= frac of - true -> Scorer maxim frac - false -> Scorer maxim maxim --- frac 0<=frac<=1 -fracScor maxim frac = Scorer maxim (maxim * frac) +scorer :: Double -> Double -> Scorer +scorer maxim scr = if maxim >= scr then (Scorer maxim scr) else (Scorer maxim maxim) -boolScor k true = Scorer k k -boolScor k false = Scorer k 0 +fracScor :: Double -> Double -> Scorer +fracScor maxim frac = scorer maxim (maxim * frac) +boolScor :: Double -> Bool -> Scorer +boolScor k True = Scorer k k +boolScor k False = Scorer k 0 + +total :: Scorer -> Double total (Scorer a b) = a / b -major (x : xs) = x -major _ = 0 -minor (x : y : xs) = y -minor _ = 0 -patches (x : y : xs) = sum xs +major :: Num a => [a] -> a +major (x : _) = x +major _ = 0 +minor :: Num a => [a] -> a +minor (_ : y : _) = y +minor _ = 0 +patches :: Num a => [a] -> a +patches (_ : _ : xs) = sum xs patches _ = 0 numDays :: Maybe UTCTime -> Maybe UTCTime -> Double -numDays (Just first) (Just last) = - fromRational $ toRational $ diffUTCTime first last / fromRational +numDays (Just first) (Just end) = + fromRational $ toRational $ diffUTCTime first end / fromRational (toRational nominalDay) numDays _ _ = 0 freshness :: [Version] -> UTCTime -> Bool -> IO Double -freshness [] _ app = return 0 +freshness [] _ _ = return 0 freshness (x : xs) lastUpd app = daysPastExpiration >>= (\dExp -> return $ max 0 $ (decayDays - dExp) / decayDays) @@ -97,32 +105,26 @@ freshness (x : xs) lastUpd app = -- lookupPackageId -- queryHasDocumentation +-- TODO CoreFeature can be substituted by CoreResource rankIO - :: CoreFeature + :: CoreResource -> VersionsFeature -> DownloadFeature -> UploadFeature + -> DocumentationFeature -> PackageDescription -> ServerPartE Scorer -rankIO core vers downs upl pkg = do - temp <- temporalScore coreR - vers - downs - upl - pkg - lastUploads - versionList - downloadsPerMonth - vers <- versionScore versionList vers lastUploads pkg - auth <- authorScore upl pkg - return (temp <> vers <> auth) +rankIO core vers downs upl docs pkg = do + temp <- temporalScore pkg lastUploads versionList downloadsPerMonth + versS <- versionScore versionList vers lastUploads pkg + auth <- authorScore upl pkg + return (temp <> versS <> auth) where pkgId = package pkg - pkgNm = pkgName $ package pkg - info = lookupPackageName coreR pkgNm - coreR = coreResource core + pkgNm = pkgName pkgId + info = lookupPackageName core pkgNm descriptions = do infPkg <- info return (pkgDesc <$> infPkg) @@ -135,6 +137,31 @@ rankIO core vers downs upl pkg = do . map (pkgVersion . package . packageDescription) <$> descriptions downloadsPerMonth = liftIO $ cmFind pkgNm <$> recentPackageDownloads downs + -- TODO get appropriate pkgInfo (head might fail) + packageTarball = pkgLatestTarball . head <$> info + documentTarball :: ServerPartE (Maybe BlobId) + documentTarball = queryDocumentation docs pkgId + +-- mdocs <- queryState documentationState $ LookupDocumentation pkgid +-- case mdocs of +-- Nothing -> +-- errNotFoundH "Not Found" +-- [ MText "There is no documentation for " +-- , MLink (display pkgid) ("/package/" ++ display pkgid) +-- , MText ". See " +-- , MLink canonicalLink canonicalLink +-- , MText " for the latest version." +-- ] +-- where +-- -- Essentially errNotFound, but overloaded to specify a header. +-- -- (Needed since errNotFound throws away result of setHeaderM) +-- errNotFoundH title message = throwError +-- (ErrorResponse 404 +-- [("Link", canonicalHeader)] +-- title message) +-- Just blob -> do +-- index <- liftIO $ cachedTarIndex blob +-- func pkgid blob index authorScore :: UploadFeature -> PackageDescription -> ServerPartE Scorer authorScore upload desc = @@ -173,8 +200,8 @@ versionScore versionList versions lastUploads desc = do (norm, _, unpref) <- partVers return $ versionNumbers <$> norm ++ unpref deprec = do - (_, deprec, _) <- partVers - return deprec + (_, deprecN, _) <- partVers + return deprecN calculateScore :: [Version] -> [UTCTime] -> [[Int]] -> Scorer calculateScore [] _ _ = Scorer 118 0 calculateScore depre lUps intUse = @@ -195,15 +222,18 @@ versionScore versionList versions lastUploads desc = do <> boolScor 10 (any (\x -> major x > 0 && major x < 20) intUse) <> boolScor 5 (not $ null depre) -temporalScore core versions download upload p lastUploads versionList downloadsPerMonth - = do - fresh <- freshnessScore - downs <- downloadScore - tract <- tractionScore - return $ tract <> fresh <> downs +temporalScore + :: PackageDescription + -> ServerPartE [UTCTime] + -> ServerPartE [Version] + -> ServerPartE Int + -> ServerPartE Scorer +temporalScore p lastUploads versionList downloadsPerMonth = do + fresh <- freshnessScore + downs <- downloadScore + tract <- tractionScore + return $ tract <> fresh <> downs where - pkgNm :: PackageName - pkgNm = pkgName $ package p isApp = (isNothing . library) p && (not . null . executables) p downloadScore = calcDownScore <$> downloadsPerMonth calcDownScore i = Scorer 5 $ min @@ -224,25 +254,29 @@ temporalScore core versions download upload p lastUploads versionList downloadsP downs <- downloadsPerMonth return $ boolScor 1 (fresh * int2Double downs > 1000) +rankPackagePage :: PackageDescription -> Scorer rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats where tests = boolScor 50 (hasTests p) benchs = boolScor 10 (hasBenchmarks p) desc = Scorer 30 (min 1 (int2Double (S.length $ description p) / 300)) - -- ducumentation = boolScor 30 () + -- documentation = boolScor 30 () homeP = boolScor 30 (not $ S.null $ homepage p) sourceRp = boolScor 8 (not $ null $ sourceRepos p) cats = boolScor 5 (not $ S.null $ category p) +-- TODO fix the function Signature replace PackageDescription to PackageName/Identifier + rankPackage - :: CoreFeature + :: CoreResource -> VersionsFeature -> DownloadFeature -> UploadFeature + -> DocumentationFeature -> PackageDescription -> ServerPartE Double -rankPackage core versions download upload p = - rankIO core versions download upload p +rankPackage core versions download upload docs p = + rankIO core versions download upload docs p >>= (\x -> return $ total x + total (rankPackagePage p)) From 973a957fbe819d6c5a99b6b0c0d03faa8cbad0f9 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Mon, 25 Jul 2022 14:32:18 +0200 Subject: [PATCH 031/129] extracted documentation length --- .../Server/Features/PackageRank.hs | 50 +++++++++---------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 0ff9dfec4..c107789ca 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -14,6 +14,8 @@ import Distribution.Server.Features.Upload import Distribution.Server.Framework ( ServerPartE ) import Distribution.Server.Framework.BlobStorage ( BlobId ) +import qualified Distribution.Server.Framework.BlobStorage + as BlobStorage import Distribution.Server.Framework.Feature ( queryState ) import Distribution.Server.Packages.Types @@ -26,6 +28,7 @@ import Distribution.Server.Util.CountingMap import Distribution.Types.Version import Control.Monad.IO.Class ( liftIO ) +import qualified Data.ByteString.Lazy as BSL import Data.List ( maximumBy , sortBy ) @@ -39,6 +42,8 @@ import Data.Time.Clock ( UTCTime(..) , getCurrentTime , nominalDay ) +import Distribution.Server.Framework.ServerEnv + ( ServerEnv(..) ) import Distribution.Simple.Utils ( safeHead , safeLast ) @@ -54,7 +59,8 @@ instance Semigroup Scorer where (Scorer a b) <> (Scorer c d) = Scorer (a + b) (c + d) scorer :: Double -> Double -> Scorer -scorer maxim scr = if maxim >= scr then (Scorer maxim scr) else (Scorer maxim maxim) +scorer maxim scr = + if maxim >= scr then Scorer maxim scr else Scorer maxim maxim fracScor :: Double -> Double -> Scorer fracScor maxim frac = scorer maxim (maxim * frac) @@ -112,10 +118,11 @@ rankIO -> DownloadFeature -> UploadFeature -> DocumentationFeature + -> ServerEnv -> PackageDescription -> ServerPartE Scorer -rankIO core vers downs upl docs pkg = do +rankIO core vers downs upl docs env pkg = do temp <- temporalScore pkg lastUploads versionList downloadsPerMonth versS <- versionScore versionList vers lastUploads pkg auth <- authorScore upl pkg @@ -139,29 +146,19 @@ rankIO core vers downs upl docs pkg = do downloadsPerMonth = liftIO $ cmFind pkgNm <$> recentPackageDownloads downs -- TODO get appropriate pkgInfo (head might fail) packageTarball = pkgLatestTarball . head <$> info - documentTarball :: ServerPartE (Maybe BlobId) - documentTarball = queryDocumentation docs pkgId + documentBlob :: ServerPartE (Maybe BlobId) + documentBlob = queryDocumentation docs pkgId + blobStore = serverBlobStore env + documentation = do + blob <- documentBlob + maybeIO blob + where + maybeIO Nothing = return Nothing + maybeIO (Just a) = liftIO (Just <$> BlobStorage.fetch blobStore a) --- mdocs <- queryState documentationState $ LookupDocumentation pkgid --- case mdocs of --- Nothing -> --- errNotFoundH "Not Found" --- [ MText "There is no documentation for " --- , MLink (display pkgid) ("/package/" ++ display pkgid) --- , MText ". See " --- , MLink canonicalLink canonicalLink --- , MText " for the latest version." --- ] --- where --- -- Essentially errNotFound, but overloaded to specify a header. --- -- (Needed since errNotFound throws away result of setHeaderM) --- errNotFoundH title message = throwError --- (ErrorResponse 404 --- [("Link", canonicalHeader)] --- title message) --- Just blob -> do --- index <- liftIO $ cachedTarIndex blob --- func pkgid blob index + documLines = + (int2Double . length . filter (not . BSL.null) . BSL.split 10 <$>) + <$> documentation -- 10 is \n authorScore :: UploadFeature -> PackageDescription -> ServerPartE Scorer authorScore upload desc = @@ -273,10 +270,11 @@ rankPackage -> DownloadFeature -> UploadFeature -> DocumentationFeature + -> ServerEnv -> PackageDescription -> ServerPartE Double -rankPackage core versions download upload docs p = - rankIO core versions download upload docs p +rankPackage core versions download upload docs env p = + rankIO core versions download upload docs env p >>= (\x -> return $ total x + total (rankPackagePage p)) From 03b3f818e8c488e2a3cbe27222cf34378054e374 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Wed, 27 Jul 2022 22:50:20 +0200 Subject: [PATCH 032/129] got tarEntries for package and fixed it for documentation --- .../Server/Features/PackageRank.hs | 54 +++++++++++++------ 1 file changed, 37 insertions(+), 17 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index c107789ca..9166638f7 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -10,10 +10,9 @@ import Distribution.Server.Features.Documentation import Distribution.Server.Features.DownloadCount import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State +import Distribution.Server.Features.TarIndexCache import Distribution.Server.Features.Upload import Distribution.Server.Framework ( ServerPartE ) -import Distribution.Server.Framework.BlobStorage - ( BlobId ) import qualified Distribution.Server.Framework.BlobStorage as BlobStorage import Distribution.Server.Framework.Feature @@ -27,6 +26,11 @@ import Distribution.Server.Util.CountingMap ( cmFind ) import Distribution.Types.Version +import Control.Monad ( forM + , join + , liftM2 + , mapM + ) import Control.Monad.IO.Class ( liftIO ) import qualified Data.ByteString.Lazy as BSL import Data.List ( maximumBy @@ -37,6 +41,7 @@ import Data.Ord ( comparing , max , min ) +import qualified Data.TarIndex as TarIndex import Data.Time.Clock ( UTCTime(..) , diffUTCTime , getCurrentTime @@ -54,6 +59,7 @@ data Scorer = Scorer { maximum :: Double , score :: Double } + deriving Show instance Semigroup Scorer where (Scorer a b) <> (Scorer c d) = Scorer (a + b) (c + d) @@ -119,10 +125,11 @@ rankIO -> UploadFeature -> DocumentationFeature -> ServerEnv + -> TarIndexCacheFeature -> PackageDescription -> ServerPartE Scorer -rankIO core vers downs upl docs env pkg = do +rankIO core vers downs upl docs env tarCache pkg = do temp <- temporalScore pkg lastUploads versionList downloadsPerMonth versS <- versionScore versionList vers lastUploads pkg auth <- authorScore upl pkg @@ -145,20 +152,32 @@ rankIO core vers downs upl docs env pkg = do <$> descriptions downloadsPerMonth = liftIO $ cmFind pkgNm <$> recentPackageDownloads downs -- TODO get appropriate pkgInfo (head might fail) - packageTarball = pkgLatestTarball . head <$> info - documentBlob :: ServerPartE (Maybe BlobId) - documentBlob = queryDocumentation docs pkgId - blobStore = serverBlobStore env - documentation = do + packageTarB = info >>= liftIO . packageTarball tarCache . head + packageTarEntr = do + tarB <- packageTarB + return + . join + $ (\(path, _, index) -> TarIndex.lookup index path) + <$> rightToMaybe tarB + rightToMaybe (Right a) = Just a + rightToMaybe (Left _) = Nothing + documentBlob :: ServerPartE (Maybe BlobStorage.BlobId) + documentBlob = queryDocumentation docs pkgId + documentIndex = documentBlob >>= liftIO . mapM (cachedTarIndex tarCache) + documentationEntry = do + index <- documentIndex + path <- documentPath + return . join $ liftM2 TarIndex.lookup index path + + blobStore = serverBlobStore env + documentPath = do blob <- documentBlob - maybeIO blob - where - maybeIO Nothing = return Nothing - maybeIO (Just a) = liftIO (Just <$> BlobStorage.fetch blobStore a) + return $ (BlobStorage.filepath blobStore) <$> blob - documLines = - (int2Double . length . filter (not . BSL.null) . BSL.split 10 <$>) - <$> documentation -- 10 is \n + -- TODO fix this + --documLines = + -- (int2Double . length . filter (not . BSL.null) . BSL.split 10 <$>) + -- <$> documentation -- 10 is \n authorScore :: UploadFeature -> PackageDescription -> ServerPartE Scorer authorScore upload desc = @@ -271,10 +290,11 @@ rankPackage -> UploadFeature -> DocumentationFeature -> ServerEnv + -> TarIndexCacheFeature -> PackageDescription -> ServerPartE Double -rankPackage core versions download upload docs env p = - rankIO core versions download upload docs env p +rankPackage core versions download upload docs env tarCache p = + rankIO core versions download upload docs env tarCache p >>= (\x -> return $ total x + total (rankPackagePage p)) From c84f467c2453e59f6bfd24f06ee3615befddcb18 Mon Sep 17 00:00:00 2001 From: Andreas Abel Date: Sun, 24 Jul 2022 16:57:54 +0200 Subject: [PATCH 033/129] Fix #1105: change order of markdown parsers to allow pipes in lists --- src/Distribution/Server/Util/Markdown.hs | 56 +++++++++++++++++++++--- 1 file changed, 51 insertions(+), 5 deletions(-) diff --git a/src/Distribution/Server/Util/Markdown.hs b/src/Distribution/Server/Util/Markdown.hs index bc8fb84a6..033ab4f00 100644 --- a/src/Distribution/Server/Util/Markdown.hs +++ b/src/Distribution/Server/Util/Markdown.hs @@ -120,6 +120,30 @@ adjustRelativeLink url --

Published to http://hackage.haskell.org/foo3/bar.

-- -- +-- >>> renderMarkdown "test" "Issue #1105:\n- pipes\n- like `a|b`\n- should be allowed in lists" +--

Issue #1105:

+--
    +--
  • pipes +--
  • +--
  • like a|b +--
  • +--
  • should be allowed in lists +--
  • +--
+-- +-- +-- >>> renderMarkdown "test" "Tables should be supported:\n\nfoo|bar\n---|---\n" +--

Tables should be supported:

+-- +-- +-- +-- +-- +-- +-- +--
foobar
+-- +-- renderMarkdown :: String -- ^ Name or path of input. -> BS.ByteString -- ^ Commonmark text input. @@ -160,11 +184,33 @@ renderMarkdown' -> BS.ByteString -- ^ Commonmark text input. -> XHtml.Html -- ^ Rendered HTML. renderMarkdown' render name md = - either (const $ XHtml.pre XHtml.<< T.unpack txt) (XHtml.primHtml . T.unpack . sanitizeBalance . TL.toStrict . render) $ - runIdentity (commonmarkWith (mathSpec <> gfmExtensions <> defaultSyntaxSpec) - name - txt) - where txt = T.decodeUtf8With T.lenientDecode . BS.toStrict $ md + either (const $ fallback) mdToHTML $ + runIdentity $ commonmarkWith spec name txt + where + -- Input + txt = T.decodeUtf8With T.lenientDecode . BS.toStrict $ md + -- Fall back to HTML if there is a parse error for markdown + fallback = XHtml.pre XHtml.<< T.unpack txt + -- Conversion of parsed md to HTML + mdToHTML = XHtml.primHtml . T.unpack . sanitizeBalance . TL.toStrict . render + -- Specification of the markdown parser. + -- Andreas Abel, 2022-07-21, issue #1105. + -- Workaround for https://github.com/jgm/commonmark-hs/issues/95: + -- Put the table parser last. + spec = mconcat $ + mathSpec : + -- all the gfm extensions except for tables + emojiSpec : + strikethroughSpec : + autolinkSpec : + autoIdentifiersSpec : + taskListSpec : + footnoteSpec : + -- the default syntax + defaultSyntaxSpec : + -- the problematic table parser + pipeTableSpec : + [] -- | Does the file extension suggest that the file is in markdown syntax? supposedToBeMarkdown :: FilePath -> Bool From 09e058c583ef7f03308165080893afd950dd19c3 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 29 Jul 2022 17:18:22 +0200 Subject: [PATCH 034/129] added codeScore --- .../Server/Features/PackageRank.hs | 109 ++++++++++++------ 1 file changed, 71 insertions(+), 38 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 9166638f7..1c4564ea6 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -1,9 +1,9 @@ +{-# LANGUAGE TupleSections #-} + module Distribution.Server.Features.PackageRank ( rankPackage ) where -import Distribution.Package -import Distribution.PackageDescription import Distribution.Server.Features.Core import Distribution.Server.Features.Documentation ( DocumentationFeature(..) ) @@ -15,8 +15,8 @@ import Distribution.Server.Features.Upload import Distribution.Server.Framework ( ServerPartE ) import qualified Distribution.Server.Framework.BlobStorage as BlobStorage -import Distribution.Server.Framework.Feature - ( queryState ) +import Distribution.Server.Framework.ServerEnv + ( ServerEnv(..) ) import Distribution.Server.Packages.Types import Distribution.Server.Users.Group ( queryUserGroups @@ -24,12 +24,18 @@ import Distribution.Server.Users.Group ) import Distribution.Server.Util.CountingMap ( cmFind ) +import Distribution.Package +import Distribution.PackageDescription import Distribution.Types.Version +import Distribution.Simple.Utils ( safeHead + , safeLast + ) +import qualified Distribution.Utils.ShortText as S -import Control.Monad ( forM - , join +import qualified Codec.Archive.Tar as Tar +import qualified Codec.Archive.Tar.Entry as Tar +import Control.Monad ( join , liftM2 - , mapM ) import Control.Monad.IO.Class ( liftIO ) import qualified Data.ByteString.Lazy as BSL @@ -37,23 +43,16 @@ import Data.List ( maximumBy , sortBy ) import Data.Maybe ( isNothing ) -import Data.Ord ( comparing - , max - , min - ) -import qualified Data.TarIndex as TarIndex +import Data.Ord ( comparing ) +import qualified Data.TarIndex as T import Data.Time.Clock ( UTCTime(..) , diffUTCTime , getCurrentTime , nominalDay ) -import Distribution.Server.Framework.ServerEnv - ( ServerEnv(..) ) -import Distribution.Simple.Utils ( safeHead - , safeLast - ) -import qualified Distribution.Utils.ShortText as S import GHC.Float ( int2Double ) +import System.FilePath ( isExtensionOf ) +import qualified System.IO as SIO data Scorer = Scorer { maximum :: Double @@ -133,7 +132,8 @@ rankIO core vers downs upl docs env tarCache pkg = do temp <- temporalScore pkg lastUploads versionList downloadsPerMonth versS <- versionScore versionList vers lastUploads pkg auth <- authorScore upl pkg - return (temp <> versS <> auth) + codeS <- codeScore documentLines srcLines packageLines + return (temp <> versS <> auth <> codeS) where pkgId = package pkg @@ -152,32 +152,53 @@ rankIO core vers downs upl docs env tarCache pkg = do <$> descriptions downloadsPerMonth = liftIO $ cmFind pkgNm <$> recentPackageDownloads downs -- TODO get appropriate pkgInfo (head might fail) - packageTarB = info >>= liftIO . packageTarball tarCache . head - packageTarEntr = do - tarB <- packageTarB + packageEntr = do + inf <- info + tarB <- liftIO . packageTarball tarCache . head $ inf return - . join - $ (\(path, _, index) -> TarIndex.lookup index path) - <$> rightToMaybe tarB + $ (\(path, _, index) -> (path, ) <$> T.lookup index path) + =<< rightToMaybe tarB rightToMaybe (Right a) = Just a rightToMaybe (Left _) = Nothing + documentBlob :: ServerPartE (Maybe BlobStorage.BlobId) - documentBlob = queryDocumentation docs pkgId - documentIndex = documentBlob >>= liftIO . mapM (cachedTarIndex tarCache) - documentationEntry = do + documentBlob = queryDocumentation docs pkgId + documentIndex = documentBlob >>= liftIO . mapM (cachedTarIndex tarCache) + documentationEntr = do index <- documentIndex path <- documentPath - return . join $ liftM2 TarIndex.lookup index path + return $ liftM2 (,) path (join $ liftM2 T.lookup index path) + documentLines = documentationEntr >>= liftIO . filterLinesTar (const True) + srcLines = packageEntr >>= liftIO . filterLinesTar (isExtensionOf ".hs") + packageLines = packageEntr >>= liftIO . filterLinesTar (const True) + + filterLinesTar + :: (FilePath -> Bool) -> Maybe (FilePath, T.TarIndexEntry) -> IO Double + filterLinesTar f (Just (path, T.TarFileEntry offset)) = + if f path then getLines path offset else return 0 + filterLinesTar f (Just (_, T.TarDir dir)) = + sum <$> mapM (filterLinesTar f . Just) dir + filterLinesTar _ _ = return 0 + + -- TODO if size is too big give it a good score and do not read the file + getLines path offset = do + handle <- SIO.openFile path SIO.ReadMode + SIO.hSeek handle SIO.AbsoluteSeek (fromIntegral $ offset * 512) + header <- BSL.hGet handle 512 + case Tar.read header of + (Tar.Next Tar.Entry { Tar.entryContent = Tar.NormalFile _ siz } _) -> do + body <- BSL.hGet handle (fromIntegral siz) + return + $ int2Double + . length + . filter (not . BSL.null) + . BSL.split 10 + $ body + _ -> return 0 - blobStore = serverBlobStore env documentPath = do blob <- documentBlob - return $ (BlobStorage.filepath blobStore) <$> blob - - -- TODO fix this - --documLines = - -- (int2Double . length . filter (not . BSL.null) . BSL.split 10 <$>) - -- <$> documentation -- 10 is \n + return $ BlobStorage.filepath (serverBlobStore env) <$> blob authorScore :: UploadFeature -> PackageDescription -> ServerPartE Scorer authorScore upload desc = @@ -191,6 +212,20 @@ authorScore upload desc = return $ boolScor 3 (size maint > 1) <> scorer 5 (int2Double $ size maint) +codeScore + :: ServerPartE Double + -> ServerPartE Double + -> ServerPartE Double + -> ServerPartE Scorer +codeScore documentL haskellL packageL = do + docum <- documentL + haskell <- haskellL + pkg <- packageL + return + $ boolScor 1 (pkg > 700) + <> boolScor 1 (pkg < 80000) + <> fracScor 2 (min 1 (haskell / 5000)) + <> fracScor 2 (min 1 (10 * docum) / (3000 + haskell)) versionScore :: ServerPartE [Version] @@ -296,5 +331,3 @@ rankPackage rankPackage core versions download upload docs env tarCache p = rankIO core versions download upload docs env tarCache p >>= (\x -> return $ total x + total (rankPackagePage p)) - - From a8ae12e6db9353000839dd7b4a97e8f2022d82d4 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 2 Aug 2022 17:10:42 +0200 Subject: [PATCH 035/129] replaced some Features by ListFeature --- .../Server/Features/PackageRank.hs | 81 +++++++++---------- 1 file changed, 40 insertions(+), 41 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 1c4564ea6..c7b0c68cd 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -4,32 +4,25 @@ module Distribution.Server.Features.PackageRank ( rankPackage ) where +import Distribution.Package +import Distribution.PackageDescription import Distribution.Server.Features.Core import Distribution.Server.Features.Documentation ( DocumentationFeature(..) ) -import Distribution.Server.Features.DownloadCount +import Distribution.Server.Features.PackageList import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.TarIndexCache -import Distribution.Server.Features.Upload import Distribution.Server.Framework ( ServerPartE ) import qualified Distribution.Server.Framework.BlobStorage as BlobStorage import Distribution.Server.Framework.ServerEnv ( ServerEnv(..) ) import Distribution.Server.Packages.Types -import Distribution.Server.Users.Group - ( queryUserGroups - , size - ) -import Distribution.Server.Util.CountingMap - ( cmFind ) -import Distribution.Package -import Distribution.PackageDescription -import Distribution.Types.Version import Distribution.Simple.Utils ( safeHead , safeLast ) +import Distribution.Types.Version import qualified Distribution.Utils.ShortText as S import qualified Codec.Archive.Tar as Tar @@ -52,7 +45,7 @@ import Data.Time.Clock ( UTCTime(..) ) import GHC.Float ( int2Double ) import System.FilePath ( isExtensionOf ) -import qualified System.IO as SIO +import qualified System.IO as SIO data Scorer = Scorer { maximum :: Double @@ -116,22 +109,20 @@ freshness (x : xs) lastUpd app = -- lookupPackageId -- queryHasDocumentation --- TODO CoreFeature can be substituted by CoreResource rankIO :: CoreResource -> VersionsFeature - -> DownloadFeature - -> UploadFeature -> DocumentationFeature -> ServerEnv -> TarIndexCacheFeature + -> ListFeature -> PackageDescription -> ServerPartE Scorer -rankIO core vers downs upl docs env tarCache pkg = do +rankIO core vers docs env tarCache list pkg = do temp <- temporalScore pkg lastUploads versionList downloadsPerMonth versS <- versionScore versionList vers lastUploads pkg - auth <- authorScore upl pkg + auth <- authorScore pkg pkgIt codeS <- codeScore documentLines srcLines packageLines return (temp <> versS <> auth <> codeS) @@ -139,6 +130,7 @@ rankIO core vers downs upl docs env tarCache pkg = do pkgId = package pkg pkgNm = pkgName pkgId info = lookupPackageName core pkgNm + pkgIt = safeHead <$> makeItemList list [pkgNm] descriptions = do infPkg <- info return (pkgDesc <$> infPkg) @@ -150,14 +142,18 @@ rankIO core vers downs upl docs env tarCache pkg = do sortBy (flip compare) . map (pkgVersion . package . packageDescription) <$> descriptions - downloadsPerMonth = liftIO $ cmFind pkgNm <$> recentPackageDownloads downs + downloadsPerMonth :: ServerPartE (Maybe Int) + downloadsPerMonth = liftIO $ do + items <- pkgIt + return (itemDownloads <$> items) -- TODO get appropriate pkgInfo (head might fail) - packageEntr = do + packageEntr = do inf <- info - tarB <- liftIO . packageTarball tarCache . head $ inf + tarB <- liftIO $ mapM (packageTarball tarCache) (safeHead inf) return $ (\(path, _, index) -> (path, ) <$> T.lookup index path) - =<< rightToMaybe tarB + =<< (join $ rightToMaybe <$> tarB) + rightToMaybe (Right a) = Just a rightToMaybe (Left _) = Nothing @@ -200,17 +196,19 @@ rankIO core vers downs upl docs env tarCache pkg = do blob <- documentBlob return $ BlobStorage.filepath (serverBlobStore env) <$> blob -authorScore :: UploadFeature -> PackageDescription -> ServerPartE Scorer -authorScore upload desc = +authorScore + :: PackageDescription -> IO (Maybe PackageItem) -> ServerPartE Scorer +authorScore desc item = liftIO maintScore >>= (\x -> return $ boolScor 1 (not $ S.null $ author desc) <> x) where pkgNm = pkgName $ package desc maintScore :: IO Scorer maintScore = do - maint <- queryUserGroups [maintainersGroup upload pkgNm] - - return $ boolScor 3 (size maint > 1) <> scorer 5 (int2Double $ size maint) + it <- item + return $ boolScor 3 (nMaint it > 1) <> scorer 5 (int2Double $ nMaint it) + nMaint (Just iT) = length $ itemMaintainer iT + nMaint Nothing = 0 codeScore :: ServerPartE Double @@ -277,16 +275,18 @@ temporalScore :: PackageDescription -> ServerPartE [UTCTime] -> ServerPartE [Version] - -> ServerPartE Int + -> ServerPartE (Maybe Int) -> ServerPartE Scorer -temporalScore p lastUploads versionList downloadsPerMonth = do - fresh <- freshnessScore - downs <- downloadScore - tract <- tractionScore - return $ tract <> fresh <> downs +temporalScore p lastUploads versionList downloadsPM = do + download <- downloadsPM + fresh <- freshnessScore + downS <- downloadScore download + tract <- tractionScore download + return $ tract <> fresh <> downS where - isApp = (isNothing . library) p && (not . null . executables) p - downloadScore = calcDownScore <$> downloadsPerMonth + isApp = (isNothing . library) p && (not . null . executables) p + downloadScore Nothing = return $ scorer 5 0 + downloadScore (Just downloads) = return $ calcDownScore downloads calcDownScore i = Scorer 5 $ min ( (logBase 2 (int2Double $ max 0 (i - 100) + 100) - 6.6) / (if isApp then 5 else 6) @@ -300,10 +300,10 @@ temporalScore p lastUploads versionList downloadsPerMonth = do _ -> liftIO $ freshness vers (head ups) isApp freshnessScore = fracScor 10 <$> packageFreshness -- Missing dependencyFreshnessScore for reasonable effectivity needs caching - tractionScore = do + tractionScore Nothing = return $ scorer 1 0 + tractionScore (Just downloads) = do fresh <- packageFreshness - downs <- downloadsPerMonth - return $ boolScor 1 (fresh * int2Double downs > 1000) + return $ boolScor 1 (fresh * int2Double downloads > 1000) rankPackagePage :: PackageDescription -> Scorer rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats @@ -321,13 +321,12 @@ rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats rankPackage :: CoreResource -> VersionsFeature - -> DownloadFeature - -> UploadFeature -> DocumentationFeature -> ServerEnv -> TarIndexCacheFeature + -> ListFeature -> PackageDescription -> ServerPartE Double -rankPackage core versions download upload docs env tarCache p = - rankIO core versions download upload docs env tarCache p +rankPackage core versions docs env tarCache list p = + rankIO core versions docs env tarCache list p >>= (\x -> return $ total x + total (rankPackagePage p)) From 124006fe3c1d3b01942def21d560b6f13b9e6dec Mon Sep 17 00:00:00 2001 From: kubaneko Date: Wed, 3 Aug 2022 18:57:20 +0200 Subject: [PATCH 036/129] added some Features to BrowseFeatures - prototype --- src/Distribution/Server/Features.hs | 4 ++++ src/Distribution/Server/Features/Browse.hs | 9 ++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/Distribution/Server/Features.hs b/src/Distribution/Server/Features.hs index f8a8e362e..6d7507903 100644 --- a/src/Distribution/Server/Features.hs +++ b/src/Distribution/Server/Features.hs @@ -351,6 +351,10 @@ initHackageFeatures env@ServerEnv{serverVerbosity = verbosity} = do searchFeature distroFeature candidatesFeature + versionsFeature + documentationCoreFeature + tarIndexCacheFeature + env packageInfoJSONFeature <- mkPackageJSONFeature coreFeature diff --git a/src/Distribution/Server/Features/Browse.hs b/src/Distribution/Server/Features/Browse.hs index ada4b622c..5da4fa97d 100644 --- a/src/Distribution/Server/Features/Browse.hs +++ b/src/Distribution/Server/Features/Browse.hs @@ -25,6 +25,9 @@ import Distribution.Server.Features.PackageList (ListFeature(ListFeature), Packa import Distribution.Server.Features.Search (SearchFeature(SearchFeature), searchPackages) import Distribution.Server.Features.Tags (Tag(..), TagsFeature(TagsFeature), TagsResource, tagUri, tagsResource) import Distribution.Server.Features.Users (UserFeature(UserFeature), UserResource, userResource, userPageUri) +import Distribution.Server.Features.PreferredVersions (VersionsFeature) +import Distribution.Server.Features.Documentation (DocumentationFeature) +import Distribution.Server.Features.TarIndexCache (TarIndexCacheFeature) import Distribution.Server.Framework.Error (ErrorResponse(ErrorResponse)) import Distribution.Server.Framework.Feature (HackageFeature(..), emptyHackageFeature) import Distribution.Server.Framework.RequestContentTypes (expectAesonContent) @@ -47,6 +50,10 @@ type BrowseFeature = -> SearchFeature -> DistroFeature -> PackageCandidatesFeature + -> VersionsFeature + -> DocumentationFeature + -> TarIndexCacheFeature + -> ServerEnv -> IO HackageFeature data ResponseFormatWithMethod @@ -63,7 +70,7 @@ initBrowseFeature ServerEnv{serverTemplatesDir, serverTemplatesMode} = do , "noscript-search-form.html" , "noscript-next-page-form.html" ] - pure \coreFeature userFeature tagsFeature listFeature searchFeature distroFeature packageCandidatesFeature -> do + pure \coreFeature userFeature tagsFeature listFeature searchFeature distroFeature packageCandidatesFeature versionsFeature documentationFeature tarIndexCacheFeature serverEnv -> do let html = htmlUtilities coreFeature packageCandidatesFeature tagsFeature userFeature renderer :: ResponseFormatWithMethod -> ServerPartT (ExceptT ErrorResponse IO) Response From f8d176b3afa9b7c71c1c52f3924e246e15736c8f Mon Sep 17 00:00:00 2001 From: kubaneko Date: Thu, 4 Aug 2022 22:26:54 +0200 Subject: [PATCH 037/129] Revert "added some Features to BrowseFeatures - prototype" This reverts commit 124006fe3c1d3b01942def21d560b6f13b9e6dec. --- src/Distribution/Server/Features.hs | 4 ---- src/Distribution/Server/Features/Browse.hs | 9 +-------- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/src/Distribution/Server/Features.hs b/src/Distribution/Server/Features.hs index 6d7507903..f8a8e362e 100644 --- a/src/Distribution/Server/Features.hs +++ b/src/Distribution/Server/Features.hs @@ -351,10 +351,6 @@ initHackageFeatures env@ServerEnv{serverVerbosity = verbosity} = do searchFeature distroFeature candidatesFeature - versionsFeature - documentationCoreFeature - tarIndexCacheFeature - env packageInfoJSONFeature <- mkPackageJSONFeature coreFeature diff --git a/src/Distribution/Server/Features/Browse.hs b/src/Distribution/Server/Features/Browse.hs index 5da4fa97d..ada4b622c 100644 --- a/src/Distribution/Server/Features/Browse.hs +++ b/src/Distribution/Server/Features/Browse.hs @@ -25,9 +25,6 @@ import Distribution.Server.Features.PackageList (ListFeature(ListFeature), Packa import Distribution.Server.Features.Search (SearchFeature(SearchFeature), searchPackages) import Distribution.Server.Features.Tags (Tag(..), TagsFeature(TagsFeature), TagsResource, tagUri, tagsResource) import Distribution.Server.Features.Users (UserFeature(UserFeature), UserResource, userResource, userPageUri) -import Distribution.Server.Features.PreferredVersions (VersionsFeature) -import Distribution.Server.Features.Documentation (DocumentationFeature) -import Distribution.Server.Features.TarIndexCache (TarIndexCacheFeature) import Distribution.Server.Framework.Error (ErrorResponse(ErrorResponse)) import Distribution.Server.Framework.Feature (HackageFeature(..), emptyHackageFeature) import Distribution.Server.Framework.RequestContentTypes (expectAesonContent) @@ -50,10 +47,6 @@ type BrowseFeature = -> SearchFeature -> DistroFeature -> PackageCandidatesFeature - -> VersionsFeature - -> DocumentationFeature - -> TarIndexCacheFeature - -> ServerEnv -> IO HackageFeature data ResponseFormatWithMethod @@ -70,7 +63,7 @@ initBrowseFeature ServerEnv{serverTemplatesDir, serverTemplatesMode} = do , "noscript-search-form.html" , "noscript-next-page-form.html" ] - pure \coreFeature userFeature tagsFeature listFeature searchFeature distroFeature packageCandidatesFeature versionsFeature documentationFeature tarIndexCacheFeature serverEnv -> do + pure \coreFeature userFeature tagsFeature listFeature searchFeature distroFeature packageCandidatesFeature -> do let html = htmlUtilities coreFeature packageCandidatesFeature tagsFeature userFeature renderer :: ResponseFormatWithMethod -> ServerPartT (ExceptT ErrorResponse IO) Response From 18e769de386b2faf3acb04f2925b322d28536a57 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 5 Aug 2022 11:20:34 +0200 Subject: [PATCH 038/129] Revert "replaced some Features by ListFeature" This reverts commit a8ae12e6db9353000839dd7b4a97e8f2022d82d4. --- .../Server/Features/PackageRank.hs | 81 ++++++++++--------- 1 file changed, 41 insertions(+), 40 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index c7b0c68cd..1c4564ea6 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -4,25 +4,32 @@ module Distribution.Server.Features.PackageRank ( rankPackage ) where -import Distribution.Package -import Distribution.PackageDescription import Distribution.Server.Features.Core import Distribution.Server.Features.Documentation ( DocumentationFeature(..) ) -import Distribution.Server.Features.PackageList +import Distribution.Server.Features.DownloadCount import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.TarIndexCache +import Distribution.Server.Features.Upload import Distribution.Server.Framework ( ServerPartE ) import qualified Distribution.Server.Framework.BlobStorage as BlobStorage import Distribution.Server.Framework.ServerEnv ( ServerEnv(..) ) import Distribution.Server.Packages.Types +import Distribution.Server.Users.Group + ( queryUserGroups + , size + ) +import Distribution.Server.Util.CountingMap + ( cmFind ) +import Distribution.Package +import Distribution.PackageDescription +import Distribution.Types.Version import Distribution.Simple.Utils ( safeHead , safeLast ) -import Distribution.Types.Version import qualified Distribution.Utils.ShortText as S import qualified Codec.Archive.Tar as Tar @@ -45,7 +52,7 @@ import Data.Time.Clock ( UTCTime(..) ) import GHC.Float ( int2Double ) import System.FilePath ( isExtensionOf ) -import qualified System.IO as SIO +import qualified System.IO as SIO data Scorer = Scorer { maximum :: Double @@ -109,20 +116,22 @@ freshness (x : xs) lastUpd app = -- lookupPackageId -- queryHasDocumentation +-- TODO CoreFeature can be substituted by CoreResource rankIO :: CoreResource -> VersionsFeature + -> DownloadFeature + -> UploadFeature -> DocumentationFeature -> ServerEnv -> TarIndexCacheFeature - -> ListFeature -> PackageDescription -> ServerPartE Scorer -rankIO core vers docs env tarCache list pkg = do +rankIO core vers downs upl docs env tarCache pkg = do temp <- temporalScore pkg lastUploads versionList downloadsPerMonth versS <- versionScore versionList vers lastUploads pkg - auth <- authorScore pkg pkgIt + auth <- authorScore upl pkg codeS <- codeScore documentLines srcLines packageLines return (temp <> versS <> auth <> codeS) @@ -130,7 +139,6 @@ rankIO core vers docs env tarCache list pkg = do pkgId = package pkg pkgNm = pkgName pkgId info = lookupPackageName core pkgNm - pkgIt = safeHead <$> makeItemList list [pkgNm] descriptions = do infPkg <- info return (pkgDesc <$> infPkg) @@ -142,18 +150,14 @@ rankIO core vers docs env tarCache list pkg = do sortBy (flip compare) . map (pkgVersion . package . packageDescription) <$> descriptions - downloadsPerMonth :: ServerPartE (Maybe Int) - downloadsPerMonth = liftIO $ do - items <- pkgIt - return (itemDownloads <$> items) + downloadsPerMonth = liftIO $ cmFind pkgNm <$> recentPackageDownloads downs -- TODO get appropriate pkgInfo (head might fail) - packageEntr = do + packageEntr = do inf <- info - tarB <- liftIO $ mapM (packageTarball tarCache) (safeHead inf) + tarB <- liftIO . packageTarball tarCache . head $ inf return $ (\(path, _, index) -> (path, ) <$> T.lookup index path) - =<< (join $ rightToMaybe <$> tarB) - + =<< rightToMaybe tarB rightToMaybe (Right a) = Just a rightToMaybe (Left _) = Nothing @@ -196,19 +200,17 @@ rankIO core vers docs env tarCache list pkg = do blob <- documentBlob return $ BlobStorage.filepath (serverBlobStore env) <$> blob -authorScore - :: PackageDescription -> IO (Maybe PackageItem) -> ServerPartE Scorer -authorScore desc item = +authorScore :: UploadFeature -> PackageDescription -> ServerPartE Scorer +authorScore upload desc = liftIO maintScore >>= (\x -> return $ boolScor 1 (not $ S.null $ author desc) <> x) where pkgNm = pkgName $ package desc maintScore :: IO Scorer maintScore = do - it <- item - return $ boolScor 3 (nMaint it > 1) <> scorer 5 (int2Double $ nMaint it) - nMaint (Just iT) = length $ itemMaintainer iT - nMaint Nothing = 0 + maint <- queryUserGroups [maintainersGroup upload pkgNm] + + return $ boolScor 3 (size maint > 1) <> scorer 5 (int2Double $ size maint) codeScore :: ServerPartE Double @@ -275,18 +277,16 @@ temporalScore :: PackageDescription -> ServerPartE [UTCTime] -> ServerPartE [Version] - -> ServerPartE (Maybe Int) + -> ServerPartE Int -> ServerPartE Scorer -temporalScore p lastUploads versionList downloadsPM = do - download <- downloadsPM - fresh <- freshnessScore - downS <- downloadScore download - tract <- tractionScore download - return $ tract <> fresh <> downS +temporalScore p lastUploads versionList downloadsPerMonth = do + fresh <- freshnessScore + downs <- downloadScore + tract <- tractionScore + return $ tract <> fresh <> downs where - isApp = (isNothing . library) p && (not . null . executables) p - downloadScore Nothing = return $ scorer 5 0 - downloadScore (Just downloads) = return $ calcDownScore downloads + isApp = (isNothing . library) p && (not . null . executables) p + downloadScore = calcDownScore <$> downloadsPerMonth calcDownScore i = Scorer 5 $ min ( (logBase 2 (int2Double $ max 0 (i - 100) + 100) - 6.6) / (if isApp then 5 else 6) @@ -300,10 +300,10 @@ temporalScore p lastUploads versionList downloadsPM = do _ -> liftIO $ freshness vers (head ups) isApp freshnessScore = fracScor 10 <$> packageFreshness -- Missing dependencyFreshnessScore for reasonable effectivity needs caching - tractionScore Nothing = return $ scorer 1 0 - tractionScore (Just downloads) = do + tractionScore = do fresh <- packageFreshness - return $ boolScor 1 (fresh * int2Double downloads > 1000) + downs <- downloadsPerMonth + return $ boolScor 1 (fresh * int2Double downs > 1000) rankPackagePage :: PackageDescription -> Scorer rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats @@ -321,12 +321,13 @@ rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats rankPackage :: CoreResource -> VersionsFeature + -> DownloadFeature + -> UploadFeature -> DocumentationFeature -> ServerEnv -> TarIndexCacheFeature - -> ListFeature -> PackageDescription -> ServerPartE Double -rankPackage core versions docs env tarCache list p = - rankIO core versions docs env tarCache list p +rankPackage core versions download upload docs env tarCache p = + rankIO core versions download upload docs env tarCache p >>= (\x -> return $ total x + total (rankPackagePage p)) From 0bae69846888148c3149889cd1d407b52c72127c Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 5 Aug 2022 11:48:50 +0200 Subject: [PATCH 039/129] changed ListFeature to fit PackageRank --- src/Distribution/Server/Features.hs | 2 ++ .../Server/Features/PackageList.hs | 24 +++++++++++++------ 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/src/Distribution/Server/Features.hs b/src/Distribution/Server/Features.hs index f8a8e362e..2b478d5fb 100644 --- a/src/Distribution/Server/Features.hs +++ b/src/Distribution/Server/Features.hs @@ -285,6 +285,8 @@ initHackageFeatures env@ServerEnv{serverVerbosity = verbosity} = do versionsFeature usersFeature uploadFeature + documentationCoreFeature + tarIndexCacheFeature searchFeature <- mkSearchFeature coreFeature diff --git a/src/Distribution/Server/Features/PackageList.hs b/src/Distribution/Server/Features/PackageList.hs index 1a719fc22..abc594373 100644 --- a/src/Distribution/Server/Features/PackageList.hs +++ b/src/Distribution/Server/Features/PackageList.hs @@ -15,6 +15,9 @@ import Distribution.Server.Features.DownloadCount import Distribution.Server.Features.Tags import Distribution.Server.Features.Users import Distribution.Server.Features.Upload(UploadFeature(..)) +import Distribution.Server.Features.Documentation (DocumentationFeature(..)) +import Distribution.Server.Features.TarIndexCache (TarIndexCacheFeature(..)) + import Distribution.Server.Users.Users (userIdToName) import qualified Distribution.Server.Users.UserIdSet as UserIdSet import Distribution.Server.Users.Group(UserGroup(..), GroupDescription(..)) @@ -39,7 +42,6 @@ import Data.Set (Set) import qualified Data.Set as Set import Data.Time.Clock (UTCTime(..)) - data ListFeature = ListFeature { listFeatureInterface :: HackageFeature, @@ -108,6 +110,8 @@ initListFeature :: ServerEnv -> VersionsFeature -> UserFeature -> UploadFeature + -> DocumentationFeature + -> TarIndexCacheFeature -> IO ListFeature) initListFeature _env = do itemCache <- newMemStateWHNF Map.empty @@ -120,11 +124,12 @@ initListFeature _env = do tagsf@TagsFeature{..} versions@VersionsFeature{..} users@UserFeature{..} - uploads@UploadFeature{..} -> do + uploads@UploadFeature{..} + docum tar -> do let (feature, modifyItem, updateDesc) = listFeature core download votesf tagsf versions users uploads - itemCache itemUpdate + itemCache itemUpdate docum tar _env registerHookJust packageChangeHook isPackageChangeAny $ \(pkgid, _) -> updateDesc (packageName pkgid) @@ -180,6 +185,9 @@ listFeature :: CoreFeature -> UploadFeature -> MemState (Map PackageName PackageItem) -> Hook (Set PackageName) () + -> DocumentationFeature + -> TarIndexCacheFeature + -> ServerEnv -> (ListFeature, PackageName -> (PackageItem -> PackageItem) -> IO (), PackageName -> IO ()) @@ -192,6 +200,7 @@ listFeature CoreFeature{..} UserFeature{..} UploadFeature{..} itemCache itemUpdate + docum tar env = (ListFeature{..}, modifyItem, updateDesc) where listFeatureInterface = (emptyHackageFeature "list") { @@ -222,7 +231,7 @@ listFeature CoreFeature{..} let pkgs = PackageIndex.lookupPackageName index pkgname case pkgs of [] -> return () --this shouldn't happen - _ -> modifyMemState itemCache . uncurry Map.insert =<< constructItem (last pkgs) + _ -> modifyMemState itemCache . uncurry Map.insert =<< constructItem pkgs updateDesc pkgname = do index <- queryGetPackageIndex @@ -243,12 +252,13 @@ listFeature CoreFeature{..} constructItemIndex :: IO (Map PackageName PackageItem) constructItemIndex = do index <- queryGetPackageIndex - items <- mapM (constructItem . last) $ PackageIndex.allPackagesByName index + items <- mapM constructItem $ PackageIndex.allPackagesByName index return $ Map.fromList items - constructItem :: PkgInfo -> IO (PackageName, PackageItem) - constructItem pkg = do + constructItem :: [PkgInfo] -> IO (PackageName, PackageItem) + constructItem pkgs = do let pkgname = packageName pkg + pkg = last pkgs -- [reverse index disabled] revCount <- query . GetReverseCount $ pkgname users <- queryGetUserDb tags <- queryTagsForPackage pkgname From 500571b9dee1589e3dddab1b04894c8965e82a8f Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 5 Aug 2022 16:02:58 +0200 Subject: [PATCH 040/129] changed PackageRank to fit in constructItem --- .../Server/Features/PackageRank.hs | 171 +++++++----------- 1 file changed, 66 insertions(+), 105 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 1c4564ea6..53f552a4e 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -4,7 +4,8 @@ module Distribution.Server.Features.PackageRank ( rankPackage ) where -import Distribution.Server.Features.Core +import Distribution.Package +import Distribution.PackageDescription import Distribution.Server.Features.Documentation ( DocumentationFeature(..) ) import Distribution.Server.Features.DownloadCount @@ -12,7 +13,6 @@ import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.TarIndexCache import Distribution.Server.Features.Upload -import Distribution.Server.Framework ( ServerPartE ) import qualified Distribution.Server.Framework.BlobStorage as BlobStorage import Distribution.Server.Framework.ServerEnv @@ -20,39 +20,28 @@ import Distribution.Server.Framework.ServerEnv import Distribution.Server.Packages.Types import Distribution.Server.Users.Group ( queryUserGroups - , size - ) + , size) import Distribution.Server.Util.CountingMap ( cmFind ) -import Distribution.Package -import Distribution.PackageDescription -import Distribution.Types.Version import Distribution.Simple.Utils ( safeHead - , safeLast - ) + , safeLast) +import Distribution.Types.Version import qualified Distribution.Utils.ShortText as S import qualified Codec.Archive.Tar as Tar import qualified Codec.Archive.Tar.Entry as Tar import Control.Monad ( join - , liftM2 - ) -import Control.Monad.IO.Class ( liftIO ) + , liftM2) import qualified Data.ByteString.Lazy as BSL import Data.List ( maximumBy - , sortBy - ) + , sortBy) import Data.Maybe ( isNothing ) import Data.Ord ( comparing ) import qualified Data.TarIndex as T -import Data.Time.Clock ( UTCTime(..) - , diffUTCTime - , getCurrentTime - , nominalDay - ) +import qualified Data.Time.Clock as CL import GHC.Float ( int2Double ) import System.FilePath ( isExtensionOf ) -import qualified System.IO as SIO +import qualified System.IO as SIO data Scorer = Scorer { maximum :: Double @@ -87,13 +76,13 @@ patches :: Num a => [a] -> a patches (_ : _ : xs) = sum xs patches _ = 0 -numDays :: Maybe UTCTime -> Maybe UTCTime -> Double +numDays :: Maybe CL.UTCTime -> Maybe CL.UTCTime -> Double numDays (Just first) (Just end) = - fromRational $ toRational $ diffUTCTime first end / fromRational - (toRational nominalDay) + fromRational $ toRational $ CL.diffUTCTime first end / fromRational + (toRational CL.nominalDay) numDays _ _ = 0 -freshness :: [Version] -> UTCTime -> Bool -> IO Double +freshness :: [Version] -> CL.UTCTime -> Bool -> IO Double freshness [] _ _ = return 0 freshness (x : xs) lastUpd app = daysPastExpiration @@ -110,7 +99,7 @@ freshness (x : xs) lastUpd app = | major v > 0 = 200 | minor v > 3 = 140 | otherwise = 80 - age = flip numDays (Just lastUpd) . Just <$> getCurrentTime + age = flip numDays (Just lastUpd) . Just <$> CL.getCurrentTime decayDays = expectedUpdateInterval / 2 + (if app then 300 else 200) -- lookupPackageId @@ -118,59 +107,54 @@ freshness (x : xs) lastUpd app = -- TODO CoreFeature can be substituted by CoreResource rankIO - :: CoreResource - -> VersionsFeature + :: VersionsFeature -> DownloadFeature -> UploadFeature -> DocumentationFeature -> ServerEnv -> TarIndexCacheFeature - -> PackageDescription - -> ServerPartE Scorer + -> [PkgInfo] + -> IO Scorer -rankIO core vers downs upl docs env tarCache pkg = do +rankIO vers downs upl docs env tarCache pkgs = do temp <- temporalScore pkg lastUploads versionList downloadsPerMonth versS <- versionScore versionList vers lastUploads pkg auth <- authorScore upl pkg - codeS <- codeScore documentLines srcLines packageLines + codeS <- codeScore documentLines srcLines return (temp <> versS <> auth <> codeS) where - pkgId = package pkg - pkgNm = pkgName pkgId - info = lookupPackageName core pkgNm - descriptions = do - infPkg <- info - return (pkgDesc <$> infPkg) - lastUploads = do - infPkg <- info - return $ sortBy (flip compare) $ fst . pkgOriginalUploadInfo <$> infPkg - versionList = - do - sortBy (flip compare) - . map (pkgVersion . package . packageDescription) - <$> descriptions - downloadsPerMonth = liftIO $ cmFind pkgNm <$> recentPackageDownloads downs + pkg = packageDescription <$> pkgDesc $ last pkgs + pkgId = package pkg + pkgNm = pkgName pkgId + lastUploads = + sortBy (flip compare) + $ (fst . pkgOriginalUploadInfo <$> pkgs) + ++ (fst . pkgLatestUploadInfo <$> pkgs) + versionList :: [Version] + versionList = sortBy (flip compare) + $ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs) + downloadsPerMonth = cmFind pkgNm <$> recentPackageDownloads downs -- TODO get appropriate pkgInfo (head might fail) packageEntr = do - inf <- info - tarB <- liftIO . packageTarball tarCache . head $ inf + tarB <- packageTarball tarCache . head $ pkgs return $ (\(path, _, index) -> (path, ) <$> T.lookup index path) =<< rightToMaybe tarB rightToMaybe (Right a) = Just a rightToMaybe (Left _) = Nothing - documentBlob :: ServerPartE (Maybe BlobStorage.BlobId) + documentBlob :: IO (Maybe BlobStorage.BlobId) documentBlob = queryDocumentation docs pkgId - documentIndex = documentBlob >>= liftIO . mapM (cachedTarIndex tarCache) + documentIndex = documentBlob >>= mapM (cachedTarIndex tarCache) documentationEntr = do index <- documentIndex path <- documentPath return $ liftM2 (,) path (join $ liftM2 T.lookup index path) - documentLines = documentationEntr >>= liftIO . filterLinesTar (const True) - srcLines = packageEntr >>= liftIO . filterLinesTar (isExtensionOf ".hs") - packageLines = packageEntr >>= liftIO . filterLinesTar (const True) + documentLines :: IO Double + documentLines = documentationEntr >>= filterLinesTar (const True) + srcLines :: IO Double + srcLines = packageEntr >>= filterLinesTar (isExtensionOf ".hs") filterLinesTar :: (FilePath -> Bool) -> Maybe (FilePath, T.TarIndexEntry) -> IO Double @@ -188,22 +172,16 @@ rankIO core vers downs upl docs env tarCache pkg = do case Tar.read header of (Tar.Next Tar.Entry { Tar.entryContent = Tar.NormalFile _ siz } _) -> do body <- BSL.hGet handle (fromIntegral siz) - return - $ int2Double - . length - . filter (not . BSL.null) - . BSL.split 10 - $ body + return $ int2Double . length . BSL.split 10 $ body _ -> return 0 documentPath = do blob <- documentBlob return $ BlobStorage.filepath (serverBlobStore env) <$> blob -authorScore :: UploadFeature -> PackageDescription -> ServerPartE Scorer +authorScore :: UploadFeature -> PackageDescription -> IO Scorer authorScore upload desc = - liftIO maintScore - >>= (\x -> return $ boolScor 1 (not $ S.null $ author desc) <> x) + maintScore >>= (\x -> return $ boolScor 1 (not $ S.null $ author desc) <> x) where pkgNm = pkgName $ package desc maintScore :: IO Scorer @@ -212,48 +190,37 @@ authorScore upload desc = return $ boolScor 3 (size maint > 1) <> scorer 5 (int2Double $ size maint) -codeScore - :: ServerPartE Double - -> ServerPartE Double - -> ServerPartE Double - -> ServerPartE Scorer -codeScore documentL haskellL packageL = do +codeScore :: IO Double -> IO Double -> IO Scorer +codeScore documentL haskellL = do docum <- documentL haskell <- haskellL - pkg <- packageL return - $ boolScor 1 (pkg > 700) - <> boolScor 1 (pkg < 80000) + $ boolScor 1 (haskell > 700) + <> boolScor 1 (haskell < 80000) <> fracScor 2 (min 1 (haskell / 5000)) <> fracScor 2 (min 1 (10 * docum) / (3000 + haskell)) versionScore - :: ServerPartE [Version] + :: [Version] -> VersionsFeature - -> ServerPartE [UTCTime] + -> [CL.UTCTime] -> PackageDescription - -> ServerPartE Scorer + -> IO Scorer versionScore versionList versions lastUploads desc = do - intUse <- intUsable - depre <- deprec - lUps <- lastUploads - return $ calculateScore depre lUps intUse + use <- intUsable + depre <- deprec + return $ calculateScore depre lastUploads use where pkgNm = pkgName $ package desc partVers = - versionList - >>= (\y -> - liftIO - $ queryGetPreferredInfo versions pkgNm - >>= (\x -> return $ partitionVersions x y) - ) + flip partitionVersions versionList <$> queryGetPreferredInfo versions pkgNm intUsable = do (norm, _, unpref) <- partVers return $ versionNumbers <$> norm ++ unpref deprec = do (_, deprecN, _) <- partVers return deprecN - calculateScore :: [Version] -> [UTCTime] -> [[Int]] -> Scorer + calculateScore :: [Version] -> [CL.UTCTime] -> [[Int]] -> Scorer calculateScore [] _ _ = Scorer 118 0 calculateScore depre lUps intUse = boolScor 20 (length intUse > 1) @@ -274,11 +241,7 @@ versionScore versionList versions lastUploads desc = do <> boolScor 5 (not $ null depre) temporalScore - :: PackageDescription - -> ServerPartE [UTCTime] - -> ServerPartE [Version] - -> ServerPartE Int - -> ServerPartE Scorer + :: PackageDescription -> [CL.UTCTime] -> [Version] -> IO Int -> IO Scorer temporalScore p lastUploads versionList downloadsPerMonth = do fresh <- freshnessScore downs <- downloadScore @@ -292,18 +255,15 @@ temporalScore p lastUploads versionList downloadsPerMonth = do / (if isApp then 5 else 6) ) 5 - packageFreshness = do - ups <- lastUploads - vers <- versionList - case ups of - [] -> return 0 - _ -> liftIO $ freshness vers (head ups) isApp + packageFreshness = case lastUploads of + [] -> return 0 + _ -> freshness versionList (head lastUploads) isApp freshnessScore = fracScor 10 <$> packageFreshness -- Missing dependencyFreshnessScore for reasonable effectivity needs caching tractionScore = do + dows <- downloadsPerMonth fresh <- packageFreshness - downs <- downloadsPerMonth - return $ boolScor 1 (fresh * int2Double downs > 1000) + return $ boolScor 1 (fresh * int2Double dows > 1000) rankPackagePage :: PackageDescription -> Scorer rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats @@ -319,15 +279,16 @@ rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats -- TODO fix the function Signature replace PackageDescription to PackageName/Identifier rankPackage - :: CoreResource - -> VersionsFeature + :: VersionsFeature -> DownloadFeature -> UploadFeature -> DocumentationFeature -> ServerEnv -> TarIndexCacheFeature - -> PackageDescription - -> ServerPartE Double -rankPackage core versions download upload docs env tarCache p = - rankIO core versions download upload docs env tarCache p - >>= (\x -> return $ total x + total (rankPackagePage p)) + -> [PkgInfo] + -> IO Double +rankPackage versions download upload docs env tarCache pkgs = + total + . (<>) (rankPackagePage pkgD) + <$> rankIO versions download upload docs env tarCache pkgs + where pkgD = packageDescription $ pkgDesc $ last pkgs From de621f793b46e54f5fd87fb368f43d65be9c5934 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 7 Aug 2022 14:30:46 +0200 Subject: [PATCH 041/129] integrated PackageRank into ListFeature --- .../Server/Features/PackageRank.hs | 70 ++++++++----------- 1 file changed, 29 insertions(+), 41 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 53f552a4e..adf496ef6 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -1,5 +1,7 @@ {-# LANGUAGE TupleSections #-} +-- TODO change the module name probably Distribution.Server.Features.PackageList.PackageRank + module Distribution.Server.Features.PackageRank ( rankPackage ) where @@ -8,33 +10,29 @@ import Distribution.Package import Distribution.PackageDescription import Distribution.Server.Features.Documentation ( DocumentationFeature(..) ) -import Distribution.Server.Features.DownloadCount import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.TarIndexCache -import Distribution.Server.Features.Upload import qualified Distribution.Server.Framework.BlobStorage as BlobStorage import Distribution.Server.Framework.ServerEnv ( ServerEnv(..) ) import Distribution.Server.Packages.Types -import Distribution.Server.Users.Group - ( queryUserGroups - , size) -import Distribution.Server.Util.CountingMap - ( cmFind ) import Distribution.Simple.Utils ( safeHead - , safeLast) + , safeLast + ) import Distribution.Types.Version import qualified Distribution.Utils.ShortText as S import qualified Codec.Archive.Tar as Tar import qualified Codec.Archive.Tar.Entry as Tar import Control.Monad ( join - , liftM2) + , liftM2 + ) import qualified Data.ByteString.Lazy as BSL import Data.List ( maximumBy - , sortBy) + , sortBy + ) import Data.Maybe ( isNothing ) import Data.Ord ( comparing ) import qualified Data.TarIndex as T @@ -108,25 +106,23 @@ freshness (x : xs) lastUpd app = -- TODO CoreFeature can be substituted by CoreResource rankIO :: VersionsFeature - -> DownloadFeature - -> UploadFeature + -> Int + -> Int -> DocumentationFeature -> ServerEnv -> TarIndexCacheFeature -> [PkgInfo] -> IO Scorer -rankIO vers downs upl docs env tarCache pkgs = do - temp <- temporalScore pkg lastUploads versionList downloadsPerMonth +rankIO vers recentDownloads maintainers docs env tarCache pkgs = do + temp <- temporalScore pkg lastUploads versionList recentDownloads versS <- versionScore versionList vers lastUploads pkg - auth <- authorScore upl pkg codeS <- codeScore documentLines srcLines - return (temp <> versS <> auth <> codeS) + return (temp <> versS <> codeS <> authorScore maintainers pkg) where pkg = packageDescription <$> pkgDesc $ last pkgs pkgId = package pkg - pkgNm = pkgName pkgId lastUploads = sortBy (flip compare) $ (fst . pkgOriginalUploadInfo <$> pkgs) @@ -134,9 +130,7 @@ rankIO vers downs upl docs env tarCache pkgs = do versionList :: [Version] versionList = sortBy (flip compare) $ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs) - downloadsPerMonth = cmFind pkgNm <$> recentPackageDownloads downs - -- TODO get appropriate pkgInfo (head might fail) - packageEntr = do + packageEntr = do tarB <- packageTarball tarCache . head $ pkgs return $ (\(path, _, index) -> (path, ) <$> T.lookup index path) @@ -179,16 +173,12 @@ rankIO vers downs upl docs env tarCache pkgs = do blob <- documentBlob return $ BlobStorage.filepath (serverBlobStore env) <$> blob -authorScore :: UploadFeature -> PackageDescription -> IO Scorer -authorScore upload desc = - maintScore >>= (\x -> return $ boolScor 1 (not $ S.null $ author desc) <> x) +authorScore :: Int -> PackageDescription -> Scorer +authorScore maintainers desc = + boolScor 1 (not $ S.null $ author desc) <> maintScore where - pkgNm = pkgName $ package desc - maintScore :: IO Scorer - maintScore = do - maint <- queryUserGroups [maintainersGroup upload pkgNm] - - return $ boolScor 3 (size maint > 1) <> scorer 5 (int2Double $ size maint) + maintScore = + boolScor 3 (maintainers > 1) <> scorer 5 (int2Double maintainers) codeScore :: IO Double -> IO Double -> IO Scorer codeScore documentL haskellL = do @@ -241,15 +231,14 @@ versionScore versionList versions lastUploads desc = do <> boolScor 5 (not $ null depre) temporalScore - :: PackageDescription -> [CL.UTCTime] -> [Version] -> IO Int -> IO Scorer -temporalScore p lastUploads versionList downloadsPerMonth = do + :: PackageDescription -> [CL.UTCTime] -> [Version] -> Int -> IO Scorer +temporalScore p lastUploads versionList recentDownloads = do fresh <- freshnessScore - downs <- downloadScore tract <- tractionScore - return $ tract <> fresh <> downs + return $ tract <> fresh <> downloadScore where isApp = (isNothing . library) p && (not . null . executables) p - downloadScore = calcDownScore <$> downloadsPerMonth + downloadScore = calcDownScore recentDownloads calcDownScore i = Scorer 5 $ min ( (logBase 2 (int2Double $ max 0 (i - 100) + 100) - 6.6) / (if isApp then 5 else 6) @@ -261,9 +250,8 @@ temporalScore p lastUploads versionList downloadsPerMonth = do freshnessScore = fracScor 10 <$> packageFreshness -- Missing dependencyFreshnessScore for reasonable effectivity needs caching tractionScore = do - dows <- downloadsPerMonth fresh <- packageFreshness - return $ boolScor 1 (fresh * int2Double dows > 1000) + return $ boolScor 1 (fresh * int2Double recentDownloads > 1000) rankPackagePage :: PackageDescription -> Scorer rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats @@ -280,15 +268,15 @@ rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats rankPackage :: VersionsFeature - -> DownloadFeature - -> UploadFeature + -> Int + -> Int -> DocumentationFeature - -> ServerEnv -> TarIndexCacheFeature + -> ServerEnv -> [PkgInfo] -> IO Double -rankPackage versions download upload docs env tarCache pkgs = +rankPackage versions recentDownloads maintainers docs tarCache env pkgs = total . (<>) (rankPackagePage pkgD) - <$> rankIO versions download upload docs env tarCache pkgs + <$> rankIO versions recentDownloads maintainers docs env tarCache pkgs where pkgD = packageDescription $ pkgDesc $ last pkgs From 327af26ddfc51ab29f674b5e2cc926f53319c6aa Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 7 Aug 2022 14:33:10 +0200 Subject: [PATCH 042/129] --no-edit --- .../Server/Features/PackageList.hs | 22 ++++++++++++------- src/Distribution/Server/Framework/MemSize.hs | 3 +++ 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/Distribution/Server/Features/PackageList.hs b/src/Distribution/Server/Features/PackageList.hs index abc594373..960039201 100644 --- a/src/Distribution/Server/Features/PackageList.hs +++ b/src/Distribution/Server/Features/PackageList.hs @@ -17,6 +17,7 @@ import Distribution.Server.Features.Users import Distribution.Server.Features.Upload(UploadFeature(..)) import Distribution.Server.Features.Documentation (DocumentationFeature(..)) import Distribution.Server.Features.TarIndexCache (TarIndexCacheFeature(..)) +import Distribution.Server.Features.PackageRank import Distribution.Server.Users.Users (userIdToName) import qualified Distribution.Server.Users.UserIdSet as UserIdSet @@ -87,18 +88,20 @@ data PackageItem = PackageItem { -- How many benchmarks (>=0) this package has. itemNumBenchmarks :: !Int, -- Last upload date - itemLastUpload :: !UTCTime + itemLastUpload :: !UTCTime, -- Hotness: a more heuristic way to sort packages. presently non-existent. - --itemHotness :: Int + --itemHotness :: Int + -- heuristic way to sort packages + itemPackageRank :: !Double } instance MemSize PackageItem where - memSize (PackageItem a b c d e f g h i j k l) = memSize12 a b c d e f g h i j k l + memSize (PackageItem a b c d e f g h i j k l m) = memSize13 a b c d e f g h i j k l m emptyPackageItem :: PackageName -> PackageItem emptyPackageItem pkg = PackageItem pkg Set.empty Nothing "" [] - 0 0 False 0 0 0 (UTCTime (toEnum 0) 0) + 0 0 False 0 0 0 (UTCTime (toEnum 0) 0) 0 initListFeature :: ServerEnv @@ -125,11 +128,11 @@ initListFeature _env = do versions@VersionsFeature{..} users@UserFeature{..} uploads@UploadFeature{..} - docum tar -> do + documentation tar -> do let (feature, modifyItem, updateDesc) = listFeature core download votesf tagsf versions users uploads - itemCache itemUpdate docum tar _env + itemCache itemUpdate documentation tar _env registerHookJust packageChangeHook isPackageChangeAny $ \(pkgid, _) -> updateDesc (packageName pkgid) @@ -196,11 +199,11 @@ listFeature CoreFeature{..} DownloadFeature{..} VotesFeature{..} TagsFeature{..} - VersionsFeature{..} + versions@VersionsFeature{..} UserFeature{..} UploadFeature{..} itemCache itemUpdate - docum tar env + documentation tar env = (ListFeature{..}, modifyItem, updateDesc) where listFeatureInterface = (emptyHackageFeature "list") { @@ -266,6 +269,8 @@ listFeature CoreFeature{..} votes <- pkgNumScore pkgname deprs <- queryGetDeprecatedFor pkgname maintainers <- queryUserGroup (maintainersGroup pkgname) + packageR <- rankPackage versions (cmFind pkgname downs) + (UserIdSet.size maintainers) documentation tar env pkgs return $ (,) pkgname $ (updateDescriptionItem (pkgDesc pkg) $ emptyPackageItem pkgname) { itemTags = tags @@ -275,6 +280,7 @@ listFeature CoreFeature{..} -- [reverse index disabled] , itemRevDepsCount = directReverseCount revCount , itemVotes = votes , itemLastUpload = fst (pkgOriginalUploadInfo pkg) + , itemPackageRank = packageR } ------------------------------ diff --git a/src/Distribution/Server/Framework/MemSize.hs b/src/Distribution/Server/Framework/MemSize.hs index 4af5d251f..f3e05f42a 100644 --- a/src/Distribution/Server/Framework/MemSize.hs +++ b/src/Distribution/Server/Framework/MemSize.hs @@ -135,6 +135,9 @@ instance MemSize Integer where instance MemSize Float where memSize _ = 2 +instance MemSize Double where + memSize _ = 3 + instance MemSize UTCTime where memSize _ = 7 From 343af58335d955c1471c9a81cf9b7097b8aa2aff Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 7 Aug 2022 17:10:29 +0200 Subject: [PATCH 043/129] tried to add an column and failed --- datafiles/templates/Html/noscript-search-form.html.st | 1 + src/Distribution/Server/Features/Browse.hs | 3 ++- src/Distribution/Server/Features/Browse/ApplyFilter.hs | 1 + src/Distribution/Server/Features/Browse/Options.hs | 4 +++- src/Distribution/Server/Features/Browse/Parsers.hs | 3 ++- 5 files changed, 9 insertions(+), 3 deletions(-) diff --git a/datafiles/templates/Html/noscript-search-form.html.st b/datafiles/templates/Html/noscript-search-form.html.st index 7c1f318e4..0193d6e97 100644 --- a/datafiles/templates/Html/noscript-search-form.html.st +++ b/datafiles/templates/Html/noscript-search-form.html.st @@ -23,6 +23,7 @@ + diff --git a/src/Distribution/Server/Features/Browse.hs b/src/Distribution/Server/Features/Browse.hs index ada4b622c..d5d4497da 100644 --- a/src/Distribution/Server/Features/Browse.hs +++ b/src/Distribution/Server/Features/Browse.hs @@ -138,7 +138,7 @@ packageIndexInfoToValue :: CoreResource -> TagsResource -> UserResource -> Packa packageIndexInfoToValue coreResource tagsResource userResource PackageItem{itemName, itemDownloads, itemVotes, - itemDesc, itemTags, itemLastUpload, itemMaintainer} = + itemDesc, itemTags, itemLastUpload, itemMaintainer, itemPackageRank} = object [ Key.fromString "name" .= renderPackage itemName , Key.fromString "downloads" .= itemDownloads @@ -147,6 +147,7 @@ packageIndexInfoToValue , Key.fromString "tags" .= map renderTag (S.toAscList itemTags) , Key.fromString "lastUpload" .= iso8601Show itemLastUpload , Key.fromString "maintainers" .= map renderUser itemMaintainer + , Key.fromString "packageRank" .= itemPackageRank ] where renderTag :: Tag -> Value diff --git a/src/Distribution/Server/Features/Browse/ApplyFilter.hs b/src/Distribution/Server/Features/Browse/ApplyFilter.hs index f96a3367c..d085819ba 100644 --- a/src/Distribution/Server/Features/Browse/ApplyFilter.hs +++ b/src/Distribution/Server/Features/Browse/ApplyFilter.hs @@ -64,6 +64,7 @@ sort isSearch sortColumn sortDirection = Tags -> comparing (S.toAscList . itemTags) LastUpload -> comparing itemLastUpload Maintainers -> comparing itemMaintainer + PackageRank -> comparing itemPackageRank in sortBy (maybeReverse comparer) where maybeReverse = diff --git a/src/Distribution/Server/Features/Browse/Options.hs b/src/Distribution/Server/Features/Browse/Options.hs index 269be66ef..942681bc3 100644 --- a/src/Distribution/Server/Features/Browse/Options.hs +++ b/src/Distribution/Server/Features/Browse/Options.hs @@ -9,7 +9,7 @@ import Distribution.Server.Features.Browse.Parsers (Filter, conditions, condsToF data IsSearch = IsSearch | IsNotSearch -data NormalColumn = Name | Downloads | Rating | Description | Tags | LastUpload | Maintainers +data NormalColumn = Name | Downloads | Rating | Description | Tags | LastUpload | Maintainers | PackageRank deriving (Show, Eq) data Column = DefaultColumn | NormalColumn NormalColumn @@ -37,6 +37,7 @@ instance FromJSON Column where "tags" -> pure $ NormalColumn Tags "lastUpload" -> pure $ NormalColumn LastUpload "maintainers" -> pure $ NormalColumn Maintainers + "packageRank" -> pure $ NormalColumn PackageRank t -> fail $ "Column invalid: " ++ T.unpack t columnToTemplateName :: Column -> String @@ -49,6 +50,7 @@ columnToTemplateName = \case NormalColumn Tags -> "tags" NormalColumn LastUpload -> "lastUpload" NormalColumn Maintainers -> "maintainers" + NormalColumn PackageRank -> "packageRank" instance FromJSON Direction where parseJSON = diff --git a/src/Distribution/Server/Features/Browse/Parsers.hs b/src/Distribution/Server/Features/Browse/Parsers.hs index 6445bbc1c..2775cd797 100644 --- a/src/Distribution/Server/Features/Browse/Parsers.hs +++ b/src/Distribution/Server/Features/Browse/Parsers.hs @@ -85,7 +85,7 @@ allowedAfterOpeningBrace AllowNot = "not " <|> allowedAfterOpeningBrace Disallow allowedAfterOpeningBrace _ = asum [ "downloads", "rating", "lastUpload" , "ageOfLastUpload" - , "tag:", "maintainer:", "deprecated:", "distro:" + , "tag:", "maintainer:", "deprecated:", "distro:", "packageRank" ] -- Whether the 'not' operator can be used. @@ -113,6 +113,7 @@ filterWith allowNot = do "maintainer:" -> MaintainerFilter <$> wordWoSpaceOrParens "deprecated:" -> DeprecatedFilter <$> deprecatedOption "distro:" -> DistroFilter <$> wordWoSpaceOrParens + "packageRank" -> DownloadsFilter <$> opAndSndParam decimal _ -> fail "Impossible since fieldName possibilities are known at compile time" pure filt From 3ec7a86a89521bf2d426974069d300383c813983 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 7 Aug 2022 17:54:45 +0200 Subject: [PATCH 044/129] switch Doubles for Floats --- .../Server/Features/PackageList.hs | 2 +- .../Server/Features/PackageRank.hs | 46 +++++++++---------- src/Distribution/Server/Framework/MemSize.hs | 3 -- 3 files changed, 24 insertions(+), 27 deletions(-) diff --git a/src/Distribution/Server/Features/PackageList.hs b/src/Distribution/Server/Features/PackageList.hs index 960039201..c75ada37f 100644 --- a/src/Distribution/Server/Features/PackageList.hs +++ b/src/Distribution/Server/Features/PackageList.hs @@ -92,7 +92,7 @@ data PackageItem = PackageItem { -- Hotness: a more heuristic way to sort packages. presently non-existent. --itemHotness :: Int -- heuristic way to sort packages - itemPackageRank :: !Double + itemPackageRank :: !Float } instance MemSize PackageItem where diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index adf496ef6..e8bc74fd7 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -37,31 +37,31 @@ import Data.Maybe ( isNothing ) import Data.Ord ( comparing ) import qualified Data.TarIndex as T import qualified Data.Time.Clock as CL -import GHC.Float ( int2Double ) +import GHC.Float ( int2Float ) import System.FilePath ( isExtensionOf ) import qualified System.IO as SIO data Scorer = Scorer - { maximum :: Double - , score :: Double + { maximum :: Float + , score :: Float } deriving Show instance Semigroup Scorer where (Scorer a b) <> (Scorer c d) = Scorer (a + b) (c + d) -scorer :: Double -> Double -> Scorer +scorer :: Float -> Float -> Scorer scorer maxim scr = if maxim >= scr then Scorer maxim scr else Scorer maxim maxim -fracScor :: Double -> Double -> Scorer +fracScor :: Float -> Float -> Scorer fracScor maxim frac = scorer maxim (maxim * frac) -boolScor :: Double -> Bool -> Scorer +boolScor :: Float -> Bool -> Scorer boolScor k True = Scorer k k boolScor k False = Scorer k 0 -total :: Scorer -> Double +total :: Scorer -> Float total (Scorer a b) = a / b major :: Num a => [a] -> a @@ -74,13 +74,13 @@ patches :: Num a => [a] -> a patches (_ : _ : xs) = sum xs patches _ = 0 -numDays :: Maybe CL.UTCTime -> Maybe CL.UTCTime -> Double +numDays :: Maybe CL.UTCTime -> Maybe CL.UTCTime -> Float numDays (Just first) (Just end) = fromRational $ toRational $ CL.diffUTCTime first end / fromRational (toRational CL.nominalDay) numDays _ _ = 0 -freshness :: [Version] -> CL.UTCTime -> Bool -> IO Double +freshness :: [Version] -> CL.UTCTime -> Bool -> IO Float freshness [] _ _ = return 0 freshness (x : xs) lastUpd app = daysPastExpiration @@ -89,7 +89,7 @@ freshness (x : xs) lastUpd app = versionLatest = versionNumbers x daysPastExpiration = age >>= (\a -> return $ max 0 a - expectedUpdateInterval) - expectedUpdateInterval = int2Double + expectedUpdateInterval = int2Float (min (versionStabilityInterval versionLatest) $ length (x : xs)) versionStabilityInterval v | patches v > 3 && major v > 0 = 700 | patches v > 3 = 450 @@ -145,13 +145,13 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs = do index <- documentIndex path <- documentPath return $ liftM2 (,) path (join $ liftM2 T.lookup index path) - documentLines :: IO Double + documentLines :: IO Float documentLines = documentationEntr >>= filterLinesTar (const True) - srcLines :: IO Double + srcLines :: IO Float srcLines = packageEntr >>= filterLinesTar (isExtensionOf ".hs") filterLinesTar - :: (FilePath -> Bool) -> Maybe (FilePath, T.TarIndexEntry) -> IO Double + :: (FilePath -> Bool) -> Maybe (FilePath, T.TarIndexEntry) -> IO Float filterLinesTar f (Just (path, T.TarFileEntry offset)) = if f path then getLines path offset else return 0 filterLinesTar f (Just (_, T.TarDir dir)) = @@ -166,7 +166,7 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs = do case Tar.read header of (Tar.Next Tar.Entry { Tar.entryContent = Tar.NormalFile _ siz } _) -> do body <- BSL.hGet handle (fromIntegral siz) - return $ int2Double . length . BSL.split 10 $ body + return $ int2Float . length . BSL.split 10 $ body _ -> return 0 documentPath = do @@ -178,9 +178,9 @@ authorScore maintainers desc = boolScor 1 (not $ S.null $ author desc) <> maintScore where maintScore = - boolScor 3 (maintainers > 1) <> scorer 5 (int2Double maintainers) + boolScor 3 (maintainers > 1) <> scorer 5 (int2Float maintainers) -codeScore :: IO Double -> IO Double -> IO Scorer +codeScore :: IO Float -> IO Float -> IO Scorer codeScore documentL haskellL = do docum <- documentL haskell <- haskellL @@ -217,15 +217,15 @@ versionScore versionList versions lastUploads desc = do <> scorer 40 (numDays (safeHead lUps) (safeLast lUps)) <> scorer 15 - (int2Double $ length $ filter (\x -> major x > 0 || minor x > 0) + (int2Float $ length $ filter (\x -> major x > 0 || minor x > 0) intUse ) <> scorer 20 - (int2Double $ 4 * length + (int2Float $ 4 * length (filter (\x -> major x > 0 && patches x > 0) intUse) ) - <> scorer 10 (int2Double $ patches $ maximumBy (comparing patches) intUse) + <> scorer 10 (int2Float $ patches $ maximumBy (comparing patches) intUse) <> boolScor 8 (any (\x -> major x == 0 && patches x > 0) intUse) <> boolScor 10 (any (\x -> major x > 0 && major x < 20) intUse) <> boolScor 5 (not $ null depre) @@ -240,7 +240,7 @@ temporalScore p lastUploads versionList recentDownloads = do isApp = (isNothing . library) p && (not . null . executables) p downloadScore = calcDownScore recentDownloads calcDownScore i = Scorer 5 $ min - ( (logBase 2 (int2Double $ max 0 (i - 100) + 100) - 6.6) + ( (logBase 2 (int2Float $ max 0 (i - 100) + 100) - 6.6) / (if isApp then 5 else 6) ) 5 @@ -251,14 +251,14 @@ temporalScore p lastUploads versionList recentDownloads = do -- Missing dependencyFreshnessScore for reasonable effectivity needs caching tractionScore = do fresh <- packageFreshness - return $ boolScor 1 (fresh * int2Double recentDownloads > 1000) + return $ boolScor 1 (fresh * int2Float recentDownloads > 1000) rankPackagePage :: PackageDescription -> Scorer rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats where tests = boolScor 50 (hasTests p) benchs = boolScor 10 (hasBenchmarks p) - desc = Scorer 30 (min 1 (int2Double (S.length $ description p) / 300)) + desc = Scorer 30 (min 1 (int2Float (S.length $ description p) / 300)) -- documentation = boolScor 30 () homeP = boolScor 30 (not $ S.null $ homepage p) sourceRp = boolScor 8 (not $ null $ sourceRepos p) @@ -274,7 +274,7 @@ rankPackage -> TarIndexCacheFeature -> ServerEnv -> [PkgInfo] - -> IO Double + -> IO Float rankPackage versions recentDownloads maintainers docs tarCache env pkgs = total . (<>) (rankPackagePage pkgD) diff --git a/src/Distribution/Server/Framework/MemSize.hs b/src/Distribution/Server/Framework/MemSize.hs index f3e05f42a..4af5d251f 100644 --- a/src/Distribution/Server/Framework/MemSize.hs +++ b/src/Distribution/Server/Framework/MemSize.hs @@ -135,9 +135,6 @@ instance MemSize Integer where instance MemSize Float where memSize _ = 2 -instance MemSize Double where - memSize _ = 3 - instance MemSize UTCTime where memSize _ = 7 From 82444ac229cd8e3bfc4614cb37dc59ee9ea79ead Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 9 Aug 2022 17:48:38 +0200 Subject: [PATCH 045/129] added the column and redid some packageRank issues --- datafiles/static/browse.js | 1 + src/Distribution/Server/Features/PackageRank.hs | 9 ++++----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/datafiles/static/browse.js b/datafiles/static/browse.js index 905239afc..cfb8a7dfd 100644 --- a/datafiles/static/browse.js +++ b/datafiles/static/browse.js @@ -134,6 +134,7 @@ const replaceRows = (response) => { tr.appendChild(createTags(row.tags)); tr.appendChild(createLastUpload(row.lastUpload)); tr.appendChild(createMaintainers(row.maintainers)); + tr.appendChild(createSimpleText(row.packageRank)); l.appendChild(tr); } }; diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index e8bc74fd7..a6cf628a3 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -89,8 +89,8 @@ freshness (x : xs) lastUpd app = versionLatest = versionNumbers x daysPastExpiration = age >>= (\a -> return $ max 0 a - expectedUpdateInterval) - expectedUpdateInterval = int2Float - (min (versionStabilityInterval versionLatest) $ length (x : xs)) + expectedUpdateInterval = + int2Float (min (versionStabilityInterval versionLatest) $ length (x : xs)) versionStabilityInterval v | patches v > 3 && major v > 0 = 700 | patches v > 3 = 450 | patches v > 0 = 300 @@ -177,8 +177,7 @@ authorScore :: Int -> PackageDescription -> Scorer authorScore maintainers desc = boolScor 1 (not $ S.null $ author desc) <> maintScore where - maintScore = - boolScor 3 (maintainers > 1) <> scorer 5 (int2Float maintainers) + maintScore = boolScor 3 (maintainers > 1) <> scorer 5 (int2Float maintainers) codeScore :: IO Float -> IO Float -> IO Scorer codeScore documentL haskellL = do @@ -218,7 +217,7 @@ versionScore versionList versions lastUploads desc = do <> scorer 15 (int2Float $ length $ filter (\x -> major x > 0 || minor x > 0) - intUse + intUse ) <> scorer 20 From c3aa8168801a829a76b98317df07dae23a828c1c Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 9 Aug 2022 18:04:18 +0200 Subject: [PATCH 046/129] fixed some basic bugs --- src/Distribution/Server/Features/PackageRank.hs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index a6cf628a3..8462f6000 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -42,13 +42,13 @@ import System.FilePath ( isExtensionOf ) import qualified System.IO as SIO data Scorer = Scorer - { maximum :: Float + { maximumS :: Float , score :: Float } deriving Show instance Semigroup Scorer where - (Scorer a b) <> (Scorer c d) = Scorer (a + b) (c + d) + (Scorer a b) <> (Scorer c d) = Scorer (a + c) (b + d) scorer :: Float -> Float -> Scorer scorer maxim scr = @@ -62,7 +62,7 @@ boolScor k True = Scorer k k boolScor k False = Scorer k 0 total :: Scorer -> Float -total (Scorer a b) = a / b +total (Scorer a b) = b / a major :: Num a => [a] -> a major (x : _) = x @@ -257,7 +257,7 @@ rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats where tests = boolScor 50 (hasTests p) benchs = boolScor 10 (hasBenchmarks p) - desc = Scorer 30 (min 1 (int2Float (S.length $ description p) / 300)) + desc = scorer 30 (min 1 (int2Float (S.length $ description p) / 300)) -- documentation = boolScor 30 () homeP = boolScor 30 (not $ S.null $ homepage p) sourceRp = boolScor 8 (not $ null $ sourceRepos p) From 212d169a8948fc6ef9d2f8c73ebbff4a1793e7eb Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 16 Aug 2022 22:22:57 +0200 Subject: [PATCH 047/129] removed Browse/parser changes --- src/Distribution/Server/Features/Browse/Parsers.hs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Distribution/Server/Features/Browse/Parsers.hs b/src/Distribution/Server/Features/Browse/Parsers.hs index 2775cd797..6445bbc1c 100644 --- a/src/Distribution/Server/Features/Browse/Parsers.hs +++ b/src/Distribution/Server/Features/Browse/Parsers.hs @@ -85,7 +85,7 @@ allowedAfterOpeningBrace AllowNot = "not " <|> allowedAfterOpeningBrace Disallow allowedAfterOpeningBrace _ = asum [ "downloads", "rating", "lastUpload" , "ageOfLastUpload" - , "tag:", "maintainer:", "deprecated:", "distro:", "packageRank" + , "tag:", "maintainer:", "deprecated:", "distro:" ] -- Whether the 'not' operator can be used. @@ -113,7 +113,6 @@ filterWith allowNot = do "maintainer:" -> MaintainerFilter <$> wordWoSpaceOrParens "deprecated:" -> DeprecatedFilter <$> deprecatedOption "distro:" -> DistroFilter <$> wordWoSpaceOrParens - "packageRank" -> DownloadsFilter <$> opAndSndParam decimal _ -> fail "Impossible since fieldName possibilities are known at compile time" pure filt From 7304a068403565da7dd15e4eb4d250f026f5d34c Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 16 Aug 2022 22:39:50 +0200 Subject: [PATCH 048/129] Fixed missing titile and changed fixed description --- datafiles/templates/Html/browse.html.st | 4 ++++ datafiles/templates/Html/noscript-search-form.html.st | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/datafiles/templates/Html/browse.html.st b/datafiles/templates/Html/browse.html.st index a7b85a496..f9d2b58b6 100644 --- a/datafiles/templates/Html/browse.html.st +++ b/datafiles/templates/Html/browse.html.st @@ -130,6 +130,9 @@ #arrow-maintainers { width: 100px; } + #arrow-packageRank { + width: 150px; + } .lastUpload, #sliderAndOutput { white-space: nowrap; } @@ -250,6 +253,7 @@ Tags Last U/L Maintainers + Package Rank diff --git a/datafiles/templates/Html/noscript-search-form.html.st b/datafiles/templates/Html/noscript-search-form.html.st index 0193d6e97..55c242afe 100644 --- a/datafiles/templates/Html/noscript-search-form.html.st +++ b/datafiles/templates/Html/noscript-search-form.html.st @@ -23,7 +23,7 @@ - + From 8380020497ab1b1a64a00ab32371c98b3aba0763 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 16 Aug 2022 22:51:04 +0200 Subject: [PATCH 049/129] Strict Scorer --- src/Distribution/Server/Features/PackageRank.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 8462f6000..a3e9ded00 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE TupleSections #-} +{-# LANGUAGE TupleSections, BangPatterns #-} -- TODO change the module name probably Distribution.Server.Features.PackageList.PackageRank @@ -42,8 +42,8 @@ import System.FilePath ( isExtensionOf ) import qualified System.IO as SIO data Scorer = Scorer - { maximumS :: Float - , score :: Float + { maximumS :: !Float + , score :: !Float } deriving Show From 868e58d3fe4c16cea6c73027fdf7b77a2304f1fb Mon Sep 17 00:00:00 2001 From: kubaneko Date: Wed, 17 Aug 2022 20:58:51 +0200 Subject: [PATCH 050/129] fixed some partial functions --- .../Server/Features/PackageRank.hs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index a3e9ded00..9b4c83569 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -114,6 +114,7 @@ rankIO -> [PkgInfo] -> IO Scorer +rankIO _ _ _ _ _ _ [] = return (Scorer (118 + 16 + 4 + 1) 0) rankIO vers recentDownloads maintainers docs env tarCache pkgs = do temp <- temporalScore pkg lastUploads versionList recentDownloads versS <- versionScore versionList vers lastUploads pkg @@ -131,10 +132,10 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs = do versionList = sortBy (flip compare) $ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs) packageEntr = do - tarB <- packageTarball tarCache . head $ pkgs + tarB <- mapM (packageTarball tarCache) (safeHead pkgs) return $ (\(path, _, index) -> (path, ) <$> T.lookup index path) - =<< rightToMaybe tarB + =<< (join $ rightToMaybe <$> tarB) rightToMaybe (Right a) = Just a rightToMaybe (Left _) = Nothing @@ -243,17 +244,18 @@ temporalScore p lastUploads versionList recentDownloads = do / (if isApp then 5 else 6) ) 5 - packageFreshness = case lastUploads of - [] -> return 0 - _ -> freshness versionList (head lastUploads) isApp + packageFreshness = case safeHead lastUploads of + Nothing -> return 0 + (Just l) -> freshness versionList l isApp freshnessScore = fracScor 10 <$> packageFreshness -- Missing dependencyFreshnessScore for reasonable effectivity needs caching tractionScore = do fresh <- packageFreshness return $ boolScor 1 (fresh * int2Float recentDownloads > 1000) -rankPackagePage :: PackageDescription -> Scorer -rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats +rankPackagePage :: Maybe PackageDescription -> Scorer +rankPackagePage Nothing = Scorer 233 0 +rankPackagePage (Just p) = tests <> benchs <> desc <> homeP <> sourceRp <> cats where tests = boolScor 50 (hasTests p) benchs = boolScor 10 (hasBenchmarks p) @@ -278,4 +280,4 @@ rankPackage versions recentDownloads maintainers docs tarCache env pkgs = total . (<>) (rankPackagePage pkgD) <$> rankIO versions recentDownloads maintainers docs env tarCache pkgs - where pkgD = packageDescription $ pkgDesc $ last pkgs + where pkgD = packageDescription . pkgDesc <$> safeLast pkgs From e033aa5db01f45b78fd56cfe1e4ef18d8d70016f Mon Sep 17 00:00:00 2001 From: kubaneko Date: Wed, 17 Aug 2022 21:09:17 +0200 Subject: [PATCH 051/129] fixed some bugs --- src/Distribution/Server/Features/PackageRank.hs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 9b4c83569..855e38876 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -112,17 +112,17 @@ rankIO -> ServerEnv -> TarIndexCacheFeature -> [PkgInfo] + -> PkgInfo -> IO Scorer -rankIO _ _ _ _ _ _ [] = return (Scorer (118 + 16 + 4 + 1) 0) -rankIO vers recentDownloads maintainers docs env tarCache pkgs = do +rankIO _ _ _ _ _ _ _ Nothing = return (Scorer (118 + 16 + 4 + 1) 0) +rankIO vers recentDownloads maintainers docs env tarCache pkgs pkg = do temp <- temporalScore pkg lastUploads versionList recentDownloads versS <- versionScore versionList vers lastUploads pkg codeS <- codeScore documentLines srcLines return (temp <> versS <> codeS <> authorScore maintainers pkg) where - pkg = packageDescription <$> pkgDesc $ last pkgs pkgId = package pkg lastUploads = sortBy (flip compare) @@ -132,10 +132,10 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs = do versionList = sortBy (flip compare) $ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs) packageEntr = do - tarB <- mapM (packageTarball tarCache) (safeHead pkgs) + tarB <- packageTarball tarCache $ pkg return $ (\(path, _, index) -> (path, ) <$> T.lookup index path) - =<< (join $ rightToMaybe <$> tarB) + =<< rightToMaybe tarB rightToMaybe (Right a) = Just a rightToMaybe (Left _) = Nothing @@ -279,5 +279,5 @@ rankPackage rankPackage versions recentDownloads maintainers docs tarCache env pkgs = total . (<>) (rankPackagePage pkgD) - <$> rankIO versions recentDownloads maintainers docs env tarCache pkgs + <$> rankIO versions recentDownloads maintainers docs env tarCache pkgs (safeLast pkgs) where pkgD = packageDescription . pkgDesc <$> safeLast pkgs From 60f8b8320c9d7820e9622d6d292805c060a303a4 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Wed, 17 Aug 2022 21:22:45 +0200 Subject: [PATCH 052/129] fixed a bug --- .../Server/Features/PackageRank.hs | 28 +++++++++++-------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 855e38876..8c0e78025 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE TupleSections, BangPatterns #-} +{-# LANGUAGE TupleSections #-} -- TODO change the module name probably Distribution.Server.Features.PackageList.PackageRank @@ -43,7 +43,7 @@ import qualified System.IO as SIO data Scorer = Scorer { maximumS :: !Float - , score :: !Float + , score :: !Float } deriving Show @@ -112,17 +112,18 @@ rankIO -> ServerEnv -> TarIndexCacheFeature -> [PkgInfo] - -> PkgInfo + -> Maybe PkgInfo -> IO Scorer rankIO _ _ _ _ _ _ _ Nothing = return (Scorer (118 + 16 + 4 + 1) 0) -rankIO vers recentDownloads maintainers docs env tarCache pkgs pkg = do +rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do temp <- temporalScore pkg lastUploads versionList recentDownloads versS <- versionScore versionList vers lastUploads pkg codeS <- codeScore documentLines srcLines return (temp <> versS <> codeS <> authorScore maintainers pkg) where + pkg = packageDescription $ pkgDesc pkgI pkgId = package pkg lastUploads = sortBy (flip compare) @@ -132,7 +133,7 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs pkg = do versionList = sortBy (flip compare) $ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs) packageEntr = do - tarB <- packageTarball tarCache $ pkg + tarB <- packageTarball tarCache pkgI return $ (\(path, _, index) -> (path, ) <$> T.lookup index path) =<< rightToMaybe tarB @@ -245,8 +246,8 @@ temporalScore p lastUploads versionList recentDownloads = do ) 5 packageFreshness = case safeHead lastUploads of - Nothing -> return 0 - (Just l) -> freshness versionList l isApp + Nothing -> return 0 + (Just l) -> freshness versionList l isApp freshnessScore = fracScor 10 <$> packageFreshness -- Missing dependencyFreshnessScore for reasonable effectivity needs caching tractionScore = do @@ -254,7 +255,7 @@ temporalScore p lastUploads versionList recentDownloads = do return $ boolScor 1 (fresh * int2Float recentDownloads > 1000) rankPackagePage :: Maybe PackageDescription -> Scorer -rankPackagePage Nothing = Scorer 233 0 +rankPackagePage Nothing = Scorer 233 0 rankPackagePage (Just p) = tests <> benchs <> desc <> homeP <> sourceRp <> cats where tests = boolScor 50 (hasTests p) @@ -277,7 +278,12 @@ rankPackage -> [PkgInfo] -> IO Float rankPackage versions recentDownloads maintainers docs tarCache env pkgs = - total - . (<>) (rankPackagePage pkgD) - <$> rankIO versions recentDownloads maintainers docs env tarCache pkgs (safeLast pkgs) + total . (<>) (rankPackagePage pkgD) <$> rankIO versions + recentDownloads + maintainers + docs + env + tarCache + pkgs + (safeLast pkgs) where pkgD = packageDescription . pkgDesc <$> safeLast pkgs From ff667de6dbc12928ae6b5bc3952fc58627c90771 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Thu, 18 Aug 2022 23:08:04 +0200 Subject: [PATCH 053/129] retrieves src correctly --- .../Server/Features/PackageRank.hs | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 8c0e78025..085f7da2f 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE TupleSections #-} +{-# LANGUAGE BangPatterns #-} -- TODO change the module name probably Distribution.Server.Features.PackageList.PackageRank @@ -120,7 +120,7 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do temp <- temporalScore pkg lastUploads versionList recentDownloads versS <- versionScore versionList vers lastUploads pkg codeS <- codeScore documentLines srcLines - return (temp <> versS <> codeS <> authorScore maintainers pkg) + return $ temp <> versS <> codeS <> authorScore maintainers pkg where pkg = packageDescription $ pkgDesc pkgI @@ -132,13 +132,17 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do versionList :: [Version] versionList = sortBy (flip compare) $ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs) - packageEntr = do - tarB <- packageTarball tarCache pkgI - return - $ (\(path, _, index) -> (path, ) <$> T.lookup index path) - =<< rightToMaybe tarB - rightToMaybe (Right a) = Just a - rightToMaybe (Left _) = Nothing + srcLines = do + Right (path, _, _) <- packageTarball tarCache pkgI + filterLines (isExtensionOf ".hs") . Tar.read <$> BSL.readFile path + + filterLines f = Tar.foldEntries (countLines f) 0 (const 0) + countLines :: (FilePath -> Bool) -> Tar.Entry -> Float -> Float + countLines f entry l = if not . f . Tar.entryPath $ entry then l else lns + where + !lns = case Tar.entryContent entry of + (Tar.NormalFile str _) -> l + (int2Float . length $ BSL.split 10 str) + _ -> l documentBlob :: IO (Maybe BlobStorage.BlobId) documentBlob = queryDocumentation docs pkgId @@ -149,8 +153,6 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do return $ liftM2 (,) path (join $ liftM2 T.lookup index path) documentLines :: IO Float documentLines = documentationEntr >>= filterLinesTar (const True) - srcLines :: IO Float - srcLines = packageEntr >>= filterLinesTar (isExtensionOf ".hs") filterLinesTar :: (FilePath -> Bool) -> Maybe (FilePath, T.TarIndexEntry) -> IO Float From 137d09d28618545a23a77cc76f9bda456161696a Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 19 Aug 2022 22:18:17 +0200 Subject: [PATCH 054/129] fixed documentation retrieval --- .../Server/Features/PackageRank.hs | 59 +++++++------------ 1 file changed, 22 insertions(+), 37 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 085f7da2f..9a93e3dd3 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -25,21 +25,17 @@ import Distribution.Types.Version import qualified Distribution.Utils.ShortText as S import qualified Codec.Archive.Tar as Tar -import qualified Codec.Archive.Tar.Entry as Tar -import Control.Monad ( join - , liftM2 - ) import qualified Data.ByteString.Lazy as BSL import Data.List ( maximumBy , sortBy ) import Data.Maybe ( isNothing ) import Data.Ord ( comparing ) -import qualified Data.TarIndex as T import qualified Data.Time.Clock as CL import GHC.Float ( int2Float ) import System.FilePath ( isExtensionOf ) -import qualified System.IO as SIO + +-- import Debug.Trace (trace) data Scorer = Scorer { maximumS :: !Float @@ -119,7 +115,7 @@ rankIO _ _ _ _ _ _ _ Nothing = return (Scorer (118 + 16 + 4 + 1) 0) rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do temp <- temporalScore pkg lastUploads versionList recentDownloads versS <- versionScore versionList vers lastUploads pkg - codeS <- codeScore documentLines srcLines + codeS <- codeScore documSize srcLines return $ temp <> versS <> codeS <> authorScore maintainers pkg where @@ -134,45 +130,34 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do $ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs) srcLines = do Right (path, _, _) <- packageTarball tarCache pkgI - filterLines (isExtensionOf ".hs") . Tar.read <$> BSL.readFile path + filterLines (isExtensionOf ".hs") countLines + . Tar.read + <$> BSL.readFile path + documSize = do + path <- documentPath + case path of + Nothing -> return 0 + Just pth -> + filterLines (isExtensionOf ".html") countSize + . Tar.read + <$> BSL.readFile pth - filterLines f = Tar.foldEntries (countLines f) 0 (const 0) + filterLines f g = Tar.foldEntries (g f) 0 (const 0) countLines :: (FilePath -> Bool) -> Tar.Entry -> Float -> Float countLines f entry l = if not . f . Tar.entryPath $ entry then l else lns where !lns = case Tar.entryContent entry of (Tar.NormalFile str _) -> l + (int2Float . length $ BSL.split 10 str) _ -> l + countSize :: (FilePath -> Bool) -> Tar.Entry -> Float -> Float + countSize f entry l = if not . f . Tar.entryPath $ entry then l else s + where + !s = case Tar.entryContent entry of + (Tar.NormalFile _ siz) -> l + fromInteger (toInteger siz) + _ -> l documentBlob :: IO (Maybe BlobStorage.BlobId) - documentBlob = queryDocumentation docs pkgId - documentIndex = documentBlob >>= mapM (cachedTarIndex tarCache) - documentationEntr = do - index <- documentIndex - path <- documentPath - return $ liftM2 (,) path (join $ liftM2 T.lookup index path) - documentLines :: IO Float - documentLines = documentationEntr >>= filterLinesTar (const True) - - filterLinesTar - :: (FilePath -> Bool) -> Maybe (FilePath, T.TarIndexEntry) -> IO Float - filterLinesTar f (Just (path, T.TarFileEntry offset)) = - if f path then getLines path offset else return 0 - filterLinesTar f (Just (_, T.TarDir dir)) = - sum <$> mapM (filterLinesTar f . Just) dir - filterLinesTar _ _ = return 0 - - -- TODO if size is too big give it a good score and do not read the file - getLines path offset = do - handle <- SIO.openFile path SIO.ReadMode - SIO.hSeek handle SIO.AbsoluteSeek (fromIntegral $ offset * 512) - header <- BSL.hGet handle 512 - case Tar.read header of - (Tar.Next Tar.Entry { Tar.entryContent = Tar.NormalFile _ siz } _) -> do - body <- BSL.hGet handle (fromIntegral siz) - return $ int2Float . length . BSL.split 10 $ body - _ -> return 0 - + documentBlob = queryDocumentation docs pkgId documentPath = do blob <- documentBlob return $ BlobStorage.filepath (serverBlobStore env) <$> blob From 33d7807f14c6a4d529d53a54f28dd6f7db63ad98 Mon Sep 17 00:00:00 2001 From: Andreas Abel Date: Tue, 23 Aug 2022 15:08:39 +0200 Subject: [PATCH 055/129] Fix #1128, fix #1130 by adding bounds to Cabal-syntax and haddock-library --- hackage-server.cabal | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/hackage-server.cabal b/hackage-server.cabal index ba6b86341..934ac195c 100644 --- a/hackage-server.cabal +++ b/hackage-server.cabal @@ -118,6 +118,9 @@ common defaults build-depends: , aeson ^>= 2.0.3.0 , Cabal ^>= 3.6.3.0 + , Cabal-syntax ^>= 3.6.0.0 + -- Cabal-syntax needs to be bound to constrain hackage-security + -- see https://github.com/haskell/hackage-server/issues/1130 , fail ^>= 4.9.0 -- we use Control.Monad.Except, introduced in mtl-2.2.1 , network >= 3 && < 3.2 @@ -390,9 +393,14 @@ library lib-server , cryptohash-sha256 ^>= 0.11.100 , csv ^>= 0.1 , ed25519 ^>= 0.0.5 - , hackage-security ^>= 0.6 + , hackage-security >= 0.6 && < 0.7 + -- N.B: hackage-security-0.6.2 uses Cabal-syntax-3.8.1.0 + -- see https://github.com/haskell/hackage-server/issues/1130 + -- Thus, we need to include Cabal-syntax as dependency explicitly , hackage-security-HTTP ^>= 0.1.1 - , haddock-library > 1.7 && < 2 + , haddock-library >= 1.7.0 && < 1.11 + -- haddock-library-1.11.0 changed type of markupOrderedList + -- see https://github.com/haskell/hackage-server/issues/1128 , happstack-server ^>= 7.7.1 , hashable ^>= 1.3 || ^>= 1.4 , hslogger ^>= 1.3.1 From 2377900828ba1d3086cbb01f15f45573ae05edfa Mon Sep 17 00:00:00 2001 From: Andreas Abel Date: Tue, 23 Aug 2022 13:35:33 +0200 Subject: [PATCH 056/129] Bump CI to 9.2.4 and some deps --- .github/workflows/haskell-ci.yml | 12 ++++++------ cabal.haskell-ci | 7 ++++++- hackage-server.cabal | 18 +++++++++--------- 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/.github/workflows/haskell-ci.yml b/.github/workflows/haskell-ci.yml index 84b405fcf..4f2631d62 100644 --- a/.github/workflows/haskell-ci.yml +++ b/.github/workflows/haskell-ci.yml @@ -8,9 +8,9 @@ # # For more information, see https://github.com/haskell-CI/haskell-ci # -# version: 0.15.20220710 +# version: 0.15.20220822 # -# REGENDATA ("0.15.20220710",["github","hackage-server.cabal"]) +# REGENDATA ("0.15.20220822",["github","hackage-server.cabal"]) # name: Haskell-CI on: @@ -34,9 +34,9 @@ jobs: strategy: matrix: include: - - compiler: ghc-9.2.3 + - compiler: ghc-9.2.4 compilerKind: ghc - compilerVersion: 9.2.3 + compilerVersion: 9.2.4 setup-method: ghcup allow-failure: false - compiler: ghc-9.0.2 @@ -62,7 +62,7 @@ jobs: apt-get install -y --no-install-recommends gnupg ca-certificates dirmngr curl git software-properties-common libtinfo5 if [ "${{ matrix.setup-method }}" = ghcup ]; then mkdir -p "$HOME/.ghcup/bin" - curl -sL https://downloads.haskell.org/ghcup/0.1.17.8/x86_64-linux-ghcup-0.1.17.8 > "$HOME/.ghcup/bin/ghcup" + curl -sL https://downloads.haskell.org/ghcup/0.1.18.0/x86_64-linux-ghcup-0.1.18.0 > "$HOME/.ghcup/bin/ghcup" chmod a+x "$HOME/.ghcup/bin/ghcup" "$HOME/.ghcup/bin/ghcup" install ghc "$HCVER" || (cat "$HOME"/.ghcup/logs/*.* && false) "$HOME/.ghcup/bin/ghcup" install cabal 3.6.2.0 || (cat "$HOME"/.ghcup/logs/*.* && false) @@ -73,7 +73,7 @@ jobs: apt-get update apt-get install -y "$HCNAME" libbrotli-dev mkdir -p "$HOME/.ghcup/bin" - curl -sL https://downloads.haskell.org/ghcup/0.1.17.8/x86_64-linux-ghcup-0.1.17.8 > "$HOME/.ghcup/bin/ghcup" + curl -sL https://downloads.haskell.org/ghcup/0.1.18.0/x86_64-linux-ghcup-0.1.18.0 > "$HOME/.ghcup/bin/ghcup" chmod a+x "$HOME/.ghcup/bin/ghcup" "$HOME/.ghcup/bin/ghcup" install cabal 3.6.2.0 || (cat "$HOME"/.ghcup/logs/*.* && false) fi diff --git a/cabal.haskell-ci b/cabal.haskell-ci index 8b3444e8f..e8ddbd02d 100644 --- a/cabal.haskell-ci +++ b/cabal.haskell-ci @@ -13,4 +13,9 @@ installed: +all -Cabal -text -parsec -- Use Ubuntu 20.04 distribution: focal -apt: libbrotli-dev \ No newline at end of file +apt: libbrotli-dev + +-- Make sure the haddock step is included, +-- even though we don't define any library. +haddock-components: all + -- since haskell-ci 0.15.20220822 diff --git a/hackage-server.cabal b/hackage-server.cabal index 934ac195c..329faf15e 100644 --- a/hackage-server.cabal +++ b/hackage-server.cabal @@ -27,7 +27,7 @@ copyright: 2008-2015 Duncan Coutts, license: BSD-3-Clause license-file: LICENSE -tested-with: GHC == { 9.2.3, 9.0.2, 8.10.7, 8.8.4 } +tested-with: GHC == { 9.2.4, 9.0.2, 8.10.7, 8.8.4 } data-dir: datafiles data-files: @@ -111,12 +111,12 @@ common defaults , process >= 1.6 && < 1.7 , text ^>= 1.2.5.0 || ^>= 2.0 , time >= 1.9 && < 1.13 - , transformers >= 0.5 && < 0.6 - , unix >= 2.7 && < 2.8 + , transformers >= 0.5 && < 0.7 + , unix >= 2.7 && < 2.9 , scientific -- other dependencies shared by most components build-depends: - , aeson ^>= 2.0.3.0 + , aeson ^>= 2.0.3.0 || ^>= 2.1.0.0 , Cabal ^>= 3.6.3.0 , Cabal-syntax ^>= 3.6.0.0 -- Cabal-syntax needs to be bound to constrain hackage-security @@ -129,7 +129,7 @@ common defaults , parsec ^>= 3.1.13 , tar ^>= 0.5 , unordered-containers ^>= 0.2.10 - , vector ^>= 0.12 + , vector ^>= 0.12 || ^>= 0.13.0.0 , zlib ^>= 0.6.2 ghc-options: -Wall -fwarn-tabs -fno-warn-unused-do-bind -fno-warn-deprecated-flags -funbox-strict-fields @@ -377,7 +377,7 @@ library lib-server , async ^>= 2.2.1 -- requires bumping http-io-streams , attoparsec ^>= 0.14.4 - , attoparsec-iso8601 ^>= 1.0 + , attoparsec-iso8601 ^>= 1.0 || ^>= 1.1.0.0 , base16-bytestring ^>= 1.0 -- requires bumping http-io-streams , base64-bytestring ^>= 1.2.1.0 @@ -401,15 +401,15 @@ library lib-server , haddock-library >= 1.7.0 && < 1.11 -- haddock-library-1.11.0 changed type of markupOrderedList -- see https://github.com/haskell/hackage-server/issues/1128 - , happstack-server ^>= 7.7.1 - , hashable ^>= 1.3 || ^>= 1.4 + , happstack-server ^>= 7.7.1 || ^>= 7.8.0 + , hashable ^>= 1.3 || ^>= 1.4 , hslogger ^>= 1.3.1 , lifted-base ^>= 0.2.1 , mime-mail ^>= 0.5 , random ^>= 1.2 , rss ^>= 3000.2.0.7 , safecopy ^>= 0.10 - , semigroups ^>= 0.19 + , semigroups ^>= 0.20 , split ^>= 0.2 , stm ^>= 2.5.0 , tagged ^>= 0.8.5 From a36abf9a98519fbd8a5f8f0b50721a70282cdd82 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 23 Aug 2022 23:20:34 +0200 Subject: [PATCH 057/129] changed the algorithm to match cargo --- .../Server/Features/PackageList.hs | 5 +- .../Server/Features/PackageRank.hs | 122 +++++++++++------- 2 files changed, 75 insertions(+), 52 deletions(-) diff --git a/src/Distribution/Server/Features/PackageList.hs b/src/Distribution/Server/Features/PackageList.hs index c75ada37f..2af483bdc 100644 --- a/src/Distribution/Server/Features/PackageList.hs +++ b/src/Distribution/Server/Features/PackageList.hs @@ -34,6 +34,7 @@ import Distribution.Package import Distribution.PackageDescription import Distribution.PackageDescription.Configuration import Distribution.Utils.ShortText (fromShortText) +import Distribution.Simple.Utils (safeLast) import Control.Concurrent import Data.Maybe (mapMaybe) @@ -269,8 +270,8 @@ listFeature CoreFeature{..} votes <- pkgNumScore pkgname deprs <- queryGetDeprecatedFor pkgname maintainers <- queryUserGroup (maintainersGroup pkgname) - packageR <- rankPackage versions (cmFind pkgname downs) - (UserIdSet.size maintainers) documentation tar env pkgs + packageR <- rankPackage versions (cmFind pkgname downs) + (UserIdSet.size maintainers) documentation tar env pkgs (safeLast pkgs) return $ (,) pkgname $ (updateDescriptionItem (pkgDesc pkg) $ emptyPackageItem pkgname) { itemTags = tags diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 9a93e3dd3..ac5c49ba5 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -32,6 +32,7 @@ import Data.List ( maximumBy import Data.Maybe ( isNothing ) import Data.Ord ( comparing ) import qualified Data.Time.Clock as CL +import Distribution.Server.Packages.Readme import GHC.Float ( int2Float ) import System.FilePath ( isExtensionOf ) @@ -60,6 +61,9 @@ boolScor k False = Scorer k 0 total :: Scorer -> Float total (Scorer a b) = b / a +scale :: Float -> Scorer -> Scorer +scale mx sc = fracScor mx (total sc) + major :: Num a => [a] -> a major (x : _) = x major _ = 0 @@ -96,38 +100,46 @@ freshness (x : xs) lastUpd app = age = flip numDays (Just lastUpd) . Just <$> CL.getCurrentTime decayDays = expectedUpdateInterval / 2 + (if app then 300 else 200) --- lookupPackageId --- queryHasDocumentation +cabalScore :: PackageDescription -> IO Bool -> IO Scorer +cabalScore p docum = + (<>) (tests <> benchs <> desc <> homeP <> sourceRp <> cats) + <$> (boolScor 30 <$> docum) + where + tests = boolScor 50 (hasTests p) + benchs = boolScor 10 (hasBenchmarks p) + desc = scorer 30 (min 1 (int2Float (S.length $ description p) / 300)) + -- documentation = boolScor 30 () + homeP = boolScor 30 (not $ S.null $ homepage p) + sourceRp = boolScor 8 (not $ null $ sourceRepos p) + cats = boolScor 5 (not $ S.null $ category p) + +readmeScore _ = Scorer 0 0 --- TODO CoreFeature can be substituted by CoreResource -rankIO +-- queryHasDocumentation +baseScore :: VersionsFeature -> Int - -> Int -> DocumentationFeature -> ServerEnv -> TarIndexCacheFeature - -> [PkgInfo] - -> Maybe PkgInfo + -> [Version] + -> [CL.UTCTime] + -> PkgInfo -> IO Scorer -rankIO _ _ _ _ _ _ _ Nothing = return (Scorer (118 + 16 + 4 + 1) 0) -rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do - temp <- temporalScore pkg lastUploads versionList recentDownloads - versS <- versionScore versionList vers lastUploads pkg - codeS <- codeScore documSize srcLines - return $ temp <> versS <> codeS <> authorScore maintainers pkg - +baseScore vers maintainers docs env tarCache versionList lastUploads pkgI = do + versS <- versionScore versionList vers lastUploads pkg + codeS <- codeScore documSize srcLines + cabalS <- cabalScore pkg documHas + return + $ scale 5 versS + <> scale 2 codeS + <> scale 3 (authorScore maintainers pkg) + <> scale 2 cabalS + <> scale 5 (readmeScore readme) where - pkg = packageDescription $ pkgDesc pkgI - pkgId = package pkg - lastUploads = - sortBy (flip compare) - $ (fst . pkgOriginalUploadInfo <$> pkgs) - ++ (fst . pkgLatestUploadInfo <$> pkgs) - versionList :: [Version] - versionList = sortBy (flip compare) - $ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs) + pkg = packageDescription $ pkgDesc pkgI + pkgId = package pkg srcLines = do Right (path, _, _) <- packageTarball tarCache pkgI filterLines (isExtensionOf ".hs") countLines @@ -141,6 +153,8 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do filterLines (isExtensionOf ".html") countSize . Tar.read <$> BSL.readFile pth + readme = findToplevelFile tarCache pkgI isReadmeFile + >>= either (\_ -> return Nothing) (return . Just) filterLines f g = Tar.foldEntries (g f) 0 (const 0) countLines :: (FilePath -> Bool) -> Tar.Entry -> Float -> Float @@ -161,6 +175,7 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do documentPath = do blob <- documentBlob return $ BlobStorage.filepath (serverBlobStore env) <$> blob + documHas = queryHasDocumentation docs pkgId authorScore :: Int -> PackageDescription -> Scorer authorScore maintainers desc = @@ -169,14 +184,14 @@ authorScore maintainers desc = maintScore = boolScor 3 (maintainers > 1) <> scorer 5 (int2Float maintainers) codeScore :: IO Float -> IO Float -> IO Scorer -codeScore documentL haskellL = do - docum <- documentL +codeScore documentS haskellL = do + docum <- documentS haskell <- haskellL return $ boolScor 1 (haskell > 700) <> boolScor 1 (haskell < 80000) <> fracScor 2 (min 1 (haskell / 5000)) - <> fracScor 2 (min 1 (10 * docum) / (3000 + haskell)) + <> fracScor 2 (min 1 docum / ((3000 + haskell) * 200)) versionScore :: [Version] @@ -241,20 +256,6 @@ temporalScore p lastUploads versionList recentDownloads = do fresh <- packageFreshness return $ boolScor 1 (fresh * int2Float recentDownloads > 1000) -rankPackagePage :: Maybe PackageDescription -> Scorer -rankPackagePage Nothing = Scorer 233 0 -rankPackagePage (Just p) = tests <> benchs <> desc <> homeP <> sourceRp <> cats - where - tests = boolScor 50 (hasTests p) - benchs = boolScor 10 (hasBenchmarks p) - desc = scorer 30 (min 1 (int2Float (S.length $ description p) / 300)) - -- documentation = boolScor 30 () - homeP = boolScor 30 (not $ S.null $ homepage p) - sourceRp = boolScor 8 (not $ null $ sourceRepos p) - cats = boolScor 5 (not $ S.null $ category p) - --- TODO fix the function Signature replace PackageDescription to PackageName/Identifier - rankPackage :: VersionsFeature -> Int @@ -263,14 +264,35 @@ rankPackage -> TarIndexCacheFeature -> ServerEnv -> [PkgInfo] + -> Maybe PkgInfo -> IO Float -rankPackage versions recentDownloads maintainers docs tarCache env pkgs = - total . (<>) (rankPackagePage pkgD) <$> rankIO versions - recentDownloads - maintainers - docs - env - tarCache - pkgs - (safeLast pkgs) - where pkgD = packageDescription . pkgDesc <$> safeLast pkgs +rankPackage _ _ _ _ _ _ _ Nothing = return 0 +rankPackage versions recentDownloads maintainers docs tarCache env pkgs (Just pkgUsed) + = do + t <- temporalScore pkgD uploads versionList recentDownloads + + b <- baseScore versions + maintainers + docs + env + tarCache + versionList + uploads + pkgUsed + depr <- deprP + return $ sAverage t b * case depr of + Nothing -> 1 + _ -> 0.2 + where + pkgname = pkgName . package $ pkgD + pkgD = packageDescription . pkgDesc $ pkgUsed + deprP = queryGetDeprecatedFor versions pkgname + sAverage x y = (total x + total y) * 0.5 + + versionList :: [Version] + versionList = sortBy (flip compare) + $ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs) + uploads = + sortBy (flip compare) + $ (fst . pkgOriginalUploadInfo <$> pkgs) + ++ (fst . pkgLatestUploadInfo <$> pkgs) From c8a07a6e93627a10fc71e810fd54424e55bf4575 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Thu, 25 Aug 2022 23:11:02 +0200 Subject: [PATCH 058/129] prototype for readme parser (collects some info about markdown) --- hackage-server.cabal | 1 + src/Distribution/Server/Features/PackageRank.hs | 2 ++ 2 files changed, 3 insertions(+) diff --git a/hackage-server.cabal b/hackage-server.cabal index 78e9d3e9f..3ee07dfc8 100644 --- a/hackage-server.cabal +++ b/hackage-server.cabal @@ -357,6 +357,7 @@ library lib-server Distribution.Server.Features.ServerIntrospect Distribution.Server.Features.Sitemap Distribution.Server.Features.PackageRank + Distribution.Server.Features.PackageRank.Parser if flag(debug) cpp-options: -DDEBUG diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index ac5c49ba5..0cef111fc 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -6,6 +6,8 @@ module Distribution.Server.Features.PackageRank ( rankPackage ) where +import Distribution.Server.Features.PackageRank.Parser + import Distribution.Package import Distribution.PackageDescription import Distribution.Server.Features.Documentation From 919b31117c0ff421da9968343d8708da842a08e5 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Thu, 25 Aug 2022 23:20:46 +0200 Subject: [PATCH 059/129] forgot to add the parser --- .../Server/Features/PackageRank/Parser.hs | 104 ++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 src/Distribution/Server/Features/PackageRank/Parser.hs diff --git a/src/Distribution/Server/Features/PackageRank/Parser.hs b/src/Distribution/Server/Features/PackageRank/Parser.hs new file mode 100644 index 000000000..5b02ed598 --- /dev/null +++ b/src/Distribution/Server/Features/PackageRank/Parser.hs @@ -0,0 +1,104 @@ +{-# LANGUAGE ScopedTypeVariables, FlexibleInstances, MultiParamTypeClasses, ConstraintKinds #-} +module Distribution.Server.Features.PackageRank.Parser + ( parseM + ) where + + +import Commonmark +import Commonmark.Extensions +import Control.Monad +import Control.Monad.Identity +import qualified Data.ByteString.Lazy as BS + ( ByteString + , toStrict + ) +import qualified Data.Text as T +import qualified Data.Text.Encoding as T +import qualified Data.Text.Encoding.Error as T + ( lenientDecode ) +import qualified Data.Text.IO as TIO +import qualified Data.Text.Lazy.IO as TLIO +import Data.Typeable ( Typeable ) +import System.FilePath + +type MarkdownRenderable a b + = (Typeable a, HasPipeTable a b, IsBlock a b, IsInline a) + +parseM :: BS.ByteString -> FilePath -> Either ParseError [MarkdownStats] +parseM md name = runIdentity + (commonmarkWith (pipeTableSpec <> defaultSyntaxSpec) name txt) + where txt = T.decodeUtf8With T.lenientDecode . BS.toStrict $ md + +data MStats = MStats Int Int --number of pictures, number of chars + deriving Show + +instance Monoid MStats where + mempty = MStats 0 0 + +instance Rangeable MStats where + ranged = const id + +instance HasAttributes MStats where + addAttributes = const id + +instance Semigroup MStats where + (MStats a b) <> (MStats c d) = MStats (a + c) (b + d) + +data MarkdownStats = NotImportant | + HCode MStats | + Code MStats | + Section | -- Int? + Table Int | + PText MStats | + List Int + deriving (Show) + +sumMStat [] = mempty +sumMStat (x : xs) = case x of + NotImportant -> sumMStat xs + Section -> sumMStat xs + (List a) -> sumMStat xs + (Table a) -> sumMStat xs + (HCode a) -> a <> sumMStat xs + (Code a) -> a <> sumMStat xs + (PText a) -> a <> sumMStat xs + +instance Rangeable [MarkdownStats] where + ranged = const id + +instance HasAttributes [MarkdownStats] where + addAttributes = const id + +instance HasPipeTable MStats [MarkdownStats] where + pipeTable _ _ rows = [Table $ length rows] + +instance IsInline MStats where + lineBreak = MStats 0 1 + softBreak = MStats 0 1 + str t = MStats 0 (T.length t) + entity t = MStats 0 (T.length t) + escapedChar _ = MStats 0 1 + emph = id + strong = id + link _ _ a = a + image _ _ (MStats a b) = MStats (a + 1) b + code t = MStats 0 (T.length t) + rawInline _ t = MStats 0 (T.length t) + +instance IsBlock MStats [MarkdownStats] where + paragraph a = [PText a] + plain a = [PText a] + thematicBreak = [NotImportant] + blockQuote = id + codeBlock language codeT | language == T.pack "haskell" = [HCode (code codeT)] + | otherwise = [Code (code codeT)] + heading _ _ = [Section] + rawBlock _ r = [NotImportant] + referenceLinkDefinition _ _ = [NotImportant] + list _ _ l = [List (length l + depSum l)] + +depSum [] = 0 +depSum ([] : xs) = depSum xs +depSum ((List a : ys) : xs) = a + depSum (ys : xs) +depSum ((_ : ys) : xs) = depSum (ys : xs) + From db0f10ad45b674f16c92b0e611ff1ad5b717ba90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A9cate=20Moonlight?= Date: Fri, 26 Aug 2022 00:37:31 +0200 Subject: [PATCH 060/129] Force .txt and .text to have UTF-8 MIME charset (#1133) --- src/Distribution/Server/Framework/HappstackUtils.hs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Distribution/Server/Framework/HappstackUtils.hs b/src/Distribution/Server/Framework/HappstackUtils.hs index 41bce1b74..f10491f08 100644 --- a/src/Distribution/Server/Framework/HappstackUtils.hs +++ b/src/Distribution/Server/Framework/HappstackUtils.hs @@ -81,6 +81,8 @@ mime x = , ("chs", "text/plain; charset=utf-8") , ("c", " text/plain; charset=utf-8") , ("h", " text/plain; charset=utf-8") + , ("text", "text/plain; charset=utf-8") + , ("txt", "text/plain; charset=utf-8") ] From f6c1e48f8da9293055ff9e86aabd8a33f6594893 Mon Sep 17 00:00:00 2001 From: Alias Qli <2576814881@qq.com> Date: Fri, 26 Aug 2022 23:38:32 +0800 Subject: [PATCH 061/129] Upgrade to haddock-library-1.11.0 (#1126) --- hackage-server.cabal | 6 +++--- .../Server/Features/Search/ExtractDescriptionTerms.hs | 2 +- src/Distribution/Server/Pages/Package/HaddockHtml.hs | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/hackage-server.cabal b/hackage-server.cabal index 329faf15e..74e511955 100644 --- a/hackage-server.cabal +++ b/hackage-server.cabal @@ -111,8 +111,8 @@ common defaults , process >= 1.6 && < 1.7 , text ^>= 1.2.5.0 || ^>= 2.0 , time >= 1.9 && < 1.13 - , transformers >= 0.5 && < 0.7 - , unix >= 2.7 && < 2.9 + , transformers >= 0.5 && < 0.6 + , unix >= 2.7 && < 2.8 , scientific -- other dependencies shared by most components build-depends: @@ -398,7 +398,7 @@ library lib-server -- see https://github.com/haskell/hackage-server/issues/1130 -- Thus, we need to include Cabal-syntax as dependency explicitly , hackage-security-HTTP ^>= 0.1.1 - , haddock-library >= 1.7.0 && < 1.11 + , haddock-library ^>= 1.11.0 -- haddock-library-1.11.0 changed type of markupOrderedList -- see https://github.com/haskell/hackage-server/issues/1128 , happstack-server ^>= 7.7.1 || ^>= 7.8.0 diff --git a/src/Distribution/Server/Features/Search/ExtractDescriptionTerms.hs b/src/Distribution/Server/Features/Search/ExtractDescriptionTerms.hs index 24a8334df..d07ed63e3 100644 --- a/src/Distribution/Server/Features/Search/ExtractDescriptionTerms.hs +++ b/src/Distribution/Server/Features/Search/ExtractDescriptionTerms.hs @@ -78,7 +78,7 @@ termsMarkup = Markup { markupBold = id, markupMonospaced = \s -> if length s > 1 then [] else s, markupUnorderedList = concat, - markupOrderedList = concat, + markupOrderedList = concat . map snd, markupDefList = concatMap (\(d,t) -> d ++ t), markupCodeBlock = const [], markupTable = concat . F.toList, diff --git a/src/Distribution/Server/Pages/Package/HaddockHtml.hs b/src/Distribution/Server/Pages/Package/HaddockHtml.hs index dba250b9f..8b5b3f0d5 100644 --- a/src/Distribution/Server/Pages/Package/HaddockHtml.hs +++ b/src/Distribution/Server/Pages/Package/HaddockHtml.hs @@ -24,7 +24,7 @@ htmlMarkup modResolv = Markup { markupBold = strong, markupMonospaced = thecode, markupUnorderedList = unordList, - markupOrderedList = ordList, + markupOrderedList = ordList . map snd, markupDefList = defList, markupCodeBlock = pre, markupHyperlink = \(Hyperlink url mLabel) -> anchor ! [href url] << maybe url showHtmlFragment mLabel, From 0a8a85d168a0fe1cbea544fabddcd2a85259b7e9 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 26 Aug 2022 23:38:50 +0200 Subject: [PATCH 062/129] finished readmeScore --- .../Server/Features/PackageRank.hs | 85 +++++++++++++------ .../Server/Features/PackageRank/Parser.hs | 82 +++++++++++------- 2 files changed, 112 insertions(+), 55 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 0cef111fc..dca027e0b 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -6,8 +6,9 @@ module Distribution.Server.Features.PackageRank ( rankPackage ) where -import Distribution.Server.Features.PackageRank.Parser +import Distribution.Server.Features.PackageRank.Parser +import Data.TarIndex ( TarEntryOffset ) import Distribution.Package import Distribution.PackageDescription import Distribution.Server.Features.Documentation @@ -17,9 +18,14 @@ import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.TarIndexCache import qualified Distribution.Server.Framework.BlobStorage as BlobStorage +import Distribution.Server.Framework.CacheControl import Distribution.Server.Framework.ServerEnv ( ServerEnv(..) ) import Distribution.Server.Packages.Types +import Distribution.Server.Util.Markdown + ( supposedToBeMarkdown ) +import Distribution.Server.Util.ServeTarball + ( loadTarEntry ) import Distribution.Simple.Utils ( safeHead , safeLast ) @@ -38,8 +44,6 @@ import Distribution.Server.Packages.Readme import GHC.Float ( int2Float ) import System.FilePath ( isExtensionOf ) --- import Debug.Trace (trace) - data Scorer = Scorer { maximumS :: !Float , score :: !Float @@ -54,7 +58,7 @@ scorer maxim scr = if maxim >= scr then Scorer maxim scr else Scorer maxim maxim fracScor :: Float -> Float -> Scorer -fracScor maxim frac = scorer maxim (maxim * frac) +fracScor maxim frac = scorer maxim (min (maxim * frac) maxim) boolScor :: Float -> Bool -> Scorer boolScor k True = Scorer k k @@ -102,10 +106,9 @@ freshness (x : xs) lastUpd app = age = flip numDays (Just lastUpd) . Just <$> CL.getCurrentTime decayDays = expectedUpdateInterval / 2 + (if app then 300 else 200) -cabalScore :: PackageDescription -> IO Bool -> IO Scorer +cabalScore :: PackageDescription -> Bool -> Scorer cabalScore p docum = - (<>) (tests <> benchs <> desc <> homeP <> sourceRp <> cats) - <$> (boolScor 30 <$> docum) + tests <> benchs <> desc <> homeP <> sourceRp <> cats <> boolScor 30 docum where tests = boolScor 50 (hasTests p) benchs = boolScor 10 (hasBenchmarks p) @@ -115,9 +118,38 @@ cabalScore p docum = sourceRp = boolScor 8 (not $ null $ sourceRepos p) cats = boolScor 5 (not $ S.null $ category p) -readmeScore _ = Scorer 0 0 +readmeScore + :: Maybe (FilePath, ETag, Data.TarIndex.TarEntryOffset, FilePath) + -> Bool + -> IO Scorer +readmeScore Nothing _ = return $ Scorer 1 0 -- readmeScore is scaled so it does not need correct max +readmeScore (Just (tarfile, _, offset, name)) app = do + entr <- loadTarEntry tarfile offset + case entr of + (Right (size, str)) -> return $ calcScore str size name + _ -> return $ Scorer 1 0 + where + calcScore str size filename = + scorer 75 (min 1 (fromInteger (toInteger size) / 3000)) + <> if supposedToBeMarkdown filename + then case parseM str filename of + Left _ -> Scorer 0 0 + Right mdStats -> format mdStats + else Scorer 0 0 + format stats = + fracScor (if app then 25 else 100) (min 1 $ int2Float hlength / 2000) + <> scorer (if app then 15 else 27) (int2Float blocks * 3) + <> boolScor (if app then 10 else 30) (clength > 150) + <> scorer 35 (int2Float images * 10) + <> scorer 30 (int2Float sections * 4) + <> scorer 25 (int2Float rows * 2) + where + (blocks, clength) = getCode stats + (_ , hlength) = getHCode stats + MStats _ images = sumMStat stats + rows = getListsTables stats + sections = getSections stats --- queryHasDocumentation baseScore :: VersionsFeature -> Int @@ -130,18 +162,25 @@ baseScore -> IO Scorer baseScore vers maintainers docs env tarCache versionList lastUploads pkgI = do - versS <- versionScore versionList vers lastUploads pkg - codeS <- codeScore documSize srcLines - cabalS <- cabalScore pkg documHas + + readM <- readme + hasDocum <- documHas + documS <- documSize + srcL <- srcLines + + versS <- versionScore versionList vers lastUploads pkg + readmeS <- readmeScore readM isApp + return $ scale 5 versS - <> scale 2 codeS + <> scale 2 (codeScore documS srcL) <> scale 3 (authorScore maintainers pkg) - <> scale 2 cabalS - <> scale 5 (readmeScore readme) + <> scale 2 (cabalScore pkg hasDocum) + <> scale 5 readmeS where pkg = packageDescription $ pkgDesc pkgI pkgId = package pkg + isApp = (isNothing . library) pkg && (not . null . executables) pkg srcLines = do Right (path, _, _) <- packageTarball tarCache pkgI filterLines (isExtensionOf ".hs") countLines @@ -165,6 +204,7 @@ baseScore vers maintainers docs env tarCache versionList lastUploads pkgI = do !lns = case Tar.entryContent entry of (Tar.NormalFile str _) -> l + (int2Float . length $ BSL.split 10 str) _ -> l + -- TODO might need to decode/add the other separator countSize :: (FilePath -> Bool) -> Tar.Entry -> Float -> Float countSize f entry l = if not . f . Tar.entryPath $ entry then l else s where @@ -185,15 +225,12 @@ authorScore maintainers desc = where maintScore = boolScor 3 (maintainers > 1) <> scorer 5 (int2Float maintainers) -codeScore :: IO Float -> IO Float -> IO Scorer -codeScore documentS haskellL = do - docum <- documentS - haskell <- haskellL - return - $ boolScor 1 (haskell > 700) - <> boolScor 1 (haskell < 80000) - <> fracScor 2 (min 1 (haskell / 5000)) - <> fracScor 2 (min 1 docum / ((3000 + haskell) * 200)) +codeScore :: Float -> Float -> Scorer +codeScore documentS haskellL = + boolScor 1 (haskellL > 700) + <> boolScor 1 (haskellL < 80000) + <> fracScor 2 (min 1 (haskellL / 5000)) + <> fracScor 2 (min 1 documentS / ((3000 + haskellL) * 200)) versionScore :: [Version] diff --git a/src/Distribution/Server/Features/PackageRank/Parser.hs b/src/Distribution/Server/Features/PackageRank/Parser.hs index 5b02ed598..431228d84 100644 --- a/src/Distribution/Server/Features/PackageRank/Parser.hs +++ b/src/Distribution/Server/Features/PackageRank/Parser.hs @@ -1,12 +1,17 @@ {-# LANGUAGE ScopedTypeVariables, FlexibleInstances, MultiParamTypeClasses, ConstraintKinds #-} module Distribution.Server.Features.PackageRank.Parser ( parseM + , sumMStat + , getListsTables + , getCode + , getHCode + , getSections + , MStats(..) ) where import Commonmark import Commonmark.Extensions -import Control.Monad import Control.Monad.Identity import qualified Data.ByteString.Lazy as BS ( ByteString @@ -16,13 +21,6 @@ import qualified Data.Text as T import qualified Data.Text.Encoding as T import qualified Data.Text.Encoding.Error as T ( lenientDecode ) -import qualified Data.Text.IO as TIO -import qualified Data.Text.Lazy.IO as TLIO -import Data.Typeable ( Typeable ) -import System.FilePath - -type MarkdownRenderable a b - = (Typeable a, HasPipeTable a b, IsBlock a b, IsInline a) parseM :: BS.ByteString -> FilePath -> Either ParseError [MarkdownStats] parseM md name = runIdentity @@ -44,24 +42,51 @@ instance HasAttributes MStats where instance Semigroup MStats where (MStats a b) <> (MStats c d) = MStats (a + c) (b + d) -data MarkdownStats = NotImportant | +data MarkdownStats = NotImportant MStats | HCode MStats | Code MStats | - Section | -- Int? - Table Int | + Section MStats | + Table Int MStats | -- Int of rows PText MStats | - List Int + List Int MStats -- Int of elements deriving (Show) +getCode :: [MarkdownStats] -> (Int, Int) -- number of code blocks, size of code +getCode [] = (0, 0) +getCode (Code (MStats code _) : xs) = (1, code) >< getCode xs +getCode (HCode (MStats code _) : xs) = (1, code) >< getCode xs +getCode (_ : xs) = getCode xs + +getHCode :: [MarkdownStats] -> (Int, Int) -- number of code blocks, size of code +getHCode [] = (0, 0) +getHCode (HCode (MStats code _) : xs) = (1, code) >< getHCode xs +getHCode (_ : xs) = getHCode xs + +getSections :: [MarkdownStats] -> Int -- number of code blocks, size of code +getSections [] = 0 +getSections (Section _ : xs) = 1 + getSections xs +getSections (_ : xs) = getSections xs + +(><) :: (Int, Int) -> (Int, Int) -> (Int, Int) +(><) (a, b) (c, d) = (a + c, b + d) + + +sumMStat :: [MarkdownStats] -> MStats sumMStat [] = mempty sumMStat (x : xs) = case x of - NotImportant -> sumMStat xs - Section -> sumMStat xs - (List a) -> sumMStat xs - (Table a) -> sumMStat xs - (HCode a) -> a <> sumMStat xs - (Code a) -> a <> sumMStat xs - (PText a) -> a <> sumMStat xs + (NotImportant a) -> a <> sumMStat xs + (Section a) -> a <> sumMStat xs + (List _ a ) -> a <> sumMStat xs + (Table _ a ) -> a <> sumMStat xs + (HCode a ) -> a <> sumMStat xs + (Code a ) -> a <> sumMStat xs + (PText a ) -> a <> sumMStat xs + +getListsTables :: [MarkdownStats] -> Int +getListsTables [] = 0 +getListsTables ((List a _) : ys) = a + getListsTables ys +getListsTables ((Table a _) : ys) = a + getListsTables ys +getListsTables (_ : ys) = getListsTables ys instance Rangeable [MarkdownStats] where ranged = const id @@ -70,7 +95,7 @@ instance HasAttributes [MarkdownStats] where addAttributes = const id instance HasPipeTable MStats [MarkdownStats] where - pipeTable _ _ rows = [Table $ length rows] + pipeTable _ _ rows = [Table (length rows) (mconcat $ mconcat <$> rows)] instance IsInline MStats where lineBreak = MStats 0 1 @@ -88,17 +113,12 @@ instance IsInline MStats where instance IsBlock MStats [MarkdownStats] where paragraph a = [PText a] plain a = [PText a] - thematicBreak = [NotImportant] + thematicBreak = [NotImportant mempty] blockQuote = id codeBlock language codeT | language == T.pack "haskell" = [HCode (code codeT)] | otherwise = [Code (code codeT)] - heading _ _ = [Section] - rawBlock _ r = [NotImportant] - referenceLinkDefinition _ _ = [NotImportant] - list _ _ l = [List (length l + depSum l)] - -depSum [] = 0 -depSum ([] : xs) = depSum xs -depSum ((List a : ys) : xs) = a + depSum (ys : xs) -depSum ((_ : ys) : xs) = depSum (ys : xs) - + heading _ a = [Section a] + rawBlock _ _ = [NotImportant mempty] + referenceLinkDefinition _ _ = [NotImportant mempty] + list _ _ l = [List (length l + sumLT l) (mconcat $ sumMStat <$> l)] + where sumLT a = sum (getListsTables <$> a) From a0e7a8f405ce4661fb06de14104d71775b456b7c Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sat, 27 Aug 2022 22:34:11 +0200 Subject: [PATCH 063/129] changed documentation parameter to get reasonable output --- src/Distribution/Server/Features/PackageRank.hs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index dca027e0b..334427f88 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -113,7 +113,6 @@ cabalScore p docum = tests = boolScor 50 (hasTests p) benchs = boolScor 10 (hasBenchmarks p) desc = scorer 30 (min 1 (int2Float (S.length $ description p) / 300)) - -- documentation = boolScor 30 () homeP = boolScor 30 (not $ S.null $ homepage p) sourceRp = boolScor 8 (not $ null $ sourceRepos p) cats = boolScor 5 (not $ S.null $ category p) @@ -230,7 +229,7 @@ codeScore documentS haskellL = boolScor 1 (haskellL > 700) <> boolScor 1 (haskellL < 80000) <> fracScor 2 (min 1 (haskellL / 5000)) - <> fracScor 2 (min 1 documentS / ((3000 + haskellL) * 200)) + <> fracScor 2 (min 1 documentS / ((3000 + haskellL) * 1600)) versionScore :: [Version] @@ -281,11 +280,10 @@ temporalScore p lastUploads versionList recentDownloads = do where isApp = (isNothing . library) p && (not . null . executables) p downloadScore = calcDownScore recentDownloads - calcDownScore i = Scorer 5 $ min + calcDownScore i = scorer 5 $ min ( (logBase 2 (int2Float $ max 0 (i - 100) + 100) - 6.6) / (if isApp then 5 else 6) ) - 5 packageFreshness = case safeHead lastUploads of Nothing -> return 0 (Just l) -> freshness versionList l isApp From 07fb0a29fb529ebed2c4c57efd06b31f2c26cb7d Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sat, 27 Aug 2022 23:35:03 +0200 Subject: [PATCH 064/129] changed some parameters to reflect hackage --- .../Server/Features/PackageRank.hs | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 334427f88..341dacbaf 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -110,7 +110,7 @@ cabalScore :: PackageDescription -> Bool -> Scorer cabalScore p docum = tests <> benchs <> desc <> homeP <> sourceRp <> cats <> boolScor 30 docum where - tests = boolScor 50 (hasTests p) + tests = boolScor 30 (hasTests p) benchs = boolScor 10 (hasBenchmarks p) desc = scorer 30 (min 1 (int2Float (S.length $ description p) / 300)) homeP = boolScor 30 (not $ S.null $ homepage p) @@ -169,7 +169,6 @@ baseScore vers maintainers docs env tarCache versionList lastUploads pkgI = do versS <- versionScore versionList vers lastUploads pkg readmeS <- readmeScore readM isApp - return $ scale 5 versS <> scale 2 (codeScore documS srcL) @@ -229,7 +228,7 @@ codeScore documentS haskellL = boolScor 1 (haskellL > 700) <> boolScor 1 (haskellL < 80000) <> fracScor 2 (min 1 (haskellL / 5000)) - <> fracScor 2 (min 1 documentS / ((3000 + haskellL) * 1600)) + <> fracScor 2 (min 1 (documentS / ((3000 + haskellL) * 1600))) versionScore :: [Version] @@ -252,10 +251,9 @@ versionScore versionList versions lastUploads desc = do (_, deprecN, _) <- partVers return deprecN calculateScore :: [Version] -> [CL.UTCTime] -> [[Int]] -> Scorer - calculateScore [] _ _ = Scorer 118 0 calculateScore depre lUps intUse = boolScor 20 (length intUse > 1) - <> scorer 40 (numDays (safeHead lUps) (safeLast lUps)) + <> scorer 40 (numDays (safeHead lUps) (safeLast lUps) / 11) <> scorer 15 (int2Float $ length $ filter (\x -> major x > 0 || minor x > 0) @@ -276,13 +274,16 @@ temporalScore temporalScore p lastUploads versionList recentDownloads = do fresh <- freshnessScore tract <- tractionScore + -- Reverse dependencies are to be done + + f <- packageFreshness return $ tract <> fresh <> downloadScore where isApp = (isNothing . library) p && (not . null . executables) p downloadScore = calcDownScore recentDownloads - calcDownScore i = scorer 5 $ min - ( (logBase 2 (int2Float $ max 0 (i - 100) + 100) - 6.6) - / (if isApp then 5 else 6) + calcDownScore i = fracScor 5 + ( (logBase 2 (int2Float $ max 0 (i - 32) + 32) - 5) + / (if isApp then 6 else 8) ) packageFreshness = case safeHead lastUploads of Nothing -> return 0 @@ -291,7 +292,7 @@ temporalScore p lastUploads versionList recentDownloads = do -- Missing dependencyFreshnessScore for reasonable effectivity needs caching tractionScore = do fresh <- packageFreshness - return $ boolScor 1 (fresh * int2Float recentDownloads > 1000) + return $ boolScor 1 (fresh * int2Float recentDownloads > 200) rankPackage :: VersionsFeature From 413038c2be69c496083472e05ef7d6d6617d4f0f Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 28 Aug 2022 21:23:06 +0200 Subject: [PATCH 065/129] moved PackageRank into PackageList Feature and changed UI so packageRank will display as Int between 1000 and 0 --- hackage-server.cabal | 4 ++-- src/Distribution/Server/Features/Browse.hs | 3 ++- src/Distribution/Server/Features/PackageList.hs | 2 +- .../{PackageRank/Parser.hs => PackageList/MStats.hs} | 8 ++++---- .../Server/Features/{ => PackageList}/PackageRank.hs | 8 +++----- 5 files changed, 12 insertions(+), 13 deletions(-) rename src/Distribution/Server/Features/{PackageRank/Parser.hs => PackageList/MStats.hs} (94%) rename src/Distribution/Server/Features/{ => PackageList}/PackageRank.hs (98%) diff --git a/hackage-server.cabal b/hackage-server.cabal index 3ee07dfc8..c6866b7c5 100644 --- a/hackage-server.cabal +++ b/hackage-server.cabal @@ -307,6 +307,8 @@ library lib-server Distribution.Server.Features.PackageCandidates.Backup Distribution.Server.Features.PackageFeed Distribution.Server.Features.PackageList + Distribution.Server.Features.PackageList.PackageRank + Distribution.Server.Features.PackageList.MStats Distribution.Server.Features.Distro Distribution.Server.Features.Distro.Distributions Distribution.Server.Features.Distro.Backup @@ -356,8 +358,6 @@ library lib-server Distribution.Server.Features.StaticFiles Distribution.Server.Features.ServerIntrospect Distribution.Server.Features.Sitemap - Distribution.Server.Features.PackageRank - Distribution.Server.Features.PackageRank.Parser if flag(debug) cpp-options: -DDEBUG diff --git a/src/Distribution/Server/Features/Browse.hs b/src/Distribution/Server/Features/Browse.hs index d5d4497da..b50d02c67 100644 --- a/src/Distribution/Server/Features/Browse.hs +++ b/src/Distribution/Server/Features/Browse.hs @@ -9,6 +9,7 @@ import qualified Data.Set as S import Data.Time (getCurrentTime) import Data.Time.Format.ISO8601 (iso8601Show) import System.FilePath (()) +import GHC.Float.RealFracMethods (roundFloatInteger) import Data.Aeson (Value(Array), object, toJSON, (.=)) import qualified Data.Aeson.Key as Key @@ -147,7 +148,7 @@ packageIndexInfoToValue , Key.fromString "tags" .= map renderTag (S.toAscList itemTags) , Key.fromString "lastUpload" .= iso8601Show itemLastUpload , Key.fromString "maintainers" .= map renderUser itemMaintainer - , Key.fromString "packageRank" .= itemPackageRank + , Key.fromString "packageRank" .= (roundFloatInteger (1000 * itemPackageRank)) ] where renderTag :: Tag -> Value diff --git a/src/Distribution/Server/Features/PackageList.hs b/src/Distribution/Server/Features/PackageList.hs index 2af483bdc..835235e77 100644 --- a/src/Distribution/Server/Features/PackageList.hs +++ b/src/Distribution/Server/Features/PackageList.hs @@ -17,7 +17,7 @@ import Distribution.Server.Features.Users import Distribution.Server.Features.Upload(UploadFeature(..)) import Distribution.Server.Features.Documentation (DocumentationFeature(..)) import Distribution.Server.Features.TarIndexCache (TarIndexCacheFeature(..)) -import Distribution.Server.Features.PackageRank +import Distribution.Server.Features.PackageList.PackageRank import Distribution.Server.Users.Users (userIdToName) import qualified Distribution.Server.Users.UserIdSet as UserIdSet diff --git a/src/Distribution/Server/Features/PackageRank/Parser.hs b/src/Distribution/Server/Features/PackageList/MStats.hs similarity index 94% rename from src/Distribution/Server/Features/PackageRank/Parser.hs rename to src/Distribution/Server/Features/PackageList/MStats.hs index 431228d84..33934ebbb 100644 --- a/src/Distribution/Server/Features/PackageRank/Parser.hs +++ b/src/Distribution/Server/Features/PackageList/MStats.hs @@ -1,5 +1,5 @@ {-# LANGUAGE ScopedTypeVariables, FlexibleInstances, MultiParamTypeClasses, ConstraintKinds #-} -module Distribution.Server.Features.PackageRank.Parser +module Distribution.Server.Features.PackageList.MStats ( parseM , sumMStat , getListsTables @@ -53,13 +53,13 @@ data MarkdownStats = NotImportant MStats | getCode :: [MarkdownStats] -> (Int, Int) -- number of code blocks, size of code getCode [] = (0, 0) -getCode (Code (MStats code _) : xs) = (1, code) >< getCode xs -getCode (HCode (MStats code _) : xs) = (1, code) >< getCode xs +getCode (Code (MStats codeT _) : xs) = (1, codeT) >< getCode xs +getCode (HCode (MStats codeT _) : xs) = (1, codeT) >< getCode xs getCode (_ : xs) = getCode xs getHCode :: [MarkdownStats] -> (Int, Int) -- number of code blocks, size of code getHCode [] = (0, 0) -getHCode (HCode (MStats code _) : xs) = (1, code) >< getHCode xs +getHCode (HCode (MStats codeT _) : xs) = (1, codeT) >< getHCode xs getHCode (_ : xs) = getHCode xs getSections :: [MarkdownStats] -> Int -- number of code blocks, size of code diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageList/PackageRank.hs similarity index 98% rename from src/Distribution/Server/Features/PackageRank.hs rename to src/Distribution/Server/Features/PackageList/PackageRank.hs index 341dacbaf..367731beb 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageList/PackageRank.hs @@ -2,11 +2,11 @@ -- TODO change the module name probably Distribution.Server.Features.PackageList.PackageRank -module Distribution.Server.Features.PackageRank +module Distribution.Server.Features.PackageList.PackageRank ( rankPackage ) where -import Distribution.Server.Features.PackageRank.Parser +import Distribution.Server.Features.PackageList.MStats import Data.TarIndex ( TarEntryOffset ) import Distribution.Package @@ -274,9 +274,7 @@ temporalScore temporalScore p lastUploads versionList recentDownloads = do fresh <- freshnessScore tract <- tractionScore - -- Reverse dependencies are to be done - - f <- packageFreshness + -- Reverse dependencies are added return $ tract <> fresh <> downloadScore where isApp = (isNothing . library) p && (not . null . executables) p From 834a12f84938fe24ffd06e2c09af95985dbcf435 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 30 Aug 2022 22:44:42 +0200 Subject: [PATCH 066/129] added some Exception handling --- .../Features/PackageList/PackageRank.hs | 53 +++++++++---------- 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/src/Distribution/Server/Features/PackageList/PackageRank.hs b/src/Distribution/Server/Features/PackageList/PackageRank.hs index 367731beb..c58f82884 100644 --- a/src/Distribution/Server/Features/PackageList/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageList/PackageRank.hs @@ -1,24 +1,18 @@ -{-# LANGUAGE BangPatterns #-} - --- TODO change the module name probably Distribution.Server.Features.PackageList.PackageRank - +{-# LANGUAGE BangPatterns, ScopedTypeVariables #-} module Distribution.Server.Features.PackageList.PackageRank ( rankPackage ) where -import Distribution.Server.Features.PackageList.MStats - -import Data.TarIndex ( TarEntryOffset ) import Distribution.Package import Distribution.PackageDescription import Distribution.Server.Features.Documentation ( DocumentationFeature(..) ) +import Distribution.Server.Features.PackageList.MStats import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.TarIndexCache import qualified Distribution.Server.Framework.BlobStorage as BlobStorage -import Distribution.Server.Framework.CacheControl import Distribution.Server.Framework.ServerEnv ( ServerEnv(..) ) import Distribution.Server.Packages.Types @@ -33,6 +27,9 @@ import Distribution.Types.Version import qualified Distribution.Utils.ShortText as S import qualified Codec.Archive.Tar as Tar +import Control.Exception ( SomeException(..) + , handle + ) import qualified Data.ByteString.Lazy as BSL import Data.List ( maximumBy , sortBy @@ -44,6 +41,9 @@ import Distribution.Server.Packages.Readme import GHC.Float ( int2Float ) import System.FilePath ( isExtensionOf ) +handleConst :: a -> IO a -> IO a +handleConst c = handle (\(_ :: SomeException) -> return c) + data Scorer = Scorer { maximumS :: !Float , score :: !Float @@ -117,17 +117,16 @@ cabalScore p docum = sourceRp = boolScor 8 (not $ null $ sourceRepos p) cats = boolScor 5 (not $ S.null $ category p) -readmeScore - :: Maybe (FilePath, ETag, Data.TarIndex.TarEntryOffset, FilePath) - -> Bool - -> IO Scorer -readmeScore Nothing _ = return $ Scorer 1 0 -- readmeScore is scaled so it does not need correct max -readmeScore (Just (tarfile, _, offset, name)) app = do - entr <- loadTarEntry tarfile offset +readmeScore :: TarIndexCacheFeature -> PkgInfo -> Bool -> IO Scorer +readmeScore tarCache pkgI app = do + Just (tarfile, _, offset, name) <- readme + entr <- loadTarEntry tarfile offset case entr of (Right (size, str)) -> return $ calcScore str size name _ -> return $ Scorer 1 0 where + readme = findToplevelFile tarCache pkgI isReadmeFile + >>= either (\_ -> return Nothing) (return . Just) calcScore str size filename = scorer 75 (min 1 (fromInteger (toInteger size) / 3000)) <> if supposedToBeMarkdown filename @@ -162,13 +161,13 @@ baseScore baseScore vers maintainers docs env tarCache versionList lastUploads pkgI = do - readM <- readme - hasDocum <- documHas - documS <- documSize - srcL <- srcLines + hasDocum <- handleConst False documHas -- Probably redundant + documS <- handleConst 0 documSize + srcL <- handleConst 0 srcLines - versS <- versionScore versionList vers lastUploads pkg - readmeS <- readmeScore readM isApp + versS <- handleConst (Scorer 1 0) + (versionScore versionList vers lastUploads pkg) + readmeS <- handleConst (Scorer 1 0) (readmeScore tarCache pkgI isApp) return $ scale 5 versS <> scale 2 (codeScore documS srcL) @@ -192,9 +191,6 @@ baseScore vers maintainers docs env tarCache versionList lastUploads pkgI = do filterLines (isExtensionOf ".html") countSize . Tar.read <$> BSL.readFile pth - readme = findToplevelFile tarCache pkgI isReadmeFile - >>= either (\_ -> return Nothing) (return . Just) - filterLines f g = Tar.foldEntries (g f) 0 (const 0) countLines :: (FilePath -> Bool) -> Tar.Entry -> Float -> Float countLines f entry l = if not . f . Tar.entryPath $ entry then l else lns @@ -279,15 +275,16 @@ temporalScore p lastUploads versionList recentDownloads = do where isApp = (isNothing . library) p && (not . null . executables) p downloadScore = calcDownScore recentDownloads - calcDownScore i = fracScor 5 + calcDownScore i = fracScor + 5 ( (logBase 2 (int2Float $ max 0 (i - 32) + 32) - 5) / (if isApp then 6 else 8) ) packageFreshness = case safeHead lastUploads of Nothing -> return 0 - (Just l) -> freshness versionList l isApp + (Just l) -> freshness versionList l isApp -- Getting time hopefully does not throw Exc. freshnessScore = fracScor 10 <$> packageFreshness --- Missing dependencyFreshnessScore for reasonable effectivity needs caching + -- Missing dependencyFreshnessScore for reasonable effectivity needs caching tractionScore = do fresh <- packageFreshness return $ boolScor 1 (fresh * int2Float recentDownloads > 200) @@ -315,7 +312,7 @@ rankPackage versions recentDownloads maintainers docs tarCache env pkgs (Just pk versionList uploads pkgUsed - depr <- deprP + depr <- handleConst Nothing deprP return $ sAverage t b * case depr of Nothing -> 1 _ -> 0.2 From 7c36cf7e42d8d0a19ba3d9b0c46d56c5cde52d86 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 4 Sep 2022 13:32:29 +0200 Subject: [PATCH 067/129] some comments and refactoring --- .../Server/Features/PackageList/MStats.hs | 32 ++--- .../Features/PackageList/PackageRank.hs | 119 +++++++++--------- 2 files changed, 78 insertions(+), 73 deletions(-) diff --git a/src/Distribution/Server/Features/PackageList/MStats.hs b/src/Distribution/Server/Features/PackageList/MStats.hs index 33934ebbb..b9dc04936 100644 --- a/src/Distribution/Server/Features/PackageList/MStats.hs +++ b/src/Distribution/Server/Features/PackageList/MStats.hs @@ -9,24 +9,32 @@ module Distribution.Server.Features.PackageList.MStats , MStats(..) ) where - import Commonmark import Commonmark.Extensions import Control.Monad.Identity import qualified Data.ByteString.Lazy as BS ( ByteString - , toStrict - ) + , toStrict ) import qualified Data.Text as T import qualified Data.Text.Encoding as T import qualified Data.Text.Encoding.Error as T ( lenientDecode ) +-- parses markdown into statistics needed for readmeScore parseM :: BS.ByteString -> FilePath -> Either ParseError [MarkdownStats] parseM md name = runIdentity (commonmarkWith (pipeTableSpec <> defaultSyntaxSpec) name txt) where txt = T.decodeUtf8With T.lenientDecode . BS.toStrict $ md +data MarkdownStats = NotImportant MStats | + HCode MStats | + Code MStats | + Section MStats | + Table Int MStats | -- Int of rows + PText MStats | + List Int MStats -- Int of elements + deriving (Show) + data MStats = MStats Int Int --number of pictures, number of chars deriving Show @@ -42,14 +50,7 @@ instance HasAttributes MStats where instance Semigroup MStats where (MStats a b) <> (MStats c d) = MStats (a + c) (b + d) -data MarkdownStats = NotImportant MStats | - HCode MStats | - Code MStats | - Section MStats | - Table Int MStats | -- Int of rows - PText MStats | - List Int MStats -- Int of elements - deriving (Show) +-- Getter functions getCode :: [MarkdownStats] -> (Int, Int) -- number of code blocks, size of code getCode [] = (0, 0) @@ -67,10 +68,6 @@ getSections [] = 0 getSections (Section _ : xs) = 1 + getSections xs getSections (_ : xs) = getSections xs -(><) :: (Int, Int) -> (Int, Int) -> (Int, Int) -(><) (a, b) (c, d) = (a + c, b + d) - - sumMStat :: [MarkdownStats] -> MStats sumMStat [] = mempty sumMStat (x : xs) = case x of @@ -88,6 +85,11 @@ getListsTables ((List a _) : ys) = a + getListsTables ys getListsTables ((Table a _) : ys) = a + getListsTables ys getListsTables (_ : ys) = getListsTables ys +-- helper +(><) :: (Int, Int) -> (Int, Int) -> (Int, Int) +(><) (a, b) (c, d) = (a + c, b + d) + +-- INSTANCES instance Rangeable [MarkdownStats] where ranged = const id diff --git a/src/Distribution/Server/Features/PackageList/PackageRank.hs b/src/Distribution/Server/Features/PackageList/PackageRank.hs index c58f82884..d259cdc2d 100644 --- a/src/Distribution/Server/Features/PackageList/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageList/PackageRank.hs @@ -21,19 +21,16 @@ import Distribution.Server.Util.Markdown import Distribution.Server.Util.ServeTarball ( loadTarEntry ) import Distribution.Simple.Utils ( safeHead - , safeLast - ) + , safeLast ) import Distribution.Types.Version import qualified Distribution.Utils.ShortText as S import qualified Codec.Archive.Tar as Tar import Control.Exception ( SomeException(..) - , handle - ) + , handle ) import qualified Data.ByteString.Lazy as BSL import Data.List ( maximumBy - , sortBy - ) + , sortBy ) import Data.Maybe ( isNothing ) import Data.Ord ( comparing ) import qualified Data.Time.Clock as CL @@ -41,9 +38,12 @@ import Distribution.Server.Packages.Readme import GHC.Float ( int2Float ) import System.FilePath ( isExtensionOf ) +-- HELPER FUNCTIONS + handleConst :: a -> IO a -> IO a handleConst c = handle (\(_ :: SomeException) -> return c) +-- Scorer stores rank information data Scorer = Scorer { maximumS :: !Float , score :: !Float @@ -70,6 +70,7 @@ total (Scorer a b) = b / a scale :: Float -> Scorer -> Scorer scale mx sc = fracScor mx (total sc) +-- calculates number of versions from version list major :: Num a => [a] -> a major (x : _) = x major _ = 0 @@ -86,6 +87,8 @@ numDays (Just first) (Just end) = (toRational CL.nominalDay) numDays _ _ = 0 +-- Score Calculations + freshness :: [Version] -> CL.UTCTime -> Bool -> IO Float freshness [] _ _ = return 0 freshness (x : xs) lastUpd app = @@ -148,6 +151,58 @@ readmeScore tarCache pkgI app = do rows = getListsTables stats sections = getSections stats +authorScore :: Int -> PackageDescription -> Scorer +authorScore maintainers desc = + boolScor 1 (not $ S.null $ author desc) <> maintScore + where + maintScore = boolScor 3 (maintainers > 1) <> scorer 5 (int2Float maintainers) + +codeScore :: Float -> Float -> Scorer +codeScore documentS haskellL = + boolScor 1 (haskellL > 700) + <> boolScor 1 (haskellL < 80000) + <> fracScor 2 (min 1 (haskellL / 5000)) + <> fracScor 2 (min 1 (documentS / ((3000 + haskellL) * 1600))) + +versionScore + :: [Version] + -> VersionsFeature + -> [CL.UTCTime] + -> PackageDescription + -> IO Scorer +versionScore versionList versions lastUploads desc = do + use <- intUsable + depre <- deprec + return $ calculateScore depre lastUploads use + where + pkgNm = pkgName $ package desc + partVers = + flip partitionVersions versionList <$> queryGetPreferredInfo versions pkgNm + intUsable = do + (norm, _, unpref) <- partVers + return $ versionNumbers <$> norm ++ unpref + deprec = do + (_, deprecN, _) <- partVers + return deprecN + calculateScore :: [Version] -> [CL.UTCTime] -> [[Int]] -> Scorer + calculateScore depre lUps intUse = + boolScor 20 (length intUse > 1) + <> scorer 40 (numDays (safeHead lUps) (safeLast lUps) / 11) + <> scorer + 15 + (int2Float $ length $ filter (\x -> major x > 0 || minor x > 0) + intUse + ) + <> scorer + 20 + (int2Float $ 4 * length + (filter (\x -> major x > 0 && patches x > 0) intUse) + ) + <> scorer 10 (int2Float $ patches $ maximumBy (comparing patches) intUse) + <> boolScor 8 (any (\x -> major x == 0 && patches x > 0) intUse) + <> boolScor 10 (any (\x -> major x > 0 && major x < 20) intUse) + <> boolScor 5 (not $ null depre) + baseScore :: VersionsFeature -> Int @@ -213,58 +268,6 @@ baseScore vers maintainers docs env tarCache versionList lastUploads pkgI = do return $ BlobStorage.filepath (serverBlobStore env) <$> blob documHas = queryHasDocumentation docs pkgId -authorScore :: Int -> PackageDescription -> Scorer -authorScore maintainers desc = - boolScor 1 (not $ S.null $ author desc) <> maintScore - where - maintScore = boolScor 3 (maintainers > 1) <> scorer 5 (int2Float maintainers) - -codeScore :: Float -> Float -> Scorer -codeScore documentS haskellL = - boolScor 1 (haskellL > 700) - <> boolScor 1 (haskellL < 80000) - <> fracScor 2 (min 1 (haskellL / 5000)) - <> fracScor 2 (min 1 (documentS / ((3000 + haskellL) * 1600))) - -versionScore - :: [Version] - -> VersionsFeature - -> [CL.UTCTime] - -> PackageDescription - -> IO Scorer -versionScore versionList versions lastUploads desc = do - use <- intUsable - depre <- deprec - return $ calculateScore depre lastUploads use - where - pkgNm = pkgName $ package desc - partVers = - flip partitionVersions versionList <$> queryGetPreferredInfo versions pkgNm - intUsable = do - (norm, _, unpref) <- partVers - return $ versionNumbers <$> norm ++ unpref - deprec = do - (_, deprecN, _) <- partVers - return deprecN - calculateScore :: [Version] -> [CL.UTCTime] -> [[Int]] -> Scorer - calculateScore depre lUps intUse = - boolScor 20 (length intUse > 1) - <> scorer 40 (numDays (safeHead lUps) (safeLast lUps) / 11) - <> scorer - 15 - (int2Float $ length $ filter (\x -> major x > 0 || minor x > 0) - intUse - ) - <> scorer - 20 - (int2Float $ 4 * length - (filter (\x -> major x > 0 && patches x > 0) intUse) - ) - <> scorer 10 (int2Float $ patches $ maximumBy (comparing patches) intUse) - <> boolScor 8 (any (\x -> major x == 0 && patches x > 0) intUse) - <> boolScor 10 (any (\x -> major x > 0 && major x < 20) intUse) - <> boolScor 5 (not $ null depre) - temporalScore :: PackageDescription -> [CL.UTCTime] -> [Version] -> Int -> IO Scorer temporalScore p lastUploads versionList recentDownloads = do From 3a4ebbcca817515ead9a67913d7f783bbc2887a2 Mon Sep 17 00:00:00 2001 From: Janus Troelsen Date: Wed, 7 Sep 2022 06:55:53 -0500 Subject: [PATCH 068/129] Use NonEmpty (#1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Ondřej Kubánek <71923533+kubaneko@users.noreply.github.com> --- .../Server/Features/PackageList.hs | 20 ++++++++++--------- .../Features/PackageList/PackageRank.hs | 15 +++++++------- .../Server/Packages/PackageIndex.hs | 10 +++++++++- 3 files changed, 28 insertions(+), 17 deletions(-) diff --git a/src/Distribution/Server/Features/PackageList.hs b/src/Distribution/Server/Features/PackageList.hs index 835235e77..718354b13 100644 --- a/src/Distribution/Server/Features/PackageList.hs +++ b/src/Distribution/Server/Features/PackageList.hs @@ -34,9 +34,10 @@ import Distribution.Package import Distribution.PackageDescription import Distribution.PackageDescription.Configuration import Distribution.Utils.ShortText (fromShortText) -import Distribution.Simple.Utils (safeLast) import Control.Concurrent +import qualified Data.List.NonEmpty as NE +import Data.List.NonEmpty (NonEmpty) import Data.Maybe (mapMaybe) import Data.Map (Map) import qualified Data.Map as Map @@ -233,9 +234,9 @@ listFeature CoreFeature{..} False -> do index <- queryGetPackageIndex let pkgs = PackageIndex.lookupPackageName index pkgname - case pkgs of - [] -> return () --this shouldn't happen - _ -> modifyMemState itemCache . uncurry Map.insert =<< constructItem pkgs + case NE.nonEmpty pkgs of + Nothing -> return () --this shouldn't happen + Just ne -> modifyMemState itemCache . uncurry Map.insert =<< constructItem ne updateDesc pkgname = do index <- queryGetPackageIndex @@ -256,13 +257,14 @@ listFeature CoreFeature{..} constructItemIndex :: IO (Map PackageName PackageItem) constructItemIndex = do index <- queryGetPackageIndex - items <- mapM constructItem $ PackageIndex.allPackagesByName index - return $ Map.fromList items + let byName = PackageIndex.allPackagesByNameNE index + mPkgInfos <- traverse (mapM constructItem) (NE.nonEmpty byName) + pure $ foldMap (Map.fromList . NE.toList) mPkgInfos - constructItem :: [PkgInfo] -> IO (PackageName, PackageItem) + constructItem :: NonEmpty PkgInfo -> IO (PackageName, PackageItem) constructItem pkgs = do let pkgname = packageName pkg - pkg = last pkgs + pkg = NE.last pkgs -- [reverse index disabled] revCount <- query . GetReverseCount $ pkgname users <- queryGetUserDb tags <- queryTagsForPackage pkgname @@ -271,7 +273,7 @@ listFeature CoreFeature{..} deprs <- queryGetDeprecatedFor pkgname maintainers <- queryUserGroup (maintainersGroup pkgname) packageR <- rankPackage versions (cmFind pkgname downs) - (UserIdSet.size maintainers) documentation tar env pkgs (safeLast pkgs) + (UserIdSet.size maintainers) documentation tar env pkgs return $ (,) pkgname $ (updateDescriptionItem (pkgDesc pkg) $ emptyPackageItem pkgname) { itemTags = tags diff --git a/src/Distribution/Server/Features/PackageList/PackageRank.hs b/src/Distribution/Server/Features/PackageList/PackageRank.hs index d259cdc2d..5b25bc3e1 100644 --- a/src/Distribution/Server/Features/PackageList/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageList/PackageRank.hs @@ -31,6 +31,8 @@ import Control.Exception ( SomeException(..) import qualified Data.ByteString.Lazy as BSL import Data.List ( maximumBy , sortBy ) +import qualified Data.List.NonEmpty as NE +import Data.List.NonEmpty ( NonEmpty ) import Data.Maybe ( isNothing ) import Data.Ord ( comparing ) import qualified Data.Time.Clock as CL @@ -299,11 +301,9 @@ rankPackage -> DocumentationFeature -> TarIndexCacheFeature -> ServerEnv - -> [PkgInfo] - -> Maybe PkgInfo + -> NonEmpty PkgInfo -> IO Float -rankPackage _ _ _ _ _ _ _ Nothing = return 0 -rankPackage versions recentDownloads maintainers docs tarCache env pkgs (Just pkgUsed) +rankPackage versions recentDownloads maintainers docs tarCache env pkgs = do t <- temporalScore pkgD uploads versionList recentDownloads @@ -320,6 +320,7 @@ rankPackage versions recentDownloads maintainers docs tarCache env pkgs (Just pk Nothing -> 1 _ -> 0.2 where + pkgUsed = NE.last pkgs pkgname = pkgName . package $ pkgD pkgD = packageDescription . pkgDesc $ pkgUsed deprP = queryGetDeprecatedFor versions pkgname @@ -327,8 +328,8 @@ rankPackage versions recentDownloads maintainers docs tarCache env pkgs (Just pk versionList :: [Version] versionList = sortBy (flip compare) - $ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs) + $ map (pkgVersion . package . packageDescription . pkgDesc) (NE.toList pkgs) uploads = sortBy (flip compare) - $ (fst . pkgOriginalUploadInfo <$> pkgs) - ++ (fst . pkgLatestUploadInfo <$> pkgs) + $ (fst . pkgOriginalUploadInfo <$> NE.toList pkgs) + ++ (fst . pkgLatestUploadInfo <$> NE.toList pkgs) diff --git a/src/Distribution/Server/Packages/PackageIndex.hs b/src/Distribution/Server/Packages/PackageIndex.hs index 4b862d649..f48750e73 100644 --- a/src/Distribution/Server/Packages/PackageIndex.hs +++ b/src/Distribution/Server/Packages/PackageIndex.hs @@ -44,7 +44,8 @@ module Distribution.Server.Packages.PackageIndex ( -- ** Bulk queries allPackageNames, allPackages, - allPackagesByName + allPackagesByName, + allPackagesByNameNE ) where import Distribution.Server.Prelude hiding (lookup) @@ -58,6 +59,8 @@ import qualified Data.Map.Strict as Map import Data.Map.Strict (Map) import qualified Data.Foldable as Foldable import Data.List (groupBy, find, isInfixOf) +import qualified Data.List.NonEmpty as NE +import Data.List.NonEmpty (NonEmpty) import Data.SafeCopy import Distribution.Types.PackageName @@ -258,6 +261,11 @@ allPackages (PackageIndex m) = concat (Map.elems m) allPackagesByName :: Package pkg => PackageIndex pkg -> [[pkg]] allPackagesByName (PackageIndex m) = Map.elems m +allPackagesByNameNE :: Package pkg => PackageIndex pkg -> [NonEmpty pkg] +allPackagesByNameNE (PackageIndex m) = + -- This is safe because there will always be at least one version of a package + NE.fromList <$> Map.elems m + allPackageNames :: PackageIndex pkg -> [PackageName] allPackageNames (PackageIndex m) = Map.keys m From 4f60d74118ce86cf44ef363d3a5de0f86a170d79 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 1 Jul 2022 17:22:53 +0200 Subject: [PATCH 069/129] started new PageRank branch --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 552dfa64b..176a8b022 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# hackage-server +# Hackage-server [![Build Status](https://travis-ci.org/haskell/hackage-server.png?branch=master)](https://travis-ci.org/haskell/hackage-server) [![Build status](https://github.com/haskell/hackage-server/actions/workflows/haskell-ci.yml/badge.svg)](https://github.com/haskell/hackage-server/actions/workflows/haskell-ci.yml) From d071bd798977d2b8fada8daa7eea8815d9cd8dde Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 1 Jul 2022 17:45:41 +0200 Subject: [PATCH 070/129] correct the error --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 176a8b022..552dfa64b 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Hackage-server +# hackage-server [![Build Status](https://travis-ci.org/haskell/hackage-server.png?branch=master)](https://travis-ci.org/haskell/hackage-server) [![Build status](https://github.com/haskell/hackage-server/actions/workflows/haskell-ci.yml/badge.svg)](https://github.com/haskell/hackage-server/actions/workflows/haskell-ci.yml) From c5eb10630624b30ab092a55455d812e2fe18b07f Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 3 Jul 2022 21:27:14 +0200 Subject: [PATCH 071/129] Created PackageRank module and added it to build --- hackage-server.cabal | 1 + src/Distribution/Server/Features/PackageRank.hs | 9 +++++++++ 2 files changed, 10 insertions(+) create mode 100644 src/Distribution/Server/Features/PackageRank.hs diff --git a/hackage-server.cabal b/hackage-server.cabal index 74e511955..51c22ab5b 100644 --- a/hackage-server.cabal +++ b/hackage-server.cabal @@ -359,6 +359,7 @@ library lib-server Distribution.Server.Features.StaticFiles Distribution.Server.Features.ServerIntrospect Distribution.Server.Features.Sitemap + Distribution.Server.Features.PackageRank if flag(debug) cpp-options: -DDEBUG diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs new file mode 100644 index 000000000..3bbe62215 --- /dev/null +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -0,0 +1,9 @@ +module Distribution.Server.Features.PackageRank ( + rankPackage + ) where + +import Distribution.Package +import Distribution.Server.Packages.Types + +rankPackage :: (Package a) => a -> IO Double +rankPackage p=return 0 From a8f4d09c0b8ce0cbd7e0d9db3b9ce165ac6bb256 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 3 Jul 2022 21:37:37 +0200 Subject: [PATCH 072/129] write out ranking criteria --- src/Distribution/Server/Features/PackageRank.hs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 3bbe62215..4438f5668 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -3,7 +3,15 @@ module Distribution.Server.Features.PackageRank ( ) where import Distribution.Package -import Distribution.Server.Packages.Types rankPackage :: (Package a) => a -> IO Double -rankPackage p=return 0 +rankPackage p=return$ reverseDeps+usageTrend+docScore+stability + +authNum+goodMetadata+weightUniqueDeps+activelyMaintained + where reverseDeps=1 + usageTrend=1 + docScore=1 + stability=1 + authNum=1 + goodMetadata=1 + weightUniqueDeps=1 + activelyMaintained=1 From 6abacffdac0aa62cd7918488960d2ce4200c708b Mon Sep 17 00:00:00 2001 From: kubaneko Date: Mon, 4 Jul 2022 23:22:32 +0200 Subject: [PATCH 073/129] started with maintainer number --- .../Server/Features/PackageRank.hs | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 4438f5668..0d51bffc2 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -3,15 +3,25 @@ module Distribution.Server.Features.PackageRank ( ) where import Distribution.Package +import Distribution.PackageDescription +import Distribution.Server.Features.Upload +import Distribution.Server.Users.UserIdSet as UserIdSet -rankPackage :: (Package a) => a -> IO Double -rankPackage p=return$ reverseDeps+usageTrend+docScore+stability - +authNum+goodMetadata+weightUniqueDeps+activelyMaintained +rankPackage :: PackageDescription -> IO Double +rankPackage p=do + maintainers <- maintNum + return maintainers+reverseDeps+usageTrend+docScore+stability + +goodMetadata+weightUniqueDeps+activelyMaintained where reverseDeps=1 usageTrend=1 docScore=1 stability=1 - authNum=1 + maintNum :: IO Double + maintNum=do + maintSet<-queryUserGroup$maintainersGroupDescription pkgNm + return fromInteger.UserIdSet$size maintSet goodMetadata=1 weightUniqueDeps=1 activelyMaintained=1 + pkgNm :: PackageName + pkgNm=pkgName$package p From 745ba37ddba4b917972762913d5919d1b1d47984 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 5 Jul 2022 22:31:19 +0200 Subject: [PATCH 074/129] added Upload Feature and got number of maintainers for package --- src/Distribution/Server/Features/PackageRank.hs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 0d51bffc2..ae5d7fda7 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -4,22 +4,22 @@ module Distribution.Server.Features.PackageRank ( import Distribution.Package import Distribution.PackageDescription +import Distribution.Server.Users.Group import Distribution.Server.Features.Upload -import Distribution.Server.Users.UserIdSet as UserIdSet -rankPackage :: PackageDescription -> IO Double -rankPackage p=do +rankPackage :: UploadFeature -> PackageDescription -> IO Double +rankPackage upload p=do maintainers <- maintNum - return maintainers+reverseDeps+usageTrend+docScore+stability + return$maintainers+reverseDeps+usageTrend+docScore+stabilityScore +goodMetadata+weightUniqueDeps+activelyMaintained where reverseDeps=1 usageTrend=1 docScore=1 - stability=1 + stabilityScore=1 maintNum :: IO Double maintNum=do - maintSet<-queryUserGroup$maintainersGroupDescription pkgNm - return fromInteger.UserIdSet$size maintSet + maint<-queryUserGroups$[maintainersGroup upload pkgNm] + return.fromInteger.toInteger$size maint goodMetadata=1 weightUniqueDeps=1 activelyMaintained=1 From 5b83f9a371ffaa18af4796ac78c5db9bc2de7510 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 5 Jul 2022 22:42:20 +0200 Subject: [PATCH 075/129] divided rankPackage to pure and IO version --- .../Server/Features/PackageRank.hs | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index ae5d7fda7..aa3921cf2 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -7,21 +7,23 @@ import Distribution.PackageDescription import Distribution.Server.Users.Group import Distribution.Server.Features.Upload -rankPackage :: UploadFeature -> PackageDescription -> IO Double -rankPackage upload p=do - maintainers <- maintNum - return$maintainers+reverseDeps+usageTrend+docScore+stabilityScore +rankPackageIO upload p=maintNum + where + maintNum :: IO Double + maintNum=do + maint<-queryUserGroups$[maintainersGroup upload pkgNm] + return.fromInteger.toInteger$size maint + pkgNm :: PackageName + pkgNm=pkgName$package p +rankPackagePure p=reverseDeps+usageTrend+docScore+stabilityScore +goodMetadata+weightUniqueDeps+activelyMaintained where reverseDeps=1 usageTrend=1 docScore=1 stabilityScore=1 - maintNum :: IO Double - maintNum=do - maint<-queryUserGroups$[maintainersGroup upload pkgNm] - return.fromInteger.toInteger$size maint goodMetadata=1 weightUniqueDeps=1 activelyMaintained=1 - pkgNm :: PackageName - pkgNm=pkgName$package p + +rankPackage :: UploadFeature -> PackageDescription -> IO Double +rankPackage upload p=rankPackageIO upload p>>=(\x->return$x + rankPackagePure p) From 1b3ddb1ed5f38f4dfcb5410601c3ae4306dfdd4f Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 5 Jul 2022 23:04:25 +0200 Subject: [PATCH 076/129] added benchmark and test info --- src/Distribution/Server/Features/PackageRank.hs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index aa3921cf2..8d11ad8c3 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -21,9 +21,13 @@ rankPackagePure p=reverseDeps+usageTrend+docScore+stabilityScore usageTrend=1 docScore=1 stabilityScore=1 + testsBench=(bool2Double.hasTests) p + (bool2Double.hasBenchmarks) p goodMetadata=1 weightUniqueDeps=1 activelyMaintained=1 + bool2Double :: Bool -> Double + bool2Double true=1 + bool2Double false=0 rankPackage :: UploadFeature -> PackageDescription -> IO Double rankPackage upload p=rankPackageIO upload p>>=(\x->return$x + rankPackagePure p) From d28983a22e1255339df3cf2ee1229ada42c9b242 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Thu, 7 Jul 2022 23:29:12 +0200 Subject: [PATCH 077/129] added Download Feature --- src/Distribution/Server/Features/PackageRank.hs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 8d11ad8c3..7fa7ca665 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -6,8 +6,9 @@ import Distribution.Package import Distribution.PackageDescription import Distribution.Server.Users.Group import Distribution.Server.Features.Upload +import Distribution.Server.Features.DownloadCount -rankPackageIO upload p=maintNum +rankPackageIO download upload p=maintNum where maintNum :: IO Double maintNum=do @@ -29,5 +30,5 @@ rankPackagePure p=reverseDeps+usageTrend+docScore+stabilityScore bool2Double true=1 bool2Double false=0 -rankPackage :: UploadFeature -> PackageDescription -> IO Double -rankPackage upload p=rankPackageIO upload p>>=(\x->return$x + rankPackagePure p) +rankPackage :: DownloadFeature -> UploadFeature -> PackageDescription -> IO Double +rankPackage download upload p=rankPackageIO download upload p>>=(\x->return$x + rankPackagePure p) From 7b989ee550ae249d61ac330f6a07b209e836a4b7 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 8 Jul 2022 23:06:24 +0200 Subject: [PATCH 078/129] formatted a bit --- .../Server/Features/PackageRank.hs | 42 +++++++++++-------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 7fa7ca665..43fc0a025 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -8,27 +8,35 @@ import Distribution.Server.Users.Group import Distribution.Server.Features.Upload import Distribution.Server.Features.DownloadCount -rankPackageIO download upload p=maintNum +rankPackageIO download upload p = maintNum where + -- Number of maintainers maintNum :: IO Double - maintNum=do - maint<-queryUserGroups$[maintainersGroup upload pkgNm] - return.fromInteger.toInteger$size maint + maintNum = do + maint <- queryUserGroups $ + [maintainersGroup upload pkgNm] + return . fromInteger . toInteger $ size maint pkgNm :: PackageName - pkgNm=pkgName$package p -rankPackagePure p=reverseDeps+usageTrend+docScore+stabilityScore + pkgNm = pkgName $ package p + +rankPackagePure p = reverseDeps+usageTrend+docScore+stabilityScore +goodMetadata+weightUniqueDeps+activelyMaintained - where reverseDeps=1 - usageTrend=1 - docScore=1 - stabilityScore=1 - testsBench=(bool2Double.hasTests) p + (bool2Double.hasBenchmarks) p - goodMetadata=1 - weightUniqueDeps=1 - activelyMaintained=1 + where reverseDeps = 1 + usageTrend = 1 + docScore = 1 + stabilityScore = 1 + -- Does the Package have tests and Benchmarks + testsBench = (bool2Double . hasTests) p + + (bool2Double . hasBenchmarks) p + goodMetadata = 1 + weightUniqueDeps = 1 + activelyMaintained = 1 bool2Double :: Bool -> Double - bool2Double true=1 - bool2Double false=0 + bool2Double true = 1 + bool2Double false = 0 rankPackage :: DownloadFeature -> UploadFeature -> PackageDescription -> IO Double -rankPackage download upload p=rankPackageIO download upload p>>=(\x->return$x + rankPackagePure p) +rankPackage download upload p = rankPackageIO download upload p + >>= (\x->return$x + rankPackagePure p) + + From cec9c721b96ebb041c536cad4ea3f9ed6f091076 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 8 Jul 2022 23:17:27 +0200 Subject: [PATCH 079/129] used brittany --- .../Server/Features/PackageRank.hs | 74 ++++++++++--------- 1 file changed, 40 insertions(+), 34 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 43fc0a025..aee6adb5d 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -1,42 +1,48 @@ -module Distribution.Server.Features.PackageRank ( - rankPackage +module Distribution.Server.Features.PackageRank + ( rankPackage ) where -import Distribution.Package -import Distribution.PackageDescription -import Distribution.Server.Users.Group -import Distribution.Server.Features.Upload -import Distribution.Server.Features.DownloadCount +import Distribution.Package +import Distribution.PackageDescription +import Distribution.Server.Features.DownloadCount +import Distribution.Server.Features.Upload +import Distribution.Server.Users.Group rankPackageIO download upload p = maintNum - where - -- Number of maintainers - maintNum :: IO Double - maintNum = do - maint <- queryUserGroups $ - [maintainersGroup upload pkgNm] - return . fromInteger . toInteger $ size maint - pkgNm :: PackageName - pkgNm = pkgName $ package p + where + -- Number of maintainers + maintNum :: IO Double + maintNum = do + maint <- queryUserGroups [maintainersGroup upload pkgNm] + return . fromInteger . toInteger $ size maint + pkgNm :: PackageName + pkgNm = pkgName $ package p -rankPackagePure p = reverseDeps+usageTrend+docScore+stabilityScore - +goodMetadata+weightUniqueDeps+activelyMaintained - where reverseDeps = 1 - usageTrend = 1 - docScore = 1 - stabilityScore = 1 - -- Does the Package have tests and Benchmarks - testsBench = (bool2Double . hasTests) p - + (bool2Double . hasBenchmarks) p - goodMetadata = 1 - weightUniqueDeps = 1 - activelyMaintained = 1 - bool2Double :: Bool -> Double - bool2Double true = 1 - bool2Double false = 0 +rankPackagePure p = + reverseDeps + + usageTrend + + docScore + + stabilityScore + + goodMetadata + + weightUniqueDeps + + activelyMaintained + where + reverseDeps = 1 + usageTrend = 1 + docScore = 1 + stabilityScore = 1 + -- Does the Package have tests and Benchmarks + testsBench = (bool2Double . hasTests) p + (bool2Double . hasBenchmarks) p + goodMetadata = 1 + weightUniqueDeps = 1 + activelyMaintained = 1 + bool2Double :: Bool -> Double + bool2Double true = 1 + bool2Double false = 0 -rankPackage :: DownloadFeature -> UploadFeature -> PackageDescription -> IO Double -rankPackage download upload p = rankPackageIO download upload p - >>= (\x->return$x + rankPackagePure p) +rankPackage + :: DownloadFeature -> UploadFeature -> PackageDescription -> IO Double +rankPackage download upload p = + rankPackageIO download upload p >>= (\x -> return $ x + rankPackagePure p) From 51ee598a0cdaa60da2500d0baff8532ab49f9a82 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Mon, 11 Jul 2022 17:43:04 +0200 Subject: [PATCH 080/129] added further info about the package --- src/Distribution/Server/Features/PackageRank.hs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index aee6adb5d..4db507005 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -4,6 +4,7 @@ module Distribution.Server.Features.PackageRank import Distribution.Package import Distribution.PackageDescription +import Distribution.Types.Version import Distribution.Server.Features.DownloadCount import Distribution.Server.Features.Upload import Distribution.Server.Users.Group @@ -28,6 +29,8 @@ rankPackagePure p = + activelyMaintained where reverseDeps = 1 + versions = versionNumbers . pkgVersion $ package p + dependencies = allBuildDepends p usageTrend = 1 docScore = 1 stabilityScore = 1 From ce5d6aeb3ec013dc0efae6fbbae6cc49d7fc2724 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 12 Jul 2022 22:34:59 +0200 Subject: [PATCH 081/129] added isApp function --- src/Distribution/Server/Features/PackageRank.hs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 4db507005..5184ed1ae 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -9,6 +9,8 @@ import Distribution.Server.Features.DownloadCount import Distribution.Server.Features.Upload import Distribution.Server.Users.Group +import Data.Maybe (isNothing) + rankPackageIO download upload p = maintNum where -- Number of maintainers @@ -39,6 +41,7 @@ rankPackagePure p = goodMetadata = 1 weightUniqueDeps = 1 activelyMaintained = 1 + isApp = (isNothing.library) p && (not.null.executables) p bool2Double :: Bool -> Double bool2Double true = 1 bool2Double false = 0 From 902f40440187eca15fcc1550dbee47fcaa22fdf8 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Wed, 13 Jul 2022 11:31:19 +0200 Subject: [PATCH 082/129] added Scorer type and started to extract versions --- .../Server/Features/PackageRank.hs | 32 +++++++++++-------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 5184ed1ae..e009c87d2 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -4,12 +4,25 @@ module Distribution.Server.Features.PackageRank import Distribution.Package import Distribution.PackageDescription -import Distribution.Types.Version import Distribution.Server.Features.DownloadCount +import Distribution.Server.Features.PackageInfoJSON.State + ( getVersionsFor ) import Distribution.Server.Features.Upload import Distribution.Server.Users.Group + ( queryUserGroups + , size + ) +import Distribution.Types.Version + +import Data.Maybe ( isNothing ) + +data Scorer = Scorer + { total :: Double + , score :: Double + } -import Data.Maybe (isNothing) +instance Num Scorer where + Scorer a b + Scorer c d = Scorer (a + c) (b + d) rankPackageIO download upload p = maintNum where @@ -18,6 +31,8 @@ rankPackageIO download upload p = maintNum maintNum = do maint <- queryUserGroups [maintainersGroup upload pkgNm] return . fromInteger . toInteger $ size maint + versionsPkg :: IO Double + versionsPkg = getVersionsFor pkgNm >>= return length pkgNm :: PackageName pkgNm = pkgName $ package p @@ -25,23 +40,14 @@ rankPackagePure p = reverseDeps + usageTrend + docScore - + stabilityScore - + goodMetadata - + weightUniqueDeps - + activelyMaintained + + reverseDeps where reverseDeps = 1 - versions = versionNumbers . pkgVersion $ package p dependencies = allBuildDepends p usageTrend = 1 docScore = 1 - stabilityScore = 1 - -- Does the Package have tests and Benchmarks testsBench = (bool2Double . hasTests) p + (bool2Double . hasBenchmarks) p - goodMetadata = 1 - weightUniqueDeps = 1 - activelyMaintained = 1 - isApp = (isNothing.library) p && (not.null.executables) p + isApp = (isNothing . library) p && (not . null . executables) p bool2Double :: Bool -> Double bool2Double true = 1 bool2Double false = 0 From 29104faaf87f0d8e3be1b60e8bc56bed7dd126ed Mon Sep 17 00:00:00 2001 From: kubaneko Date: Thu, 14 Jul 2022 21:46:06 +0200 Subject: [PATCH 083/129] more work on versions --- .../Server/Features/PackageRank.hs | 53 +++++++++++-------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index e009c87d2..e333a3f71 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -5,8 +5,8 @@ module Distribution.Server.Features.PackageRank import Distribution.Package import Distribution.PackageDescription import Distribution.Server.Features.DownloadCount -import Distribution.Server.Features.PackageInfoJSON.State - ( getVersionsFor ) +import Distribution.Server.Features.HaskellPlatform +import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.Upload import Distribution.Server.Users.Group ( queryUserGroups @@ -21,40 +21,47 @@ data Scorer = Scorer , score :: Double } -instance Num Scorer where - Scorer a b + Scorer c d = Scorer (a + c) (b + d) +add (Scorer a b) (Scorer c d) = Scorer (a + c) (b + d) -rankPackageIO download upload p = maintNum +rankPackageIO + :: VersionsFeature + -> PlatformFeature + -> DownloadFeature + -> UploadFeature + -> PackageDescription + -> IO Double +rankPackageIO prefferedV platform download upload p = maintNum where + pkgNm :: PackageName + pkgNm = pkgName $ package p -- Number of maintainers maintNum :: IO Double maintNum = do maint <- queryUserGroups [maintainersGroup upload pkgNm] return . fromInteger . toInteger $ size maint - versionsPkg :: IO Double - versionsPkg = getVersionsFor pkgNm >>= return length - pkgNm :: PackageName - pkgNm = pkgName $ package p + versions = platformVersions platform pkgNm -rankPackagePure p = - reverseDeps - + usageTrend - + docScore - + reverseDeps +rankPackagePure p = reverseDeps + usageTrend + docScore + reverseDeps where - reverseDeps = 1 - dependencies = allBuildDepends p - usageTrend = 1 - docScore = 1 - testsBench = (bool2Double . hasTests) p + (bool2Double . hasBenchmarks) p - isApp = (isNothing . library) p && (not . null . executables) p + reverseDeps = 1 + dependencies = allBuildDepends p + usageTrend = 1 + docScore = 1 + testsBench = (bool2Double . hasTests) p + (bool2Double . hasBenchmarks) p + isApp = (isNothing . library) p && (not . null . executables) p bool2Double :: Bool -> Double bool2Double true = 1 bool2Double false = 0 rankPackage - :: DownloadFeature -> UploadFeature -> PackageDescription -> IO Double -rankPackage download upload p = - rankPackageIO download upload p >>= (\x -> return $ x + rankPackagePure p) + :: VersionsFeature + -> PlatformFeature + -> DownloadFeature + -> UploadFeature + -> PackageDescription + -> IO Double +rankPackage versions platform download upload p = + rankPackageIO versions platform download upload p + >>= (\x -> return $ x + rankPackagePure p) From 62021baf8560b0eefc4fe87735bcf7f684c6880d Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 15 Jul 2022 11:36:56 +0200 Subject: [PATCH 084/129] got versions and partitioned based on status --- .../Server/Features/HaskellPlatform.hs | 2 +- .../Server/Features/PackageRank.hs | 41 ++++++++++++------- 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/src/Distribution/Server/Features/HaskellPlatform.hs b/src/Distribution/Server/Features/HaskellPlatform.hs index 9d0840bd8..15be3e815 100644 --- a/src/Distribution/Server/Features/HaskellPlatform.hs +++ b/src/Distribution/Server/Features/HaskellPlatform.hs @@ -1,6 +1,6 @@ {-# LANGUAGE RankNTypes, NamedFieldPuns, RecordWildCards #-} module Distribution.Server.Features.HaskellPlatform ( - PlatformFeature, + PlatformFeature(..), PlatformResource(..), initPlatformFeature, ) where diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index e333a3f71..b26887e63 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -4,10 +4,17 @@ module Distribution.Server.Features.PackageRank import Distribution.Package import Distribution.PackageDescription +import Distribution.Server.Features.Core import Distribution.Server.Features.DownloadCount -import Distribution.Server.Features.HaskellPlatform import Distribution.Server.Features.PreferredVersions +import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.Upload +import Distribution.Server.Framework +import Distribution.Server.Packages.PackageIndex + ( PackageIndex ) +import qualified Distribution.Server.Packages.PackageIndex + as PackageIndex +import Distribution.Server.Packages.Types import Distribution.Server.Users.Group ( queryUserGroups , size @@ -23,14 +30,7 @@ data Scorer = Scorer add (Scorer a b) (Scorer c d) = Scorer (a + c) (b + d) -rankPackageIO - :: VersionsFeature - -> PlatformFeature - -> DownloadFeature - -> UploadFeature - -> PackageDescription - -> IO Double -rankPackageIO prefferedV platform download upload p = maintNum +rankPackageIO core versions download upload p = maintNum where pkgNm :: PackageName pkgNm = pkgName $ package p @@ -39,7 +39,20 @@ rankPackageIO prefferedV platform download upload p = maintNum maintNum = do maint <- queryUserGroups [maintainersGroup upload pkgNm] return . fromInteger . toInteger $ size maint - versions = platformVersions platform pkgNm + descriptions = do + desc <- lookupPackageName core pkgNm + return (pkgDesc <$> desc) + partVer :: ServerPartE (IO ([Version], [Version], [Version])) + partVer = do + desc <- descriptions + return + $ queryGetPreferredInfo versions pkgNm + >>= (\x -> return $ partitionVersions + x + (map (pkgVersion . package . packageDescription) desc) + ) + + rankPackagePure p = reverseDeps + usageTrend + docScore + reverseDeps where @@ -54,14 +67,14 @@ rankPackagePure p = reverseDeps + usageTrend + docScore + reverseDeps bool2Double false = 0 rankPackage - :: VersionsFeature - -> PlatformFeature + :: CoreResource + -> VersionsFeature -> DownloadFeature -> UploadFeature -> PackageDescription -> IO Double -rankPackage versions platform download upload p = - rankPackageIO versions platform download upload p +rankPackage core versions download upload p = + rankPackageIO core versions download upload p >>= (\x -> return $ x + rankPackagePure p) From 4a521b8c9f7fe8f38ff0852aaf99bcc17cc37f0c Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 15 Jul 2022 11:46:31 +0200 Subject: [PATCH 085/129] separated versions into versionList and verPart --- .../Server/Features/PackageRank.hs | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index b26887e63..20876e98f 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -42,15 +42,20 @@ rankPackageIO core versions download upload p = maintNum descriptions = do desc <- lookupPackageName core pkgNm return (pkgDesc <$> desc) - partVer :: ServerPartE (IO ([Version], [Version], [Version])) - partVer = do + + versionList = do desc <- descriptions - return - $ queryGetPreferredInfo versions pkgNm - >>= (\x -> return $ partitionVersions - x - (map (pkgVersion . package . packageDescription) desc) + return (map (pkgVersion . package . packageDescription) desc) + + partVer :: ServerPartE (IO ([Version], [Version], [Version])) + partVer = + versionList + >>= (\y -> + return + $ queryGetPreferredInfo versions pkgNm + >>= (\x -> return $ partitionVersions x y) ) + From c0d0e34735c2cc12873b4d9547bf42772eb5c9b6 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 15 Jul 2022 14:41:49 +0200 Subject: [PATCH 086/129] added last upload times --- .../Server/Features/PackageRank.hs | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 20876e98f..ff1c37d5e 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -21,7 +21,11 @@ import Distribution.Server.Users.Group ) import Distribution.Types.Version +import Data.List ( sort + , sortBy + ) import Data.Maybe ( isNothing ) +import Data.Time.Clock ( UTCTime(..) ) data Scorer = Scorer { total :: Double @@ -39,9 +43,10 @@ rankPackageIO core versions download upload p = maintNum maintNum = do maint <- queryUserGroups [maintainersGroup upload pkgNm] return . fromInteger . toInteger $ size maint + info = lookupPackageName core pkgNm descriptions = do - desc <- lookupPackageName core pkgNm - return (pkgDesc <$> desc) + infPkg <- info + return (pkgDesc <$> infPkg) versionList = do desc <- descriptions @@ -55,9 +60,12 @@ rankPackageIO core versions download upload p = maintNum $ queryGetPreferredInfo versions pkgNm >>= (\x -> return $ partitionVersions x y) ) - - - + lastUploads = do + infPkg <- info + return + $ sortBy (flip compare) + $ (\x -> fst (pkgOriginalUploadInfo x)) + <$> infPkg rankPackagePure p = reverseDeps + usageTrend + docScore + reverseDeps where From 9413a0da51b01e8f8463667708a0322f9d6f8152 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 15 Jul 2022 20:40:51 +0200 Subject: [PATCH 087/129] added freshnessScore --- .../Server/Features/PackageRank.hs | 95 ++++++++++++++----- 1 file changed, 72 insertions(+), 23 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index ff1c37d5e..1401cb9e5 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -9,11 +9,7 @@ import Distribution.Server.Features.DownloadCount import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.Upload -import Distribution.Server.Framework -import Distribution.Server.Packages.PackageIndex - ( PackageIndex ) -import qualified Distribution.Server.Packages.PackageIndex - as PackageIndex +import Distribution.Server.Framework ( ServerPartE ) import Distribution.Server.Packages.Types import Distribution.Server.Users.Group ( queryUserGroups @@ -21,20 +17,73 @@ import Distribution.Server.Users.Group ) import Distribution.Types.Version +import Control.Monad.IO.Class ( liftIO ) import Data.List ( sort , sortBy ) import Data.Maybe ( isNothing ) -import Data.Time.Clock ( UTCTime(..) ) +import Data.Ord ( max + , min + ) +import Data.Time.Clock ( UTCTime(..) + , diffUTCTime + , getCurrentTime + , nominalDay + ) +import GHC.Float ( int2Double ) data Scorer = Scorer - { total :: Double - , score :: Double + { maximum :: Double + , score :: Double } add (Scorer a b) (Scorer c d) = Scorer (a + c) (b + d) -rankPackageIO core versions download upload p = maintNum +total (Scorer a b) = a / b + +freshnessScore :: [Version] -> UTCTime -> Bool -> IO Double +freshnessScore [] _ app = return 0 +freshnessScore (x : xs) lastUpd app = + daysPastExpiration + >>= (\dExp -> return $ max 0 $ (decayDays - dExp) / decayDays) + where + versionLatest = versionNumbers x + isNightly = case major versionLatest of + 0 -> True + _ -> False + daysPastExpiration = + age >>= (\a -> return $ max 0 a - expectedUpdateInterval) + expectedUpdateInterval = + int2Double (min (versionStabilityInterval versionLatest) $ length (x : xs)) + / (if isNightly then 4 else 1) + versionStabilityInterval v | patches v > 3 && major v > 0 = 700 + | patches v > 3 = 450 + | patches v > 0 = 300 + | major v > 0 = 200 + | minor v > 3 = 140 + | otherwise = 80 + age = + getCurrentTime + >>= (\x -> + return + $ fromRational + $ toRational + $ diffUTCTime x lastUpd + / fromRational (toRational nominalDay) + ) + -- expected_update_interval/2 + if cr.is_nightly { 30 } else if is_app_only {300} else {200}; + decayDays = + expectedUpdateInterval + / 2 + + (if isNightly then 30 else (if app then 300 else 200)) + major (x : xs) = x + major _ = 0 + minor (x : y : xs) = y + minor _ = 0 + patches (x : y : xs) = sum xs + patches _ = 0 + +rankPackageIO core versions download upload p = liftIO maintNum where pkgNm :: PackageName pkgNm = pkgName $ package p @@ -42,30 +91,29 @@ rankPackageIO core versions download upload p = maintNum maintNum :: IO Double maintNum = do maint <- queryUserGroups [maintainersGroup upload pkgNm] - return . fromInteger . toInteger $ size maint + return . int2Double $ size maint info = lookupPackageName core pkgNm descriptions = do infPkg <- info return (pkgDesc <$> infPkg) - versionList = do - desc <- descriptions - return (map (pkgVersion . package . packageDescription) desc) + versionList = + do + sortBy (flip compare) + . map (pkgVersion . package . packageDescription) + <$> descriptions - partVer :: ServerPartE (IO ([Version], [Version], [Version])) + partVer :: ServerPartE ([Version], [Version], [Version]) partVer = versionList >>= (\y -> - return + liftIO $ queryGetPreferredInfo versions pkgNm >>= (\x -> return $ partitionVersions x y) ) lastUploads = do infPkg <- info - return - $ sortBy (flip compare) - $ (\x -> fst (pkgOriginalUploadInfo x)) - <$> infPkg + return $ sortBy (flip compare) $ fst . pkgOriginalUploadInfo <$> infPkg rankPackagePure p = reverseDeps + usageTrend + docScore + reverseDeps where @@ -75,9 +123,10 @@ rankPackagePure p = reverseDeps + usageTrend + docScore + reverseDeps docScore = 1 testsBench = (bool2Double . hasTests) p + (bool2Double . hasBenchmarks) p isApp = (isNothing . library) p && (not . null . executables) p - bool2Double :: Bool -> Double - bool2Double true = 1 - bool2Double false = 0 + +bool2Double :: Bool -> Double +bool2Double true = 1 +bool2Double false = 0 rankPackage :: CoreResource @@ -85,7 +134,7 @@ rankPackage -> DownloadFeature -> UploadFeature -> PackageDescription - -> IO Double + -> ServerPartE Double rankPackage core versions download upload p = rankPackageIO core versions download upload p >>= (\x -> return $ x + rankPackagePure p) From 8f0f49c69a408b34eb76adcbda474657cb20b1ce Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 15 Jul 2022 22:07:08 +0200 Subject: [PATCH 088/129] added Download Scorer --- src/Distribution/Server/Features/PackageRank.hs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 1401cb9e5..db5ab28ad 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -6,6 +6,7 @@ import Distribution.Package import Distribution.PackageDescription import Distribution.Server.Features.Core import Distribution.Server.Features.DownloadCount +import Distribution.Server.Features.DownloadCount.State import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.Upload @@ -16,6 +17,7 @@ import Distribution.Server.Users.Group , size ) import Distribution.Types.Version +import Distribution.Server.Util.CountingMap (cmFind) import Control.Monad.IO.Class ( liftIO ) import Data.List ( sort @@ -87,6 +89,7 @@ rankPackageIO core versions download upload p = liftIO maintNum where pkgNm :: PackageName pkgNm = pkgName $ package p + isApp = (isNothing . library) p && (not . null . executables) p -- Number of maintainers maintNum :: IO Double maintNum = do @@ -96,7 +99,9 @@ rankPackageIO core versions download upload p = liftIO maintNum descriptions = do infPkg <- info return (pkgDesc <$> infPkg) - + downloadScore :: IO Scorer + downloadScore = recentPackageDownloads download >>=return.calcDownScore.(cmFind pkgNm) + calcDownScore i = Scorer 5 $ (logBase 2 (int2Double$max 0 (i-100) + 100) - 6.6) / (if isApp then 5 else 6) versionList = do sortBy (flip compare) From 7ae9ac856d1f7fb56bf2cb7e747ef8f52170e0ee Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sat, 16 Jul 2022 20:00:56 +0200 Subject: [PATCH 089/129] finished simple temporalScore (rankPackageIO) --- .../Server/Features/PackageRank.hs | 66 ++++++++++++------- 1 file changed, 43 insertions(+), 23 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index db5ab28ad..c5c480b89 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -6,7 +6,6 @@ import Distribution.Package import Distribution.PackageDescription import Distribution.Server.Features.Core import Distribution.Server.Features.DownloadCount -import Distribution.Server.Features.DownloadCount.State import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.Upload @@ -16,8 +15,9 @@ import Distribution.Server.Users.Group ( queryUserGroups , size ) +import Distribution.Server.Util.CountingMap + ( cmFind ) import Distribution.Types.Version -import Distribution.Server.Util.CountingMap (cmFind) import Control.Monad.IO.Class ( liftIO ) import Data.List ( sort @@ -39,25 +39,27 @@ data Scorer = Scorer , score :: Double } +-- frac 0<=frac<=1 +fracScor maxim frac = Scorer maxim (maxim * frac) + +boolScor k true = Scorer k k +boolScor k true = Scorer k 0 + add (Scorer a b) (Scorer c d) = Scorer (a + c) (b + d) total (Scorer a b) = a / b -freshnessScore :: [Version] -> UTCTime -> Bool -> IO Double -freshnessScore [] _ app = return 0 -freshnessScore (x : xs) lastUpd app = +freshness :: [Version] -> UTCTime -> Bool -> IO Double +freshness [] _ app = return 0 +freshness (x : xs) lastUpd app = daysPastExpiration >>= (\dExp -> return $ max 0 $ (decayDays - dExp) / decayDays) where versionLatest = versionNumbers x - isNightly = case major versionLatest of - 0 -> True - _ -> False daysPastExpiration = age >>= (\a -> return $ max 0 a - expectedUpdateInterval) - expectedUpdateInterval = - int2Double (min (versionStabilityInterval versionLatest) $ length (x : xs)) - / (if isNightly then 4 else 1) + expectedUpdateInterval = int2Double + (min (versionStabilityInterval versionLatest) $ length (x : xs)) versionStabilityInterval v | patches v > 3 && major v > 0 = 700 | patches v > 3 = 450 | patches v > 0 = 300 @@ -73,11 +75,7 @@ freshnessScore (x : xs) lastUpd app = $ diffUTCTime x lastUpd / fromRational (toRational nominalDay) ) - -- expected_update_interval/2 + if cr.is_nightly { 30 } else if is_app_only {300} else {200}; - decayDays = - expectedUpdateInterval - / 2 - + (if isNightly then 30 else (if app then 300 else 200)) + decayDays = expectedUpdateInterval / 2 + (if app then 300 else 200) major (x : xs) = x major _ = 0 minor (x : y : xs) = y @@ -85,11 +83,15 @@ freshnessScore (x : xs) lastUpd app = patches (x : y : xs) = sum xs patches _ = 0 -rankPackageIO core versions download upload p = liftIO maintNum +temporalScore core versions download upload p = do + fresh <- freshnessScore + downs <- downloadScore + tract <- tractionScore + return $ add tract $ add fresh downs where pkgNm :: PackageName pkgNm = pkgName $ package p - isApp = (isNothing . library) p && (not . null . executables) p + isApp = (isNothing . library) p && (not . null . executables) p -- Number of maintainers maintNum :: IO Double maintNum = do @@ -99,9 +101,14 @@ rankPackageIO core versions download upload p = liftIO maintNum descriptions = do infPkg <- info return (pkgDesc <$> infPkg) - downloadScore :: IO Scorer - downloadScore = recentPackageDownloads download >>=return.calcDownScore.(cmFind pkgNm) - calcDownScore i = Scorer 5 $ (logBase 2 (int2Double$max 0 (i-100) + 100) - 6.6) / (if isApp then 5 else 6) + downloadScore = downloadsPerMonth >>= return . calcDownScore + downloadsPerMonth = + liftIO $ recentPackageDownloads download >>= return . cmFind pkgNm + calcDownScore i = Scorer 5 $ max + ( (logBase 2 (int2Double $ max 0 (i - 100) + 100) - 6.6) + / (if isApp then 5 else 6) + ) + 5 versionList = do sortBy (flip compare) @@ -119,6 +126,19 @@ rankPackageIO core versions download upload p = liftIO maintNum lastUploads = do infPkg <- info return $ sortBy (flip compare) $ fst . pkgOriginalUploadInfo <$> infPkg + -- [Version] -> UTCTime -> Bool + packageFreshness = do + ups <- lastUploads + vers <- versionList + case ups of + [] -> return 0 + _ -> liftIO $ freshness vers (head ups) isApp + freshnessScore = packageFreshness >>= return . fracScor 10 + -- Missing dependencyFreshnessScore for reasonable effectivity needs caching + tractionScore = do + fresh <- packageFreshness + downs <- downloadsPerMonth + return $ boolScor 1 (fresh * int2Double downs > 1000) rankPackagePure p = reverseDeps + usageTrend + docScore + reverseDeps where @@ -141,7 +161,7 @@ rankPackage -> PackageDescription -> ServerPartE Double rankPackage core versions download upload p = - rankPackageIO core versions download upload p - >>= (\x -> return $ x + rankPackagePure p) + temporalScore core versions download upload p + >>= (\x -> return $ total x + rankPackagePure p) From f0e792042551ed5216afbdcfb2fba47c9dd2601c Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 17 Jul 2022 21:00:29 +0200 Subject: [PATCH 090/129] separated rankIO from temporalScore --- .../Server/Features/PackageRank.hs | 81 ++++++++++++------- 1 file changed, 50 insertions(+), 31 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index c5c480b89..c8cd2fdfa 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -83,7 +83,55 @@ freshness (x : xs) lastUpd app = patches (x : y : xs) = sum xs patches _ = 0 -temporalScore core versions download upload p = do + +-- partVer :: ServerPartE ([Version], [Version], [Version]) +-- partVer = +-- versionList +-- >>= (\y -> +-- liftIO +-- $ queryGetPreferredInfo versions pkgNm +-- >>= (\x -> return $ partitionVersions x y) +-- ) +-- +-- -- Number of maintainers +-- maintNum :: IO Double +-- maintNum = do +-- maint <- queryUserGroups [maintainersGroup upload pkgNm] +-- return . int2Double $ size maint + +rankIO + :: CoreResource + -> VersionsFeature + -> DownloadFeature + -> UploadFeature + -> PackageDescription + -> ServerPartE Scorer + +rankIO core vers downs upl pkg = do + temp <- temporalScore core vers downs upl pkg lastUploads versionList downloadsPerMonth + return temp + + where + pkgNm :: PackageName + pkgNm = pkgName $ package pkg + info = lookupPackageName core pkgNm + descriptions = do + infPkg <- info + return (pkgDesc <$> infPkg) + lastUploads = do + infPkg <- info + return $ sortBy (flip compare) $ fst . pkgOriginalUploadInfo <$> infPkg + versionList = + do + sortBy (flip compare) + . map (pkgVersion . package . packageDescription) + <$> descriptions + downloadsPerMonth = + liftIO $ recentPackageDownloads downs >>= return . cmFind pkgNm + + + +temporalScore core versions download upload p lastUploads versionList downloadsPerMonth= do fresh <- freshnessScore downs <- downloadScore tract <- tractionScore @@ -92,41 +140,12 @@ temporalScore core versions download upload p = do pkgNm :: PackageName pkgNm = pkgName $ package p isApp = (isNothing . library) p && (not . null . executables) p - -- Number of maintainers - maintNum :: IO Double - maintNum = do - maint <- queryUserGroups [maintainersGroup upload pkgNm] - return . int2Double $ size maint - info = lookupPackageName core pkgNm - descriptions = do - infPkg <- info - return (pkgDesc <$> infPkg) downloadScore = downloadsPerMonth >>= return . calcDownScore - downloadsPerMonth = - liftIO $ recentPackageDownloads download >>= return . cmFind pkgNm calcDownScore i = Scorer 5 $ max ( (logBase 2 (int2Double $ max 0 (i - 100) + 100) - 6.6) / (if isApp then 5 else 6) ) 5 - versionList = - do - sortBy (flip compare) - . map (pkgVersion . package . packageDescription) - <$> descriptions - - partVer :: ServerPartE ([Version], [Version], [Version]) - partVer = - versionList - >>= (\y -> - liftIO - $ queryGetPreferredInfo versions pkgNm - >>= (\x -> return $ partitionVersions x y) - ) - lastUploads = do - infPkg <- info - return $ sortBy (flip compare) $ fst . pkgOriginalUploadInfo <$> infPkg - -- [Version] -> UTCTime -> Bool packageFreshness = do ups <- lastUploads vers <- versionList @@ -161,7 +180,7 @@ rankPackage -> PackageDescription -> ServerPartE Double rankPackage core versions download upload p = - temporalScore core versions download upload p + rankIO core versions download upload p >>= (\x -> return $ total x + rankPackagePure p) From 0eb4c94c13dc5bc14197649816d15cfa1f8d72fd Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 17 Jul 2022 22:10:39 +0200 Subject: [PATCH 091/129] added pageRank --- .../Server/Features/PackageRank.hs | 102 +++++++++--------- 1 file changed, 54 insertions(+), 48 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index c8cd2fdfa..e7011085f 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -32,6 +32,7 @@ import Data.Time.Clock ( UTCTime(..) , getCurrentTime , nominalDay ) +import qualified Distribution.Utils.ShortText as S import GHC.Float ( int2Double ) data Scorer = Scorer @@ -42,10 +43,11 @@ data Scorer = Scorer -- frac 0<=frac<=1 fracScor maxim frac = Scorer maxim (maxim * frac) -boolScor k true = Scorer k k -boolScor k true = Scorer k 0 +boolScor k true = Scorer k k +boolScor k false = Scorer k 0 -add (Scorer a b) (Scorer c d) = Scorer (a + c) (b + d) +(><) :: Scorer -> Scorer -> Scorer +(><) (Scorer a b) (Scorer c d) = Scorer (a + c) (b + d) total (Scorer a b) = a / b @@ -108,39 +110,46 @@ rankIO -> ServerPartE Scorer rankIO core vers downs upl pkg = do - temp <- temporalScore core vers downs upl pkg lastUploads versionList downloadsPerMonth - return temp - - where - pkgNm :: PackageName - pkgNm = pkgName $ package pkg - info = lookupPackageName core pkgNm - descriptions = do - infPkg <- info - return (pkgDesc <$> infPkg) - lastUploads = do - infPkg <- info - return $ sortBy (flip compare) $ fst . pkgOriginalUploadInfo <$> infPkg - versionList = - do - sortBy (flip compare) - . map (pkgVersion . package . packageDescription) - <$> descriptions - downloadsPerMonth = - liftIO $ recentPackageDownloads downs >>= return . cmFind pkgNm - - - -temporalScore core versions download upload p lastUploads versionList downloadsPerMonth= do - fresh <- freshnessScore - downs <- downloadScore - tract <- tractionScore - return $ add tract $ add fresh downs + temp <- temporalScore core + vers + downs + upl + pkg + lastUploads + versionList + downloadsPerMonth + return temp + + where + pkgNm :: PackageName + pkgNm = pkgName $ package pkg + info = lookupPackageName core pkgNm + descriptions = do + infPkg <- info + return (pkgDesc <$> infPkg) + lastUploads = do + infPkg <- info + return $ sortBy (flip compare) $ fst . pkgOriginalUploadInfo <$> infPkg + versionList = + do + sortBy (flip compare) + . map (pkgVersion . package . packageDescription) + <$> descriptions + downloadsPerMonth = liftIO $ cmFind pkgNm <$> recentPackageDownloads downs + + + +temporalScore core versions download upload p lastUploads versionList downloadsPerMonth + = do + fresh <- freshnessScore + downs <- downloadScore + tract <- tractionScore + return $ tract >< fresh >< downs where pkgNm :: PackageName - pkgNm = pkgName $ package p - isApp = (isNothing . library) p && (not . null . executables) p - downloadScore = downloadsPerMonth >>= return . calcDownScore + pkgNm = pkgName $ package p + isApp = (isNothing . library) p && (not . null . executables) p + downloadScore = calcDownScore <$> downloadsPerMonth calcDownScore i = Scorer 5 $ max ( (logBase 2 (int2Double $ max 0 (i - 100) + 100) - 6.6) / (if isApp then 5 else 6) @@ -152,25 +161,22 @@ temporalScore core versions download upload p lastUploads versionList downloadsP case ups of [] -> return 0 _ -> liftIO $ freshness vers (head ups) isApp - freshnessScore = packageFreshness >>= return . fracScor 10 - -- Missing dependencyFreshnessScore for reasonable effectivity needs caching + freshnessScore = fracScor 10 <$> packageFreshness +-- Missing dependencyFreshnessScore for reasonable effectivity needs caching tractionScore = do fresh <- packageFreshness downs <- downloadsPerMonth return $ boolScor 1 (fresh * int2Double downs > 1000) -rankPackagePure p = reverseDeps + usageTrend + docScore + reverseDeps +rankPackagePage p = tests >< benchs >< desc >< homeP >< sourceRp >< cats where - reverseDeps = 1 - dependencies = allBuildDepends p - usageTrend = 1 - docScore = 1 - testsBench = (bool2Double . hasTests) p + (bool2Double . hasBenchmarks) p - isApp = (isNothing . library) p && (not . null . executables) p - -bool2Double :: Bool -> Double -bool2Double true = 1 -bool2Double false = 0 + tests = boolScor 50 (hasTests p) + benchs = boolScor 10 (hasBenchmarks p) + desc = Scorer 30 (min 1 (int2Double (S.length $ description p) / 300)) + -- ducumentation = boolScor 30 () + homeP = boolScor 30 (not $ S.null $ homepage p) + sourceRp = boolScor 8 (not $ null $ sourceRepos p) + cats = boolScor 5 (not $ S.null $ category p) rankPackage :: CoreResource @@ -181,6 +187,6 @@ rankPackage -> ServerPartE Double rankPackage core versions download upload p = rankIO core versions download upload p - >>= (\x -> return $ total x + rankPackagePure p) + >>= (\x -> return $ total x + total (rankPackagePage p)) From 924c06a38ffd73c8a1f4e704a5b7f5716eed5238 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Mon, 18 Jul 2022 23:34:51 +0200 Subject: [PATCH 092/129] added versionScore --- .../Server/Features/PackageRank.hs | 105 ++++++++++++------ 1 file changed, 74 insertions(+), 31 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index e7011085f..df2868c93 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -24,7 +24,8 @@ import Data.List ( sort , sortBy ) import Data.Maybe ( isNothing ) -import Data.Ord ( max +import Data.Ord ( comparing + , max , min ) import Data.Time.Clock ( UTCTime(..) @@ -32,6 +33,9 @@ import Data.Time.Clock ( UTCTime(..) , getCurrentTime , nominalDay ) +import Distribution.Simple.Utils ( safeHead + , safeLast + ) import qualified Distribution.Utils.ShortText as S import GHC.Float ( int2Double ) @@ -40,6 +44,9 @@ data Scorer = Scorer , score :: Double } +scorer maxim frac = case maxim >= frac of + true -> Scorer maxim frac + false -> Scorer maxim maxim -- frac 0<=frac<=1 fracScor maxim frac = Scorer maxim (maxim * frac) @@ -51,6 +58,19 @@ boolScor k false = Scorer k 0 total (Scorer a b) = a / b +major (x : xs) = x +major _ = 0 +minor (x : y : xs) = y +minor _ = 0 +patches (x : y : xs) = sum xs +patches _ = 0 + +numDays :: Maybe UTCTime -> Maybe UTCTime -> Double +numDays (Just first) (Just last) = + fromRational $ toRational $ diffUTCTime first last / fromRational + (toRational nominalDay) +numDays _ _ = 0 + freshness :: [Version] -> UTCTime -> Bool -> IO Double freshness [] _ app = return 0 freshness (x : xs) lastUpd app = @@ -68,33 +88,10 @@ freshness (x : xs) lastUpd app = | major v > 0 = 200 | minor v > 3 = 140 | otherwise = 80 - age = - getCurrentTime - >>= (\x -> - return - $ fromRational - $ toRational - $ diffUTCTime x lastUpd - / fromRational (toRational nominalDay) - ) + age = flip numDays (Just lastUpd) . Just <$> getCurrentTime decayDays = expectedUpdateInterval / 2 + (if app then 300 else 200) - major (x : xs) = x - major _ = 0 - minor (x : y : xs) = y - minor _ = 0 - patches (x : y : xs) = sum xs - patches _ = 0 - - --- partVer :: ServerPartE ([Version], [Version], [Version]) --- partVer = --- versionList --- >>= (\y -> --- liftIO --- $ queryGetPreferredInfo versions pkgNm --- >>= (\x -> return $ partitionVersions x y) --- ) --- + + -- -- Number of maintainers -- maintNum :: IO Double -- maintNum = do @@ -118,10 +115,10 @@ rankIO core vers downs upl pkg = do lastUploads versionList downloadsPerMonth - return temp + vers <- versionScore versionList vers lastUploads pkg + return (temp >< vers) where - pkgNm :: PackageName pkgNm = pkgName $ package pkg info = lookupPackageName core pkgNm descriptions = do @@ -137,7 +134,53 @@ rankIO core vers downs upl pkg = do <$> descriptions downloadsPerMonth = liftIO $ cmFind pkgNm <$> recentPackageDownloads downs - +versionScore + :: ServerPartE [Version] + -> VersionsFeature + -> ServerPartE [UTCTime] + -> PackageDescription + -> ServerPartE Scorer +versionScore versionList versions lastUploads desc = do + intUse <- intUsable + depre <- deprec + lUps <- lastUploads + return $ calculateScore depre lUps intUse + where + pkgNm = pkgName $ package desc + partVers = + versionList + >>= (\y -> + liftIO + $ queryGetPreferredInfo versions pkgNm + >>= (\x -> return $ partitionVersions x y) + ) + intUsable = do + (norm, _, unpref) <- partVers + return $ versionNumbers <$> norm ++ unpref + deprec = do + (_, deprec, _) <- partVers + return deprec + calculateScore :: [Version] -> [UTCTime] -> [[Int]] -> Scorer + calculateScore [] _ _ = Scorer 118 0 + calculateScore depre lUps intUse = + boolScor 20 (length intUse > 1) + >< scorer 40 (numDays (safeHead lUps) (safeLast lUps)) + >< scorer + 15 + (int2Double $ length $ filter (\x -> major x > 0 || minor x > 0) + intUse + ) + >< scorer + 20 + (int2Double $ 4 * length + (filter (\x -> major x > 0 && patches x > 0) intUse) + ) + >< scorer + 10 + (int2Double $ patches $ head $ sortBy (comparing patches) intUse) + >< boolScor 8 (any (\x -> major x == 0 && patches x > 0) intUse) + >< boolScor 10 (any (\x -> major x > 0 && major x < 20) intUse) + >< boolScor 5 (not $ null $ depre) temporalScore core versions download upload p lastUploads versionList downloadsPerMonth = do @@ -150,7 +193,7 @@ temporalScore core versions download upload p lastUploads versionList downloadsP pkgNm = pkgName $ package p isApp = (isNothing . library) p && (not . null . executables) p downloadScore = calcDownScore <$> downloadsPerMonth - calcDownScore i = Scorer 5 $ max + calcDownScore i = Scorer 5 $ min ( (logBase 2 (int2Double $ max 0 (i - 100) + 100) - 6.6) / (if isApp then 5 else 6) ) From a6bfafd286215524e24773f08c5a97227e6c44cf Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 19 Jul 2022 22:27:38 +0200 Subject: [PATCH 093/129] added authorScore --- .../Server/Features/PackageRank.hs | 33 +++++++++++-------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index df2868c93..0315c079b 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -20,7 +20,8 @@ import Distribution.Server.Util.CountingMap import Distribution.Types.Version import Control.Monad.IO.Class ( liftIO ) -import Data.List ( sort +import Data.List ( maximumBy + , sort , sortBy ) import Data.Maybe ( isNothing ) @@ -92,12 +93,6 @@ freshness (x : xs) lastUpd app = decayDays = expectedUpdateInterval / 2 + (if app then 300 else 200) --- -- Number of maintainers --- maintNum :: IO Double --- maintNum = do --- maint <- queryUserGroups [maintainersGroup upload pkgNm] --- return . int2Double $ size maint - rankIO :: CoreResource -> VersionsFeature @@ -116,7 +111,8 @@ rankIO core vers downs upl pkg = do versionList downloadsPerMonth vers <- versionScore versionList vers lastUploads pkg - return (temp >< vers) + auth <- authorScore upl pkg + return (temp >< vers >< auth) where pkgNm = pkgName $ package pkg @@ -134,6 +130,19 @@ rankIO core vers downs upl pkg = do <$> descriptions downloadsPerMonth = liftIO $ cmFind pkgNm <$> recentPackageDownloads downs +authorScore :: UploadFeature -> PackageDescription -> ServerPartE Scorer +authorScore upload desc = + liftIO maintScore + >>= (\x -> return $ boolScor 1 (not $ S.null $ author desc) >< x) + where + pkgNm = pkgName $ package desc + maintScore :: IO Scorer + maintScore = do + maint <- queryUserGroups [maintainersGroup upload pkgNm] + + return $ boolScor 3 (size maint > 1) >< scorer 5 (int2Double $ size maint) + + versionScore :: ServerPartE [Version] -> VersionsFeature @@ -175,12 +184,10 @@ versionScore versionList versions lastUploads desc = do (int2Double $ 4 * length (filter (\x -> major x > 0 && patches x > 0) intUse) ) - >< scorer - 10 - (int2Double $ patches $ head $ sortBy (comparing patches) intUse) - >< boolScor 8 (any (\x -> major x == 0 && patches x > 0) intUse) + >< scorer 10 (int2Double $ patches $ maximumBy (comparing patches) intUse) + >< boolScor 8 (any (\x -> major x == 0 && patches x > 0) intUse) >< boolScor 10 (any (\x -> major x > 0 && major x < 20) intUse) - >< boolScor 5 (not $ null $ depre) + >< boolScor 5 (not $ null depre) temporalScore core versions download upload p lastUploads versionList downloadsPerMonth = do From 0186f6e98b0751b320b87fd861eb5a16f015a039 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 22 Jul 2022 23:19:38 +0200 Subject: [PATCH 094/129] instance Semigroup Scorer --- .../Server/Features/PackageRank.hs | 44 +++++++++++-------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 0315c079b..2d5caeaaa 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -10,6 +10,8 @@ import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.Upload import Distribution.Server.Framework ( ServerPartE ) +import Distribution.Server.Framework.Feature + ( queryState ) import Distribution.Server.Packages.Types import Distribution.Server.Users.Group ( queryUserGroups @@ -45,6 +47,9 @@ data Scorer = Scorer , score :: Double } +instance Semigroup Scorer where + (Scorer a b) <> (Scorer c d) = Scorer (a + b) (c + d) + scorer maxim frac = case maxim >= frac of true -> Scorer maxim frac false -> Scorer maxim maxim @@ -54,9 +59,6 @@ fracScor maxim frac = Scorer maxim (maxim * frac) boolScor k true = Scorer k k boolScor k false = Scorer k 0 -(><) :: Scorer -> Scorer -> Scorer -(><) (Scorer a b) (Scorer c d) = Scorer (a + c) (b + d) - total (Scorer a b) = a / b major (x : xs) = x @@ -92,9 +94,11 @@ freshness (x : xs) lastUpd app = age = flip numDays (Just lastUpd) . Just <$> getCurrentTime decayDays = expectedUpdateInterval / 2 + (if app then 300 else 200) +-- lookupPackageId +-- queryHasDocumentation rankIO - :: CoreResource + :: CoreFeature -> VersionsFeature -> DownloadFeature -> UploadFeature @@ -102,7 +106,7 @@ rankIO -> ServerPartE Scorer rankIO core vers downs upl pkg = do - temp <- temporalScore core + temp <- temporalScore coreR vers downs upl @@ -112,11 +116,13 @@ rankIO core vers downs upl pkg = do downloadsPerMonth vers <- versionScore versionList vers lastUploads pkg auth <- authorScore upl pkg - return (temp >< vers >< auth) + return (temp <> vers <> auth) where + pkgId = package pkg pkgNm = pkgName $ package pkg - info = lookupPackageName core pkgNm + info = lookupPackageName coreR pkgNm + coreR = coreResource core descriptions = do infPkg <- info return (pkgDesc <$> infPkg) @@ -133,14 +139,14 @@ rankIO core vers downs upl pkg = do authorScore :: UploadFeature -> PackageDescription -> ServerPartE Scorer authorScore upload desc = liftIO maintScore - >>= (\x -> return $ boolScor 1 (not $ S.null $ author desc) >< x) + >>= (\x -> return $ boolScor 1 (not $ S.null $ author desc) <> x) where pkgNm = pkgName $ package desc maintScore :: IO Scorer maintScore = do maint <- queryUserGroups [maintainersGroup upload pkgNm] - return $ boolScor 3 (size maint > 1) >< scorer 5 (int2Double $ size maint) + return $ boolScor 3 (size maint > 1) <> scorer 5 (int2Double $ size maint) versionScore @@ -173,28 +179,28 @@ versionScore versionList versions lastUploads desc = do calculateScore [] _ _ = Scorer 118 0 calculateScore depre lUps intUse = boolScor 20 (length intUse > 1) - >< scorer 40 (numDays (safeHead lUps) (safeLast lUps)) - >< scorer + <> scorer 40 (numDays (safeHead lUps) (safeLast lUps)) + <> scorer 15 (int2Double $ length $ filter (\x -> major x > 0 || minor x > 0) intUse ) - >< scorer + <> scorer 20 (int2Double $ 4 * length (filter (\x -> major x > 0 && patches x > 0) intUse) ) - >< scorer 10 (int2Double $ patches $ maximumBy (comparing patches) intUse) - >< boolScor 8 (any (\x -> major x == 0 && patches x > 0) intUse) - >< boolScor 10 (any (\x -> major x > 0 && major x < 20) intUse) - >< boolScor 5 (not $ null depre) + <> scorer 10 (int2Double $ patches $ maximumBy (comparing patches) intUse) + <> boolScor 8 (any (\x -> major x == 0 && patches x > 0) intUse) + <> boolScor 10 (any (\x -> major x > 0 && major x < 20) intUse) + <> boolScor 5 (not $ null depre) temporalScore core versions download upload p lastUploads versionList downloadsPerMonth = do fresh <- freshnessScore downs <- downloadScore tract <- tractionScore - return $ tract >< fresh >< downs + return $ tract <> fresh <> downs where pkgNm :: PackageName pkgNm = pkgName $ package p @@ -218,7 +224,7 @@ temporalScore core versions download upload p lastUploads versionList downloadsP downs <- downloadsPerMonth return $ boolScor 1 (fresh * int2Double downs > 1000) -rankPackagePage p = tests >< benchs >< desc >< homeP >< sourceRp >< cats +rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats where tests = boolScor 50 (hasTests p) benchs = boolScor 10 (hasBenchmarks p) @@ -229,7 +235,7 @@ rankPackagePage p = tests >< benchs >< desc >< homeP >< sourceRp >< cats cats = boolScor 5 (not $ S.null $ category p) rankPackage - :: CoreResource + :: CoreFeature -> VersionsFeature -> DownloadFeature -> UploadFeature From d3a6c06584a80d36121da10aaa9041948b1cc099 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sat, 23 Jul 2022 15:41:21 +0200 Subject: [PATCH 095/129] got tarballs and fixed warnings --- .../Server/Features/PackageRank.hs | 126 +++++++++++------- 1 file changed, 80 insertions(+), 46 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 2d5caeaaa..0ff9dfec4 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -5,11 +5,15 @@ module Distribution.Server.Features.PackageRank import Distribution.Package import Distribution.PackageDescription import Distribution.Server.Features.Core +import Distribution.Server.Features.Documentation + ( DocumentationFeature(..) ) import Distribution.Server.Features.DownloadCount import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.Upload import Distribution.Server.Framework ( ServerPartE ) +import Distribution.Server.Framework.BlobStorage + ( BlobId ) import Distribution.Server.Framework.Feature ( queryState ) import Distribution.Server.Packages.Types @@ -23,7 +27,6 @@ import Distribution.Types.Version import Control.Monad.IO.Class ( liftIO ) import Data.List ( maximumBy - , sort , sortBy ) import Data.Maybe ( isNothing ) @@ -50,32 +53,37 @@ data Scorer = Scorer instance Semigroup Scorer where (Scorer a b) <> (Scorer c d) = Scorer (a + b) (c + d) -scorer maxim frac = case maxim >= frac of - true -> Scorer maxim frac - false -> Scorer maxim maxim --- frac 0<=frac<=1 -fracScor maxim frac = Scorer maxim (maxim * frac) +scorer :: Double -> Double -> Scorer +scorer maxim scr = if maxim >= scr then (Scorer maxim scr) else (Scorer maxim maxim) -boolScor k true = Scorer k k -boolScor k false = Scorer k 0 +fracScor :: Double -> Double -> Scorer +fracScor maxim frac = scorer maxim (maxim * frac) +boolScor :: Double -> Bool -> Scorer +boolScor k True = Scorer k k +boolScor k False = Scorer k 0 + +total :: Scorer -> Double total (Scorer a b) = a / b -major (x : xs) = x -major _ = 0 -minor (x : y : xs) = y -minor _ = 0 -patches (x : y : xs) = sum xs +major :: Num a => [a] -> a +major (x : _) = x +major _ = 0 +minor :: Num a => [a] -> a +minor (_ : y : _) = y +minor _ = 0 +patches :: Num a => [a] -> a +patches (_ : _ : xs) = sum xs patches _ = 0 numDays :: Maybe UTCTime -> Maybe UTCTime -> Double -numDays (Just first) (Just last) = - fromRational $ toRational $ diffUTCTime first last / fromRational +numDays (Just first) (Just end) = + fromRational $ toRational $ diffUTCTime first end / fromRational (toRational nominalDay) numDays _ _ = 0 freshness :: [Version] -> UTCTime -> Bool -> IO Double -freshness [] _ app = return 0 +freshness [] _ _ = return 0 freshness (x : xs) lastUpd app = daysPastExpiration >>= (\dExp -> return $ max 0 $ (decayDays - dExp) / decayDays) @@ -97,32 +105,26 @@ freshness (x : xs) lastUpd app = -- lookupPackageId -- queryHasDocumentation +-- TODO CoreFeature can be substituted by CoreResource rankIO - :: CoreFeature + :: CoreResource -> VersionsFeature -> DownloadFeature -> UploadFeature + -> DocumentationFeature -> PackageDescription -> ServerPartE Scorer -rankIO core vers downs upl pkg = do - temp <- temporalScore coreR - vers - downs - upl - pkg - lastUploads - versionList - downloadsPerMonth - vers <- versionScore versionList vers lastUploads pkg - auth <- authorScore upl pkg - return (temp <> vers <> auth) +rankIO core vers downs upl docs pkg = do + temp <- temporalScore pkg lastUploads versionList downloadsPerMonth + versS <- versionScore versionList vers lastUploads pkg + auth <- authorScore upl pkg + return (temp <> versS <> auth) where pkgId = package pkg - pkgNm = pkgName $ package pkg - info = lookupPackageName coreR pkgNm - coreR = coreResource core + pkgNm = pkgName pkgId + info = lookupPackageName core pkgNm descriptions = do infPkg <- info return (pkgDesc <$> infPkg) @@ -135,6 +137,31 @@ rankIO core vers downs upl pkg = do . map (pkgVersion . package . packageDescription) <$> descriptions downloadsPerMonth = liftIO $ cmFind pkgNm <$> recentPackageDownloads downs + -- TODO get appropriate pkgInfo (head might fail) + packageTarball = pkgLatestTarball . head <$> info + documentTarball :: ServerPartE (Maybe BlobId) + documentTarball = queryDocumentation docs pkgId + +-- mdocs <- queryState documentationState $ LookupDocumentation pkgid +-- case mdocs of +-- Nothing -> +-- errNotFoundH "Not Found" +-- [ MText "There is no documentation for " +-- , MLink (display pkgid) ("/package/" ++ display pkgid) +-- , MText ". See " +-- , MLink canonicalLink canonicalLink +-- , MText " for the latest version." +-- ] +-- where +-- -- Essentially errNotFound, but overloaded to specify a header. +-- -- (Needed since errNotFound throws away result of setHeaderM) +-- errNotFoundH title message = throwError +-- (ErrorResponse 404 +-- [("Link", canonicalHeader)] +-- title message) +-- Just blob -> do +-- index <- liftIO $ cachedTarIndex blob +-- func pkgid blob index authorScore :: UploadFeature -> PackageDescription -> ServerPartE Scorer authorScore upload desc = @@ -173,8 +200,8 @@ versionScore versionList versions lastUploads desc = do (norm, _, unpref) <- partVers return $ versionNumbers <$> norm ++ unpref deprec = do - (_, deprec, _) <- partVers - return deprec + (_, deprecN, _) <- partVers + return deprecN calculateScore :: [Version] -> [UTCTime] -> [[Int]] -> Scorer calculateScore [] _ _ = Scorer 118 0 calculateScore depre lUps intUse = @@ -195,15 +222,18 @@ versionScore versionList versions lastUploads desc = do <> boolScor 10 (any (\x -> major x > 0 && major x < 20) intUse) <> boolScor 5 (not $ null depre) -temporalScore core versions download upload p lastUploads versionList downloadsPerMonth - = do - fresh <- freshnessScore - downs <- downloadScore - tract <- tractionScore - return $ tract <> fresh <> downs +temporalScore + :: PackageDescription + -> ServerPartE [UTCTime] + -> ServerPartE [Version] + -> ServerPartE Int + -> ServerPartE Scorer +temporalScore p lastUploads versionList downloadsPerMonth = do + fresh <- freshnessScore + downs <- downloadScore + tract <- tractionScore + return $ tract <> fresh <> downs where - pkgNm :: PackageName - pkgNm = pkgName $ package p isApp = (isNothing . library) p && (not . null . executables) p downloadScore = calcDownScore <$> downloadsPerMonth calcDownScore i = Scorer 5 $ min @@ -224,25 +254,29 @@ temporalScore core versions download upload p lastUploads versionList downloadsP downs <- downloadsPerMonth return $ boolScor 1 (fresh * int2Double downs > 1000) +rankPackagePage :: PackageDescription -> Scorer rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats where tests = boolScor 50 (hasTests p) benchs = boolScor 10 (hasBenchmarks p) desc = Scorer 30 (min 1 (int2Double (S.length $ description p) / 300)) - -- ducumentation = boolScor 30 () + -- documentation = boolScor 30 () homeP = boolScor 30 (not $ S.null $ homepage p) sourceRp = boolScor 8 (not $ null $ sourceRepos p) cats = boolScor 5 (not $ S.null $ category p) +-- TODO fix the function Signature replace PackageDescription to PackageName/Identifier + rankPackage - :: CoreFeature + :: CoreResource -> VersionsFeature -> DownloadFeature -> UploadFeature + -> DocumentationFeature -> PackageDescription -> ServerPartE Double -rankPackage core versions download upload p = - rankIO core versions download upload p +rankPackage core versions download upload docs p = + rankIO core versions download upload docs p >>= (\x -> return $ total x + total (rankPackagePage p)) From a34fed2d96a70bc1278d047f28cd0bd0b103eddb Mon Sep 17 00:00:00 2001 From: kubaneko Date: Mon, 25 Jul 2022 14:32:18 +0200 Subject: [PATCH 096/129] extracted documentation length --- .../Server/Features/PackageRank.hs | 50 +++++++++---------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 0ff9dfec4..c107789ca 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -14,6 +14,8 @@ import Distribution.Server.Features.Upload import Distribution.Server.Framework ( ServerPartE ) import Distribution.Server.Framework.BlobStorage ( BlobId ) +import qualified Distribution.Server.Framework.BlobStorage + as BlobStorage import Distribution.Server.Framework.Feature ( queryState ) import Distribution.Server.Packages.Types @@ -26,6 +28,7 @@ import Distribution.Server.Util.CountingMap import Distribution.Types.Version import Control.Monad.IO.Class ( liftIO ) +import qualified Data.ByteString.Lazy as BSL import Data.List ( maximumBy , sortBy ) @@ -39,6 +42,8 @@ import Data.Time.Clock ( UTCTime(..) , getCurrentTime , nominalDay ) +import Distribution.Server.Framework.ServerEnv + ( ServerEnv(..) ) import Distribution.Simple.Utils ( safeHead , safeLast ) @@ -54,7 +59,8 @@ instance Semigroup Scorer where (Scorer a b) <> (Scorer c d) = Scorer (a + b) (c + d) scorer :: Double -> Double -> Scorer -scorer maxim scr = if maxim >= scr then (Scorer maxim scr) else (Scorer maxim maxim) +scorer maxim scr = + if maxim >= scr then Scorer maxim scr else Scorer maxim maxim fracScor :: Double -> Double -> Scorer fracScor maxim frac = scorer maxim (maxim * frac) @@ -112,10 +118,11 @@ rankIO -> DownloadFeature -> UploadFeature -> DocumentationFeature + -> ServerEnv -> PackageDescription -> ServerPartE Scorer -rankIO core vers downs upl docs pkg = do +rankIO core vers downs upl docs env pkg = do temp <- temporalScore pkg lastUploads versionList downloadsPerMonth versS <- versionScore versionList vers lastUploads pkg auth <- authorScore upl pkg @@ -139,29 +146,19 @@ rankIO core vers downs upl docs pkg = do downloadsPerMonth = liftIO $ cmFind pkgNm <$> recentPackageDownloads downs -- TODO get appropriate pkgInfo (head might fail) packageTarball = pkgLatestTarball . head <$> info - documentTarball :: ServerPartE (Maybe BlobId) - documentTarball = queryDocumentation docs pkgId + documentBlob :: ServerPartE (Maybe BlobId) + documentBlob = queryDocumentation docs pkgId + blobStore = serverBlobStore env + documentation = do + blob <- documentBlob + maybeIO blob + where + maybeIO Nothing = return Nothing + maybeIO (Just a) = liftIO (Just <$> BlobStorage.fetch blobStore a) --- mdocs <- queryState documentationState $ LookupDocumentation pkgid --- case mdocs of --- Nothing -> --- errNotFoundH "Not Found" --- [ MText "There is no documentation for " --- , MLink (display pkgid) ("/package/" ++ display pkgid) --- , MText ". See " --- , MLink canonicalLink canonicalLink --- , MText " for the latest version." --- ] --- where --- -- Essentially errNotFound, but overloaded to specify a header. --- -- (Needed since errNotFound throws away result of setHeaderM) --- errNotFoundH title message = throwError --- (ErrorResponse 404 --- [("Link", canonicalHeader)] --- title message) --- Just blob -> do --- index <- liftIO $ cachedTarIndex blob --- func pkgid blob index + documLines = + (int2Double . length . filter (not . BSL.null) . BSL.split 10 <$>) + <$> documentation -- 10 is \n authorScore :: UploadFeature -> PackageDescription -> ServerPartE Scorer authorScore upload desc = @@ -273,10 +270,11 @@ rankPackage -> DownloadFeature -> UploadFeature -> DocumentationFeature + -> ServerEnv -> PackageDescription -> ServerPartE Double -rankPackage core versions download upload docs p = - rankIO core versions download upload docs p +rankPackage core versions download upload docs env p = + rankIO core versions download upload docs env p >>= (\x -> return $ total x + total (rankPackagePage p)) From 60582491d826877ef0bfd92a256541e7e254decb Mon Sep 17 00:00:00 2001 From: kubaneko Date: Wed, 27 Jul 2022 22:50:20 +0200 Subject: [PATCH 097/129] got tarEntries for package and fixed it for documentation --- .../Server/Features/PackageRank.hs | 54 +++++++++++++------ 1 file changed, 37 insertions(+), 17 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index c107789ca..9166638f7 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -10,10 +10,9 @@ import Distribution.Server.Features.Documentation import Distribution.Server.Features.DownloadCount import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State +import Distribution.Server.Features.TarIndexCache import Distribution.Server.Features.Upload import Distribution.Server.Framework ( ServerPartE ) -import Distribution.Server.Framework.BlobStorage - ( BlobId ) import qualified Distribution.Server.Framework.BlobStorage as BlobStorage import Distribution.Server.Framework.Feature @@ -27,6 +26,11 @@ import Distribution.Server.Util.CountingMap ( cmFind ) import Distribution.Types.Version +import Control.Monad ( forM + , join + , liftM2 + , mapM + ) import Control.Monad.IO.Class ( liftIO ) import qualified Data.ByteString.Lazy as BSL import Data.List ( maximumBy @@ -37,6 +41,7 @@ import Data.Ord ( comparing , max , min ) +import qualified Data.TarIndex as TarIndex import Data.Time.Clock ( UTCTime(..) , diffUTCTime , getCurrentTime @@ -54,6 +59,7 @@ data Scorer = Scorer { maximum :: Double , score :: Double } + deriving Show instance Semigroup Scorer where (Scorer a b) <> (Scorer c d) = Scorer (a + b) (c + d) @@ -119,10 +125,11 @@ rankIO -> UploadFeature -> DocumentationFeature -> ServerEnv + -> TarIndexCacheFeature -> PackageDescription -> ServerPartE Scorer -rankIO core vers downs upl docs env pkg = do +rankIO core vers downs upl docs env tarCache pkg = do temp <- temporalScore pkg lastUploads versionList downloadsPerMonth versS <- versionScore versionList vers lastUploads pkg auth <- authorScore upl pkg @@ -145,20 +152,32 @@ rankIO core vers downs upl docs env pkg = do <$> descriptions downloadsPerMonth = liftIO $ cmFind pkgNm <$> recentPackageDownloads downs -- TODO get appropriate pkgInfo (head might fail) - packageTarball = pkgLatestTarball . head <$> info - documentBlob :: ServerPartE (Maybe BlobId) - documentBlob = queryDocumentation docs pkgId - blobStore = serverBlobStore env - documentation = do + packageTarB = info >>= liftIO . packageTarball tarCache . head + packageTarEntr = do + tarB <- packageTarB + return + . join + $ (\(path, _, index) -> TarIndex.lookup index path) + <$> rightToMaybe tarB + rightToMaybe (Right a) = Just a + rightToMaybe (Left _) = Nothing + documentBlob :: ServerPartE (Maybe BlobStorage.BlobId) + documentBlob = queryDocumentation docs pkgId + documentIndex = documentBlob >>= liftIO . mapM (cachedTarIndex tarCache) + documentationEntry = do + index <- documentIndex + path <- documentPath + return . join $ liftM2 TarIndex.lookup index path + + blobStore = serverBlobStore env + documentPath = do blob <- documentBlob - maybeIO blob - where - maybeIO Nothing = return Nothing - maybeIO (Just a) = liftIO (Just <$> BlobStorage.fetch blobStore a) + return $ (BlobStorage.filepath blobStore) <$> blob - documLines = - (int2Double . length . filter (not . BSL.null) . BSL.split 10 <$>) - <$> documentation -- 10 is \n + -- TODO fix this + --documLines = + -- (int2Double . length . filter (not . BSL.null) . BSL.split 10 <$>) + -- <$> documentation -- 10 is \n authorScore :: UploadFeature -> PackageDescription -> ServerPartE Scorer authorScore upload desc = @@ -271,10 +290,11 @@ rankPackage -> UploadFeature -> DocumentationFeature -> ServerEnv + -> TarIndexCacheFeature -> PackageDescription -> ServerPartE Double -rankPackage core versions download upload docs env p = - rankIO core versions download upload docs env p +rankPackage core versions download upload docs env tarCache p = + rankIO core versions download upload docs env tarCache p >>= (\x -> return $ total x + total (rankPackagePage p)) From b1ac101c333b0c58e6bb3e8de116bdd3512ab497 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 29 Jul 2022 17:18:22 +0200 Subject: [PATCH 098/129] added codeScore --- .../Server/Features/PackageRank.hs | 109 ++++++++++++------ 1 file changed, 71 insertions(+), 38 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 9166638f7..1c4564ea6 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -1,9 +1,9 @@ +{-# LANGUAGE TupleSections #-} + module Distribution.Server.Features.PackageRank ( rankPackage ) where -import Distribution.Package -import Distribution.PackageDescription import Distribution.Server.Features.Core import Distribution.Server.Features.Documentation ( DocumentationFeature(..) ) @@ -15,8 +15,8 @@ import Distribution.Server.Features.Upload import Distribution.Server.Framework ( ServerPartE ) import qualified Distribution.Server.Framework.BlobStorage as BlobStorage -import Distribution.Server.Framework.Feature - ( queryState ) +import Distribution.Server.Framework.ServerEnv + ( ServerEnv(..) ) import Distribution.Server.Packages.Types import Distribution.Server.Users.Group ( queryUserGroups @@ -24,12 +24,18 @@ import Distribution.Server.Users.Group ) import Distribution.Server.Util.CountingMap ( cmFind ) +import Distribution.Package +import Distribution.PackageDescription import Distribution.Types.Version +import Distribution.Simple.Utils ( safeHead + , safeLast + ) +import qualified Distribution.Utils.ShortText as S -import Control.Monad ( forM - , join +import qualified Codec.Archive.Tar as Tar +import qualified Codec.Archive.Tar.Entry as Tar +import Control.Monad ( join , liftM2 - , mapM ) import Control.Monad.IO.Class ( liftIO ) import qualified Data.ByteString.Lazy as BSL @@ -37,23 +43,16 @@ import Data.List ( maximumBy , sortBy ) import Data.Maybe ( isNothing ) -import Data.Ord ( comparing - , max - , min - ) -import qualified Data.TarIndex as TarIndex +import Data.Ord ( comparing ) +import qualified Data.TarIndex as T import Data.Time.Clock ( UTCTime(..) , diffUTCTime , getCurrentTime , nominalDay ) -import Distribution.Server.Framework.ServerEnv - ( ServerEnv(..) ) -import Distribution.Simple.Utils ( safeHead - , safeLast - ) -import qualified Distribution.Utils.ShortText as S import GHC.Float ( int2Double ) +import System.FilePath ( isExtensionOf ) +import qualified System.IO as SIO data Scorer = Scorer { maximum :: Double @@ -133,7 +132,8 @@ rankIO core vers downs upl docs env tarCache pkg = do temp <- temporalScore pkg lastUploads versionList downloadsPerMonth versS <- versionScore versionList vers lastUploads pkg auth <- authorScore upl pkg - return (temp <> versS <> auth) + codeS <- codeScore documentLines srcLines packageLines + return (temp <> versS <> auth <> codeS) where pkgId = package pkg @@ -152,32 +152,53 @@ rankIO core vers downs upl docs env tarCache pkg = do <$> descriptions downloadsPerMonth = liftIO $ cmFind pkgNm <$> recentPackageDownloads downs -- TODO get appropriate pkgInfo (head might fail) - packageTarB = info >>= liftIO . packageTarball tarCache . head - packageTarEntr = do - tarB <- packageTarB + packageEntr = do + inf <- info + tarB <- liftIO . packageTarball tarCache . head $ inf return - . join - $ (\(path, _, index) -> TarIndex.lookup index path) - <$> rightToMaybe tarB + $ (\(path, _, index) -> (path, ) <$> T.lookup index path) + =<< rightToMaybe tarB rightToMaybe (Right a) = Just a rightToMaybe (Left _) = Nothing + documentBlob :: ServerPartE (Maybe BlobStorage.BlobId) - documentBlob = queryDocumentation docs pkgId - documentIndex = documentBlob >>= liftIO . mapM (cachedTarIndex tarCache) - documentationEntry = do + documentBlob = queryDocumentation docs pkgId + documentIndex = documentBlob >>= liftIO . mapM (cachedTarIndex tarCache) + documentationEntr = do index <- documentIndex path <- documentPath - return . join $ liftM2 TarIndex.lookup index path + return $ liftM2 (,) path (join $ liftM2 T.lookup index path) + documentLines = documentationEntr >>= liftIO . filterLinesTar (const True) + srcLines = packageEntr >>= liftIO . filterLinesTar (isExtensionOf ".hs") + packageLines = packageEntr >>= liftIO . filterLinesTar (const True) + + filterLinesTar + :: (FilePath -> Bool) -> Maybe (FilePath, T.TarIndexEntry) -> IO Double + filterLinesTar f (Just (path, T.TarFileEntry offset)) = + if f path then getLines path offset else return 0 + filterLinesTar f (Just (_, T.TarDir dir)) = + sum <$> mapM (filterLinesTar f . Just) dir + filterLinesTar _ _ = return 0 + + -- TODO if size is too big give it a good score and do not read the file + getLines path offset = do + handle <- SIO.openFile path SIO.ReadMode + SIO.hSeek handle SIO.AbsoluteSeek (fromIntegral $ offset * 512) + header <- BSL.hGet handle 512 + case Tar.read header of + (Tar.Next Tar.Entry { Tar.entryContent = Tar.NormalFile _ siz } _) -> do + body <- BSL.hGet handle (fromIntegral siz) + return + $ int2Double + . length + . filter (not . BSL.null) + . BSL.split 10 + $ body + _ -> return 0 - blobStore = serverBlobStore env documentPath = do blob <- documentBlob - return $ (BlobStorage.filepath blobStore) <$> blob - - -- TODO fix this - --documLines = - -- (int2Double . length . filter (not . BSL.null) . BSL.split 10 <$>) - -- <$> documentation -- 10 is \n + return $ BlobStorage.filepath (serverBlobStore env) <$> blob authorScore :: UploadFeature -> PackageDescription -> ServerPartE Scorer authorScore upload desc = @@ -191,6 +212,20 @@ authorScore upload desc = return $ boolScor 3 (size maint > 1) <> scorer 5 (int2Double $ size maint) +codeScore + :: ServerPartE Double + -> ServerPartE Double + -> ServerPartE Double + -> ServerPartE Scorer +codeScore documentL haskellL packageL = do + docum <- documentL + haskell <- haskellL + pkg <- packageL + return + $ boolScor 1 (pkg > 700) + <> boolScor 1 (pkg < 80000) + <> fracScor 2 (min 1 (haskell / 5000)) + <> fracScor 2 (min 1 (10 * docum) / (3000 + haskell)) versionScore :: ServerPartE [Version] @@ -296,5 +331,3 @@ rankPackage rankPackage core versions download upload docs env tarCache p = rankIO core versions download upload docs env tarCache p >>= (\x -> return $ total x + total (rankPackagePage p)) - - From 68d2d67213b59f4f7877f5da87d36135109de475 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 2 Aug 2022 17:10:42 +0200 Subject: [PATCH 099/129] replaced some Features by ListFeature --- .../Server/Features/PackageRank.hs | 81 +++++++++---------- 1 file changed, 40 insertions(+), 41 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 1c4564ea6..c7b0c68cd 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -4,32 +4,25 @@ module Distribution.Server.Features.PackageRank ( rankPackage ) where +import Distribution.Package +import Distribution.PackageDescription import Distribution.Server.Features.Core import Distribution.Server.Features.Documentation ( DocumentationFeature(..) ) -import Distribution.Server.Features.DownloadCount +import Distribution.Server.Features.PackageList import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.TarIndexCache -import Distribution.Server.Features.Upload import Distribution.Server.Framework ( ServerPartE ) import qualified Distribution.Server.Framework.BlobStorage as BlobStorage import Distribution.Server.Framework.ServerEnv ( ServerEnv(..) ) import Distribution.Server.Packages.Types -import Distribution.Server.Users.Group - ( queryUserGroups - , size - ) -import Distribution.Server.Util.CountingMap - ( cmFind ) -import Distribution.Package -import Distribution.PackageDescription -import Distribution.Types.Version import Distribution.Simple.Utils ( safeHead , safeLast ) +import Distribution.Types.Version import qualified Distribution.Utils.ShortText as S import qualified Codec.Archive.Tar as Tar @@ -52,7 +45,7 @@ import Data.Time.Clock ( UTCTime(..) ) import GHC.Float ( int2Double ) import System.FilePath ( isExtensionOf ) -import qualified System.IO as SIO +import qualified System.IO as SIO data Scorer = Scorer { maximum :: Double @@ -116,22 +109,20 @@ freshness (x : xs) lastUpd app = -- lookupPackageId -- queryHasDocumentation --- TODO CoreFeature can be substituted by CoreResource rankIO :: CoreResource -> VersionsFeature - -> DownloadFeature - -> UploadFeature -> DocumentationFeature -> ServerEnv -> TarIndexCacheFeature + -> ListFeature -> PackageDescription -> ServerPartE Scorer -rankIO core vers downs upl docs env tarCache pkg = do +rankIO core vers docs env tarCache list pkg = do temp <- temporalScore pkg lastUploads versionList downloadsPerMonth versS <- versionScore versionList vers lastUploads pkg - auth <- authorScore upl pkg + auth <- authorScore pkg pkgIt codeS <- codeScore documentLines srcLines packageLines return (temp <> versS <> auth <> codeS) @@ -139,6 +130,7 @@ rankIO core vers downs upl docs env tarCache pkg = do pkgId = package pkg pkgNm = pkgName pkgId info = lookupPackageName core pkgNm + pkgIt = safeHead <$> makeItemList list [pkgNm] descriptions = do infPkg <- info return (pkgDesc <$> infPkg) @@ -150,14 +142,18 @@ rankIO core vers downs upl docs env tarCache pkg = do sortBy (flip compare) . map (pkgVersion . package . packageDescription) <$> descriptions - downloadsPerMonth = liftIO $ cmFind pkgNm <$> recentPackageDownloads downs + downloadsPerMonth :: ServerPartE (Maybe Int) + downloadsPerMonth = liftIO $ do + items <- pkgIt + return (itemDownloads <$> items) -- TODO get appropriate pkgInfo (head might fail) - packageEntr = do + packageEntr = do inf <- info - tarB <- liftIO . packageTarball tarCache . head $ inf + tarB <- liftIO $ mapM (packageTarball tarCache) (safeHead inf) return $ (\(path, _, index) -> (path, ) <$> T.lookup index path) - =<< rightToMaybe tarB + =<< (join $ rightToMaybe <$> tarB) + rightToMaybe (Right a) = Just a rightToMaybe (Left _) = Nothing @@ -200,17 +196,19 @@ rankIO core vers downs upl docs env tarCache pkg = do blob <- documentBlob return $ BlobStorage.filepath (serverBlobStore env) <$> blob -authorScore :: UploadFeature -> PackageDescription -> ServerPartE Scorer -authorScore upload desc = +authorScore + :: PackageDescription -> IO (Maybe PackageItem) -> ServerPartE Scorer +authorScore desc item = liftIO maintScore >>= (\x -> return $ boolScor 1 (not $ S.null $ author desc) <> x) where pkgNm = pkgName $ package desc maintScore :: IO Scorer maintScore = do - maint <- queryUserGroups [maintainersGroup upload pkgNm] - - return $ boolScor 3 (size maint > 1) <> scorer 5 (int2Double $ size maint) + it <- item + return $ boolScor 3 (nMaint it > 1) <> scorer 5 (int2Double $ nMaint it) + nMaint (Just iT) = length $ itemMaintainer iT + nMaint Nothing = 0 codeScore :: ServerPartE Double @@ -277,16 +275,18 @@ temporalScore :: PackageDescription -> ServerPartE [UTCTime] -> ServerPartE [Version] - -> ServerPartE Int + -> ServerPartE (Maybe Int) -> ServerPartE Scorer -temporalScore p lastUploads versionList downloadsPerMonth = do - fresh <- freshnessScore - downs <- downloadScore - tract <- tractionScore - return $ tract <> fresh <> downs +temporalScore p lastUploads versionList downloadsPM = do + download <- downloadsPM + fresh <- freshnessScore + downS <- downloadScore download + tract <- tractionScore download + return $ tract <> fresh <> downS where - isApp = (isNothing . library) p && (not . null . executables) p - downloadScore = calcDownScore <$> downloadsPerMonth + isApp = (isNothing . library) p && (not . null . executables) p + downloadScore Nothing = return $ scorer 5 0 + downloadScore (Just downloads) = return $ calcDownScore downloads calcDownScore i = Scorer 5 $ min ( (logBase 2 (int2Double $ max 0 (i - 100) + 100) - 6.6) / (if isApp then 5 else 6) @@ -300,10 +300,10 @@ temporalScore p lastUploads versionList downloadsPerMonth = do _ -> liftIO $ freshness vers (head ups) isApp freshnessScore = fracScor 10 <$> packageFreshness -- Missing dependencyFreshnessScore for reasonable effectivity needs caching - tractionScore = do + tractionScore Nothing = return $ scorer 1 0 + tractionScore (Just downloads) = do fresh <- packageFreshness - downs <- downloadsPerMonth - return $ boolScor 1 (fresh * int2Double downs > 1000) + return $ boolScor 1 (fresh * int2Double downloads > 1000) rankPackagePage :: PackageDescription -> Scorer rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats @@ -321,13 +321,12 @@ rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats rankPackage :: CoreResource -> VersionsFeature - -> DownloadFeature - -> UploadFeature -> DocumentationFeature -> ServerEnv -> TarIndexCacheFeature + -> ListFeature -> PackageDescription -> ServerPartE Double -rankPackage core versions download upload docs env tarCache p = - rankIO core versions download upload docs env tarCache p +rankPackage core versions docs env tarCache list p = + rankIO core versions docs env tarCache list p >>= (\x -> return $ total x + total (rankPackagePage p)) From 544e9c0d5af89e53afb34f1270eb58bdca13398a Mon Sep 17 00:00:00 2001 From: kubaneko Date: Wed, 3 Aug 2022 18:57:20 +0200 Subject: [PATCH 100/129] added some Features to BrowseFeatures - prototype --- src/Distribution/Server/Features.hs | 4 ++++ src/Distribution/Server/Features/Browse.hs | 9 ++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/Distribution/Server/Features.hs b/src/Distribution/Server/Features.hs index f8a8e362e..6d7507903 100644 --- a/src/Distribution/Server/Features.hs +++ b/src/Distribution/Server/Features.hs @@ -351,6 +351,10 @@ initHackageFeatures env@ServerEnv{serverVerbosity = verbosity} = do searchFeature distroFeature candidatesFeature + versionsFeature + documentationCoreFeature + tarIndexCacheFeature + env packageInfoJSONFeature <- mkPackageJSONFeature coreFeature diff --git a/src/Distribution/Server/Features/Browse.hs b/src/Distribution/Server/Features/Browse.hs index ada4b622c..5da4fa97d 100644 --- a/src/Distribution/Server/Features/Browse.hs +++ b/src/Distribution/Server/Features/Browse.hs @@ -25,6 +25,9 @@ import Distribution.Server.Features.PackageList (ListFeature(ListFeature), Packa import Distribution.Server.Features.Search (SearchFeature(SearchFeature), searchPackages) import Distribution.Server.Features.Tags (Tag(..), TagsFeature(TagsFeature), TagsResource, tagUri, tagsResource) import Distribution.Server.Features.Users (UserFeature(UserFeature), UserResource, userResource, userPageUri) +import Distribution.Server.Features.PreferredVersions (VersionsFeature) +import Distribution.Server.Features.Documentation (DocumentationFeature) +import Distribution.Server.Features.TarIndexCache (TarIndexCacheFeature) import Distribution.Server.Framework.Error (ErrorResponse(ErrorResponse)) import Distribution.Server.Framework.Feature (HackageFeature(..), emptyHackageFeature) import Distribution.Server.Framework.RequestContentTypes (expectAesonContent) @@ -47,6 +50,10 @@ type BrowseFeature = -> SearchFeature -> DistroFeature -> PackageCandidatesFeature + -> VersionsFeature + -> DocumentationFeature + -> TarIndexCacheFeature + -> ServerEnv -> IO HackageFeature data ResponseFormatWithMethod @@ -63,7 +70,7 @@ initBrowseFeature ServerEnv{serverTemplatesDir, serverTemplatesMode} = do , "noscript-search-form.html" , "noscript-next-page-form.html" ] - pure \coreFeature userFeature tagsFeature listFeature searchFeature distroFeature packageCandidatesFeature -> do + pure \coreFeature userFeature tagsFeature listFeature searchFeature distroFeature packageCandidatesFeature versionsFeature documentationFeature tarIndexCacheFeature serverEnv -> do let html = htmlUtilities coreFeature packageCandidatesFeature tagsFeature userFeature renderer :: ResponseFormatWithMethod -> ServerPartT (ExceptT ErrorResponse IO) Response From 5e5cc6a0d867f1cb026aa62ad80f2ac0ed5a47ae Mon Sep 17 00:00:00 2001 From: kubaneko Date: Thu, 4 Aug 2022 22:26:54 +0200 Subject: [PATCH 101/129] Revert "added some Features to BrowseFeatures - prototype" This reverts commit 124006fe3c1d3b01942def21d560b6f13b9e6dec. --- src/Distribution/Server/Features.hs | 4 ---- src/Distribution/Server/Features/Browse.hs | 9 +-------- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/src/Distribution/Server/Features.hs b/src/Distribution/Server/Features.hs index 6d7507903..f8a8e362e 100644 --- a/src/Distribution/Server/Features.hs +++ b/src/Distribution/Server/Features.hs @@ -351,10 +351,6 @@ initHackageFeatures env@ServerEnv{serverVerbosity = verbosity} = do searchFeature distroFeature candidatesFeature - versionsFeature - documentationCoreFeature - tarIndexCacheFeature - env packageInfoJSONFeature <- mkPackageJSONFeature coreFeature diff --git a/src/Distribution/Server/Features/Browse.hs b/src/Distribution/Server/Features/Browse.hs index 5da4fa97d..ada4b622c 100644 --- a/src/Distribution/Server/Features/Browse.hs +++ b/src/Distribution/Server/Features/Browse.hs @@ -25,9 +25,6 @@ import Distribution.Server.Features.PackageList (ListFeature(ListFeature), Packa import Distribution.Server.Features.Search (SearchFeature(SearchFeature), searchPackages) import Distribution.Server.Features.Tags (Tag(..), TagsFeature(TagsFeature), TagsResource, tagUri, tagsResource) import Distribution.Server.Features.Users (UserFeature(UserFeature), UserResource, userResource, userPageUri) -import Distribution.Server.Features.PreferredVersions (VersionsFeature) -import Distribution.Server.Features.Documentation (DocumentationFeature) -import Distribution.Server.Features.TarIndexCache (TarIndexCacheFeature) import Distribution.Server.Framework.Error (ErrorResponse(ErrorResponse)) import Distribution.Server.Framework.Feature (HackageFeature(..), emptyHackageFeature) import Distribution.Server.Framework.RequestContentTypes (expectAesonContent) @@ -50,10 +47,6 @@ type BrowseFeature = -> SearchFeature -> DistroFeature -> PackageCandidatesFeature - -> VersionsFeature - -> DocumentationFeature - -> TarIndexCacheFeature - -> ServerEnv -> IO HackageFeature data ResponseFormatWithMethod @@ -70,7 +63,7 @@ initBrowseFeature ServerEnv{serverTemplatesDir, serverTemplatesMode} = do , "noscript-search-form.html" , "noscript-next-page-form.html" ] - pure \coreFeature userFeature tagsFeature listFeature searchFeature distroFeature packageCandidatesFeature versionsFeature documentationFeature tarIndexCacheFeature serverEnv -> do + pure \coreFeature userFeature tagsFeature listFeature searchFeature distroFeature packageCandidatesFeature -> do let html = htmlUtilities coreFeature packageCandidatesFeature tagsFeature userFeature renderer :: ResponseFormatWithMethod -> ServerPartT (ExceptT ErrorResponse IO) Response From 8c08fd1ef1da6ede6a59e9b791d31792a753a719 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 5 Aug 2022 11:20:34 +0200 Subject: [PATCH 102/129] Revert "replaced some Features by ListFeature" This reverts commit a8ae12e6db9353000839dd7b4a97e8f2022d82d4. --- .../Server/Features/PackageRank.hs | 81 ++++++++++--------- 1 file changed, 41 insertions(+), 40 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index c7b0c68cd..1c4564ea6 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -4,25 +4,32 @@ module Distribution.Server.Features.PackageRank ( rankPackage ) where -import Distribution.Package -import Distribution.PackageDescription import Distribution.Server.Features.Core import Distribution.Server.Features.Documentation ( DocumentationFeature(..) ) -import Distribution.Server.Features.PackageList +import Distribution.Server.Features.DownloadCount import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.TarIndexCache +import Distribution.Server.Features.Upload import Distribution.Server.Framework ( ServerPartE ) import qualified Distribution.Server.Framework.BlobStorage as BlobStorage import Distribution.Server.Framework.ServerEnv ( ServerEnv(..) ) import Distribution.Server.Packages.Types +import Distribution.Server.Users.Group + ( queryUserGroups + , size + ) +import Distribution.Server.Util.CountingMap + ( cmFind ) +import Distribution.Package +import Distribution.PackageDescription +import Distribution.Types.Version import Distribution.Simple.Utils ( safeHead , safeLast ) -import Distribution.Types.Version import qualified Distribution.Utils.ShortText as S import qualified Codec.Archive.Tar as Tar @@ -45,7 +52,7 @@ import Data.Time.Clock ( UTCTime(..) ) import GHC.Float ( int2Double ) import System.FilePath ( isExtensionOf ) -import qualified System.IO as SIO +import qualified System.IO as SIO data Scorer = Scorer { maximum :: Double @@ -109,20 +116,22 @@ freshness (x : xs) lastUpd app = -- lookupPackageId -- queryHasDocumentation +-- TODO CoreFeature can be substituted by CoreResource rankIO :: CoreResource -> VersionsFeature + -> DownloadFeature + -> UploadFeature -> DocumentationFeature -> ServerEnv -> TarIndexCacheFeature - -> ListFeature -> PackageDescription -> ServerPartE Scorer -rankIO core vers docs env tarCache list pkg = do +rankIO core vers downs upl docs env tarCache pkg = do temp <- temporalScore pkg lastUploads versionList downloadsPerMonth versS <- versionScore versionList vers lastUploads pkg - auth <- authorScore pkg pkgIt + auth <- authorScore upl pkg codeS <- codeScore documentLines srcLines packageLines return (temp <> versS <> auth <> codeS) @@ -130,7 +139,6 @@ rankIO core vers docs env tarCache list pkg = do pkgId = package pkg pkgNm = pkgName pkgId info = lookupPackageName core pkgNm - pkgIt = safeHead <$> makeItemList list [pkgNm] descriptions = do infPkg <- info return (pkgDesc <$> infPkg) @@ -142,18 +150,14 @@ rankIO core vers docs env tarCache list pkg = do sortBy (flip compare) . map (pkgVersion . package . packageDescription) <$> descriptions - downloadsPerMonth :: ServerPartE (Maybe Int) - downloadsPerMonth = liftIO $ do - items <- pkgIt - return (itemDownloads <$> items) + downloadsPerMonth = liftIO $ cmFind pkgNm <$> recentPackageDownloads downs -- TODO get appropriate pkgInfo (head might fail) - packageEntr = do + packageEntr = do inf <- info - tarB <- liftIO $ mapM (packageTarball tarCache) (safeHead inf) + tarB <- liftIO . packageTarball tarCache . head $ inf return $ (\(path, _, index) -> (path, ) <$> T.lookup index path) - =<< (join $ rightToMaybe <$> tarB) - + =<< rightToMaybe tarB rightToMaybe (Right a) = Just a rightToMaybe (Left _) = Nothing @@ -196,19 +200,17 @@ rankIO core vers docs env tarCache list pkg = do blob <- documentBlob return $ BlobStorage.filepath (serverBlobStore env) <$> blob -authorScore - :: PackageDescription -> IO (Maybe PackageItem) -> ServerPartE Scorer -authorScore desc item = +authorScore :: UploadFeature -> PackageDescription -> ServerPartE Scorer +authorScore upload desc = liftIO maintScore >>= (\x -> return $ boolScor 1 (not $ S.null $ author desc) <> x) where pkgNm = pkgName $ package desc maintScore :: IO Scorer maintScore = do - it <- item - return $ boolScor 3 (nMaint it > 1) <> scorer 5 (int2Double $ nMaint it) - nMaint (Just iT) = length $ itemMaintainer iT - nMaint Nothing = 0 + maint <- queryUserGroups [maintainersGroup upload pkgNm] + + return $ boolScor 3 (size maint > 1) <> scorer 5 (int2Double $ size maint) codeScore :: ServerPartE Double @@ -275,18 +277,16 @@ temporalScore :: PackageDescription -> ServerPartE [UTCTime] -> ServerPartE [Version] - -> ServerPartE (Maybe Int) + -> ServerPartE Int -> ServerPartE Scorer -temporalScore p lastUploads versionList downloadsPM = do - download <- downloadsPM - fresh <- freshnessScore - downS <- downloadScore download - tract <- tractionScore download - return $ tract <> fresh <> downS +temporalScore p lastUploads versionList downloadsPerMonth = do + fresh <- freshnessScore + downs <- downloadScore + tract <- tractionScore + return $ tract <> fresh <> downs where - isApp = (isNothing . library) p && (not . null . executables) p - downloadScore Nothing = return $ scorer 5 0 - downloadScore (Just downloads) = return $ calcDownScore downloads + isApp = (isNothing . library) p && (not . null . executables) p + downloadScore = calcDownScore <$> downloadsPerMonth calcDownScore i = Scorer 5 $ min ( (logBase 2 (int2Double $ max 0 (i - 100) + 100) - 6.6) / (if isApp then 5 else 6) @@ -300,10 +300,10 @@ temporalScore p lastUploads versionList downloadsPM = do _ -> liftIO $ freshness vers (head ups) isApp freshnessScore = fracScor 10 <$> packageFreshness -- Missing dependencyFreshnessScore for reasonable effectivity needs caching - tractionScore Nothing = return $ scorer 1 0 - tractionScore (Just downloads) = do + tractionScore = do fresh <- packageFreshness - return $ boolScor 1 (fresh * int2Double downloads > 1000) + downs <- downloadsPerMonth + return $ boolScor 1 (fresh * int2Double downs > 1000) rankPackagePage :: PackageDescription -> Scorer rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats @@ -321,12 +321,13 @@ rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats rankPackage :: CoreResource -> VersionsFeature + -> DownloadFeature + -> UploadFeature -> DocumentationFeature -> ServerEnv -> TarIndexCacheFeature - -> ListFeature -> PackageDescription -> ServerPartE Double -rankPackage core versions docs env tarCache list p = - rankIO core versions docs env tarCache list p +rankPackage core versions download upload docs env tarCache p = + rankIO core versions download upload docs env tarCache p >>= (\x -> return $ total x + total (rankPackagePage p)) From 2e5ac4bbc1d321a20ffe5b203ef0902dff165d22 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 5 Aug 2022 11:48:50 +0200 Subject: [PATCH 103/129] changed ListFeature to fit PackageRank --- src/Distribution/Server/Features.hs | 2 ++ .../Server/Features/PackageList.hs | 24 +++++++++++++------ 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/src/Distribution/Server/Features.hs b/src/Distribution/Server/Features.hs index f8a8e362e..2b478d5fb 100644 --- a/src/Distribution/Server/Features.hs +++ b/src/Distribution/Server/Features.hs @@ -285,6 +285,8 @@ initHackageFeatures env@ServerEnv{serverVerbosity = verbosity} = do versionsFeature usersFeature uploadFeature + documentationCoreFeature + tarIndexCacheFeature searchFeature <- mkSearchFeature coreFeature diff --git a/src/Distribution/Server/Features/PackageList.hs b/src/Distribution/Server/Features/PackageList.hs index 1a719fc22..abc594373 100644 --- a/src/Distribution/Server/Features/PackageList.hs +++ b/src/Distribution/Server/Features/PackageList.hs @@ -15,6 +15,9 @@ import Distribution.Server.Features.DownloadCount import Distribution.Server.Features.Tags import Distribution.Server.Features.Users import Distribution.Server.Features.Upload(UploadFeature(..)) +import Distribution.Server.Features.Documentation (DocumentationFeature(..)) +import Distribution.Server.Features.TarIndexCache (TarIndexCacheFeature(..)) + import Distribution.Server.Users.Users (userIdToName) import qualified Distribution.Server.Users.UserIdSet as UserIdSet import Distribution.Server.Users.Group(UserGroup(..), GroupDescription(..)) @@ -39,7 +42,6 @@ import Data.Set (Set) import qualified Data.Set as Set import Data.Time.Clock (UTCTime(..)) - data ListFeature = ListFeature { listFeatureInterface :: HackageFeature, @@ -108,6 +110,8 @@ initListFeature :: ServerEnv -> VersionsFeature -> UserFeature -> UploadFeature + -> DocumentationFeature + -> TarIndexCacheFeature -> IO ListFeature) initListFeature _env = do itemCache <- newMemStateWHNF Map.empty @@ -120,11 +124,12 @@ initListFeature _env = do tagsf@TagsFeature{..} versions@VersionsFeature{..} users@UserFeature{..} - uploads@UploadFeature{..} -> do + uploads@UploadFeature{..} + docum tar -> do let (feature, modifyItem, updateDesc) = listFeature core download votesf tagsf versions users uploads - itemCache itemUpdate + itemCache itemUpdate docum tar _env registerHookJust packageChangeHook isPackageChangeAny $ \(pkgid, _) -> updateDesc (packageName pkgid) @@ -180,6 +185,9 @@ listFeature :: CoreFeature -> UploadFeature -> MemState (Map PackageName PackageItem) -> Hook (Set PackageName) () + -> DocumentationFeature + -> TarIndexCacheFeature + -> ServerEnv -> (ListFeature, PackageName -> (PackageItem -> PackageItem) -> IO (), PackageName -> IO ()) @@ -192,6 +200,7 @@ listFeature CoreFeature{..} UserFeature{..} UploadFeature{..} itemCache itemUpdate + docum tar env = (ListFeature{..}, modifyItem, updateDesc) where listFeatureInterface = (emptyHackageFeature "list") { @@ -222,7 +231,7 @@ listFeature CoreFeature{..} let pkgs = PackageIndex.lookupPackageName index pkgname case pkgs of [] -> return () --this shouldn't happen - _ -> modifyMemState itemCache . uncurry Map.insert =<< constructItem (last pkgs) + _ -> modifyMemState itemCache . uncurry Map.insert =<< constructItem pkgs updateDesc pkgname = do index <- queryGetPackageIndex @@ -243,12 +252,13 @@ listFeature CoreFeature{..} constructItemIndex :: IO (Map PackageName PackageItem) constructItemIndex = do index <- queryGetPackageIndex - items <- mapM (constructItem . last) $ PackageIndex.allPackagesByName index + items <- mapM constructItem $ PackageIndex.allPackagesByName index return $ Map.fromList items - constructItem :: PkgInfo -> IO (PackageName, PackageItem) - constructItem pkg = do + constructItem :: [PkgInfo] -> IO (PackageName, PackageItem) + constructItem pkgs = do let pkgname = packageName pkg + pkg = last pkgs -- [reverse index disabled] revCount <- query . GetReverseCount $ pkgname users <- queryGetUserDb tags <- queryTagsForPackage pkgname From 8e8bf8903b79d784810e208d8ed6cd3543ee51db Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 5 Aug 2022 16:02:58 +0200 Subject: [PATCH 104/129] changed PackageRank to fit in constructItem --- .../Server/Features/PackageRank.hs | 171 +++++++----------- 1 file changed, 66 insertions(+), 105 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 1c4564ea6..53f552a4e 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -4,7 +4,8 @@ module Distribution.Server.Features.PackageRank ( rankPackage ) where -import Distribution.Server.Features.Core +import Distribution.Package +import Distribution.PackageDescription import Distribution.Server.Features.Documentation ( DocumentationFeature(..) ) import Distribution.Server.Features.DownloadCount @@ -12,7 +13,6 @@ import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.TarIndexCache import Distribution.Server.Features.Upload -import Distribution.Server.Framework ( ServerPartE ) import qualified Distribution.Server.Framework.BlobStorage as BlobStorage import Distribution.Server.Framework.ServerEnv @@ -20,39 +20,28 @@ import Distribution.Server.Framework.ServerEnv import Distribution.Server.Packages.Types import Distribution.Server.Users.Group ( queryUserGroups - , size - ) + , size) import Distribution.Server.Util.CountingMap ( cmFind ) -import Distribution.Package -import Distribution.PackageDescription -import Distribution.Types.Version import Distribution.Simple.Utils ( safeHead - , safeLast - ) + , safeLast) +import Distribution.Types.Version import qualified Distribution.Utils.ShortText as S import qualified Codec.Archive.Tar as Tar import qualified Codec.Archive.Tar.Entry as Tar import Control.Monad ( join - , liftM2 - ) -import Control.Monad.IO.Class ( liftIO ) + , liftM2) import qualified Data.ByteString.Lazy as BSL import Data.List ( maximumBy - , sortBy - ) + , sortBy) import Data.Maybe ( isNothing ) import Data.Ord ( comparing ) import qualified Data.TarIndex as T -import Data.Time.Clock ( UTCTime(..) - , diffUTCTime - , getCurrentTime - , nominalDay - ) +import qualified Data.Time.Clock as CL import GHC.Float ( int2Double ) import System.FilePath ( isExtensionOf ) -import qualified System.IO as SIO +import qualified System.IO as SIO data Scorer = Scorer { maximum :: Double @@ -87,13 +76,13 @@ patches :: Num a => [a] -> a patches (_ : _ : xs) = sum xs patches _ = 0 -numDays :: Maybe UTCTime -> Maybe UTCTime -> Double +numDays :: Maybe CL.UTCTime -> Maybe CL.UTCTime -> Double numDays (Just first) (Just end) = - fromRational $ toRational $ diffUTCTime first end / fromRational - (toRational nominalDay) + fromRational $ toRational $ CL.diffUTCTime first end / fromRational + (toRational CL.nominalDay) numDays _ _ = 0 -freshness :: [Version] -> UTCTime -> Bool -> IO Double +freshness :: [Version] -> CL.UTCTime -> Bool -> IO Double freshness [] _ _ = return 0 freshness (x : xs) lastUpd app = daysPastExpiration @@ -110,7 +99,7 @@ freshness (x : xs) lastUpd app = | major v > 0 = 200 | minor v > 3 = 140 | otherwise = 80 - age = flip numDays (Just lastUpd) . Just <$> getCurrentTime + age = flip numDays (Just lastUpd) . Just <$> CL.getCurrentTime decayDays = expectedUpdateInterval / 2 + (if app then 300 else 200) -- lookupPackageId @@ -118,59 +107,54 @@ freshness (x : xs) lastUpd app = -- TODO CoreFeature can be substituted by CoreResource rankIO - :: CoreResource - -> VersionsFeature + :: VersionsFeature -> DownloadFeature -> UploadFeature -> DocumentationFeature -> ServerEnv -> TarIndexCacheFeature - -> PackageDescription - -> ServerPartE Scorer + -> [PkgInfo] + -> IO Scorer -rankIO core vers downs upl docs env tarCache pkg = do +rankIO vers downs upl docs env tarCache pkgs = do temp <- temporalScore pkg lastUploads versionList downloadsPerMonth versS <- versionScore versionList vers lastUploads pkg auth <- authorScore upl pkg - codeS <- codeScore documentLines srcLines packageLines + codeS <- codeScore documentLines srcLines return (temp <> versS <> auth <> codeS) where - pkgId = package pkg - pkgNm = pkgName pkgId - info = lookupPackageName core pkgNm - descriptions = do - infPkg <- info - return (pkgDesc <$> infPkg) - lastUploads = do - infPkg <- info - return $ sortBy (flip compare) $ fst . pkgOriginalUploadInfo <$> infPkg - versionList = - do - sortBy (flip compare) - . map (pkgVersion . package . packageDescription) - <$> descriptions - downloadsPerMonth = liftIO $ cmFind pkgNm <$> recentPackageDownloads downs + pkg = packageDescription <$> pkgDesc $ last pkgs + pkgId = package pkg + pkgNm = pkgName pkgId + lastUploads = + sortBy (flip compare) + $ (fst . pkgOriginalUploadInfo <$> pkgs) + ++ (fst . pkgLatestUploadInfo <$> pkgs) + versionList :: [Version] + versionList = sortBy (flip compare) + $ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs) + downloadsPerMonth = cmFind pkgNm <$> recentPackageDownloads downs -- TODO get appropriate pkgInfo (head might fail) packageEntr = do - inf <- info - tarB <- liftIO . packageTarball tarCache . head $ inf + tarB <- packageTarball tarCache . head $ pkgs return $ (\(path, _, index) -> (path, ) <$> T.lookup index path) =<< rightToMaybe tarB rightToMaybe (Right a) = Just a rightToMaybe (Left _) = Nothing - documentBlob :: ServerPartE (Maybe BlobStorage.BlobId) + documentBlob :: IO (Maybe BlobStorage.BlobId) documentBlob = queryDocumentation docs pkgId - documentIndex = documentBlob >>= liftIO . mapM (cachedTarIndex tarCache) + documentIndex = documentBlob >>= mapM (cachedTarIndex tarCache) documentationEntr = do index <- documentIndex path <- documentPath return $ liftM2 (,) path (join $ liftM2 T.lookup index path) - documentLines = documentationEntr >>= liftIO . filterLinesTar (const True) - srcLines = packageEntr >>= liftIO . filterLinesTar (isExtensionOf ".hs") - packageLines = packageEntr >>= liftIO . filterLinesTar (const True) + documentLines :: IO Double + documentLines = documentationEntr >>= filterLinesTar (const True) + srcLines :: IO Double + srcLines = packageEntr >>= filterLinesTar (isExtensionOf ".hs") filterLinesTar :: (FilePath -> Bool) -> Maybe (FilePath, T.TarIndexEntry) -> IO Double @@ -188,22 +172,16 @@ rankIO core vers downs upl docs env tarCache pkg = do case Tar.read header of (Tar.Next Tar.Entry { Tar.entryContent = Tar.NormalFile _ siz } _) -> do body <- BSL.hGet handle (fromIntegral siz) - return - $ int2Double - . length - . filter (not . BSL.null) - . BSL.split 10 - $ body + return $ int2Double . length . BSL.split 10 $ body _ -> return 0 documentPath = do blob <- documentBlob return $ BlobStorage.filepath (serverBlobStore env) <$> blob -authorScore :: UploadFeature -> PackageDescription -> ServerPartE Scorer +authorScore :: UploadFeature -> PackageDescription -> IO Scorer authorScore upload desc = - liftIO maintScore - >>= (\x -> return $ boolScor 1 (not $ S.null $ author desc) <> x) + maintScore >>= (\x -> return $ boolScor 1 (not $ S.null $ author desc) <> x) where pkgNm = pkgName $ package desc maintScore :: IO Scorer @@ -212,48 +190,37 @@ authorScore upload desc = return $ boolScor 3 (size maint > 1) <> scorer 5 (int2Double $ size maint) -codeScore - :: ServerPartE Double - -> ServerPartE Double - -> ServerPartE Double - -> ServerPartE Scorer -codeScore documentL haskellL packageL = do +codeScore :: IO Double -> IO Double -> IO Scorer +codeScore documentL haskellL = do docum <- documentL haskell <- haskellL - pkg <- packageL return - $ boolScor 1 (pkg > 700) - <> boolScor 1 (pkg < 80000) + $ boolScor 1 (haskell > 700) + <> boolScor 1 (haskell < 80000) <> fracScor 2 (min 1 (haskell / 5000)) <> fracScor 2 (min 1 (10 * docum) / (3000 + haskell)) versionScore - :: ServerPartE [Version] + :: [Version] -> VersionsFeature - -> ServerPartE [UTCTime] + -> [CL.UTCTime] -> PackageDescription - -> ServerPartE Scorer + -> IO Scorer versionScore versionList versions lastUploads desc = do - intUse <- intUsable - depre <- deprec - lUps <- lastUploads - return $ calculateScore depre lUps intUse + use <- intUsable + depre <- deprec + return $ calculateScore depre lastUploads use where pkgNm = pkgName $ package desc partVers = - versionList - >>= (\y -> - liftIO - $ queryGetPreferredInfo versions pkgNm - >>= (\x -> return $ partitionVersions x y) - ) + flip partitionVersions versionList <$> queryGetPreferredInfo versions pkgNm intUsable = do (norm, _, unpref) <- partVers return $ versionNumbers <$> norm ++ unpref deprec = do (_, deprecN, _) <- partVers return deprecN - calculateScore :: [Version] -> [UTCTime] -> [[Int]] -> Scorer + calculateScore :: [Version] -> [CL.UTCTime] -> [[Int]] -> Scorer calculateScore [] _ _ = Scorer 118 0 calculateScore depre lUps intUse = boolScor 20 (length intUse > 1) @@ -274,11 +241,7 @@ versionScore versionList versions lastUploads desc = do <> boolScor 5 (not $ null depre) temporalScore - :: PackageDescription - -> ServerPartE [UTCTime] - -> ServerPartE [Version] - -> ServerPartE Int - -> ServerPartE Scorer + :: PackageDescription -> [CL.UTCTime] -> [Version] -> IO Int -> IO Scorer temporalScore p lastUploads versionList downloadsPerMonth = do fresh <- freshnessScore downs <- downloadScore @@ -292,18 +255,15 @@ temporalScore p lastUploads versionList downloadsPerMonth = do / (if isApp then 5 else 6) ) 5 - packageFreshness = do - ups <- lastUploads - vers <- versionList - case ups of - [] -> return 0 - _ -> liftIO $ freshness vers (head ups) isApp + packageFreshness = case lastUploads of + [] -> return 0 + _ -> freshness versionList (head lastUploads) isApp freshnessScore = fracScor 10 <$> packageFreshness -- Missing dependencyFreshnessScore for reasonable effectivity needs caching tractionScore = do + dows <- downloadsPerMonth fresh <- packageFreshness - downs <- downloadsPerMonth - return $ boolScor 1 (fresh * int2Double downs > 1000) + return $ boolScor 1 (fresh * int2Double dows > 1000) rankPackagePage :: PackageDescription -> Scorer rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats @@ -319,15 +279,16 @@ rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats -- TODO fix the function Signature replace PackageDescription to PackageName/Identifier rankPackage - :: CoreResource - -> VersionsFeature + :: VersionsFeature -> DownloadFeature -> UploadFeature -> DocumentationFeature -> ServerEnv -> TarIndexCacheFeature - -> PackageDescription - -> ServerPartE Double -rankPackage core versions download upload docs env tarCache p = - rankIO core versions download upload docs env tarCache p - >>= (\x -> return $ total x + total (rankPackagePage p)) + -> [PkgInfo] + -> IO Double +rankPackage versions download upload docs env tarCache pkgs = + total + . (<>) (rankPackagePage pkgD) + <$> rankIO versions download upload docs env tarCache pkgs + where pkgD = packageDescription $ pkgDesc $ last pkgs From da3c345656a827c0a314a46dae21765cff1202ea Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 7 Aug 2022 14:30:46 +0200 Subject: [PATCH 105/129] integrated PackageRank into ListFeature --- .../Server/Features/PackageRank.hs | 70 ++++++++----------- 1 file changed, 29 insertions(+), 41 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 53f552a4e..adf496ef6 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -1,5 +1,7 @@ {-# LANGUAGE TupleSections #-} +-- TODO change the module name probably Distribution.Server.Features.PackageList.PackageRank + module Distribution.Server.Features.PackageRank ( rankPackage ) where @@ -8,33 +10,29 @@ import Distribution.Package import Distribution.PackageDescription import Distribution.Server.Features.Documentation ( DocumentationFeature(..) ) -import Distribution.Server.Features.DownloadCount import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.TarIndexCache -import Distribution.Server.Features.Upload import qualified Distribution.Server.Framework.BlobStorage as BlobStorage import Distribution.Server.Framework.ServerEnv ( ServerEnv(..) ) import Distribution.Server.Packages.Types -import Distribution.Server.Users.Group - ( queryUserGroups - , size) -import Distribution.Server.Util.CountingMap - ( cmFind ) import Distribution.Simple.Utils ( safeHead - , safeLast) + , safeLast + ) import Distribution.Types.Version import qualified Distribution.Utils.ShortText as S import qualified Codec.Archive.Tar as Tar import qualified Codec.Archive.Tar.Entry as Tar import Control.Monad ( join - , liftM2) + , liftM2 + ) import qualified Data.ByteString.Lazy as BSL import Data.List ( maximumBy - , sortBy) + , sortBy + ) import Data.Maybe ( isNothing ) import Data.Ord ( comparing ) import qualified Data.TarIndex as T @@ -108,25 +106,23 @@ freshness (x : xs) lastUpd app = -- TODO CoreFeature can be substituted by CoreResource rankIO :: VersionsFeature - -> DownloadFeature - -> UploadFeature + -> Int + -> Int -> DocumentationFeature -> ServerEnv -> TarIndexCacheFeature -> [PkgInfo] -> IO Scorer -rankIO vers downs upl docs env tarCache pkgs = do - temp <- temporalScore pkg lastUploads versionList downloadsPerMonth +rankIO vers recentDownloads maintainers docs env tarCache pkgs = do + temp <- temporalScore pkg lastUploads versionList recentDownloads versS <- versionScore versionList vers lastUploads pkg - auth <- authorScore upl pkg codeS <- codeScore documentLines srcLines - return (temp <> versS <> auth <> codeS) + return (temp <> versS <> codeS <> authorScore maintainers pkg) where pkg = packageDescription <$> pkgDesc $ last pkgs pkgId = package pkg - pkgNm = pkgName pkgId lastUploads = sortBy (flip compare) $ (fst . pkgOriginalUploadInfo <$> pkgs) @@ -134,9 +130,7 @@ rankIO vers downs upl docs env tarCache pkgs = do versionList :: [Version] versionList = sortBy (flip compare) $ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs) - downloadsPerMonth = cmFind pkgNm <$> recentPackageDownloads downs - -- TODO get appropriate pkgInfo (head might fail) - packageEntr = do + packageEntr = do tarB <- packageTarball tarCache . head $ pkgs return $ (\(path, _, index) -> (path, ) <$> T.lookup index path) @@ -179,16 +173,12 @@ rankIO vers downs upl docs env tarCache pkgs = do blob <- documentBlob return $ BlobStorage.filepath (serverBlobStore env) <$> blob -authorScore :: UploadFeature -> PackageDescription -> IO Scorer -authorScore upload desc = - maintScore >>= (\x -> return $ boolScor 1 (not $ S.null $ author desc) <> x) +authorScore :: Int -> PackageDescription -> Scorer +authorScore maintainers desc = + boolScor 1 (not $ S.null $ author desc) <> maintScore where - pkgNm = pkgName $ package desc - maintScore :: IO Scorer - maintScore = do - maint <- queryUserGroups [maintainersGroup upload pkgNm] - - return $ boolScor 3 (size maint > 1) <> scorer 5 (int2Double $ size maint) + maintScore = + boolScor 3 (maintainers > 1) <> scorer 5 (int2Double maintainers) codeScore :: IO Double -> IO Double -> IO Scorer codeScore documentL haskellL = do @@ -241,15 +231,14 @@ versionScore versionList versions lastUploads desc = do <> boolScor 5 (not $ null depre) temporalScore - :: PackageDescription -> [CL.UTCTime] -> [Version] -> IO Int -> IO Scorer -temporalScore p lastUploads versionList downloadsPerMonth = do + :: PackageDescription -> [CL.UTCTime] -> [Version] -> Int -> IO Scorer +temporalScore p lastUploads versionList recentDownloads = do fresh <- freshnessScore - downs <- downloadScore tract <- tractionScore - return $ tract <> fresh <> downs + return $ tract <> fresh <> downloadScore where isApp = (isNothing . library) p && (not . null . executables) p - downloadScore = calcDownScore <$> downloadsPerMonth + downloadScore = calcDownScore recentDownloads calcDownScore i = Scorer 5 $ min ( (logBase 2 (int2Double $ max 0 (i - 100) + 100) - 6.6) / (if isApp then 5 else 6) @@ -261,9 +250,8 @@ temporalScore p lastUploads versionList downloadsPerMonth = do freshnessScore = fracScor 10 <$> packageFreshness -- Missing dependencyFreshnessScore for reasonable effectivity needs caching tractionScore = do - dows <- downloadsPerMonth fresh <- packageFreshness - return $ boolScor 1 (fresh * int2Double dows > 1000) + return $ boolScor 1 (fresh * int2Double recentDownloads > 1000) rankPackagePage :: PackageDescription -> Scorer rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats @@ -280,15 +268,15 @@ rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats rankPackage :: VersionsFeature - -> DownloadFeature - -> UploadFeature + -> Int + -> Int -> DocumentationFeature - -> ServerEnv -> TarIndexCacheFeature + -> ServerEnv -> [PkgInfo] -> IO Double -rankPackage versions download upload docs env tarCache pkgs = +rankPackage versions recentDownloads maintainers docs tarCache env pkgs = total . (<>) (rankPackagePage pkgD) - <$> rankIO versions download upload docs env tarCache pkgs + <$> rankIO versions recentDownloads maintainers docs env tarCache pkgs where pkgD = packageDescription $ pkgDesc $ last pkgs From a9351aa3aaf8c114b2f290d2e0ece9cae68b69d0 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 7 Aug 2022 14:33:10 +0200 Subject: [PATCH 106/129] --no-edit --- .../Server/Features/PackageList.hs | 22 ++++++++++++------- src/Distribution/Server/Framework/MemSize.hs | 3 +++ 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/Distribution/Server/Features/PackageList.hs b/src/Distribution/Server/Features/PackageList.hs index abc594373..960039201 100644 --- a/src/Distribution/Server/Features/PackageList.hs +++ b/src/Distribution/Server/Features/PackageList.hs @@ -17,6 +17,7 @@ import Distribution.Server.Features.Users import Distribution.Server.Features.Upload(UploadFeature(..)) import Distribution.Server.Features.Documentation (DocumentationFeature(..)) import Distribution.Server.Features.TarIndexCache (TarIndexCacheFeature(..)) +import Distribution.Server.Features.PackageRank import Distribution.Server.Users.Users (userIdToName) import qualified Distribution.Server.Users.UserIdSet as UserIdSet @@ -87,18 +88,20 @@ data PackageItem = PackageItem { -- How many benchmarks (>=0) this package has. itemNumBenchmarks :: !Int, -- Last upload date - itemLastUpload :: !UTCTime + itemLastUpload :: !UTCTime, -- Hotness: a more heuristic way to sort packages. presently non-existent. - --itemHotness :: Int + --itemHotness :: Int + -- heuristic way to sort packages + itemPackageRank :: !Double } instance MemSize PackageItem where - memSize (PackageItem a b c d e f g h i j k l) = memSize12 a b c d e f g h i j k l + memSize (PackageItem a b c d e f g h i j k l m) = memSize13 a b c d e f g h i j k l m emptyPackageItem :: PackageName -> PackageItem emptyPackageItem pkg = PackageItem pkg Set.empty Nothing "" [] - 0 0 False 0 0 0 (UTCTime (toEnum 0) 0) + 0 0 False 0 0 0 (UTCTime (toEnum 0) 0) 0 initListFeature :: ServerEnv @@ -125,11 +128,11 @@ initListFeature _env = do versions@VersionsFeature{..} users@UserFeature{..} uploads@UploadFeature{..} - docum tar -> do + documentation tar -> do let (feature, modifyItem, updateDesc) = listFeature core download votesf tagsf versions users uploads - itemCache itemUpdate docum tar _env + itemCache itemUpdate documentation tar _env registerHookJust packageChangeHook isPackageChangeAny $ \(pkgid, _) -> updateDesc (packageName pkgid) @@ -196,11 +199,11 @@ listFeature CoreFeature{..} DownloadFeature{..} VotesFeature{..} TagsFeature{..} - VersionsFeature{..} + versions@VersionsFeature{..} UserFeature{..} UploadFeature{..} itemCache itemUpdate - docum tar env + documentation tar env = (ListFeature{..}, modifyItem, updateDesc) where listFeatureInterface = (emptyHackageFeature "list") { @@ -266,6 +269,8 @@ listFeature CoreFeature{..} votes <- pkgNumScore pkgname deprs <- queryGetDeprecatedFor pkgname maintainers <- queryUserGroup (maintainersGroup pkgname) + packageR <- rankPackage versions (cmFind pkgname downs) + (UserIdSet.size maintainers) documentation tar env pkgs return $ (,) pkgname $ (updateDescriptionItem (pkgDesc pkg) $ emptyPackageItem pkgname) { itemTags = tags @@ -275,6 +280,7 @@ listFeature CoreFeature{..} -- [reverse index disabled] , itemRevDepsCount = directReverseCount revCount , itemVotes = votes , itemLastUpload = fst (pkgOriginalUploadInfo pkg) + , itemPackageRank = packageR } ------------------------------ diff --git a/src/Distribution/Server/Framework/MemSize.hs b/src/Distribution/Server/Framework/MemSize.hs index 4af5d251f..f3e05f42a 100644 --- a/src/Distribution/Server/Framework/MemSize.hs +++ b/src/Distribution/Server/Framework/MemSize.hs @@ -135,6 +135,9 @@ instance MemSize Integer where instance MemSize Float where memSize _ = 2 +instance MemSize Double where + memSize _ = 3 + instance MemSize UTCTime where memSize _ = 7 From 728fb0d899b35ed812e62c0a29805651a2baac6f Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 7 Aug 2022 17:10:29 +0200 Subject: [PATCH 107/129] tried to add an column and failed --- datafiles/templates/Html/noscript-search-form.html.st | 1 + src/Distribution/Server/Features/Browse.hs | 3 ++- src/Distribution/Server/Features/Browse/ApplyFilter.hs | 1 + src/Distribution/Server/Features/Browse/Options.hs | 4 +++- src/Distribution/Server/Features/Browse/Parsers.hs | 3 ++- 5 files changed, 9 insertions(+), 3 deletions(-) diff --git a/datafiles/templates/Html/noscript-search-form.html.st b/datafiles/templates/Html/noscript-search-form.html.st index 7c1f318e4..0193d6e97 100644 --- a/datafiles/templates/Html/noscript-search-form.html.st +++ b/datafiles/templates/Html/noscript-search-form.html.st @@ -23,6 +23,7 @@ + diff --git a/src/Distribution/Server/Features/Browse.hs b/src/Distribution/Server/Features/Browse.hs index ada4b622c..d5d4497da 100644 --- a/src/Distribution/Server/Features/Browse.hs +++ b/src/Distribution/Server/Features/Browse.hs @@ -138,7 +138,7 @@ packageIndexInfoToValue :: CoreResource -> TagsResource -> UserResource -> Packa packageIndexInfoToValue coreResource tagsResource userResource PackageItem{itemName, itemDownloads, itemVotes, - itemDesc, itemTags, itemLastUpload, itemMaintainer} = + itemDesc, itemTags, itemLastUpload, itemMaintainer, itemPackageRank} = object [ Key.fromString "name" .= renderPackage itemName , Key.fromString "downloads" .= itemDownloads @@ -147,6 +147,7 @@ packageIndexInfoToValue , Key.fromString "tags" .= map renderTag (S.toAscList itemTags) , Key.fromString "lastUpload" .= iso8601Show itemLastUpload , Key.fromString "maintainers" .= map renderUser itemMaintainer + , Key.fromString "packageRank" .= itemPackageRank ] where renderTag :: Tag -> Value diff --git a/src/Distribution/Server/Features/Browse/ApplyFilter.hs b/src/Distribution/Server/Features/Browse/ApplyFilter.hs index f96a3367c..d085819ba 100644 --- a/src/Distribution/Server/Features/Browse/ApplyFilter.hs +++ b/src/Distribution/Server/Features/Browse/ApplyFilter.hs @@ -64,6 +64,7 @@ sort isSearch sortColumn sortDirection = Tags -> comparing (S.toAscList . itemTags) LastUpload -> comparing itemLastUpload Maintainers -> comparing itemMaintainer + PackageRank -> comparing itemPackageRank in sortBy (maybeReverse comparer) where maybeReverse = diff --git a/src/Distribution/Server/Features/Browse/Options.hs b/src/Distribution/Server/Features/Browse/Options.hs index 269be66ef..942681bc3 100644 --- a/src/Distribution/Server/Features/Browse/Options.hs +++ b/src/Distribution/Server/Features/Browse/Options.hs @@ -9,7 +9,7 @@ import Distribution.Server.Features.Browse.Parsers (Filter, conditions, condsToF data IsSearch = IsSearch | IsNotSearch -data NormalColumn = Name | Downloads | Rating | Description | Tags | LastUpload | Maintainers +data NormalColumn = Name | Downloads | Rating | Description | Tags | LastUpload | Maintainers | PackageRank deriving (Show, Eq) data Column = DefaultColumn | NormalColumn NormalColumn @@ -37,6 +37,7 @@ instance FromJSON Column where "tags" -> pure $ NormalColumn Tags "lastUpload" -> pure $ NormalColumn LastUpload "maintainers" -> pure $ NormalColumn Maintainers + "packageRank" -> pure $ NormalColumn PackageRank t -> fail $ "Column invalid: " ++ T.unpack t columnToTemplateName :: Column -> String @@ -49,6 +50,7 @@ columnToTemplateName = \case NormalColumn Tags -> "tags" NormalColumn LastUpload -> "lastUpload" NormalColumn Maintainers -> "maintainers" + NormalColumn PackageRank -> "packageRank" instance FromJSON Direction where parseJSON = diff --git a/src/Distribution/Server/Features/Browse/Parsers.hs b/src/Distribution/Server/Features/Browse/Parsers.hs index 6445bbc1c..2775cd797 100644 --- a/src/Distribution/Server/Features/Browse/Parsers.hs +++ b/src/Distribution/Server/Features/Browse/Parsers.hs @@ -85,7 +85,7 @@ allowedAfterOpeningBrace AllowNot = "not " <|> allowedAfterOpeningBrace Disallow allowedAfterOpeningBrace _ = asum [ "downloads", "rating", "lastUpload" , "ageOfLastUpload" - , "tag:", "maintainer:", "deprecated:", "distro:" + , "tag:", "maintainer:", "deprecated:", "distro:", "packageRank" ] -- Whether the 'not' operator can be used. @@ -113,6 +113,7 @@ filterWith allowNot = do "maintainer:" -> MaintainerFilter <$> wordWoSpaceOrParens "deprecated:" -> DeprecatedFilter <$> deprecatedOption "distro:" -> DistroFilter <$> wordWoSpaceOrParens + "packageRank" -> DownloadsFilter <$> opAndSndParam decimal _ -> fail "Impossible since fieldName possibilities are known at compile time" pure filt From 0d538ece4c90e9aae96dfd1ee2a19620434d9276 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 7 Aug 2022 17:54:45 +0200 Subject: [PATCH 108/129] switch Doubles for Floats --- .../Server/Features/PackageList.hs | 2 +- .../Server/Features/PackageRank.hs | 46 +++++++++---------- src/Distribution/Server/Framework/MemSize.hs | 3 -- 3 files changed, 24 insertions(+), 27 deletions(-) diff --git a/src/Distribution/Server/Features/PackageList.hs b/src/Distribution/Server/Features/PackageList.hs index 960039201..c75ada37f 100644 --- a/src/Distribution/Server/Features/PackageList.hs +++ b/src/Distribution/Server/Features/PackageList.hs @@ -92,7 +92,7 @@ data PackageItem = PackageItem { -- Hotness: a more heuristic way to sort packages. presently non-existent. --itemHotness :: Int -- heuristic way to sort packages - itemPackageRank :: !Double + itemPackageRank :: !Float } instance MemSize PackageItem where diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index adf496ef6..e8bc74fd7 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -37,31 +37,31 @@ import Data.Maybe ( isNothing ) import Data.Ord ( comparing ) import qualified Data.TarIndex as T import qualified Data.Time.Clock as CL -import GHC.Float ( int2Double ) +import GHC.Float ( int2Float ) import System.FilePath ( isExtensionOf ) import qualified System.IO as SIO data Scorer = Scorer - { maximum :: Double - , score :: Double + { maximum :: Float + , score :: Float } deriving Show instance Semigroup Scorer where (Scorer a b) <> (Scorer c d) = Scorer (a + b) (c + d) -scorer :: Double -> Double -> Scorer +scorer :: Float -> Float -> Scorer scorer maxim scr = if maxim >= scr then Scorer maxim scr else Scorer maxim maxim -fracScor :: Double -> Double -> Scorer +fracScor :: Float -> Float -> Scorer fracScor maxim frac = scorer maxim (maxim * frac) -boolScor :: Double -> Bool -> Scorer +boolScor :: Float -> Bool -> Scorer boolScor k True = Scorer k k boolScor k False = Scorer k 0 -total :: Scorer -> Double +total :: Scorer -> Float total (Scorer a b) = a / b major :: Num a => [a] -> a @@ -74,13 +74,13 @@ patches :: Num a => [a] -> a patches (_ : _ : xs) = sum xs patches _ = 0 -numDays :: Maybe CL.UTCTime -> Maybe CL.UTCTime -> Double +numDays :: Maybe CL.UTCTime -> Maybe CL.UTCTime -> Float numDays (Just first) (Just end) = fromRational $ toRational $ CL.diffUTCTime first end / fromRational (toRational CL.nominalDay) numDays _ _ = 0 -freshness :: [Version] -> CL.UTCTime -> Bool -> IO Double +freshness :: [Version] -> CL.UTCTime -> Bool -> IO Float freshness [] _ _ = return 0 freshness (x : xs) lastUpd app = daysPastExpiration @@ -89,7 +89,7 @@ freshness (x : xs) lastUpd app = versionLatest = versionNumbers x daysPastExpiration = age >>= (\a -> return $ max 0 a - expectedUpdateInterval) - expectedUpdateInterval = int2Double + expectedUpdateInterval = int2Float (min (versionStabilityInterval versionLatest) $ length (x : xs)) versionStabilityInterval v | patches v > 3 && major v > 0 = 700 | patches v > 3 = 450 @@ -145,13 +145,13 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs = do index <- documentIndex path <- documentPath return $ liftM2 (,) path (join $ liftM2 T.lookup index path) - documentLines :: IO Double + documentLines :: IO Float documentLines = documentationEntr >>= filterLinesTar (const True) - srcLines :: IO Double + srcLines :: IO Float srcLines = packageEntr >>= filterLinesTar (isExtensionOf ".hs") filterLinesTar - :: (FilePath -> Bool) -> Maybe (FilePath, T.TarIndexEntry) -> IO Double + :: (FilePath -> Bool) -> Maybe (FilePath, T.TarIndexEntry) -> IO Float filterLinesTar f (Just (path, T.TarFileEntry offset)) = if f path then getLines path offset else return 0 filterLinesTar f (Just (_, T.TarDir dir)) = @@ -166,7 +166,7 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs = do case Tar.read header of (Tar.Next Tar.Entry { Tar.entryContent = Tar.NormalFile _ siz } _) -> do body <- BSL.hGet handle (fromIntegral siz) - return $ int2Double . length . BSL.split 10 $ body + return $ int2Float . length . BSL.split 10 $ body _ -> return 0 documentPath = do @@ -178,9 +178,9 @@ authorScore maintainers desc = boolScor 1 (not $ S.null $ author desc) <> maintScore where maintScore = - boolScor 3 (maintainers > 1) <> scorer 5 (int2Double maintainers) + boolScor 3 (maintainers > 1) <> scorer 5 (int2Float maintainers) -codeScore :: IO Double -> IO Double -> IO Scorer +codeScore :: IO Float -> IO Float -> IO Scorer codeScore documentL haskellL = do docum <- documentL haskell <- haskellL @@ -217,15 +217,15 @@ versionScore versionList versions lastUploads desc = do <> scorer 40 (numDays (safeHead lUps) (safeLast lUps)) <> scorer 15 - (int2Double $ length $ filter (\x -> major x > 0 || minor x > 0) + (int2Float $ length $ filter (\x -> major x > 0 || minor x > 0) intUse ) <> scorer 20 - (int2Double $ 4 * length + (int2Float $ 4 * length (filter (\x -> major x > 0 && patches x > 0) intUse) ) - <> scorer 10 (int2Double $ patches $ maximumBy (comparing patches) intUse) + <> scorer 10 (int2Float $ patches $ maximumBy (comparing patches) intUse) <> boolScor 8 (any (\x -> major x == 0 && patches x > 0) intUse) <> boolScor 10 (any (\x -> major x > 0 && major x < 20) intUse) <> boolScor 5 (not $ null depre) @@ -240,7 +240,7 @@ temporalScore p lastUploads versionList recentDownloads = do isApp = (isNothing . library) p && (not . null . executables) p downloadScore = calcDownScore recentDownloads calcDownScore i = Scorer 5 $ min - ( (logBase 2 (int2Double $ max 0 (i - 100) + 100) - 6.6) + ( (logBase 2 (int2Float $ max 0 (i - 100) + 100) - 6.6) / (if isApp then 5 else 6) ) 5 @@ -251,14 +251,14 @@ temporalScore p lastUploads versionList recentDownloads = do -- Missing dependencyFreshnessScore for reasonable effectivity needs caching tractionScore = do fresh <- packageFreshness - return $ boolScor 1 (fresh * int2Double recentDownloads > 1000) + return $ boolScor 1 (fresh * int2Float recentDownloads > 1000) rankPackagePage :: PackageDescription -> Scorer rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats where tests = boolScor 50 (hasTests p) benchs = boolScor 10 (hasBenchmarks p) - desc = Scorer 30 (min 1 (int2Double (S.length $ description p) / 300)) + desc = Scorer 30 (min 1 (int2Float (S.length $ description p) / 300)) -- documentation = boolScor 30 () homeP = boolScor 30 (not $ S.null $ homepage p) sourceRp = boolScor 8 (not $ null $ sourceRepos p) @@ -274,7 +274,7 @@ rankPackage -> TarIndexCacheFeature -> ServerEnv -> [PkgInfo] - -> IO Double + -> IO Float rankPackage versions recentDownloads maintainers docs tarCache env pkgs = total . (<>) (rankPackagePage pkgD) diff --git a/src/Distribution/Server/Framework/MemSize.hs b/src/Distribution/Server/Framework/MemSize.hs index f3e05f42a..4af5d251f 100644 --- a/src/Distribution/Server/Framework/MemSize.hs +++ b/src/Distribution/Server/Framework/MemSize.hs @@ -135,9 +135,6 @@ instance MemSize Integer where instance MemSize Float where memSize _ = 2 -instance MemSize Double where - memSize _ = 3 - instance MemSize UTCTime where memSize _ = 7 From 83cf50c086e93489a48df3d582a409e70044ef0d Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 9 Aug 2022 17:48:38 +0200 Subject: [PATCH 109/129] added the column and redid some packageRank issues --- datafiles/static/browse.js | 1 + src/Distribution/Server/Features/PackageRank.hs | 9 ++++----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/datafiles/static/browse.js b/datafiles/static/browse.js index 905239afc..cfb8a7dfd 100644 --- a/datafiles/static/browse.js +++ b/datafiles/static/browse.js @@ -134,6 +134,7 @@ const replaceRows = (response) => { tr.appendChild(createTags(row.tags)); tr.appendChild(createLastUpload(row.lastUpload)); tr.appendChild(createMaintainers(row.maintainers)); + tr.appendChild(createSimpleText(row.packageRank)); l.appendChild(tr); } }; diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index e8bc74fd7..a6cf628a3 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -89,8 +89,8 @@ freshness (x : xs) lastUpd app = versionLatest = versionNumbers x daysPastExpiration = age >>= (\a -> return $ max 0 a - expectedUpdateInterval) - expectedUpdateInterval = int2Float - (min (versionStabilityInterval versionLatest) $ length (x : xs)) + expectedUpdateInterval = + int2Float (min (versionStabilityInterval versionLatest) $ length (x : xs)) versionStabilityInterval v | patches v > 3 && major v > 0 = 700 | patches v > 3 = 450 | patches v > 0 = 300 @@ -177,8 +177,7 @@ authorScore :: Int -> PackageDescription -> Scorer authorScore maintainers desc = boolScor 1 (not $ S.null $ author desc) <> maintScore where - maintScore = - boolScor 3 (maintainers > 1) <> scorer 5 (int2Float maintainers) + maintScore = boolScor 3 (maintainers > 1) <> scorer 5 (int2Float maintainers) codeScore :: IO Float -> IO Float -> IO Scorer codeScore documentL haskellL = do @@ -218,7 +217,7 @@ versionScore versionList versions lastUploads desc = do <> scorer 15 (int2Float $ length $ filter (\x -> major x > 0 || minor x > 0) - intUse + intUse ) <> scorer 20 From 1865b9d8a843d03aca7cfecd9d7d859eb2a4a079 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 9 Aug 2022 18:04:18 +0200 Subject: [PATCH 110/129] fixed some basic bugs --- src/Distribution/Server/Features/PackageRank.hs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index a6cf628a3..8462f6000 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -42,13 +42,13 @@ import System.FilePath ( isExtensionOf ) import qualified System.IO as SIO data Scorer = Scorer - { maximum :: Float + { maximumS :: Float , score :: Float } deriving Show instance Semigroup Scorer where - (Scorer a b) <> (Scorer c d) = Scorer (a + b) (c + d) + (Scorer a b) <> (Scorer c d) = Scorer (a + c) (b + d) scorer :: Float -> Float -> Scorer scorer maxim scr = @@ -62,7 +62,7 @@ boolScor k True = Scorer k k boolScor k False = Scorer k 0 total :: Scorer -> Float -total (Scorer a b) = a / b +total (Scorer a b) = b / a major :: Num a => [a] -> a major (x : _) = x @@ -257,7 +257,7 @@ rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats where tests = boolScor 50 (hasTests p) benchs = boolScor 10 (hasBenchmarks p) - desc = Scorer 30 (min 1 (int2Float (S.length $ description p) / 300)) + desc = scorer 30 (min 1 (int2Float (S.length $ description p) / 300)) -- documentation = boolScor 30 () homeP = boolScor 30 (not $ S.null $ homepage p) sourceRp = boolScor 8 (not $ null $ sourceRepos p) From 3c562bff3e52e8673a20355b5e94af0dfc48f092 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 16 Aug 2022 22:22:57 +0200 Subject: [PATCH 111/129] removed Browse/parser changes --- src/Distribution/Server/Features/Browse/Parsers.hs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Distribution/Server/Features/Browse/Parsers.hs b/src/Distribution/Server/Features/Browse/Parsers.hs index 2775cd797..6445bbc1c 100644 --- a/src/Distribution/Server/Features/Browse/Parsers.hs +++ b/src/Distribution/Server/Features/Browse/Parsers.hs @@ -85,7 +85,7 @@ allowedAfterOpeningBrace AllowNot = "not " <|> allowedAfterOpeningBrace Disallow allowedAfterOpeningBrace _ = asum [ "downloads", "rating", "lastUpload" , "ageOfLastUpload" - , "tag:", "maintainer:", "deprecated:", "distro:", "packageRank" + , "tag:", "maintainer:", "deprecated:", "distro:" ] -- Whether the 'not' operator can be used. @@ -113,7 +113,6 @@ filterWith allowNot = do "maintainer:" -> MaintainerFilter <$> wordWoSpaceOrParens "deprecated:" -> DeprecatedFilter <$> deprecatedOption "distro:" -> DistroFilter <$> wordWoSpaceOrParens - "packageRank" -> DownloadsFilter <$> opAndSndParam decimal _ -> fail "Impossible since fieldName possibilities are known at compile time" pure filt From 2e266dcc07fe3917331121efc7782dbb236dbd4f Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 16 Aug 2022 22:39:50 +0200 Subject: [PATCH 112/129] Fixed missing titile and changed fixed description --- datafiles/templates/Html/browse.html.st | 4 ++++ datafiles/templates/Html/noscript-search-form.html.st | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/datafiles/templates/Html/browse.html.st b/datafiles/templates/Html/browse.html.st index a7b85a496..f9d2b58b6 100644 --- a/datafiles/templates/Html/browse.html.st +++ b/datafiles/templates/Html/browse.html.st @@ -130,6 +130,9 @@ #arrow-maintainers { width: 100px; } + #arrow-packageRank { + width: 150px; + } .lastUpload, #sliderAndOutput { white-space: nowrap; } @@ -250,6 +253,7 @@ Tags Last U/L Maintainers + Package Rank diff --git a/datafiles/templates/Html/noscript-search-form.html.st b/datafiles/templates/Html/noscript-search-form.html.st index 0193d6e97..55c242afe 100644 --- a/datafiles/templates/Html/noscript-search-form.html.st +++ b/datafiles/templates/Html/noscript-search-form.html.st @@ -23,7 +23,7 @@ - + From 7ed9cf511ef9af768f201367e4a34f00620e9132 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 16 Aug 2022 22:51:04 +0200 Subject: [PATCH 113/129] Strict Scorer --- src/Distribution/Server/Features/PackageRank.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 8462f6000..a3e9ded00 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE TupleSections #-} +{-# LANGUAGE TupleSections, BangPatterns #-} -- TODO change the module name probably Distribution.Server.Features.PackageList.PackageRank @@ -42,8 +42,8 @@ import System.FilePath ( isExtensionOf ) import qualified System.IO as SIO data Scorer = Scorer - { maximumS :: Float - , score :: Float + { maximumS :: !Float + , score :: !Float } deriving Show From 30d48cc938e2033542b2975eba361c0451890d7c Mon Sep 17 00:00:00 2001 From: kubaneko Date: Wed, 17 Aug 2022 20:58:51 +0200 Subject: [PATCH 114/129] fixed some partial functions --- .../Server/Features/PackageRank.hs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index a3e9ded00..9b4c83569 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -114,6 +114,7 @@ rankIO -> [PkgInfo] -> IO Scorer +rankIO _ _ _ _ _ _ [] = return (Scorer (118 + 16 + 4 + 1) 0) rankIO vers recentDownloads maintainers docs env tarCache pkgs = do temp <- temporalScore pkg lastUploads versionList recentDownloads versS <- versionScore versionList vers lastUploads pkg @@ -131,10 +132,10 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs = do versionList = sortBy (flip compare) $ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs) packageEntr = do - tarB <- packageTarball tarCache . head $ pkgs + tarB <- mapM (packageTarball tarCache) (safeHead pkgs) return $ (\(path, _, index) -> (path, ) <$> T.lookup index path) - =<< rightToMaybe tarB + =<< (join $ rightToMaybe <$> tarB) rightToMaybe (Right a) = Just a rightToMaybe (Left _) = Nothing @@ -243,17 +244,18 @@ temporalScore p lastUploads versionList recentDownloads = do / (if isApp then 5 else 6) ) 5 - packageFreshness = case lastUploads of - [] -> return 0 - _ -> freshness versionList (head lastUploads) isApp + packageFreshness = case safeHead lastUploads of + Nothing -> return 0 + (Just l) -> freshness versionList l isApp freshnessScore = fracScor 10 <$> packageFreshness -- Missing dependencyFreshnessScore for reasonable effectivity needs caching tractionScore = do fresh <- packageFreshness return $ boolScor 1 (fresh * int2Float recentDownloads > 1000) -rankPackagePage :: PackageDescription -> Scorer -rankPackagePage p = tests <> benchs <> desc <> homeP <> sourceRp <> cats +rankPackagePage :: Maybe PackageDescription -> Scorer +rankPackagePage Nothing = Scorer 233 0 +rankPackagePage (Just p) = tests <> benchs <> desc <> homeP <> sourceRp <> cats where tests = boolScor 50 (hasTests p) benchs = boolScor 10 (hasBenchmarks p) @@ -278,4 +280,4 @@ rankPackage versions recentDownloads maintainers docs tarCache env pkgs = total . (<>) (rankPackagePage pkgD) <$> rankIO versions recentDownloads maintainers docs env tarCache pkgs - where pkgD = packageDescription $ pkgDesc $ last pkgs + where pkgD = packageDescription . pkgDesc <$> safeLast pkgs From 830cfd52b17ce62ee83a4915b872f11d248935ed Mon Sep 17 00:00:00 2001 From: kubaneko Date: Wed, 17 Aug 2022 21:09:17 +0200 Subject: [PATCH 115/129] fixed some bugs --- src/Distribution/Server/Features/PackageRank.hs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 9b4c83569..855e38876 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -112,17 +112,17 @@ rankIO -> ServerEnv -> TarIndexCacheFeature -> [PkgInfo] + -> PkgInfo -> IO Scorer -rankIO _ _ _ _ _ _ [] = return (Scorer (118 + 16 + 4 + 1) 0) -rankIO vers recentDownloads maintainers docs env tarCache pkgs = do +rankIO _ _ _ _ _ _ _ Nothing = return (Scorer (118 + 16 + 4 + 1) 0) +rankIO vers recentDownloads maintainers docs env tarCache pkgs pkg = do temp <- temporalScore pkg lastUploads versionList recentDownloads versS <- versionScore versionList vers lastUploads pkg codeS <- codeScore documentLines srcLines return (temp <> versS <> codeS <> authorScore maintainers pkg) where - pkg = packageDescription <$> pkgDesc $ last pkgs pkgId = package pkg lastUploads = sortBy (flip compare) @@ -132,10 +132,10 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs = do versionList = sortBy (flip compare) $ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs) packageEntr = do - tarB <- mapM (packageTarball tarCache) (safeHead pkgs) + tarB <- packageTarball tarCache $ pkg return $ (\(path, _, index) -> (path, ) <$> T.lookup index path) - =<< (join $ rightToMaybe <$> tarB) + =<< rightToMaybe tarB rightToMaybe (Right a) = Just a rightToMaybe (Left _) = Nothing @@ -279,5 +279,5 @@ rankPackage rankPackage versions recentDownloads maintainers docs tarCache env pkgs = total . (<>) (rankPackagePage pkgD) - <$> rankIO versions recentDownloads maintainers docs env tarCache pkgs + <$> rankIO versions recentDownloads maintainers docs env tarCache pkgs (safeLast pkgs) where pkgD = packageDescription . pkgDesc <$> safeLast pkgs From df26e92956d7e605ba0df46592274b41816e3902 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Wed, 17 Aug 2022 21:22:45 +0200 Subject: [PATCH 116/129] fixed a bug --- .../Server/Features/PackageRank.hs | 28 +++++++++++-------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 855e38876..8c0e78025 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE TupleSections, BangPatterns #-} +{-# LANGUAGE TupleSections #-} -- TODO change the module name probably Distribution.Server.Features.PackageList.PackageRank @@ -43,7 +43,7 @@ import qualified System.IO as SIO data Scorer = Scorer { maximumS :: !Float - , score :: !Float + , score :: !Float } deriving Show @@ -112,17 +112,18 @@ rankIO -> ServerEnv -> TarIndexCacheFeature -> [PkgInfo] - -> PkgInfo + -> Maybe PkgInfo -> IO Scorer rankIO _ _ _ _ _ _ _ Nothing = return (Scorer (118 + 16 + 4 + 1) 0) -rankIO vers recentDownloads maintainers docs env tarCache pkgs pkg = do +rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do temp <- temporalScore pkg lastUploads versionList recentDownloads versS <- versionScore versionList vers lastUploads pkg codeS <- codeScore documentLines srcLines return (temp <> versS <> codeS <> authorScore maintainers pkg) where + pkg = packageDescription $ pkgDesc pkgI pkgId = package pkg lastUploads = sortBy (flip compare) @@ -132,7 +133,7 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs pkg = do versionList = sortBy (flip compare) $ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs) packageEntr = do - tarB <- packageTarball tarCache $ pkg + tarB <- packageTarball tarCache pkgI return $ (\(path, _, index) -> (path, ) <$> T.lookup index path) =<< rightToMaybe tarB @@ -245,8 +246,8 @@ temporalScore p lastUploads versionList recentDownloads = do ) 5 packageFreshness = case safeHead lastUploads of - Nothing -> return 0 - (Just l) -> freshness versionList l isApp + Nothing -> return 0 + (Just l) -> freshness versionList l isApp freshnessScore = fracScor 10 <$> packageFreshness -- Missing dependencyFreshnessScore for reasonable effectivity needs caching tractionScore = do @@ -254,7 +255,7 @@ temporalScore p lastUploads versionList recentDownloads = do return $ boolScor 1 (fresh * int2Float recentDownloads > 1000) rankPackagePage :: Maybe PackageDescription -> Scorer -rankPackagePage Nothing = Scorer 233 0 +rankPackagePage Nothing = Scorer 233 0 rankPackagePage (Just p) = tests <> benchs <> desc <> homeP <> sourceRp <> cats where tests = boolScor 50 (hasTests p) @@ -277,7 +278,12 @@ rankPackage -> [PkgInfo] -> IO Float rankPackage versions recentDownloads maintainers docs tarCache env pkgs = - total - . (<>) (rankPackagePage pkgD) - <$> rankIO versions recentDownloads maintainers docs env tarCache pkgs (safeLast pkgs) + total . (<>) (rankPackagePage pkgD) <$> rankIO versions + recentDownloads + maintainers + docs + env + tarCache + pkgs + (safeLast pkgs) where pkgD = packageDescription . pkgDesc <$> safeLast pkgs From d889087068a00f4e70fb25e0b70bad6fd0fc0033 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Thu, 18 Aug 2022 23:08:04 +0200 Subject: [PATCH 117/129] retrieves src correctly --- .../Server/Features/PackageRank.hs | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 8c0e78025..085f7da2f 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -1,4 +1,4 @@ -{-# LANGUAGE TupleSections #-} +{-# LANGUAGE BangPatterns #-} -- TODO change the module name probably Distribution.Server.Features.PackageList.PackageRank @@ -120,7 +120,7 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do temp <- temporalScore pkg lastUploads versionList recentDownloads versS <- versionScore versionList vers lastUploads pkg codeS <- codeScore documentLines srcLines - return (temp <> versS <> codeS <> authorScore maintainers pkg) + return $ temp <> versS <> codeS <> authorScore maintainers pkg where pkg = packageDescription $ pkgDesc pkgI @@ -132,13 +132,17 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do versionList :: [Version] versionList = sortBy (flip compare) $ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs) - packageEntr = do - tarB <- packageTarball tarCache pkgI - return - $ (\(path, _, index) -> (path, ) <$> T.lookup index path) - =<< rightToMaybe tarB - rightToMaybe (Right a) = Just a - rightToMaybe (Left _) = Nothing + srcLines = do + Right (path, _, _) <- packageTarball tarCache pkgI + filterLines (isExtensionOf ".hs") . Tar.read <$> BSL.readFile path + + filterLines f = Tar.foldEntries (countLines f) 0 (const 0) + countLines :: (FilePath -> Bool) -> Tar.Entry -> Float -> Float + countLines f entry l = if not . f . Tar.entryPath $ entry then l else lns + where + !lns = case Tar.entryContent entry of + (Tar.NormalFile str _) -> l + (int2Float . length $ BSL.split 10 str) + _ -> l documentBlob :: IO (Maybe BlobStorage.BlobId) documentBlob = queryDocumentation docs pkgId @@ -149,8 +153,6 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do return $ liftM2 (,) path (join $ liftM2 T.lookup index path) documentLines :: IO Float documentLines = documentationEntr >>= filterLinesTar (const True) - srcLines :: IO Float - srcLines = packageEntr >>= filterLinesTar (isExtensionOf ".hs") filterLinesTar :: (FilePath -> Bool) -> Maybe (FilePath, T.TarIndexEntry) -> IO Float From 40bfc1b0eb2dd9f812721d6fc79e8e59e3aa6828 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 19 Aug 2022 22:18:17 +0200 Subject: [PATCH 118/129] fixed documentation retrieval --- .../Server/Features/PackageRank.hs | 59 +++++++------------ 1 file changed, 22 insertions(+), 37 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 085f7da2f..9a93e3dd3 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -25,21 +25,17 @@ import Distribution.Types.Version import qualified Distribution.Utils.ShortText as S import qualified Codec.Archive.Tar as Tar -import qualified Codec.Archive.Tar.Entry as Tar -import Control.Monad ( join - , liftM2 - ) import qualified Data.ByteString.Lazy as BSL import Data.List ( maximumBy , sortBy ) import Data.Maybe ( isNothing ) import Data.Ord ( comparing ) -import qualified Data.TarIndex as T import qualified Data.Time.Clock as CL import GHC.Float ( int2Float ) import System.FilePath ( isExtensionOf ) -import qualified System.IO as SIO + +-- import Debug.Trace (trace) data Scorer = Scorer { maximumS :: !Float @@ -119,7 +115,7 @@ rankIO _ _ _ _ _ _ _ Nothing = return (Scorer (118 + 16 + 4 + 1) 0) rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do temp <- temporalScore pkg lastUploads versionList recentDownloads versS <- versionScore versionList vers lastUploads pkg - codeS <- codeScore documentLines srcLines + codeS <- codeScore documSize srcLines return $ temp <> versS <> codeS <> authorScore maintainers pkg where @@ -134,45 +130,34 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do $ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs) srcLines = do Right (path, _, _) <- packageTarball tarCache pkgI - filterLines (isExtensionOf ".hs") . Tar.read <$> BSL.readFile path + filterLines (isExtensionOf ".hs") countLines + . Tar.read + <$> BSL.readFile path + documSize = do + path <- documentPath + case path of + Nothing -> return 0 + Just pth -> + filterLines (isExtensionOf ".html") countSize + . Tar.read + <$> BSL.readFile pth - filterLines f = Tar.foldEntries (countLines f) 0 (const 0) + filterLines f g = Tar.foldEntries (g f) 0 (const 0) countLines :: (FilePath -> Bool) -> Tar.Entry -> Float -> Float countLines f entry l = if not . f . Tar.entryPath $ entry then l else lns where !lns = case Tar.entryContent entry of (Tar.NormalFile str _) -> l + (int2Float . length $ BSL.split 10 str) _ -> l + countSize :: (FilePath -> Bool) -> Tar.Entry -> Float -> Float + countSize f entry l = if not . f . Tar.entryPath $ entry then l else s + where + !s = case Tar.entryContent entry of + (Tar.NormalFile _ siz) -> l + fromInteger (toInteger siz) + _ -> l documentBlob :: IO (Maybe BlobStorage.BlobId) - documentBlob = queryDocumentation docs pkgId - documentIndex = documentBlob >>= mapM (cachedTarIndex tarCache) - documentationEntr = do - index <- documentIndex - path <- documentPath - return $ liftM2 (,) path (join $ liftM2 T.lookup index path) - documentLines :: IO Float - documentLines = documentationEntr >>= filterLinesTar (const True) - - filterLinesTar - :: (FilePath -> Bool) -> Maybe (FilePath, T.TarIndexEntry) -> IO Float - filterLinesTar f (Just (path, T.TarFileEntry offset)) = - if f path then getLines path offset else return 0 - filterLinesTar f (Just (_, T.TarDir dir)) = - sum <$> mapM (filterLinesTar f . Just) dir - filterLinesTar _ _ = return 0 - - -- TODO if size is too big give it a good score and do not read the file - getLines path offset = do - handle <- SIO.openFile path SIO.ReadMode - SIO.hSeek handle SIO.AbsoluteSeek (fromIntegral $ offset * 512) - header <- BSL.hGet handle 512 - case Tar.read header of - (Tar.Next Tar.Entry { Tar.entryContent = Tar.NormalFile _ siz } _) -> do - body <- BSL.hGet handle (fromIntegral siz) - return $ int2Float . length . BSL.split 10 $ body - _ -> return 0 - + documentBlob = queryDocumentation docs pkgId documentPath = do blob <- documentBlob return $ BlobStorage.filepath (serverBlobStore env) <$> blob From a568a40a5f89dd42c042f3c094b047a6864903fd Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 23 Aug 2022 23:20:34 +0200 Subject: [PATCH 119/129] changed the algorithm to match cargo --- .../Server/Features/PackageList.hs | 5 +- .../Server/Features/PackageRank.hs | 122 +++++++++++------- 2 files changed, 75 insertions(+), 52 deletions(-) diff --git a/src/Distribution/Server/Features/PackageList.hs b/src/Distribution/Server/Features/PackageList.hs index c75ada37f..2af483bdc 100644 --- a/src/Distribution/Server/Features/PackageList.hs +++ b/src/Distribution/Server/Features/PackageList.hs @@ -34,6 +34,7 @@ import Distribution.Package import Distribution.PackageDescription import Distribution.PackageDescription.Configuration import Distribution.Utils.ShortText (fromShortText) +import Distribution.Simple.Utils (safeLast) import Control.Concurrent import Data.Maybe (mapMaybe) @@ -269,8 +270,8 @@ listFeature CoreFeature{..} votes <- pkgNumScore pkgname deprs <- queryGetDeprecatedFor pkgname maintainers <- queryUserGroup (maintainersGroup pkgname) - packageR <- rankPackage versions (cmFind pkgname downs) - (UserIdSet.size maintainers) documentation tar env pkgs + packageR <- rankPackage versions (cmFind pkgname downs) + (UserIdSet.size maintainers) documentation tar env pkgs (safeLast pkgs) return $ (,) pkgname $ (updateDescriptionItem (pkgDesc pkg) $ emptyPackageItem pkgname) { itemTags = tags diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 9a93e3dd3..ac5c49ba5 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -32,6 +32,7 @@ import Data.List ( maximumBy import Data.Maybe ( isNothing ) import Data.Ord ( comparing ) import qualified Data.Time.Clock as CL +import Distribution.Server.Packages.Readme import GHC.Float ( int2Float ) import System.FilePath ( isExtensionOf ) @@ -60,6 +61,9 @@ boolScor k False = Scorer k 0 total :: Scorer -> Float total (Scorer a b) = b / a +scale :: Float -> Scorer -> Scorer +scale mx sc = fracScor mx (total sc) + major :: Num a => [a] -> a major (x : _) = x major _ = 0 @@ -96,38 +100,46 @@ freshness (x : xs) lastUpd app = age = flip numDays (Just lastUpd) . Just <$> CL.getCurrentTime decayDays = expectedUpdateInterval / 2 + (if app then 300 else 200) --- lookupPackageId --- queryHasDocumentation +cabalScore :: PackageDescription -> IO Bool -> IO Scorer +cabalScore p docum = + (<>) (tests <> benchs <> desc <> homeP <> sourceRp <> cats) + <$> (boolScor 30 <$> docum) + where + tests = boolScor 50 (hasTests p) + benchs = boolScor 10 (hasBenchmarks p) + desc = scorer 30 (min 1 (int2Float (S.length $ description p) / 300)) + -- documentation = boolScor 30 () + homeP = boolScor 30 (not $ S.null $ homepage p) + sourceRp = boolScor 8 (not $ null $ sourceRepos p) + cats = boolScor 5 (not $ S.null $ category p) + +readmeScore _ = Scorer 0 0 --- TODO CoreFeature can be substituted by CoreResource -rankIO +-- queryHasDocumentation +baseScore :: VersionsFeature -> Int - -> Int -> DocumentationFeature -> ServerEnv -> TarIndexCacheFeature - -> [PkgInfo] - -> Maybe PkgInfo + -> [Version] + -> [CL.UTCTime] + -> PkgInfo -> IO Scorer -rankIO _ _ _ _ _ _ _ Nothing = return (Scorer (118 + 16 + 4 + 1) 0) -rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do - temp <- temporalScore pkg lastUploads versionList recentDownloads - versS <- versionScore versionList vers lastUploads pkg - codeS <- codeScore documSize srcLines - return $ temp <> versS <> codeS <> authorScore maintainers pkg - +baseScore vers maintainers docs env tarCache versionList lastUploads pkgI = do + versS <- versionScore versionList vers lastUploads pkg + codeS <- codeScore documSize srcLines + cabalS <- cabalScore pkg documHas + return + $ scale 5 versS + <> scale 2 codeS + <> scale 3 (authorScore maintainers pkg) + <> scale 2 cabalS + <> scale 5 (readmeScore readme) where - pkg = packageDescription $ pkgDesc pkgI - pkgId = package pkg - lastUploads = - sortBy (flip compare) - $ (fst . pkgOriginalUploadInfo <$> pkgs) - ++ (fst . pkgLatestUploadInfo <$> pkgs) - versionList :: [Version] - versionList = sortBy (flip compare) - $ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs) + pkg = packageDescription $ pkgDesc pkgI + pkgId = package pkg srcLines = do Right (path, _, _) <- packageTarball tarCache pkgI filterLines (isExtensionOf ".hs") countLines @@ -141,6 +153,8 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do filterLines (isExtensionOf ".html") countSize . Tar.read <$> BSL.readFile pth + readme = findToplevelFile tarCache pkgI isReadmeFile + >>= either (\_ -> return Nothing) (return . Just) filterLines f g = Tar.foldEntries (g f) 0 (const 0) countLines :: (FilePath -> Bool) -> Tar.Entry -> Float -> Float @@ -161,6 +175,7 @@ rankIO vers recentDownloads maintainers docs env tarCache pkgs (Just pkgI) = do documentPath = do blob <- documentBlob return $ BlobStorage.filepath (serverBlobStore env) <$> blob + documHas = queryHasDocumentation docs pkgId authorScore :: Int -> PackageDescription -> Scorer authorScore maintainers desc = @@ -169,14 +184,14 @@ authorScore maintainers desc = maintScore = boolScor 3 (maintainers > 1) <> scorer 5 (int2Float maintainers) codeScore :: IO Float -> IO Float -> IO Scorer -codeScore documentL haskellL = do - docum <- documentL +codeScore documentS haskellL = do + docum <- documentS haskell <- haskellL return $ boolScor 1 (haskell > 700) <> boolScor 1 (haskell < 80000) <> fracScor 2 (min 1 (haskell / 5000)) - <> fracScor 2 (min 1 (10 * docum) / (3000 + haskell)) + <> fracScor 2 (min 1 docum / ((3000 + haskell) * 200)) versionScore :: [Version] @@ -241,20 +256,6 @@ temporalScore p lastUploads versionList recentDownloads = do fresh <- packageFreshness return $ boolScor 1 (fresh * int2Float recentDownloads > 1000) -rankPackagePage :: Maybe PackageDescription -> Scorer -rankPackagePage Nothing = Scorer 233 0 -rankPackagePage (Just p) = tests <> benchs <> desc <> homeP <> sourceRp <> cats - where - tests = boolScor 50 (hasTests p) - benchs = boolScor 10 (hasBenchmarks p) - desc = scorer 30 (min 1 (int2Float (S.length $ description p) / 300)) - -- documentation = boolScor 30 () - homeP = boolScor 30 (not $ S.null $ homepage p) - sourceRp = boolScor 8 (not $ null $ sourceRepos p) - cats = boolScor 5 (not $ S.null $ category p) - --- TODO fix the function Signature replace PackageDescription to PackageName/Identifier - rankPackage :: VersionsFeature -> Int @@ -263,14 +264,35 @@ rankPackage -> TarIndexCacheFeature -> ServerEnv -> [PkgInfo] + -> Maybe PkgInfo -> IO Float -rankPackage versions recentDownloads maintainers docs tarCache env pkgs = - total . (<>) (rankPackagePage pkgD) <$> rankIO versions - recentDownloads - maintainers - docs - env - tarCache - pkgs - (safeLast pkgs) - where pkgD = packageDescription . pkgDesc <$> safeLast pkgs +rankPackage _ _ _ _ _ _ _ Nothing = return 0 +rankPackage versions recentDownloads maintainers docs tarCache env pkgs (Just pkgUsed) + = do + t <- temporalScore pkgD uploads versionList recentDownloads + + b <- baseScore versions + maintainers + docs + env + tarCache + versionList + uploads + pkgUsed + depr <- deprP + return $ sAverage t b * case depr of + Nothing -> 1 + _ -> 0.2 + where + pkgname = pkgName . package $ pkgD + pkgD = packageDescription . pkgDesc $ pkgUsed + deprP = queryGetDeprecatedFor versions pkgname + sAverage x y = (total x + total y) * 0.5 + + versionList :: [Version] + versionList = sortBy (flip compare) + $ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs) + uploads = + sortBy (flip compare) + $ (fst . pkgOriginalUploadInfo <$> pkgs) + ++ (fst . pkgLatestUploadInfo <$> pkgs) From bec7e052ebe6d86fc6b96b3580ab71d67d397efc Mon Sep 17 00:00:00 2001 From: kubaneko Date: Thu, 25 Aug 2022 23:11:02 +0200 Subject: [PATCH 120/129] prototype for readme parser (collects some info about markdown) --- hackage-server.cabal | 1 + src/Distribution/Server/Features/PackageRank.hs | 2 ++ 2 files changed, 3 insertions(+) diff --git a/hackage-server.cabal b/hackage-server.cabal index 51c22ab5b..fc7319446 100644 --- a/hackage-server.cabal +++ b/hackage-server.cabal @@ -360,6 +360,7 @@ library lib-server Distribution.Server.Features.ServerIntrospect Distribution.Server.Features.Sitemap Distribution.Server.Features.PackageRank + Distribution.Server.Features.PackageRank.Parser if flag(debug) cpp-options: -DDEBUG diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index ac5c49ba5..0cef111fc 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -6,6 +6,8 @@ module Distribution.Server.Features.PackageRank ( rankPackage ) where +import Distribution.Server.Features.PackageRank.Parser + import Distribution.Package import Distribution.PackageDescription import Distribution.Server.Features.Documentation From c0d4cb681be53cb1eca4909a57ec4fc54599e9ce Mon Sep 17 00:00:00 2001 From: kubaneko Date: Thu, 25 Aug 2022 23:20:46 +0200 Subject: [PATCH 121/129] forgot to add the parser --- .../Server/Features/PackageRank/Parser.hs | 104 ++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 src/Distribution/Server/Features/PackageRank/Parser.hs diff --git a/src/Distribution/Server/Features/PackageRank/Parser.hs b/src/Distribution/Server/Features/PackageRank/Parser.hs new file mode 100644 index 000000000..5b02ed598 --- /dev/null +++ b/src/Distribution/Server/Features/PackageRank/Parser.hs @@ -0,0 +1,104 @@ +{-# LANGUAGE ScopedTypeVariables, FlexibleInstances, MultiParamTypeClasses, ConstraintKinds #-} +module Distribution.Server.Features.PackageRank.Parser + ( parseM + ) where + + +import Commonmark +import Commonmark.Extensions +import Control.Monad +import Control.Monad.Identity +import qualified Data.ByteString.Lazy as BS + ( ByteString + , toStrict + ) +import qualified Data.Text as T +import qualified Data.Text.Encoding as T +import qualified Data.Text.Encoding.Error as T + ( lenientDecode ) +import qualified Data.Text.IO as TIO +import qualified Data.Text.Lazy.IO as TLIO +import Data.Typeable ( Typeable ) +import System.FilePath + +type MarkdownRenderable a b + = (Typeable a, HasPipeTable a b, IsBlock a b, IsInline a) + +parseM :: BS.ByteString -> FilePath -> Either ParseError [MarkdownStats] +parseM md name = runIdentity + (commonmarkWith (pipeTableSpec <> defaultSyntaxSpec) name txt) + where txt = T.decodeUtf8With T.lenientDecode . BS.toStrict $ md + +data MStats = MStats Int Int --number of pictures, number of chars + deriving Show + +instance Monoid MStats where + mempty = MStats 0 0 + +instance Rangeable MStats where + ranged = const id + +instance HasAttributes MStats where + addAttributes = const id + +instance Semigroup MStats where + (MStats a b) <> (MStats c d) = MStats (a + c) (b + d) + +data MarkdownStats = NotImportant | + HCode MStats | + Code MStats | + Section | -- Int? + Table Int | + PText MStats | + List Int + deriving (Show) + +sumMStat [] = mempty +sumMStat (x : xs) = case x of + NotImportant -> sumMStat xs + Section -> sumMStat xs + (List a) -> sumMStat xs + (Table a) -> sumMStat xs + (HCode a) -> a <> sumMStat xs + (Code a) -> a <> sumMStat xs + (PText a) -> a <> sumMStat xs + +instance Rangeable [MarkdownStats] where + ranged = const id + +instance HasAttributes [MarkdownStats] where + addAttributes = const id + +instance HasPipeTable MStats [MarkdownStats] where + pipeTable _ _ rows = [Table $ length rows] + +instance IsInline MStats where + lineBreak = MStats 0 1 + softBreak = MStats 0 1 + str t = MStats 0 (T.length t) + entity t = MStats 0 (T.length t) + escapedChar _ = MStats 0 1 + emph = id + strong = id + link _ _ a = a + image _ _ (MStats a b) = MStats (a + 1) b + code t = MStats 0 (T.length t) + rawInline _ t = MStats 0 (T.length t) + +instance IsBlock MStats [MarkdownStats] where + paragraph a = [PText a] + plain a = [PText a] + thematicBreak = [NotImportant] + blockQuote = id + codeBlock language codeT | language == T.pack "haskell" = [HCode (code codeT)] + | otherwise = [Code (code codeT)] + heading _ _ = [Section] + rawBlock _ r = [NotImportant] + referenceLinkDefinition _ _ = [NotImportant] + list _ _ l = [List (length l + depSum l)] + +depSum [] = 0 +depSum ([] : xs) = depSum xs +depSum ((List a : ys) : xs) = a + depSum (ys : xs) +depSum ((_ : ys) : xs) = depSum (ys : xs) + From 0a98ed24ce4538415f26dc7884226073c61508e8 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Fri, 26 Aug 2022 23:38:50 +0200 Subject: [PATCH 122/129] finished readmeScore --- .../Server/Features/PackageRank.hs | 85 +++++++++++++------ .../Server/Features/PackageRank/Parser.hs | 82 +++++++++++------- 2 files changed, 112 insertions(+), 55 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 0cef111fc..dca027e0b 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -6,8 +6,9 @@ module Distribution.Server.Features.PackageRank ( rankPackage ) where -import Distribution.Server.Features.PackageRank.Parser +import Distribution.Server.Features.PackageRank.Parser +import Data.TarIndex ( TarEntryOffset ) import Distribution.Package import Distribution.PackageDescription import Distribution.Server.Features.Documentation @@ -17,9 +18,14 @@ import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.TarIndexCache import qualified Distribution.Server.Framework.BlobStorage as BlobStorage +import Distribution.Server.Framework.CacheControl import Distribution.Server.Framework.ServerEnv ( ServerEnv(..) ) import Distribution.Server.Packages.Types +import Distribution.Server.Util.Markdown + ( supposedToBeMarkdown ) +import Distribution.Server.Util.ServeTarball + ( loadTarEntry ) import Distribution.Simple.Utils ( safeHead , safeLast ) @@ -38,8 +44,6 @@ import Distribution.Server.Packages.Readme import GHC.Float ( int2Float ) import System.FilePath ( isExtensionOf ) --- import Debug.Trace (trace) - data Scorer = Scorer { maximumS :: !Float , score :: !Float @@ -54,7 +58,7 @@ scorer maxim scr = if maxim >= scr then Scorer maxim scr else Scorer maxim maxim fracScor :: Float -> Float -> Scorer -fracScor maxim frac = scorer maxim (maxim * frac) +fracScor maxim frac = scorer maxim (min (maxim * frac) maxim) boolScor :: Float -> Bool -> Scorer boolScor k True = Scorer k k @@ -102,10 +106,9 @@ freshness (x : xs) lastUpd app = age = flip numDays (Just lastUpd) . Just <$> CL.getCurrentTime decayDays = expectedUpdateInterval / 2 + (if app then 300 else 200) -cabalScore :: PackageDescription -> IO Bool -> IO Scorer +cabalScore :: PackageDescription -> Bool -> Scorer cabalScore p docum = - (<>) (tests <> benchs <> desc <> homeP <> sourceRp <> cats) - <$> (boolScor 30 <$> docum) + tests <> benchs <> desc <> homeP <> sourceRp <> cats <> boolScor 30 docum where tests = boolScor 50 (hasTests p) benchs = boolScor 10 (hasBenchmarks p) @@ -115,9 +118,38 @@ cabalScore p docum = sourceRp = boolScor 8 (not $ null $ sourceRepos p) cats = boolScor 5 (not $ S.null $ category p) -readmeScore _ = Scorer 0 0 +readmeScore + :: Maybe (FilePath, ETag, Data.TarIndex.TarEntryOffset, FilePath) + -> Bool + -> IO Scorer +readmeScore Nothing _ = return $ Scorer 1 0 -- readmeScore is scaled so it does not need correct max +readmeScore (Just (tarfile, _, offset, name)) app = do + entr <- loadTarEntry tarfile offset + case entr of + (Right (size, str)) -> return $ calcScore str size name + _ -> return $ Scorer 1 0 + where + calcScore str size filename = + scorer 75 (min 1 (fromInteger (toInteger size) / 3000)) + <> if supposedToBeMarkdown filename + then case parseM str filename of + Left _ -> Scorer 0 0 + Right mdStats -> format mdStats + else Scorer 0 0 + format stats = + fracScor (if app then 25 else 100) (min 1 $ int2Float hlength / 2000) + <> scorer (if app then 15 else 27) (int2Float blocks * 3) + <> boolScor (if app then 10 else 30) (clength > 150) + <> scorer 35 (int2Float images * 10) + <> scorer 30 (int2Float sections * 4) + <> scorer 25 (int2Float rows * 2) + where + (blocks, clength) = getCode stats + (_ , hlength) = getHCode stats + MStats _ images = sumMStat stats + rows = getListsTables stats + sections = getSections stats --- queryHasDocumentation baseScore :: VersionsFeature -> Int @@ -130,18 +162,25 @@ baseScore -> IO Scorer baseScore vers maintainers docs env tarCache versionList lastUploads pkgI = do - versS <- versionScore versionList vers lastUploads pkg - codeS <- codeScore documSize srcLines - cabalS <- cabalScore pkg documHas + + readM <- readme + hasDocum <- documHas + documS <- documSize + srcL <- srcLines + + versS <- versionScore versionList vers lastUploads pkg + readmeS <- readmeScore readM isApp + return $ scale 5 versS - <> scale 2 codeS + <> scale 2 (codeScore documS srcL) <> scale 3 (authorScore maintainers pkg) - <> scale 2 cabalS - <> scale 5 (readmeScore readme) + <> scale 2 (cabalScore pkg hasDocum) + <> scale 5 readmeS where pkg = packageDescription $ pkgDesc pkgI pkgId = package pkg + isApp = (isNothing . library) pkg && (not . null . executables) pkg srcLines = do Right (path, _, _) <- packageTarball tarCache pkgI filterLines (isExtensionOf ".hs") countLines @@ -165,6 +204,7 @@ baseScore vers maintainers docs env tarCache versionList lastUploads pkgI = do !lns = case Tar.entryContent entry of (Tar.NormalFile str _) -> l + (int2Float . length $ BSL.split 10 str) _ -> l + -- TODO might need to decode/add the other separator countSize :: (FilePath -> Bool) -> Tar.Entry -> Float -> Float countSize f entry l = if not . f . Tar.entryPath $ entry then l else s where @@ -185,15 +225,12 @@ authorScore maintainers desc = where maintScore = boolScor 3 (maintainers > 1) <> scorer 5 (int2Float maintainers) -codeScore :: IO Float -> IO Float -> IO Scorer -codeScore documentS haskellL = do - docum <- documentS - haskell <- haskellL - return - $ boolScor 1 (haskell > 700) - <> boolScor 1 (haskell < 80000) - <> fracScor 2 (min 1 (haskell / 5000)) - <> fracScor 2 (min 1 docum / ((3000 + haskell) * 200)) +codeScore :: Float -> Float -> Scorer +codeScore documentS haskellL = + boolScor 1 (haskellL > 700) + <> boolScor 1 (haskellL < 80000) + <> fracScor 2 (min 1 (haskellL / 5000)) + <> fracScor 2 (min 1 documentS / ((3000 + haskellL) * 200)) versionScore :: [Version] diff --git a/src/Distribution/Server/Features/PackageRank/Parser.hs b/src/Distribution/Server/Features/PackageRank/Parser.hs index 5b02ed598..431228d84 100644 --- a/src/Distribution/Server/Features/PackageRank/Parser.hs +++ b/src/Distribution/Server/Features/PackageRank/Parser.hs @@ -1,12 +1,17 @@ {-# LANGUAGE ScopedTypeVariables, FlexibleInstances, MultiParamTypeClasses, ConstraintKinds #-} module Distribution.Server.Features.PackageRank.Parser ( parseM + , sumMStat + , getListsTables + , getCode + , getHCode + , getSections + , MStats(..) ) where import Commonmark import Commonmark.Extensions -import Control.Monad import Control.Monad.Identity import qualified Data.ByteString.Lazy as BS ( ByteString @@ -16,13 +21,6 @@ import qualified Data.Text as T import qualified Data.Text.Encoding as T import qualified Data.Text.Encoding.Error as T ( lenientDecode ) -import qualified Data.Text.IO as TIO -import qualified Data.Text.Lazy.IO as TLIO -import Data.Typeable ( Typeable ) -import System.FilePath - -type MarkdownRenderable a b - = (Typeable a, HasPipeTable a b, IsBlock a b, IsInline a) parseM :: BS.ByteString -> FilePath -> Either ParseError [MarkdownStats] parseM md name = runIdentity @@ -44,24 +42,51 @@ instance HasAttributes MStats where instance Semigroup MStats where (MStats a b) <> (MStats c d) = MStats (a + c) (b + d) -data MarkdownStats = NotImportant | +data MarkdownStats = NotImportant MStats | HCode MStats | Code MStats | - Section | -- Int? - Table Int | + Section MStats | + Table Int MStats | -- Int of rows PText MStats | - List Int + List Int MStats -- Int of elements deriving (Show) +getCode :: [MarkdownStats] -> (Int, Int) -- number of code blocks, size of code +getCode [] = (0, 0) +getCode (Code (MStats code _) : xs) = (1, code) >< getCode xs +getCode (HCode (MStats code _) : xs) = (1, code) >< getCode xs +getCode (_ : xs) = getCode xs + +getHCode :: [MarkdownStats] -> (Int, Int) -- number of code blocks, size of code +getHCode [] = (0, 0) +getHCode (HCode (MStats code _) : xs) = (1, code) >< getHCode xs +getHCode (_ : xs) = getHCode xs + +getSections :: [MarkdownStats] -> Int -- number of code blocks, size of code +getSections [] = 0 +getSections (Section _ : xs) = 1 + getSections xs +getSections (_ : xs) = getSections xs + +(><) :: (Int, Int) -> (Int, Int) -> (Int, Int) +(><) (a, b) (c, d) = (a + c, b + d) + + +sumMStat :: [MarkdownStats] -> MStats sumMStat [] = mempty sumMStat (x : xs) = case x of - NotImportant -> sumMStat xs - Section -> sumMStat xs - (List a) -> sumMStat xs - (Table a) -> sumMStat xs - (HCode a) -> a <> sumMStat xs - (Code a) -> a <> sumMStat xs - (PText a) -> a <> sumMStat xs + (NotImportant a) -> a <> sumMStat xs + (Section a) -> a <> sumMStat xs + (List _ a ) -> a <> sumMStat xs + (Table _ a ) -> a <> sumMStat xs + (HCode a ) -> a <> sumMStat xs + (Code a ) -> a <> sumMStat xs + (PText a ) -> a <> sumMStat xs + +getListsTables :: [MarkdownStats] -> Int +getListsTables [] = 0 +getListsTables ((List a _) : ys) = a + getListsTables ys +getListsTables ((Table a _) : ys) = a + getListsTables ys +getListsTables (_ : ys) = getListsTables ys instance Rangeable [MarkdownStats] where ranged = const id @@ -70,7 +95,7 @@ instance HasAttributes [MarkdownStats] where addAttributes = const id instance HasPipeTable MStats [MarkdownStats] where - pipeTable _ _ rows = [Table $ length rows] + pipeTable _ _ rows = [Table (length rows) (mconcat $ mconcat <$> rows)] instance IsInline MStats where lineBreak = MStats 0 1 @@ -88,17 +113,12 @@ instance IsInline MStats where instance IsBlock MStats [MarkdownStats] where paragraph a = [PText a] plain a = [PText a] - thematicBreak = [NotImportant] + thematicBreak = [NotImportant mempty] blockQuote = id codeBlock language codeT | language == T.pack "haskell" = [HCode (code codeT)] | otherwise = [Code (code codeT)] - heading _ _ = [Section] - rawBlock _ r = [NotImportant] - referenceLinkDefinition _ _ = [NotImportant] - list _ _ l = [List (length l + depSum l)] - -depSum [] = 0 -depSum ([] : xs) = depSum xs -depSum ((List a : ys) : xs) = a + depSum (ys : xs) -depSum ((_ : ys) : xs) = depSum (ys : xs) - + heading _ a = [Section a] + rawBlock _ _ = [NotImportant mempty] + referenceLinkDefinition _ _ = [NotImportant mempty] + list _ _ l = [List (length l + sumLT l) (mconcat $ sumMStat <$> l)] + where sumLT a = sum (getListsTables <$> a) From e1e645244b38e88b996b7f5c3323559b469aa399 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sat, 27 Aug 2022 22:34:11 +0200 Subject: [PATCH 123/129] changed documentation parameter to get reasonable output --- src/Distribution/Server/Features/PackageRank.hs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index dca027e0b..334427f88 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -113,7 +113,6 @@ cabalScore p docum = tests = boolScor 50 (hasTests p) benchs = boolScor 10 (hasBenchmarks p) desc = scorer 30 (min 1 (int2Float (S.length $ description p) / 300)) - -- documentation = boolScor 30 () homeP = boolScor 30 (not $ S.null $ homepage p) sourceRp = boolScor 8 (not $ null $ sourceRepos p) cats = boolScor 5 (not $ S.null $ category p) @@ -230,7 +229,7 @@ codeScore documentS haskellL = boolScor 1 (haskellL > 700) <> boolScor 1 (haskellL < 80000) <> fracScor 2 (min 1 (haskellL / 5000)) - <> fracScor 2 (min 1 documentS / ((3000 + haskellL) * 200)) + <> fracScor 2 (min 1 documentS / ((3000 + haskellL) * 1600)) versionScore :: [Version] @@ -281,11 +280,10 @@ temporalScore p lastUploads versionList recentDownloads = do where isApp = (isNothing . library) p && (not . null . executables) p downloadScore = calcDownScore recentDownloads - calcDownScore i = Scorer 5 $ min + calcDownScore i = scorer 5 $ min ( (logBase 2 (int2Float $ max 0 (i - 100) + 100) - 6.6) / (if isApp then 5 else 6) ) - 5 packageFreshness = case safeHead lastUploads of Nothing -> return 0 (Just l) -> freshness versionList l isApp From 618dea14492d2e24c5c9e96853c2c1df687d1379 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sat, 27 Aug 2022 23:35:03 +0200 Subject: [PATCH 124/129] changed some parameters to reflect hackage --- .../Server/Features/PackageRank.hs | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageRank.hs index 334427f88..341dacbaf 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageRank.hs @@ -110,7 +110,7 @@ cabalScore :: PackageDescription -> Bool -> Scorer cabalScore p docum = tests <> benchs <> desc <> homeP <> sourceRp <> cats <> boolScor 30 docum where - tests = boolScor 50 (hasTests p) + tests = boolScor 30 (hasTests p) benchs = boolScor 10 (hasBenchmarks p) desc = scorer 30 (min 1 (int2Float (S.length $ description p) / 300)) homeP = boolScor 30 (not $ S.null $ homepage p) @@ -169,7 +169,6 @@ baseScore vers maintainers docs env tarCache versionList lastUploads pkgI = do versS <- versionScore versionList vers lastUploads pkg readmeS <- readmeScore readM isApp - return $ scale 5 versS <> scale 2 (codeScore documS srcL) @@ -229,7 +228,7 @@ codeScore documentS haskellL = boolScor 1 (haskellL > 700) <> boolScor 1 (haskellL < 80000) <> fracScor 2 (min 1 (haskellL / 5000)) - <> fracScor 2 (min 1 documentS / ((3000 + haskellL) * 1600)) + <> fracScor 2 (min 1 (documentS / ((3000 + haskellL) * 1600))) versionScore :: [Version] @@ -252,10 +251,9 @@ versionScore versionList versions lastUploads desc = do (_, deprecN, _) <- partVers return deprecN calculateScore :: [Version] -> [CL.UTCTime] -> [[Int]] -> Scorer - calculateScore [] _ _ = Scorer 118 0 calculateScore depre lUps intUse = boolScor 20 (length intUse > 1) - <> scorer 40 (numDays (safeHead lUps) (safeLast lUps)) + <> scorer 40 (numDays (safeHead lUps) (safeLast lUps) / 11) <> scorer 15 (int2Float $ length $ filter (\x -> major x > 0 || minor x > 0) @@ -276,13 +274,16 @@ temporalScore temporalScore p lastUploads versionList recentDownloads = do fresh <- freshnessScore tract <- tractionScore + -- Reverse dependencies are to be done + + f <- packageFreshness return $ tract <> fresh <> downloadScore where isApp = (isNothing . library) p && (not . null . executables) p downloadScore = calcDownScore recentDownloads - calcDownScore i = scorer 5 $ min - ( (logBase 2 (int2Float $ max 0 (i - 100) + 100) - 6.6) - / (if isApp then 5 else 6) + calcDownScore i = fracScor 5 + ( (logBase 2 (int2Float $ max 0 (i - 32) + 32) - 5) + / (if isApp then 6 else 8) ) packageFreshness = case safeHead lastUploads of Nothing -> return 0 @@ -291,7 +292,7 @@ temporalScore p lastUploads versionList recentDownloads = do -- Missing dependencyFreshnessScore for reasonable effectivity needs caching tractionScore = do fresh <- packageFreshness - return $ boolScor 1 (fresh * int2Float recentDownloads > 1000) + return $ boolScor 1 (fresh * int2Float recentDownloads > 200) rankPackage :: VersionsFeature From 560050ceac6db25a4be43798000818efabda1ee1 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 28 Aug 2022 21:23:06 +0200 Subject: [PATCH 125/129] moved PackageRank into PackageList Feature and changed UI so packageRank will display as Int between 1000 and 0 --- hackage-server.cabal | 4 ++-- src/Distribution/Server/Features/Browse.hs | 3 ++- src/Distribution/Server/Features/PackageList.hs | 2 +- .../{PackageRank/Parser.hs => PackageList/MStats.hs} | 8 ++++---- .../Server/Features/{ => PackageList}/PackageRank.hs | 8 +++----- 5 files changed, 12 insertions(+), 13 deletions(-) rename src/Distribution/Server/Features/{PackageRank/Parser.hs => PackageList/MStats.hs} (94%) rename src/Distribution/Server/Features/{ => PackageList}/PackageRank.hs (98%) diff --git a/hackage-server.cabal b/hackage-server.cabal index fc7319446..60a6a63f7 100644 --- a/hackage-server.cabal +++ b/hackage-server.cabal @@ -310,6 +310,8 @@ library lib-server Distribution.Server.Features.PackageCandidates.Backup Distribution.Server.Features.PackageFeed Distribution.Server.Features.PackageList + Distribution.Server.Features.PackageList.PackageRank + Distribution.Server.Features.PackageList.MStats Distribution.Server.Features.Distro Distribution.Server.Features.Distro.Distributions Distribution.Server.Features.Distro.Backup @@ -359,8 +361,6 @@ library lib-server Distribution.Server.Features.StaticFiles Distribution.Server.Features.ServerIntrospect Distribution.Server.Features.Sitemap - Distribution.Server.Features.PackageRank - Distribution.Server.Features.PackageRank.Parser if flag(debug) cpp-options: -DDEBUG diff --git a/src/Distribution/Server/Features/Browse.hs b/src/Distribution/Server/Features/Browse.hs index d5d4497da..b50d02c67 100644 --- a/src/Distribution/Server/Features/Browse.hs +++ b/src/Distribution/Server/Features/Browse.hs @@ -9,6 +9,7 @@ import qualified Data.Set as S import Data.Time (getCurrentTime) import Data.Time.Format.ISO8601 (iso8601Show) import System.FilePath (()) +import GHC.Float.RealFracMethods (roundFloatInteger) import Data.Aeson (Value(Array), object, toJSON, (.=)) import qualified Data.Aeson.Key as Key @@ -147,7 +148,7 @@ packageIndexInfoToValue , Key.fromString "tags" .= map renderTag (S.toAscList itemTags) , Key.fromString "lastUpload" .= iso8601Show itemLastUpload , Key.fromString "maintainers" .= map renderUser itemMaintainer - , Key.fromString "packageRank" .= itemPackageRank + , Key.fromString "packageRank" .= (roundFloatInteger (1000 * itemPackageRank)) ] where renderTag :: Tag -> Value diff --git a/src/Distribution/Server/Features/PackageList.hs b/src/Distribution/Server/Features/PackageList.hs index 2af483bdc..835235e77 100644 --- a/src/Distribution/Server/Features/PackageList.hs +++ b/src/Distribution/Server/Features/PackageList.hs @@ -17,7 +17,7 @@ import Distribution.Server.Features.Users import Distribution.Server.Features.Upload(UploadFeature(..)) import Distribution.Server.Features.Documentation (DocumentationFeature(..)) import Distribution.Server.Features.TarIndexCache (TarIndexCacheFeature(..)) -import Distribution.Server.Features.PackageRank +import Distribution.Server.Features.PackageList.PackageRank import Distribution.Server.Users.Users (userIdToName) import qualified Distribution.Server.Users.UserIdSet as UserIdSet diff --git a/src/Distribution/Server/Features/PackageRank/Parser.hs b/src/Distribution/Server/Features/PackageList/MStats.hs similarity index 94% rename from src/Distribution/Server/Features/PackageRank/Parser.hs rename to src/Distribution/Server/Features/PackageList/MStats.hs index 431228d84..33934ebbb 100644 --- a/src/Distribution/Server/Features/PackageRank/Parser.hs +++ b/src/Distribution/Server/Features/PackageList/MStats.hs @@ -1,5 +1,5 @@ {-# LANGUAGE ScopedTypeVariables, FlexibleInstances, MultiParamTypeClasses, ConstraintKinds #-} -module Distribution.Server.Features.PackageRank.Parser +module Distribution.Server.Features.PackageList.MStats ( parseM , sumMStat , getListsTables @@ -53,13 +53,13 @@ data MarkdownStats = NotImportant MStats | getCode :: [MarkdownStats] -> (Int, Int) -- number of code blocks, size of code getCode [] = (0, 0) -getCode (Code (MStats code _) : xs) = (1, code) >< getCode xs -getCode (HCode (MStats code _) : xs) = (1, code) >< getCode xs +getCode (Code (MStats codeT _) : xs) = (1, codeT) >< getCode xs +getCode (HCode (MStats codeT _) : xs) = (1, codeT) >< getCode xs getCode (_ : xs) = getCode xs getHCode :: [MarkdownStats] -> (Int, Int) -- number of code blocks, size of code getHCode [] = (0, 0) -getHCode (HCode (MStats code _) : xs) = (1, code) >< getHCode xs +getHCode (HCode (MStats codeT _) : xs) = (1, codeT) >< getHCode xs getHCode (_ : xs) = getHCode xs getSections :: [MarkdownStats] -> Int -- number of code blocks, size of code diff --git a/src/Distribution/Server/Features/PackageRank.hs b/src/Distribution/Server/Features/PackageList/PackageRank.hs similarity index 98% rename from src/Distribution/Server/Features/PackageRank.hs rename to src/Distribution/Server/Features/PackageList/PackageRank.hs index 341dacbaf..367731beb 100644 --- a/src/Distribution/Server/Features/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageList/PackageRank.hs @@ -2,11 +2,11 @@ -- TODO change the module name probably Distribution.Server.Features.PackageList.PackageRank -module Distribution.Server.Features.PackageRank +module Distribution.Server.Features.PackageList.PackageRank ( rankPackage ) where -import Distribution.Server.Features.PackageRank.Parser +import Distribution.Server.Features.PackageList.MStats import Data.TarIndex ( TarEntryOffset ) import Distribution.Package @@ -274,9 +274,7 @@ temporalScore temporalScore p lastUploads versionList recentDownloads = do fresh <- freshnessScore tract <- tractionScore - -- Reverse dependencies are to be done - - f <- packageFreshness + -- Reverse dependencies are added return $ tract <> fresh <> downloadScore where isApp = (isNothing . library) p && (not . null . executables) p From 0bb394ef92a41aec4a2887a1ff23d4f6bdf11c62 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Tue, 30 Aug 2022 22:44:42 +0200 Subject: [PATCH 126/129] added some Exception handling --- .../Features/PackageList/PackageRank.hs | 53 +++++++++---------- 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/src/Distribution/Server/Features/PackageList/PackageRank.hs b/src/Distribution/Server/Features/PackageList/PackageRank.hs index 367731beb..c58f82884 100644 --- a/src/Distribution/Server/Features/PackageList/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageList/PackageRank.hs @@ -1,24 +1,18 @@ -{-# LANGUAGE BangPatterns #-} - --- TODO change the module name probably Distribution.Server.Features.PackageList.PackageRank - +{-# LANGUAGE BangPatterns, ScopedTypeVariables #-} module Distribution.Server.Features.PackageList.PackageRank ( rankPackage ) where -import Distribution.Server.Features.PackageList.MStats - -import Data.TarIndex ( TarEntryOffset ) import Distribution.Package import Distribution.PackageDescription import Distribution.Server.Features.Documentation ( DocumentationFeature(..) ) +import Distribution.Server.Features.PackageList.MStats import Distribution.Server.Features.PreferredVersions import Distribution.Server.Features.PreferredVersions.State import Distribution.Server.Features.TarIndexCache import qualified Distribution.Server.Framework.BlobStorage as BlobStorage -import Distribution.Server.Framework.CacheControl import Distribution.Server.Framework.ServerEnv ( ServerEnv(..) ) import Distribution.Server.Packages.Types @@ -33,6 +27,9 @@ import Distribution.Types.Version import qualified Distribution.Utils.ShortText as S import qualified Codec.Archive.Tar as Tar +import Control.Exception ( SomeException(..) + , handle + ) import qualified Data.ByteString.Lazy as BSL import Data.List ( maximumBy , sortBy @@ -44,6 +41,9 @@ import Distribution.Server.Packages.Readme import GHC.Float ( int2Float ) import System.FilePath ( isExtensionOf ) +handleConst :: a -> IO a -> IO a +handleConst c = handle (\(_ :: SomeException) -> return c) + data Scorer = Scorer { maximumS :: !Float , score :: !Float @@ -117,17 +117,16 @@ cabalScore p docum = sourceRp = boolScor 8 (not $ null $ sourceRepos p) cats = boolScor 5 (not $ S.null $ category p) -readmeScore - :: Maybe (FilePath, ETag, Data.TarIndex.TarEntryOffset, FilePath) - -> Bool - -> IO Scorer -readmeScore Nothing _ = return $ Scorer 1 0 -- readmeScore is scaled so it does not need correct max -readmeScore (Just (tarfile, _, offset, name)) app = do - entr <- loadTarEntry tarfile offset +readmeScore :: TarIndexCacheFeature -> PkgInfo -> Bool -> IO Scorer +readmeScore tarCache pkgI app = do + Just (tarfile, _, offset, name) <- readme + entr <- loadTarEntry tarfile offset case entr of (Right (size, str)) -> return $ calcScore str size name _ -> return $ Scorer 1 0 where + readme = findToplevelFile tarCache pkgI isReadmeFile + >>= either (\_ -> return Nothing) (return . Just) calcScore str size filename = scorer 75 (min 1 (fromInteger (toInteger size) / 3000)) <> if supposedToBeMarkdown filename @@ -162,13 +161,13 @@ baseScore baseScore vers maintainers docs env tarCache versionList lastUploads pkgI = do - readM <- readme - hasDocum <- documHas - documS <- documSize - srcL <- srcLines + hasDocum <- handleConst False documHas -- Probably redundant + documS <- handleConst 0 documSize + srcL <- handleConst 0 srcLines - versS <- versionScore versionList vers lastUploads pkg - readmeS <- readmeScore readM isApp + versS <- handleConst (Scorer 1 0) + (versionScore versionList vers lastUploads pkg) + readmeS <- handleConst (Scorer 1 0) (readmeScore tarCache pkgI isApp) return $ scale 5 versS <> scale 2 (codeScore documS srcL) @@ -192,9 +191,6 @@ baseScore vers maintainers docs env tarCache versionList lastUploads pkgI = do filterLines (isExtensionOf ".html") countSize . Tar.read <$> BSL.readFile pth - readme = findToplevelFile tarCache pkgI isReadmeFile - >>= either (\_ -> return Nothing) (return . Just) - filterLines f g = Tar.foldEntries (g f) 0 (const 0) countLines :: (FilePath -> Bool) -> Tar.Entry -> Float -> Float countLines f entry l = if not . f . Tar.entryPath $ entry then l else lns @@ -279,15 +275,16 @@ temporalScore p lastUploads versionList recentDownloads = do where isApp = (isNothing . library) p && (not . null . executables) p downloadScore = calcDownScore recentDownloads - calcDownScore i = fracScor 5 + calcDownScore i = fracScor + 5 ( (logBase 2 (int2Float $ max 0 (i - 32) + 32) - 5) / (if isApp then 6 else 8) ) packageFreshness = case safeHead lastUploads of Nothing -> return 0 - (Just l) -> freshness versionList l isApp + (Just l) -> freshness versionList l isApp -- Getting time hopefully does not throw Exc. freshnessScore = fracScor 10 <$> packageFreshness --- Missing dependencyFreshnessScore for reasonable effectivity needs caching + -- Missing dependencyFreshnessScore for reasonable effectivity needs caching tractionScore = do fresh <- packageFreshness return $ boolScor 1 (fresh * int2Float recentDownloads > 200) @@ -315,7 +312,7 @@ rankPackage versions recentDownloads maintainers docs tarCache env pkgs (Just pk versionList uploads pkgUsed - depr <- deprP + depr <- handleConst Nothing deprP return $ sAverage t b * case depr of Nothing -> 1 _ -> 0.2 From dbc9141270717b1c375920ccca1a2d8751a13849 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 4 Sep 2022 13:32:29 +0200 Subject: [PATCH 127/129] some comments and refactoring --- .../Server/Features/PackageList/MStats.hs | 32 ++--- .../Features/PackageList/PackageRank.hs | 119 +++++++++--------- 2 files changed, 78 insertions(+), 73 deletions(-) diff --git a/src/Distribution/Server/Features/PackageList/MStats.hs b/src/Distribution/Server/Features/PackageList/MStats.hs index 33934ebbb..b9dc04936 100644 --- a/src/Distribution/Server/Features/PackageList/MStats.hs +++ b/src/Distribution/Server/Features/PackageList/MStats.hs @@ -9,24 +9,32 @@ module Distribution.Server.Features.PackageList.MStats , MStats(..) ) where - import Commonmark import Commonmark.Extensions import Control.Monad.Identity import qualified Data.ByteString.Lazy as BS ( ByteString - , toStrict - ) + , toStrict ) import qualified Data.Text as T import qualified Data.Text.Encoding as T import qualified Data.Text.Encoding.Error as T ( lenientDecode ) +-- parses markdown into statistics needed for readmeScore parseM :: BS.ByteString -> FilePath -> Either ParseError [MarkdownStats] parseM md name = runIdentity (commonmarkWith (pipeTableSpec <> defaultSyntaxSpec) name txt) where txt = T.decodeUtf8With T.lenientDecode . BS.toStrict $ md +data MarkdownStats = NotImportant MStats | + HCode MStats | + Code MStats | + Section MStats | + Table Int MStats | -- Int of rows + PText MStats | + List Int MStats -- Int of elements + deriving (Show) + data MStats = MStats Int Int --number of pictures, number of chars deriving Show @@ -42,14 +50,7 @@ instance HasAttributes MStats where instance Semigroup MStats where (MStats a b) <> (MStats c d) = MStats (a + c) (b + d) -data MarkdownStats = NotImportant MStats | - HCode MStats | - Code MStats | - Section MStats | - Table Int MStats | -- Int of rows - PText MStats | - List Int MStats -- Int of elements - deriving (Show) +-- Getter functions getCode :: [MarkdownStats] -> (Int, Int) -- number of code blocks, size of code getCode [] = (0, 0) @@ -67,10 +68,6 @@ getSections [] = 0 getSections (Section _ : xs) = 1 + getSections xs getSections (_ : xs) = getSections xs -(><) :: (Int, Int) -> (Int, Int) -> (Int, Int) -(><) (a, b) (c, d) = (a + c, b + d) - - sumMStat :: [MarkdownStats] -> MStats sumMStat [] = mempty sumMStat (x : xs) = case x of @@ -88,6 +85,11 @@ getListsTables ((List a _) : ys) = a + getListsTables ys getListsTables ((Table a _) : ys) = a + getListsTables ys getListsTables (_ : ys) = getListsTables ys +-- helper +(><) :: (Int, Int) -> (Int, Int) -> (Int, Int) +(><) (a, b) (c, d) = (a + c, b + d) + +-- INSTANCES instance Rangeable [MarkdownStats] where ranged = const id diff --git a/src/Distribution/Server/Features/PackageList/PackageRank.hs b/src/Distribution/Server/Features/PackageList/PackageRank.hs index c58f82884..d259cdc2d 100644 --- a/src/Distribution/Server/Features/PackageList/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageList/PackageRank.hs @@ -21,19 +21,16 @@ import Distribution.Server.Util.Markdown import Distribution.Server.Util.ServeTarball ( loadTarEntry ) import Distribution.Simple.Utils ( safeHead - , safeLast - ) + , safeLast ) import Distribution.Types.Version import qualified Distribution.Utils.ShortText as S import qualified Codec.Archive.Tar as Tar import Control.Exception ( SomeException(..) - , handle - ) + , handle ) import qualified Data.ByteString.Lazy as BSL import Data.List ( maximumBy - , sortBy - ) + , sortBy ) import Data.Maybe ( isNothing ) import Data.Ord ( comparing ) import qualified Data.Time.Clock as CL @@ -41,9 +38,12 @@ import Distribution.Server.Packages.Readme import GHC.Float ( int2Float ) import System.FilePath ( isExtensionOf ) +-- HELPER FUNCTIONS + handleConst :: a -> IO a -> IO a handleConst c = handle (\(_ :: SomeException) -> return c) +-- Scorer stores rank information data Scorer = Scorer { maximumS :: !Float , score :: !Float @@ -70,6 +70,7 @@ total (Scorer a b) = b / a scale :: Float -> Scorer -> Scorer scale mx sc = fracScor mx (total sc) +-- calculates number of versions from version list major :: Num a => [a] -> a major (x : _) = x major _ = 0 @@ -86,6 +87,8 @@ numDays (Just first) (Just end) = (toRational CL.nominalDay) numDays _ _ = 0 +-- Score Calculations + freshness :: [Version] -> CL.UTCTime -> Bool -> IO Float freshness [] _ _ = return 0 freshness (x : xs) lastUpd app = @@ -148,6 +151,58 @@ readmeScore tarCache pkgI app = do rows = getListsTables stats sections = getSections stats +authorScore :: Int -> PackageDescription -> Scorer +authorScore maintainers desc = + boolScor 1 (not $ S.null $ author desc) <> maintScore + where + maintScore = boolScor 3 (maintainers > 1) <> scorer 5 (int2Float maintainers) + +codeScore :: Float -> Float -> Scorer +codeScore documentS haskellL = + boolScor 1 (haskellL > 700) + <> boolScor 1 (haskellL < 80000) + <> fracScor 2 (min 1 (haskellL / 5000)) + <> fracScor 2 (min 1 (documentS / ((3000 + haskellL) * 1600))) + +versionScore + :: [Version] + -> VersionsFeature + -> [CL.UTCTime] + -> PackageDescription + -> IO Scorer +versionScore versionList versions lastUploads desc = do + use <- intUsable + depre <- deprec + return $ calculateScore depre lastUploads use + where + pkgNm = pkgName $ package desc + partVers = + flip partitionVersions versionList <$> queryGetPreferredInfo versions pkgNm + intUsable = do + (norm, _, unpref) <- partVers + return $ versionNumbers <$> norm ++ unpref + deprec = do + (_, deprecN, _) <- partVers + return deprecN + calculateScore :: [Version] -> [CL.UTCTime] -> [[Int]] -> Scorer + calculateScore depre lUps intUse = + boolScor 20 (length intUse > 1) + <> scorer 40 (numDays (safeHead lUps) (safeLast lUps) / 11) + <> scorer + 15 + (int2Float $ length $ filter (\x -> major x > 0 || minor x > 0) + intUse + ) + <> scorer + 20 + (int2Float $ 4 * length + (filter (\x -> major x > 0 && patches x > 0) intUse) + ) + <> scorer 10 (int2Float $ patches $ maximumBy (comparing patches) intUse) + <> boolScor 8 (any (\x -> major x == 0 && patches x > 0) intUse) + <> boolScor 10 (any (\x -> major x > 0 && major x < 20) intUse) + <> boolScor 5 (not $ null depre) + baseScore :: VersionsFeature -> Int @@ -213,58 +268,6 @@ baseScore vers maintainers docs env tarCache versionList lastUploads pkgI = do return $ BlobStorage.filepath (serverBlobStore env) <$> blob documHas = queryHasDocumentation docs pkgId -authorScore :: Int -> PackageDescription -> Scorer -authorScore maintainers desc = - boolScor 1 (not $ S.null $ author desc) <> maintScore - where - maintScore = boolScor 3 (maintainers > 1) <> scorer 5 (int2Float maintainers) - -codeScore :: Float -> Float -> Scorer -codeScore documentS haskellL = - boolScor 1 (haskellL > 700) - <> boolScor 1 (haskellL < 80000) - <> fracScor 2 (min 1 (haskellL / 5000)) - <> fracScor 2 (min 1 (documentS / ((3000 + haskellL) * 1600))) - -versionScore - :: [Version] - -> VersionsFeature - -> [CL.UTCTime] - -> PackageDescription - -> IO Scorer -versionScore versionList versions lastUploads desc = do - use <- intUsable - depre <- deprec - return $ calculateScore depre lastUploads use - where - pkgNm = pkgName $ package desc - partVers = - flip partitionVersions versionList <$> queryGetPreferredInfo versions pkgNm - intUsable = do - (norm, _, unpref) <- partVers - return $ versionNumbers <$> norm ++ unpref - deprec = do - (_, deprecN, _) <- partVers - return deprecN - calculateScore :: [Version] -> [CL.UTCTime] -> [[Int]] -> Scorer - calculateScore depre lUps intUse = - boolScor 20 (length intUse > 1) - <> scorer 40 (numDays (safeHead lUps) (safeLast lUps) / 11) - <> scorer - 15 - (int2Float $ length $ filter (\x -> major x > 0 || minor x > 0) - intUse - ) - <> scorer - 20 - (int2Float $ 4 * length - (filter (\x -> major x > 0 && patches x > 0) intUse) - ) - <> scorer 10 (int2Float $ patches $ maximumBy (comparing patches) intUse) - <> boolScor 8 (any (\x -> major x == 0 && patches x > 0) intUse) - <> boolScor 10 (any (\x -> major x > 0 && major x < 20) intUse) - <> boolScor 5 (not $ null depre) - temporalScore :: PackageDescription -> [CL.UTCTime] -> [Version] -> Int -> IO Scorer temporalScore p lastUploads versionList recentDownloads = do From b9b73cc78c8a5a5705dc1366fd35754fdf22a3d7 Mon Sep 17 00:00:00 2001 From: Janus Troelsen Date: Wed, 7 Sep 2022 06:55:53 -0500 Subject: [PATCH 128/129] Use NonEmpty (#1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Ondřej Kubánek <71923533+kubaneko@users.noreply.github.com> --- .../Server/Features/PackageList.hs | 20 ++++++++++--------- .../Features/PackageList/PackageRank.hs | 15 +++++++------- .../Server/Packages/PackageIndex.hs | 10 +++++++++- 3 files changed, 28 insertions(+), 17 deletions(-) diff --git a/src/Distribution/Server/Features/PackageList.hs b/src/Distribution/Server/Features/PackageList.hs index 835235e77..718354b13 100644 --- a/src/Distribution/Server/Features/PackageList.hs +++ b/src/Distribution/Server/Features/PackageList.hs @@ -34,9 +34,10 @@ import Distribution.Package import Distribution.PackageDescription import Distribution.PackageDescription.Configuration import Distribution.Utils.ShortText (fromShortText) -import Distribution.Simple.Utils (safeLast) import Control.Concurrent +import qualified Data.List.NonEmpty as NE +import Data.List.NonEmpty (NonEmpty) import Data.Maybe (mapMaybe) import Data.Map (Map) import qualified Data.Map as Map @@ -233,9 +234,9 @@ listFeature CoreFeature{..} False -> do index <- queryGetPackageIndex let pkgs = PackageIndex.lookupPackageName index pkgname - case pkgs of - [] -> return () --this shouldn't happen - _ -> modifyMemState itemCache . uncurry Map.insert =<< constructItem pkgs + case NE.nonEmpty pkgs of + Nothing -> return () --this shouldn't happen + Just ne -> modifyMemState itemCache . uncurry Map.insert =<< constructItem ne updateDesc pkgname = do index <- queryGetPackageIndex @@ -256,13 +257,14 @@ listFeature CoreFeature{..} constructItemIndex :: IO (Map PackageName PackageItem) constructItemIndex = do index <- queryGetPackageIndex - items <- mapM constructItem $ PackageIndex.allPackagesByName index - return $ Map.fromList items + let byName = PackageIndex.allPackagesByNameNE index + mPkgInfos <- traverse (mapM constructItem) (NE.nonEmpty byName) + pure $ foldMap (Map.fromList . NE.toList) mPkgInfos - constructItem :: [PkgInfo] -> IO (PackageName, PackageItem) + constructItem :: NonEmpty PkgInfo -> IO (PackageName, PackageItem) constructItem pkgs = do let pkgname = packageName pkg - pkg = last pkgs + pkg = NE.last pkgs -- [reverse index disabled] revCount <- query . GetReverseCount $ pkgname users <- queryGetUserDb tags <- queryTagsForPackage pkgname @@ -271,7 +273,7 @@ listFeature CoreFeature{..} deprs <- queryGetDeprecatedFor pkgname maintainers <- queryUserGroup (maintainersGroup pkgname) packageR <- rankPackage versions (cmFind pkgname downs) - (UserIdSet.size maintainers) documentation tar env pkgs (safeLast pkgs) + (UserIdSet.size maintainers) documentation tar env pkgs return $ (,) pkgname $ (updateDescriptionItem (pkgDesc pkg) $ emptyPackageItem pkgname) { itemTags = tags diff --git a/src/Distribution/Server/Features/PackageList/PackageRank.hs b/src/Distribution/Server/Features/PackageList/PackageRank.hs index d259cdc2d..5b25bc3e1 100644 --- a/src/Distribution/Server/Features/PackageList/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageList/PackageRank.hs @@ -31,6 +31,8 @@ import Control.Exception ( SomeException(..) import qualified Data.ByteString.Lazy as BSL import Data.List ( maximumBy , sortBy ) +import qualified Data.List.NonEmpty as NE +import Data.List.NonEmpty ( NonEmpty ) import Data.Maybe ( isNothing ) import Data.Ord ( comparing ) import qualified Data.Time.Clock as CL @@ -299,11 +301,9 @@ rankPackage -> DocumentationFeature -> TarIndexCacheFeature -> ServerEnv - -> [PkgInfo] - -> Maybe PkgInfo + -> NonEmpty PkgInfo -> IO Float -rankPackage _ _ _ _ _ _ _ Nothing = return 0 -rankPackage versions recentDownloads maintainers docs tarCache env pkgs (Just pkgUsed) +rankPackage versions recentDownloads maintainers docs tarCache env pkgs = do t <- temporalScore pkgD uploads versionList recentDownloads @@ -320,6 +320,7 @@ rankPackage versions recentDownloads maintainers docs tarCache env pkgs (Just pk Nothing -> 1 _ -> 0.2 where + pkgUsed = NE.last pkgs pkgname = pkgName . package $ pkgD pkgD = packageDescription . pkgDesc $ pkgUsed deprP = queryGetDeprecatedFor versions pkgname @@ -327,8 +328,8 @@ rankPackage versions recentDownloads maintainers docs tarCache env pkgs (Just pk versionList :: [Version] versionList = sortBy (flip compare) - $ map (pkgVersion . package . packageDescription) (pkgDesc <$> pkgs) + $ map (pkgVersion . package . packageDescription . pkgDesc) (NE.toList pkgs) uploads = sortBy (flip compare) - $ (fst . pkgOriginalUploadInfo <$> pkgs) - ++ (fst . pkgLatestUploadInfo <$> pkgs) + $ (fst . pkgOriginalUploadInfo <$> NE.toList pkgs) + ++ (fst . pkgLatestUploadInfo <$> NE.toList pkgs) diff --git a/src/Distribution/Server/Packages/PackageIndex.hs b/src/Distribution/Server/Packages/PackageIndex.hs index 4b862d649..f48750e73 100644 --- a/src/Distribution/Server/Packages/PackageIndex.hs +++ b/src/Distribution/Server/Packages/PackageIndex.hs @@ -44,7 +44,8 @@ module Distribution.Server.Packages.PackageIndex ( -- ** Bulk queries allPackageNames, allPackages, - allPackagesByName + allPackagesByName, + allPackagesByNameNE ) where import Distribution.Server.Prelude hiding (lookup) @@ -58,6 +59,8 @@ import qualified Data.Map.Strict as Map import Data.Map.Strict (Map) import qualified Data.Foldable as Foldable import Data.List (groupBy, find, isInfixOf) +import qualified Data.List.NonEmpty as NE +import Data.List.NonEmpty (NonEmpty) import Data.SafeCopy import Distribution.Types.PackageName @@ -258,6 +261,11 @@ allPackages (PackageIndex m) = concat (Map.elems m) allPackagesByName :: Package pkg => PackageIndex pkg -> [[pkg]] allPackagesByName (PackageIndex m) = Map.elems m +allPackagesByNameNE :: Package pkg => PackageIndex pkg -> [NonEmpty pkg] +allPackagesByNameNE (PackageIndex m) = + -- This is safe because there will always be at least one version of a package + NE.fromList <$> Map.elems m + allPackageNames :: PackageIndex pkg -> [PackageName] allPackageNames (PackageIndex m) = Map.keys m From a41b375fe9d0e3f15e825db9dff16a9a653acbc7 Mon Sep 17 00:00:00 2001 From: kubaneko Date: Sun, 2 Oct 2022 21:46:33 +0200 Subject: [PATCH 129/129] test commit --- src/Distribution/Server/Features/PackageList/PackageRank.hs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Distribution/Server/Features/PackageList/PackageRank.hs b/src/Distribution/Server/Features/PackageList/PackageRank.hs index 5b25bc3e1..2263419be 100644 --- a/src/Distribution/Server/Features/PackageList/PackageRank.hs +++ b/src/Distribution/Server/Features/PackageList/PackageRank.hs @@ -73,6 +73,7 @@ scale :: Float -> Scorer -> Scorer scale mx sc = fracScor mx (total sc) -- calculates number of versions from version list + major :: Num a => [a] -> a major (x : _) = x major _ = 0