Skip to content

Commit

Permalink
feat: Add configurable Scraper list (#1113)
Browse files Browse the repository at this point in the history
* Add configurable Scraper list

* Fix Cusomt typo

* Add url to scraper json and remove old sites

Url is now the main id for site in scrapers.json
Id is optional and will default based on url
Note company is optional
Create scraper Attribute filters

* Add scene scraper_id to enable site-scene linking

* Various

Fix bundle import/export, add version check to avoid importing old ids
Improve logic for unknown studios from private forks

* Remove custom site accidentely left in migration

* fix icon url

* fix icon url

* fix icon url

* fix icon url

* fix icon url

* shift wankzvr, milfvr and transvr to povr

also deletes old wankzvr scraper

---------

Co-authored-by: crwxaj <[email protected]>
  • Loading branch information
toshski and crwxaj authored Feb 6, 2023
1 parent 555ec5f commit 0d1c91b
Show file tree
Hide file tree
Showing 47 changed files with 1,074 additions and 343 deletions.
4 changes: 4 additions & 0 deletions pkg/api/scenes.go
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,10 @@ func (i SceneResource) getFilters(req *restful.Request, resp *restful.Response)
outAttributes = append(outAttributes, "MKX200")
outAttributes = append(outAttributes, "MKX220")
outAttributes = append(outAttributes, "VRCA220")
outAttributes = append(outAttributes, "POVR Scraper")
outAttributes = append(outAttributes, "SLR Scraper")
outAttributes = append(outAttributes, "VRPHub Scraper")
outAttributes = append(outAttributes, "VRPorn Scraper")
type Results struct {
Result string
}
Expand Down
3 changes: 2 additions & 1 deletion pkg/api/tasks.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ func (i TaskResource) exportNewFunscripts(req *restful.Request, resp *restful.Re

func (i TaskResource) backupBundle(req *restful.Request, resp *restful.Response) {
inclAllSites, _ := strconv.ParseBool(req.QueryParameter("allSites"))
onlyIncludeOfficalSites, _ := strconv.ParseBool(req.QueryParameter("onlyIncludeOfficalSites"))
inclScenes, _ := strconv.ParseBool(req.QueryParameter("inclScenes"))
inclFileLinks, _ := strconv.ParseBool(req.QueryParameter("inclLinks"))
inclCuepoints, _ := strconv.ParseBool(req.QueryParameter("inclCuepoints"))
Expand All @@ -132,7 +133,7 @@ func (i TaskResource) backupBundle(req *restful.Request, resp *restful.Response)
playlistId := req.QueryParameter("playlistId")
download := req.QueryParameter("download")

bundle := tasks.BackupBundle(inclAllSites, inclScenes, inclFileLinks, inclCuepoints, inclHistory, inclPlaylists, inclActorAkas, inclVolumes, inclSites, inclActions, playlistId)
bundle := tasks.BackupBundle(inclAllSites, onlyIncludeOfficalSites, inclScenes, inclFileLinks, inclCuepoints, inclHistory, inclPlaylists, inclActorAkas, inclVolumes, inclSites, inclActions, playlistId, "", "")
if download == "true" {
resp.WriteHeaderAndEntity(http.StatusOK, ResponseBackupBundle{Response: "Ready to Download from http://xxx.xxx.xxx.xxx:9999/download/xbvr-content-bundle.json"})
} else {
Expand Down
159 changes: 159 additions & 0 deletions pkg/config/scraper_list.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
package config

import (
_ "embed"
"encoding/json"
"io/ioutil"
"os"
"path/filepath"
"strings"
"sync"

"github.com/xbapps/xbvr/pkg/common"
"github.com/xbapps/xbvr/pkg/models"
)

//go:embed scrapers.json
var officalList []byte

type ScraperList struct {
Warnings []string `json:"warning"`
CustomScrapers CustomScrapers `json:"custom"`
XbvrScrapers XbvrScrapers `json:"xbvr"`
}
type XbvrScrapers struct {
PovrScrapers []ScraperConfig `json:"povr"`
SlrScrapers []ScraperConfig `json:"slr"`
VrpornScrapers []ScraperConfig `json:"vrporn"`
VrphubScrapers []ScraperConfig `json:"vrphub"`
}
type CustomScrapers struct {
PovrScrapers []ScraperConfig `json:"povr"`
SlrScrapers []ScraperConfig `json:"slr"`
VrpornScrapers []ScraperConfig `json:"vrporn"`
VrphubScrapers []ScraperConfig `json:"vrphub"`
}
type ScraperConfig struct {
ID string `json:"-"`
URL string `json:"url"`
Name string `json:"name"`
Company string `json:"company"`
AvatarUrl string `json:"avatar_url"`
FileID string `json:"id,omitempty"`
}

var loadLock sync.Mutex

func (o *ScraperList) Load() error {
loadLock.Lock()
defer loadLock.Unlock()

// load standard scraper config embeded in distribution
var officalScrapers ScraperList
json.Unmarshal(officalList, &officalScrapers)

fName := filepath.Join(common.AppDir, "scrapers.json")
if _, err := os.Stat(fName); os.IsNotExist(err) {
list, _ := json.MarshalIndent(officalScrapers, "", " ")
ioutil.WriteFile(fName, list, 0644)
return nil
} else {
b, err := ioutil.ReadFile(fName)
if err != nil {
o.XbvrScrapers = officalScrapers.XbvrScrapers
return err
}
json.Unmarshal(b, &o)
}

// overwrite the local files offical list
o.XbvrScrapers = officalScrapers.XbvrScrapers
o.Warnings = officalScrapers.Warnings

SetSiteId(&o.XbvrScrapers.PovrScrapers, "")
SetSiteId(&o.XbvrScrapers.SlrScrapers, "")
SetSiteId(&o.XbvrScrapers.VrphubScrapers, "")
SetSiteId(&o.XbvrScrapers.VrpornScrapers, "")
SetSiteId(&o.CustomScrapers.PovrScrapers, "povr")
SetSiteId(&o.CustomScrapers.SlrScrapers, "slr")
SetSiteId(&o.CustomScrapers.VrphubScrapers, "vrphub")
SetSiteId(&o.CustomScrapers.VrpornScrapers, "vrporn")

// remove custom sites that are now offical for the same aggregation site
o.CustomScrapers.PovrScrapers = RemoveCustomListNowOffical(o.CustomScrapers.PovrScrapers, o.XbvrScrapers.PovrScrapers)
o.CustomScrapers.SlrScrapers = RemoveCustomListNowOffical(o.CustomScrapers.SlrScrapers, o.XbvrScrapers.SlrScrapers)
o.CustomScrapers.VrphubScrapers = RemoveCustomListNowOffical(o.CustomScrapers.VrphubScrapers, o.XbvrScrapers.VrphubScrapers)
o.CustomScrapers.VrpornScrapers = RemoveCustomListNowOffical(o.CustomScrapers.VrpornScrapers, o.XbvrScrapers.VrpornScrapers)

list, err := json.MarshalIndent(o, "", " ")
if err == nil {
ioutil.WriteFile(fName, list, 0644)
}

return nil
}

func RemoveCustomListNowOffical(customSiteList []ScraperConfig, officalSiteList []ScraperConfig) []ScraperConfig {
newList := []ScraperConfig{}
for _, customSite := range customSiteList {
if !CheckMatchingSite(customSite, officalSiteList) {
newList = append(newList, customSite)
} else {
db, _ := models.GetDB()
defer db.Close()
db.Model(&models.Scene{}).Where("scraper_id = ?", customSite.ID).Update("needs_update", true)
db.Delete(&models.Site{ID: customSite.ID})
common.Log.Infof("Studio %s is now an offical Studio and has been shifted from your custom list. Enable the offical scraper and run it to update existing scenes", customSite.Name)
}
}
return newList
}

func CheckMatchingSite(findSite ScraperConfig, searchList []ScraperConfig) bool {
for _, customSite := range searchList {
s1 := strings.ToLower(customSite.URL)
s2 := strings.ToLower(findSite.URL)
if !strings.HasSuffix(s1, "/") {
s1 += "/"
}
if !strings.HasSuffix(s2, "/") {
s2 += "/"
}
if s1 == s2 {
return true
}
}
return false
}
func GetMatchingSite(findSite ScraperConfig, searchList []ScraperConfig) ScraperConfig {
for _, site := range searchList {
if findSite.URL == site.URL {
return site
}
}
return ScraperConfig{}
}
func CheckMatchingSiteID(findSite ScraperConfig, searchList []ScraperConfig) bool {
for _, customSite := range searchList {
if findSite.ID == customSite.ID {
return true
}
}
return false
}

func SetSiteId(configList *[]ScraperConfig, customId string) {
for idx, siteconfig := range *configList {
if siteconfig.FileID == "" || customId != "" {
id := strings.TrimRight(siteconfig.URL, "/")
siteconfig.ID = strings.ToLower(id[strings.LastIndex(id, "/")+1:])
} else {
siteconfig.ID = strings.ToLower(siteconfig.FileID)
}
if customId != "" {
siteconfig.ID = strings.ToLower(siteconfig.ID + "-" + customId)
}
(*configList)[idx] = siteconfig
}

}
Loading

0 comments on commit 0d1c91b

Please sign in to comment.