From 8664265027c05e93c87e4a9c307d5ea2363d90da Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Wed, 5 Mar 2025 13:26:48 -0500 Subject: [PATCH 01/46] delete (some) sharev2 api [ENG-7388] --- api/base/views.py | 3 - api/formattedmetadatarecords/serializers.py | 19 - api/formattedmetadatarecords/urls.py | 7 - api/formattedmetadatarecords/views.py | 15 - api/normalizeddata/serializers.py | 30 -- api/normalizeddata/urls.py | 7 - api/normalizeddata/views.py | 108 ------ api/schemas/urls.py | 7 - api/schemas/views.py | 13 - api/sourceregistrations/serializers.py | 18 - api/sourceregistrations/urls.py | 7 - api/sourceregistrations/views.py | 49 --- api/suids/serializers.py | 3 - api/urls.py | 5 - api/urls_v1.py | 9 - api/v1_schemas.py | 355 ----------------- api/views/__init__.py | 1 - api/views/workflow.py | 123 ------ project/urls.py | 1 - tests/api/test_generated_endpoints.py | 1 - tests/api/test_normalizeddata.py | 227 ----------- tests/api/test_providerregistration.py | 398 -------------------- tests/api/test_v1_push.py | 99 ----- tests/api/test_validator.py | 393 ------------------- 24 files changed, 1898 deletions(-) delete mode 100644 api/formattedmetadatarecords/serializers.py delete mode 100644 api/formattedmetadatarecords/urls.py delete mode 100644 api/formattedmetadatarecords/views.py delete mode 100644 api/normalizeddata/serializers.py delete mode 100644 api/normalizeddata/urls.py delete mode 100644 api/normalizeddata/views.py delete mode 100644 api/schemas/urls.py delete mode 100644 api/schemas/views.py delete mode 100644 api/sourceregistrations/serializers.py delete mode 100644 api/sourceregistrations/urls.py delete mode 100644 api/sourceregistrations/views.py delete mode 100644 api/urls_v1.py delete mode 100644 api/v1_schemas.py delete mode 100644 api/views/workflow.py delete mode 100644 tests/api/test_normalizeddata.py delete mode 100644 tests/api/test_providerregistration.py delete mode 100644 tests/api/test_v1_push.py delete mode 100644 tests/api/test_validator.py diff --git a/api/base/views.py b/api/base/views.py index 38b222bb1..6471c8d7d 100644 --- a/api/base/views.py +++ b/api/base/views.py @@ -45,12 +45,9 @@ def initial(self, request, *args, **kwargs): class RootView(views.APIView): def get(self, request): links = { - 'normalizeddata': 'api:normalizeddata-list', 'rawdata': 'api:rawdatum-list', - 'sourceregistrations': 'api:sourceregistration-list', 'sources': 'api:source-list', 'users': 'api:user-list', - 'schema': 'api:schema', 'status': 'api:status', 'rss': 'api:rss', 'atom': 'api:atom', diff --git a/api/formattedmetadatarecords/serializers.py b/api/formattedmetadatarecords/serializers.py deleted file mode 100644 index eabef161c..000000000 --- a/api/formattedmetadatarecords/serializers.py +++ /dev/null @@ -1,19 +0,0 @@ -from share import models - -from api.base import ShareSerializer -from api.fields import ShareIdentityField - - -class FormattedMetadataRecordSerializer(ShareSerializer): - # link to self - url = ShareIdentityField(view_name='api:formattedmetadatarecord-detail') - - class Meta: - model = models.FormattedMetadataRecord - fields = ( - 'suid', - 'record_format', - 'date_modified', - 'formatted_metadata', - 'url', - ) diff --git a/api/formattedmetadatarecords/urls.py b/api/formattedmetadatarecords/urls.py deleted file mode 100644 index a23eb6674..000000000 --- a/api/formattedmetadatarecords/urls.py +++ /dev/null @@ -1,7 +0,0 @@ -from rest_framework.routers import SimpleRouter -from api.formattedmetadatarecords import views - - -router = SimpleRouter() -router.register(r'formattedmetadatarecords', views.FormattedMetadataRecordViewSet, basename='formattedmetadatarecord') -urlpatterns = router.urls diff --git a/api/formattedmetadatarecords/views.py b/api/formattedmetadatarecords/views.py deleted file mode 100644 index 9df8a3a99..000000000 --- a/api/formattedmetadatarecords/views.py +++ /dev/null @@ -1,15 +0,0 @@ -from rest_framework import viewsets - -from api.formattedmetadatarecords.serializers import FormattedMetadataRecordSerializer -from api.base import ShareViewSet - -from share.models import FormattedMetadataRecord - - -class FormattedMetadataRecordViewSet(ShareViewSet, viewsets.ReadOnlyModelViewSet): - serializer_class = FormattedMetadataRecordSerializer - - ordering = ('id', ) - - def get_queryset(self): - return FormattedMetadataRecord.objects.all() diff --git a/api/normalizeddata/serializers.py b/api/normalizeddata/serializers.py deleted file mode 100644 index c00ccbd99..000000000 --- a/api/normalizeddata/serializers.py +++ /dev/null @@ -1,30 +0,0 @@ -from rest_framework_json_api import serializers - -from share import models - -from api import fields - - -class FullNormalizedDataSerializer(serializers.ModelSerializer): - # link to self - url = fields.ShareIdentityField(view_name='api:normalizeddata-detail') - - tasks = serializers.PrimaryKeyRelatedField(many=True, read_only=False, queryset=models.CeleryTaskResult.objects.all()) - source = serializers.HiddenField(default=serializers.CurrentUserDefault()) - suid = serializers.CharField(write_only=True, required=False) - - class Meta: - model = models.NormalizedData - fields = ('data', 'source', 'raw', 'tasks', 'url', 'suid') - - -class BasicNormalizedDataSerializer(serializers.ModelSerializer): - # link to self - url = fields.ShareIdentityField(view_name='api:normalizeddata-detail') - - source = serializers.HiddenField(default=serializers.CurrentUserDefault()) - suid = serializers.CharField(write_only=True, required=False) - - class Meta: - model = models.NormalizedData - fields = ('data', 'source', 'url', 'suid') diff --git a/api/normalizeddata/urls.py b/api/normalizeddata/urls.py deleted file mode 100644 index 7e209eda3..000000000 --- a/api/normalizeddata/urls.py +++ /dev/null @@ -1,7 +0,0 @@ -from rest_framework.routers import SimpleRouter -from api.normalizeddata import views - - -router = SimpleRouter() -router.register(r'normalizeddata', views.NormalizedDataViewSet, basename='normalizeddata') -urlpatterns = router.urls diff --git a/api/normalizeddata/views.py b/api/normalizeddata/views.py deleted file mode 100644 index 4db5aa857..000000000 --- a/api/normalizeddata/views.py +++ /dev/null @@ -1,108 +0,0 @@ -import logging -import json - -from django.urls import reverse -from rest_framework import status -from rest_framework import generics -from rest_framework.exceptions import ValidationError -from rest_framework.response import Response -import sentry_sdk - -from share import models as share_db -from share.util.graph import MutableGraph -from share.util.osf import guess_osf_guid -from api.base.views import ShareViewSet -from api.normalizeddata.serializers import BasicNormalizedDataSerializer -from api.normalizeddata.serializers import FullNormalizedDataSerializer -from api.pagination import CursorPagination -from api.permissions import ReadOnlyOrTokenHasScopeOrIsAuthenticated -from trove import digestive_tract - - -logger = logging.getLogger(__name__) - - -class NormalizedDataViewSet(ShareViewSet, generics.ListCreateAPIView, generics.RetrieveAPIView): - """View showing all normalized data in the SHARE Dataset. - - ## Submitting changes to the SHARE dataset - Changes, whether they are additions or modifications, are submitted as a subset of [JSON-LD graphs](https://www.w3.org/TR/json-ld/#named-graphs). - Each [node](https://www.w3.org/TR/json-ld/#dfn-node) of the graph MUST contain both an `@id` and `@type` key. - - Method: POST - Body (JSON): { - 'data': { - 'type': 'NormalizedData' - 'attributes': { - 'data': { - '@graph': [{ - '@type': , - '@id': <_:random>, - : , - : { - '@type': , - '@id': - } - }] - } - } - } - } - Success: 200 OK - """ - ordering = ('-id', ) - pagination_class = CursorPagination - permission_classes = (ReadOnlyOrTokenHasScopeOrIsAuthenticated, ) - required_scopes = ('upload_normalized_manuscript', ) - resource_name = 'NormalizedData' - - def get_serializer_class(self): - if not self.request.user.is_authenticated: - return BasicNormalizedDataSerializer - elif self.request.user.is_robot: - return FullNormalizedDataSerializer - return BasicNormalizedDataSerializer - - def get_queryset(self): - return share_db.NormalizedData.objects.all() - - def create(self, request, *args, **kwargs): - if share_db.FeatureFlag.objects.flag_is_up(share_db.FeatureFlag.IGNORE_SHAREV2_INGEST): - return Response({ - 'errors': [ - {'detail': ( - 'this route was deprecated and has been removed' - f' (use {reverse("trove:ingest-rdf")} instead)' - )}, - ], - }, status=status.HTTP_410_GONE) - try: - return self._do_create(request, *args, **kwargs) - except Exception: - sentry_sdk.capture_exception() # get some insight into common validation errors - raise - - def _do_create(self, request, *args, **kwargs): - serializer = self.get_serializer_class()(data=request.data, context={'request': request}) - serializer.is_valid(raise_exception=True) - - data = serializer.validated_data['data'] - suid = serializer.validated_data.get('suid', None) - if not suid: - # HACK: try for an osf guid -- may still be None tho - suid = guess_osf_guid(MutableGraph.from_jsonld(data)) - if not suid: - raise ValidationError("'suid' is a required attribute") - _task_id = digestive_tract.swallow__sharev2_legacy( - from_user=request.user, - record=json.dumps(data, sort_keys=True), - record_identifier=suid, - transformer_key='v2_push', - urgent=True, - ) - return Response({ - 'type': 'NormalizedData', - 'attributes': { - 'task': _task_id, - }, - }, status=status.HTTP_202_ACCEPTED) diff --git a/api/schemas/urls.py b/api/schemas/urls.py deleted file mode 100644 index 3e71428f5..000000000 --- a/api/schemas/urls.py +++ /dev/null @@ -1,7 +0,0 @@ -from django.urls import re_path as url - -from api.schemas import views - -urlpatterns = [ - url(r'^$', views.SchemaView.as_view(), name='schema'), -] diff --git a/api/schemas/views.py b/api/schemas/views.py deleted file mode 100644 index 20db28d26..000000000 --- a/api/schemas/views.py +++ /dev/null @@ -1,13 +0,0 @@ -from rest_framework import views -from rest_framework.response import Response - -from share.models.validators import JSONLDValidator - - -__all__ = ('SchemaView',) - - -class SchemaView(views.APIView): - def get(self, request, *args, **kwargs): - schema = JSONLDValidator.jsonld_schema.schema - return Response(schema) diff --git a/api/sourceregistrations/serializers.py b/api/sourceregistrations/serializers.py deleted file mode 100644 index b0eb8ca8c..000000000 --- a/api/sourceregistrations/serializers.py +++ /dev/null @@ -1,18 +0,0 @@ -from rest_framework_json_api import serializers - -from share import models - -from api.base import ShareSerializer - - -class ProviderRegistrationSerializer(ShareSerializer): - status = serializers.SerializerMethodField() - submitted_at = serializers.DateTimeField(read_only=True) - submitted_by = serializers.HiddenField(default=serializers.CurrentUserDefault()) - - def get_status(self, obj): - return models.ProviderRegistration.STATUS[obj.status] - - class Meta: - model = models.ProviderRegistration - fields = '__all__' diff --git a/api/sourceregistrations/urls.py b/api/sourceregistrations/urls.py deleted file mode 100644 index 16678ea20..000000000 --- a/api/sourceregistrations/urls.py +++ /dev/null @@ -1,7 +0,0 @@ -from rest_framework.routers import SimpleRouter -from api.sourceregistrations import views - - -router = SimpleRouter() -router.register(r'sourceregistrations', views.ProviderRegistrationViewSet, basename='sourceregistration') -urlpatterns = router.urls diff --git a/api/sourceregistrations/views.py b/api/sourceregistrations/views.py deleted file mode 100644 index 2f62d8d13..000000000 --- a/api/sourceregistrations/views.py +++ /dev/null @@ -1,49 +0,0 @@ -from rest_framework import generics -from rest_framework.permissions import IsAuthenticated - -from share.models import ProviderRegistration - -from api.base.views import ShareViewSet -from api.deprecation import deprecate -from api.pagination import CursorPagination -from api.sourceregistrations.serializers import ProviderRegistrationSerializer - - -@deprecate(pls_hide=True) -class ProviderRegistrationViewSet(ShareViewSet, generics.ListCreateAPIView, generics.RetrieveAPIView): - """View showing all registration data in the SHARE Dataset. - - ## Submit Registration. - - Create - - Method: POST - Body (JSON): { - "data": { - "type": "ProviderRegistration", - "attributes": { - "contact_name": "John Doe", - "contact_email": "email@email.com", - "contact_affiliation": "Organization affliation", - "direct_source": true, - "source_name": "Organization Name", - "source_description": "Organization description.", - "source_rate_limit": "(Optional) 1 request/second", - "source_documentation": "(Optional)", - "source_preferred_metadata_prefix": "(Optional)", - "source_oai": false, - "source_base_url": "(Optional)", - "source_disallowed_sets": "(Optional)", - "source_additional_info": "(Optional)" - } - } - } - - Success: 201 CREATED - """ - pagination_class = CursorPagination - permission_classes = (IsAuthenticated, ) - serializer_class = ProviderRegistrationSerializer - - def get_queryset(self): - return ProviderRegistration.objects.filter(submitted_by_id=self.request.user.pk) diff --git a/api/suids/serializers.py b/api/suids/serializers.py index b3d99758b..af332fb09 100644 --- a/api/suids/serializers.py +++ b/api/suids/serializers.py @@ -3,13 +3,11 @@ from api.base import ShareSerializer from api.fields import ShareIdentityField from api.sourceconfigs.serializers import SourceConfigSerializer -from api.formattedmetadatarecords.serializers import FormattedMetadataRecordSerializer class SuidSerializer(ShareSerializer): included_serializers = { 'source_config': SourceConfigSerializer, - 'formattedmetadatarecord_set': FormattedMetadataRecordSerializer, } # link to self @@ -21,5 +19,4 @@ class Meta: 'identifier', 'source_config', 'url', - 'formattedmetadatarecord_set', ) diff --git a/api/urls.py b/api/urls.py index 81500862c..de3dcdea0 100644 --- a/api/urls.py +++ b/api/urls.py @@ -9,19 +9,14 @@ urlpatterns = [ url('^$', RootView.as_view()), url('^', include('api.banners.urls')), - url('^', include('api.formattedmetadatarecords.urls')), - url('^', include('api.normalizeddata.urls')), url('^', include('api.rawdata.urls')), - url('^', include('api.sourceregistrations.urls')), url('^', include('api.sourceconfigs.urls')), url('^', include('api.sources.urls')), url('^', include('api.suids.urls')), url('^', include('api.users.urls')), - url('^schemas?/', include('api.schemas.urls'), name='schema'), url('^search/', include('api.search.urls'), name='search'), - # TODO refactor non-viewset endpoints to conform to new structure url(r'^status/?', views.ServerStatusView.as_view(), name='status'), url(r'^rss/?', views.LegacyCreativeWorksRSS(), name='rss'), url(r'^atom/?', views.LegacyCreativeWorksAtom(), name='atom'), diff --git a/api/urls_v1.py b/api/urls_v1.py deleted file mode 100644 index 4b112829b..000000000 --- a/api/urls_v1.py +++ /dev/null @@ -1,9 +0,0 @@ -from django.urls import re_path as url - -from api import views - -app_name = 'api' - -urlpatterns = [ - url(r'share/data/?', views.V1DataView.as_view(), name='v1data') -] diff --git a/api/v1_schemas.py b/api/v1_schemas.py deleted file mode 100644 index be7dcda43..000000000 --- a/api/v1_schemas.py +++ /dev/null @@ -1,355 +0,0 @@ -v1_push_schema = { - "definitions": { - "person": { - "required": [ - "name" - ], - "type": "object", - "description": "A person that is a contributor to the research object.", - "properties": { - "affiliation": { - "items": { - "$ref": "#/definitions/organization" - }, - "type": "array", - "description": "The organization(s) that this person is affiliated with. For example, a school/university." - }, - "givenName": { - "type": "string", - "description": "Also called the \"first name\", this element is preferred over using the combined \"name\" field." - }, - "additionalName": { - "type": "string", - "description": "Also called the \"middle name\", this element will be derived from the creator.name by SHARE if not supplied by the source." - }, - "name": { - "type": "string", - "description": "The name of the person if familyName, givenName, and/or additionalName." - }, - "sameAs": { - "items": { - "type": "string", - "description": "An HTTP URI that describes the person.", - "format": "uri" - }, - "type": "array", - "description": "An array of identifiers expressed as HTTP URIs that describe the person. For example, an ORCID, ResearcherID, arXiv author ID, ScopusID, ISNI, or other unique identifier expressed as an HTTP URI." - }, - "familyName": { - "type": "string", - "description": "Also called the \"last name\", this element is preferred over using the combined \"name\" field." - }, - "email": { - "type": "string", - "description": "The email address for this person.", - "format": "email" - } - } - }, - "sponsor": { - "required": [ - "sponsorName" - ], - "type": "object", - "description": "This describes the sponsor of the resource.", - "properties": { - "sponsorName": { - "type": "string", - "description": "The name of the entity responsible for sponsoring the resource, recorded here as text." - }, - "sponsorIdentifier": { - "type": "string", - "description": "A globally unique identifier for the sponsor of the resource should be recorded here.", - "format": "uri" - } - } - }, - "license": { - "required": [ - "uri" - ], - "type": "object", - "properties": { - "startDate": { - "type": "string", - "description": "The date and time at which the license will apply to this object. If the resource was always licensed this way, then this date can be omitted.", - "format": "date-time" - }, - "endDate": { - "type": "string", - "description": "The date and time at which this resource will no longer be licensed in this way.", - "format": "date-time" - }, - "uri": { - "type": "string", - "description": "The HTTP URI of the license of the object or--if startDate and endDate are included--in effect during the period listed.", - "format": "uri" - }, - "description": { - "type": "string", - "description": "Text describing to what aspect of the object the license is applied." - } - } - }, - "organization": { - "required": [ - "name" - ], - "type": "object", - "description": "An organization or institution.", - "properties": { - "sameAs": { - "items": { - "type": "string", - "description": "A single HTTP URI that describes this organization", - "format": "uri" - }, - "type": "array", - "description": "Identifiers that describe this organization" - }, - "name": { - "type": "string", - "description": "The name of the organization." - }, - "email": { - "type": "string", - "description": "An email address for this organization", - "format": "uri" - } - } - }, - "otherProperties": { - "required": [ - "name", - "properties" - ], - "type": "object", - "properties": { - "description": { - "type": "string", - "description": "A description of this collection of properties." - }, - "properties": { - "type": "object", - "description": "The collection of key/value pair properties." - }, - "name": { - "type": "string", - "description": "A name that describes this collection of properties." - }, - "uri": { - "type": "string", - "description": "A URI that points to the definition, schema, and/or vocabulary of this entry.", - "format": "uri" - } - } - }, - "award": { - "required": [ - "awardName" - ], - "type": "object", - "description": "The award made in support of the object.", - "properties": { - "awardIdentifier": { - "type": "string", - "description": "An HTTP URI for the award.", - "format": "uri" - }, - "awardName": { - "type": "string", - "description": "The textual representation of the award identifier as issued by the sponsor." - } - } - }, - "sponsorship": { - "required": [ - "sponsor" - ], - "type": "object", - "description": "A sponsorship associated with the resource.", - "properties": { - "sponsor": { - "$ref": "#/definitions/sponsor" - }, - "award": { - "$ref": "#/definitions/award" - } - } - } - }, - "$schema": "http://json-schema.org/draft-04/schema#", - "required": ["jsonData"], - "additionalproperties": False, - "type": "object", - "properties": { - "jsonData": { - "required": [ - "title", - "contributors", - "uris", - "providerUpdatedDateTime" - ], - "type": "object", - "properties": { - "publisher": { - "type": "object", - "anyOf": [ - { - "$ref": "#/definitions/person" - }, - { - "$ref": "#/definitions/organization" - } - ], - "description": "This element contains the name of the entity, typically a 'publisher', responsible for making the version of record of the resource available. This could be a person, organisation or service" - }, - "description": { - "type": "string", - "description": "A textual description of the resource." - }, - "contributors": { - "items": { - "anyOf": [ - { - "$ref": "#/definitions/person" - }, - { - "$ref": "#/definitions/organization" - } - ] - }, - "type": "array", - "description": "The people or organizations responsible for making contributions to an object." - }, - "title": { - "type": "string", - "description": "The title and any sub-titles of the resource." - }, - "otherProperties": { - "items": { - "$ref": "#/definitions/otherProperties" - }, - "type": "array", - "description": "Any structured or unstructured properties (properties that do or do not include URIs to definitions) that do not fall into the schema provided." - }, - "tags": { - "items": { - "type": "string" - }, - "type": "array", - "description": "Non-hierarchical terms or keywords assigned to an object to aid browsing or searching." - }, - "uris": { - "type": "object", - "required": ["canonicalUri", "providerUris"], - "properties": { - "canonicalUri": { - "type": "string", - "description": "The preferred persistent HTTP URI that represents the research object. This should be repeated in exactly one other field in the uris object.", - "format": "uri" - }, - "anyOf": { - "providerUris": { - "items": { - "type": "string", - "format": "uri" - }, - "type": "array", - "description": "The persistent HTTP URI that points to the object's record at the SHARE provider regardless of format." - }, - "descriptorUris": { - "items": { - "type": "string", - "format": "uri" - }, - "type": "array", - "description": "A persistent HTTP URI that points to a description of the research object." - }, - "objectUris": { - "items": { - "type": "string", - "format": "uri" - }, - "type": "array", - "description": "A persistent HTTP URI that points directly to the research object." - } - } - } - }, - "languages": { - "items": { - "pattern": "[a-z][a-z]?[a-z]", - "type": ["string", "null"] - }, - "type": "array", - "description": "The primary languages in which the content of the resource is presented. Values used for this element MUST conform to ISO 639\u20133. This offers two and three letter tags e.g. \"en\" or \"eng\" for English and \"en-GB\" for English used in the UK." - }, - "providerUpdatedDateTime": { - "type": "string", - "description": "The date and time the provider describing the object has been updated about either the creation or update of an object by its contributors. E.g., this may be the date a manuscript is published, but not necessarily the date the manuscript was written.", - "format": "date-time" - }, - "sponsorships": { - "items": { - "$ref": "#/definitions/sponsorship" - }, - "type": "array", - "description": "Sponsorships associated with the object" - }, - "version": { - "type": "object", - "description": "Infomation about this version of the object.", - "properties": { - "versionId": { - "description": "The name or number representing this version of the object." - }, - "versionDateTime": { - "type": "string", - "description": "The date and time the object was created or updated by its contributors. If the data for the object describes the first version of that object, it will be the date and time of object creation, otherwise it will be considered the date and time the object was updated.", - "format": "date-time" - }, - "versionOf": { - "type": "string", - "description": "If the object is an update, the HTTP URI of the object the content is updating. Depending upon what the upate is relative to, that could be the object at creation or a previous version.", - "format": "uri" - } - } - }, - "freeToRead": { - "required": [ - "startDate" - ], - "type": "object", - "description": "A date range specifying when this research object will be accessible, without restrictsions such as fee or registration). If the object is free to read, then only the startDate is required.", - "properties": { - "startDate": { - "type": ["string", "null"], - "description": "The date and time at which the object will be accessible. If the resource was always free to read, then the date the object was created should be used.", - "format": "date" - }, - "endDate": { - "type": "string", - "description": "The date and time at which restrictions such as fees or registrations will be in place limiting accessibility.", - "format": "date" - } - } - }, - "licenses": { - "items": { - "$ref": "#/definitions/license" - }, - "type": "array", - "description": "The licenses under which the object has been released." - }, - "subjects": { - "items": { - "type": "string" - }, - "type": "array", - "description": "The topic or domain of the object. Follows recommendations of http://dublincore.org/documents/2012/06/14/dcmi-terms/?v=elements#terms-subject" - } - } - } - } -} diff --git a/api/views/__init__.py b/api/views/__init__.py index f9313083f..5c4a15d25 100644 --- a/api/views/__init__.py +++ b/api/views/__init__.py @@ -1,3 +1,2 @@ -from .workflow import * # noqa from .share import * # noqa from .feeds import * # noqa diff --git a/api/views/workflow.py b/api/views/workflow.py deleted file mode 100644 index 464c0e0dc..000000000 --- a/api/views/workflow.py +++ /dev/null @@ -1,123 +0,0 @@ -import jsonschema - -from django.db import transaction - -from rest_framework import views, status -from rest_framework.exceptions import ParseError -from rest_framework.parsers import JSONParser -from rest_framework.renderers import JSONRenderer -from rest_framework.response import Response - -from trove import digestive_tract - -from api import v1_schemas -from api.authentication import APIV1TokenBackPortAuthentication -from api.deprecation import deprecate -from api.permissions import ReadOnlyOrTokenHasScopeOrIsAuthenticated -from api.normalizeddata.serializers import BasicNormalizedDataSerializer - - -__all__ = ('V1DataView', ) - - -@deprecate(pls_hide=False) -class V1DataView(views.APIView): - """View allowing sources to post SHARE v1 formatted metadata directly to the SHARE Dataset. - - ## Submit Data in SHARE v1 Format - Please note that this endpoint is to ease the transition from SHARE v1 to SHARE v2 and sources - are encouraged to transition to submitting metadata in the SHARE v2 format. - - Submitting data through the normalizeddata endpoint is strongly preferred as support for - the v1 format will not be continued. - - v1 Format - - For the full format please see https://github.com/erinspace/shareregistration/blob/master/push_endpoint/schemas.py - - Required Fields: [ - "title", - "contributors", - "uris", - "providerUpdatedDateTime" - ], - - Create - - Method: POST - Body (JSON): { - { - "jsonData": { - "publisher":{ - "name": , - "uri": - }, - "description": , - "contributors":[ - { - "name":, - "email": , - "sameAs": - }, - { - "name": - } - ], - "title": , - "tags":[ - <tag>, - <tag> - ], - "languages":[ - <language> - ], - "providerUpdatedDateTime": <time submitted>, - "uris": { - "canonicalUri": <uri>, - "providerUris":[ - <uri> - ] - } - } - } - } - Success: 200 OK - """ - authentication_classes = (APIV1TokenBackPortAuthentication, ) - permission_classes = (ReadOnlyOrTokenHasScopeOrIsAuthenticated, ) - serializer_class = BasicNormalizedDataSerializer - renderer_classes = (JSONRenderer, ) - parser_classes = (JSONParser,) - - def post(self, request, *args, **kwargs): - - try: - jsonschema.validate(request.data, v1_schemas.v1_push_schema) - except (jsonschema.exceptions.ValidationError) as error: - raise ParseError(detail=error.message) - - try: - prelim_data = request.data['jsonData'] - except ParseError as error: - return Response( - 'Invalid JSON - {0}'.format(error.message), - status=status.HTTP_400_BAD_REQUEST - ) - - # store raw data, assuming you can only submit one at a time - with transaction.atomic(): - try: - doc_id = prelim_data['uris']['canonicalUri'] - except KeyError: - return Response({'errors': 'Canonical URI not found in uris.', 'data': prelim_data}, status=status.HTTP_400_BAD_REQUEST) - - _task_id = digestive_tract.swallow__sharev2_legacy( - from_user=request.user, - record=prelim_data, - record_identifier=doc_id, - transformer_key='v1_push', - urgent=True, - ) - return Response({ - 'task_id': _task_id, - }, status=status.HTTP_202_ACCEPTED) diff --git a/project/urls.py b/project/urls.py index 1cbb18a24..54aa68120 100644 --- a/project/urls.py +++ b/project/urls.py @@ -23,7 +23,6 @@ path('vocab/2023/trove/<path:vocab_term>', view=TroveVocabView.as_view(), name='trove-vocab'), url(r'^api/v2/', include('api.urls', namespace='api')), url(r'^api/(?P<path>(?!v\d+).*)', APIVersionRedirectView.as_view()), - url(r'^api/v1/', include('api.urls_v1', namespace='api_v1')), url(r'^oai-pmh/', OAIPMHView.as_view(), name='oai-pmh'), url(r'^o/', include('oauth2_provider.urls', namespace='oauth2_provider')), url(r'^accounts/social/login/cancelled/', osf_oauth2_adapter_views.login_errored_cancelled), diff --git a/tests/api/test_generated_endpoints.py b/tests/api/test_generated_endpoints.py index 4ac5af8fe..d0605f6c8 100644 --- a/tests/api/test_generated_endpoints.py +++ b/tests/api/test_generated_endpoints.py @@ -6,7 +6,6 @@ # TODO these tests belong somewhere else @pytest.mark.django_db @pytest.mark.parametrize('endpoint, factory', [ - ('normalizeddata', factories.NormalizedDataFactory), ('rawdata', factories.RawDatumFactory), ]) class TestPagination: diff --git a/tests/api/test_normalizeddata.py b/tests/api/test_normalizeddata.py deleted file mode 100644 index 97d4dfa44..000000000 --- a/tests/api/test_normalizeddata.py +++ /dev/null @@ -1,227 +0,0 @@ -from unittest import mock -import json -import pytest -import requests - -from share.util import IDObfuscator - -from tests import factories - - -class Response: - def __init__(self, status_code=200, json=None, keys=None): - self.status_code = status_code - self._json = json or {} - self._keys = keys - - def json(self): - return self._json - - def __eq__(self, other): - assert other.status_code == self.status_code - - if self._keys: - assert set(other.json().keys()) == self._keys - else: - assert other.json() == self.json() - - return True - - -class TestPostNormalizedData: - - POST_CASES = [{ - 'authorized': False, - 'out': Response(401, json={'errors': [{ - 'code': 'not_authenticated', - 'detail': 'Authentication credentials were not provided.', - 'source': {'pointer': '/data'}, - 'status': '401' - }]}), - 'in': requests.Request('POST', headers={'Content-Type': 'application/vnd.api+json'}, json={'data': {'type': 'NormalizedData'}}) - }, { - 'authorized': False, - 'out': Response(401, json={'errors': [{ - 'code': 'not_authenticated', - 'detail': 'Authentication credentials were not provided.', - 'source': {'pointer': '/data'}, - 'status': '401' - }]}), - 'in': requests.Request('POST', headers={'Content-Type': 'application/vnd.api+json'}, json={ - 'data': { - 'type': 'NormalizedData', - 'attributes': { - '@graph': [{'@type': 'person', 'given_name': 'Jim'}] - } - } - }) - }, { - 'authorized': False, - 'out': Response(401, json={'errors': [{ - 'code': 'not_authenticated', - 'detail': 'Authentication credentials were not provided.', - 'source': {'pointer': '/data'}, - 'status': '401' - }]}), - 'in': requests.Request('POST', headers={'Content-Type': 'application/vnd.api+json', 'Authorization': 'Foo'}, json={ - 'data': { - 'type': 'NormalizedData', - 'attributes': { - '@graph': [{'@type': 'person', 'given_name': 'Jim'}] - } - } - }) - }, { - 'out': Response(400, json={'errors': [{ - 'code': 'parse_error', - 'detail': 'Received document does not contain primary data', - 'source': {'pointer': '/data'}, - 'status': '400' - }]}), - 'in': requests.Request('POST', headers={'Content-Type': 'application/vnd.api+json'}, json={}) - }, { - 'out': Response(400, json={'errors': [{ - 'code': 'parse_error', - 'detail': 'JSON parse error - Expecting value: line 1 column 1 (char 0)', - 'source': {'pointer': '/data'}, - 'status': '400' - }]}), - 'in': requests.Request('POST', headers={'Content-Type': 'application/vnd.api+json'}, data='<html!>') - }, { - 'out': Response(400, json={ - 'errors': [ - { - 'code': 'invalid', - 'detail': '@graph may not be empty', - 'source': {'pointer': '/data/attributes/data'}, - 'status': '400' - } - ] - }), - 'in': requests.Request('POST', headers={'Content-Type': 'application/vnd.api+json'}, json={ - 'data': { - 'type': 'NormalizedData', - 'attributes': { - 'data': {'@graph': []} - } - } - }) - }, { - 'out': Response(202, keys={'data'}), - 'in': requests.Request('POST', headers={'Content-Type': 'application/vnd.api+json'}, json={ - 'data': { - 'type': 'NormalizedData', - 'attributes': { - 'suid': 'jim', - 'data': { - '@graph': [{ - '@id': '_:100', - '@type': 'Person', - 'given_name': 'Jim', - }] - } - } - } - }) - }, { - 'out': Response(202, keys={'data'}), - 'in': requests.Request('POST', headers={'Content-Type': 'application/vnd.api+json'}, json={ - 'data': { - 'type': 'NormalizedData', - 'attributes': { - 'data': { - '@graph': [{ - '@id': '_:100', - '@type': 'CreativeWork', - 'title': 'Jim', - }, { - '@id': '_:101', - '@type': 'WorkIdentifier', - 'creative_work': {'@type': 'CreativeWork', '@id': '_:100'}, - # a recognizable OSF guid means no suid is required - 'uri': 'https://osf.io/jimbo', - }] - } - } - } - }) - }, { - 'out': Response(400, json={ - 'errors': [ - { - 'code': 'invalid', - 'detail': "'suid' is a required attribute", - 'source': {'pointer': '/data'}, - 'status': '400' - } - ] - }), - 'in': requests.Request('POST', headers={'Content-Type': 'application/vnd.api+json'}, json={ - 'data': { - 'type': 'NormalizedData', - 'attributes': { - 'data': { - '@graph': [{ - '@id': '_:100', - '@type': 'CreativeWork', - 'title': 'Jim', - }] - } - } - } - }) - }, { - 'out': Response(400, json={ - 'errors': [ - { - 'code': 'invalid', - 'detail': "'@id' is a required property at /@graph/0", - 'source': {'pointer': '/data/attributes/data'}, - 'status': '400' - } - ] - }), - 'in': requests.Request('POST', headers={'Content-Type': 'application/vnd.api+json'}, json={ - 'data': { - 'type': 'NormalizedData', - 'attributes': { - 'suid': 'jim', - 'data': { - '@graph': [{ - '@type': 'Person', - 'given_name': 'Jim', - }] - } - } - } - }) - }] - - @pytest.mark.django_db - @pytest.mark.parametrize('_request, response, authorized', [(case['in'], case['out'], case.get('authorized', True)) for case in POST_CASES]) - def test_post_data(self, trusted_user, client, _request, response, authorized): - args, kwargs = (), {'content_type': 'application/vnd.api+json'} - - if _request.data: - kwargs['data'] = _request.data - elif _request.json is not None: - kwargs['data'] = json.dumps(_request.json) - - if authorized: - kwargs['HTTP_AUTHORIZATION'] = 'Bearer {}'.format(trusted_user.oauth2_provider_accesstoken.first()) - - with mock.patch('api.normalizeddata.views.digestive_tract') as mock_digestive_tract: - mock_digestive_tract.swallow__sharev2_legacy.return_value = '123' - assert response == client.post('/api/v2/normalizeddata/', *args, **kwargs) - - -@pytest.mark.django_db -class TestGetNormalizedData: - - def test_by_id(self, client): - nd = factories.NormalizedDataFactory(data={'@graph': []}) - resp = client.get('/api/v2/normalizeddata/{}/'.format(IDObfuscator.encode(nd))) - assert resp.status_code == 200 - assert resp.json()['data']['id'] == IDObfuscator.encode(nd) - assert resp.json()['data']['type'] == 'NormalizedData' - assert resp.json()['data']['attributes']['data'] == {'@graph': []} diff --git a/tests/api/test_providerregistration.py b/tests/api/test_providerregistration.py deleted file mode 100644 index 33b5efa25..000000000 --- a/tests/api/test_providerregistration.py +++ /dev/null @@ -1,398 +0,0 @@ -import json -import pytest -import requests - -from share import models -from share.util import IDObfuscator - - -class Response: - def __init__(self, status_code=200, json=None, keys=None): - self.status_code = status_code - self._json = json or {} - self._keys = keys - - def json(self): - return self._json - - def __eq__(self, other): - assert other.status_code == self.status_code - - if self._keys: - assert set(other.json().keys()) == self._keys - else: - assert other.json() == self.json() - - return True - - -class TestPostProviderRegistration: - - # 301 characters - LONG_MESSAGE = ( - '3Hc0q7ZkM10seNA8lt2h20ggq8S7NxbBKvAtkAI7S2J4RJeiij' - '8m3TMfv4TI7AXzHI8jrGnK7TlI95z75yRFvmLBfNV4KCaj6acv' - 'g22Vca8DIhmFyFrxkfRTW88WHqUvaTPDl4646AHSo7kkXrMNS2' - 'eZGfjKa8G7kcVX2OyZp2Fv3zvHiKFH9UeEnEnZAqrRTC3QSTjK' - 'DztGEwvsfQsjLzTp42pLXeFLw91y4eJ0jkgQC4KqFLwgtaD5FX' - 'RrNkZDG8HqKoQbevHbMXTYRtMjYfuJZisyv7BuWb5EhZGU7yTv' - 'a' - ) - - # 1001 characters - LONGER_MESSAGE = ( - 'OZvaNw9SZ5Z1aCSTqntvaAUEhAXEOCbcz3wYQ3c8KLTKPG6iWg' - 'q1PVZllPA0KlkfACtLvKq1s7aB61ITWJDpuBFXv9KGhMzh8WgZ' - 'jjhohpb56sOES6iTMGKuylDXKrsgVhj6EWPDWgHFCRBpFVLRYn' - 'ZHsle8s4EsqVqpOzw5mEGgmGxes8vjgoYRExRkmblu4lWMpbNy' - 'jgCpUl5oQ3l4NWpZRCHF4SuXVucZxaB8CTC9MUustkz8weEYEG' - 'xiGB7h2N1z6ZqyuLq8Y0zyXL2BmyRAuc8yHLpqBg7ncTCLuqfy' - 'iiFEu2hINuTaSjBFHv6V90MgJMq4C0HpnTMJYTMhG4cpuS767a' - 'Eu6FlirlnZvaZQaGVGiHnSj73nHj2C0SKQy8NEIX3UQbmJQ0X6' - 'sCKhSkQPHjn75InaZlNLHRUSUI1O555ITFJwJIlX0SRcYvWVy9' - 'v5LMxquRbOExuEAqczqnABQJeDRkQnVbcL2wXc6DT2PcNk0Vjc' - 'OC5xk3R47Z7eG2wllb18YZ7gAvjhL7lNAMe8xPJnloW77XCrR0' - 'OumiIvXohWLlRy36oYsQgitoUqCl94By6Z8mo5Gn7tSe6Knc6G' - 'gfz8Ym6agWnrEWjO05VLZX5x933P1vgZeu73Vs0BY1OQ5R6gff' - 'k6Rl6nSvMA5qPT4RJJClprCQSRTCANpUwoniWh4Zhp2fWxQ1Mk' - 'Jnuwv725r7HJYgEel0BIDU8kipqty2f8yQZrEDslGnc3CBk6vJ' - 'BifwaCLHhAsILbMUaLGP6T3uInH9SThPkFpvE0jo5iGYHEj3xk' - '2TAil3ibrmDrVYw2FnWZDBWNRQbCHwMXWNQZmWkgukPPfC4m5Z' - 'uXp8f8RfKI09oI3S7ZYyGLxab83fJJgmvsIcRW3necJMyG4Qz6' - 'c5yvRkkBmuCf4cLiRJHJtfiF1MlOo5auAUbbuXFyA4foqqUbwX' - 'q6XKFR2H2U2sQQNgBtgksfPfAfT2kM8czRQHb2qBOqYus6hIgP' - 'a' - ) - - POST_CASES = [{ - 'authorized': False, - 'out': Response(401, json={'errors': [{ - 'code': 'not_authenticated', - 'detail': 'Authentication credentials were not provided.', - 'source': {'pointer': '/data'}, - 'status': '401' - }]}), - 'in': requests.Request('POST', headers={'Content-Type': 'application/vnd.api+json'}, json={'data': {'type': 'ProviderRegistration'}}) - }, { - 'authorized': False, - 'out': Response(401, json={'errors': [{ - 'code': 'not_authenticated', - 'detail': 'Authentication credentials were not provided.', - 'source': {'pointer': '/data'}, - 'status': '401' - }]}), - 'in': requests.Request('POST', headers={'Content-Type': 'application/vnd.api+json'}, json={ - 'data': { - 'type': 'ProviderRegistration', - 'attributes': { - 'contactName': 'Test' - } - } - }) - }, { - 'authorized': False, - 'out': Response(401, json={'errors': [{ - 'code': 'not_authenticated', - 'detail': 'Authentication credentials were not provided.', - 'source': {'pointer': '/data'}, - 'status': '401' - }]}), - 'in': requests.Request( - 'POST', - json={ - 'data': { - 'type': 'ProviderRegistration', - 'attributes': {'contactName': 'Test'} - } - }, - headers={'Authorization': 'Foo'} - ) - }, { - 'out': Response(400, json={'errors': [{ - 'code': 'parse_error', - 'detail': 'Received document does not contain primary data', - 'source': {'pointer': '/data'}, 'status': '400'} - ]}), - 'in': requests.Request('POST', json={}) - }, { - 'out': Response(409, json={'errors': [{ - 'code': 'error', - 'detail': 'The resource object\'s type (None) is not the type that constitute the collection represented by the endpoint (ProviderRegistration).', - 'source': {'pointer': '/data'}, - 'status': '409' - }]}), - 'in': requests.Request('POST', json={'data': {}}) - }, { - 'out': Response(409, json={'errors': [{ - 'code': 'error', - 'detail': 'The resource object\'s type (None) is not the type that constitute the collection represented by the endpoint (ProviderRegistration).', - 'source': {'pointer': '/data'}, - 'status': '409' - }]}), - 'in': requests.Request('POST', json={ - 'data': { - 'attributes': {} - } - }) - }, { - 'out': Response(400, json={ - 'errors': [ - { - 'code': 'required', - 'detail': 'This field is required.', - 'source': {'pointer': '/data/attributes/contactAffiliation'}, - 'status': '400' - }, - { - 'code': 'required', - 'detail': 'This field is required.', - 'source': {'pointer': '/data/attributes/contactEmail'}, - 'status': '400'}, - { - 'code': 'required', - 'detail': 'This field is required.', - 'source': {'pointer': '/data/attributes/contactName'}, - 'status': '400' - }, - { - 'code': 'required', - 'detail': 'This field is required.', - 'source': {'pointer': '/data/attributes/sourceDescription'}, - 'status': '400' - }, - { - 'code': 'required', - 'detail': 'This field is required.', - 'source': {'pointer': '/data/attributes/sourceName'}, - 'status': '400' - } - ] - }), - 'in': requests.Request('POST', json={ - 'data': { - 'type': 'ProviderRegistration', - 'attributes': {} - } - }) - }, { - 'out': Response(400, json={'errors': [ - { - 'code': 'parse_error', - 'detail': 'JSON parse error - Expecting value: line 1 column 1 (char 0)', - 'source': {'pointer': '/data'}, - 'status': '400' - } - ]}), - 'in': requests.Request('POST', data='<html!>') - }, { - 'out': Response(400, json={'errors': [ - { - 'code': 'invalid', - 'detail': 'Enter a valid email address.', - 'source': {'pointer': '/data/attributes/contactEmail'}, - 'status': '400' - }, - ]}), - 'in': requests.Request('POST', json={ - 'data': { - 'type': 'ProviderRegistration', - 'attributes': { - 'contact_affiliation': 'Test', - 'contact_email': 'Bad email', - 'contact_name': 'Test', - 'source_description': 'Test', - 'source_name': 'Test' - } - } - }) - }, { - 'out': Response(201, keys={'data'}), - 'in': requests.Request('POST', json={ - 'data': { - 'type': 'ProviderRegistration', - 'attributes': { - 'contact_affiliation': 'Test', - 'contact_email': 'good@email.com', - 'contact_name': 'Test', - 'source_description': 'Test', - 'source_name': 'Test' - } - } - }) - }, { - 'out': Response(400, json={'errors': [ - { - 'code': 'invalid', - 'detail': 'Enter a valid URL.', - 'source': {'pointer': '/data/attributes/sourceBaseUrl'}, - 'status': '400' - } - ]}), - 'in': requests.Request('POST', json={ - 'data': { - 'type': 'ProviderRegistration', - 'attributes': { - 'contact_affiliation': 'Test', - 'contact_email': 'good@email.com', - 'contact_name': 'Test', - 'source_description': 'Test', - 'source_name': 'Test', - 'source_base_url': 'bad url' - } - } - }) - }, { - 'out': Response(201, keys={'data'}), - 'in': requests.Request('POST', json={ - 'data': { - 'type': 'ProviderRegistration', - 'attributes': { - 'contact_affiliation': 'Test', - 'contact_email': 'good@email.com', - 'contact_name': 'Test', - 'source_description': 'Test', - 'source_name': 'Test', - 'source_base_url': 'https://www.goodurl.com' - } - } - }) - }, { - 'out': Response(400, json={'errors': [ - { - 'code': 'max_length', - 'detail': 'Ensure this field has no more than 300 characters.', - 'source': {'pointer': '/data/attributes/contactAffiliation'}, - 'status': '400' - }, { - 'code': 'max_length', - 'detail': 'Ensure this field has no more than 300 characters.', - 'source': {'pointer': '/data/attributes/contactName'}, - 'status': '400' - }, { - 'code': 'max_length', - 'detail': 'Ensure this field has no more than 1000 characters.', - 'source': {'pointer': '/data/attributes/sourceAdditionalInfo'}, - 'status': '400' - }, { - 'code': 'max_length', - 'detail': 'Ensure this field has no more than 1000 characters.', - 'source': {'pointer': '/data/attributes/sourceDescription'}, - 'status': '400' - }, { - 'code': 'max_length', - 'detail': 'Ensure this field has no more than 300 characters.', - 'source': {'pointer': '/data/attributes/sourceDisallowedSets'}, - 'status': '400' - }, { - 'code': 'max_length', - 'detail': 'Ensure this field has no more than 300 characters.', - 'source': {'pointer': '/data/attributes/sourceDocumentation'}, - 'status': '400' - }, { - 'code': 'max_length', - 'detail': 'Ensure this field has no more than 300 characters.', - 'source': {'pointer': '/data/attributes/sourceName'}, - 'status': '400' - }, { - 'code': 'max_length', - 'detail': 'Ensure this field has no more than 300 characters.', - 'source': {'pointer': '/data/attributes/sourcePreferredMetadataPrefix'}, - 'status': '400' - }, { - 'code': 'max_length', - 'detail': 'Ensure this field has no more than 300 characters.', - 'source': {'pointer': '/data/attributes/sourceRateLimit'}, - 'status': '400' - } - ]}), - 'in': requests.Request('POST', json={ - 'data': { - 'type': 'ProviderRegistration', - 'attributes': { - 'contact_affiliation': LONG_MESSAGE, - 'contact_email': 'good@email.com', - 'contact_name': LONG_MESSAGE, - 'source_description': LONGER_MESSAGE, - 'source_name': LONG_MESSAGE, - 'source_rate_limit': LONG_MESSAGE, - 'source_documentation': LONG_MESSAGE, - 'source_preferred_metadata_prefix': LONG_MESSAGE, - 'source_disallowed_sets': LONG_MESSAGE, - 'source_additional_info': LONGER_MESSAGE - } - } - }) - }] - - @pytest.mark.django_db - @pytest.mark.parametrize('_request, response, authorized', [(case['in'], case['out'], case.get('authorized', True)) for case in POST_CASES]) - def test_post_data(self, trusted_user, client, _request, response, authorized): - args, kwargs = (), {'content_type': 'application/vnd.api+json'} - - if _request.data: - kwargs['data'] = _request.data - elif _request.json is not None: - kwargs['data'] = json.dumps(_request.json) - - if authorized: - kwargs['HTTP_AUTHORIZATION'] = 'Bearer {}'.format(trusted_user.oauth2_provider_accesstoken.first()) - - assert response == client.post('/api/v2/sourceregistrations/', *args, **kwargs) - - @pytest.mark.django_db - def test_get_data(self, client): - assert client.get('/api/v2/sourceregistrations/').status_code == 401 - - @pytest.mark.django_db - def test_get_data_authorized(self, trusted_user, client): - resp = client.get( - '/api/v2/sourceregistrations/', - content_type='application/vnd.api+json', - HTTP_AUTHORIZATION='Bearer {}'.format(trusted_user.oauth2_provider_accesstoken.first()), - ) - - assert resp.status_code == 200 - assert resp.json() == { - 'data': [], - 'links': { - 'prev': None, - 'next': None, - }, - } - - @pytest.mark.django_db - def test_get_by_id(self, trusted_user, client): - reg = models.ProviderRegistration.objects.create( - contact_name='Tester Testington', - contact_email='Some@thi.ng', - contact_affiliation='Just some person', - source_name='PlsWerk', - submitted_by=trusted_user, - ) - - resp = client.get( - '/api/v2/sourceregistrations/{}/'.format(IDObfuscator.encode(reg)), - content_type='application/vnd.api+json', - HTTP_AUTHORIZATION='Bearer {}'.format(trusted_user.oauth2_provider_accesstoken.first()), - ) - - assert resp.status_code == 200 - assert resp.json()['data']['id'] == IDObfuscator.encode(reg) - assert resp.json()['data']['type'] == 'ProviderRegistration' - assert resp.json()['data']['attributes'] == { - 'contactAffiliation': 'Just some person', - 'contactEmail': 'Some@thi.ng', - 'contactName': 'Tester Testington', - 'directSource': False, - 'sourceAdditionalInfo': '', - 'sourceBaseUrl': '', - 'sourceDescription': '', - 'sourceDisallowedSets': '', - 'sourceDocumentation': '', - 'sourceName': 'PlsWerk', - 'sourceOai': False, - 'sourcePreferredMetadataPrefix': '', - 'sourceRateLimit': '', - 'status': 'pending', - 'submittedAt': resp.json()['data']['attributes']['submittedAt'] - } diff --git a/tests/api/test_v1_push.py b/tests/api/test_v1_push.py deleted file mode 100644 index 9a5d0319b..000000000 --- a/tests/api/test_v1_push.py +++ /dev/null @@ -1,99 +0,0 @@ -import json -import pytest -from unittest import mock - - -@pytest.mark.django_db -class TestV1PushProxy: - - @pytest.fixture - def mock_ingest(self): - with mock.patch('api.views.workflow.digestive_tract') as mock_digestive_tract: - mock_digestive_tract.swallow__sharev2_legacy.return_value = '123' - yield mock_digestive_tract.swallow__sharev2_legacy - - valid_data = { - "jsonData": { - "providerUpdatedDateTime": "2016-08-25T11:37:40Z", - "uris": { - "canonicalUri": "https://provider.domain/files/7d2792031", - "providerUris": ["https://provider.domain/files/7d2792031"] - }, - "contributors": [ - {"name": "Person1", "email": "one@provider.domain"}, - {"name": "Person2", "email": "two@provider.domain"}, - {"name": "Person3", "email": "three@provider.domain"}, - {"name": "Person4", "email": "four@provider.domain"} - ], - "title": "Title" - } - } - - @pytest.mark.parametrize('data', [{ - "jsonData": { - "providerUpdatedDateTime": "2016-08-25T11:37:40Z", - "uris": { - "providerUris": ["https://provider.domain/files/7d2792031"] - }, - "contributors": [ - {"name": "Person1", "email": "one@provider.domain"}, - {"name": "Person2", "email": "two@provider.domain"}, - {"name": "Person3", "email": "three@provider.domain"}, - {"name": "Person4", "email": "four@provider.domain"} - ], - "title": "Title" - } - }, { - }, { - "providerUpdatedDateTime": "2016-08-25T11:37:40Z", - "uris": { - "providerUris": ["https://provider.domain/files/7d2792031"] - }, - "contributors": [ - {"name": "Person1", "email": "one@provider.domain"}, - {"name": "Person2", "email": "two@provider.domain"}, - {"name": "Person3", "email": "three@provider.domain"}, - {"name": "Person4", "email": "four@provider.domain"} - ], - "title": "Title" - } - ]) - def test_invalid_data(self, client, trusted_user, data, mock_ingest): - assert client.post( - '/api/v1/share/data/', - json.dumps(data), - content_type='application/json', - HTTP_AUTHORIZATION='Bearer ' + trusted_user.oauth2_provider_accesstoken.first().token - ).status_code == 400 - assert not mock_ingest.called - - def test_valid_data(self, client, trusted_user, mock_ingest): - assert client.post( - '/api/v1/share/data/', - json.dumps(self.valid_data), - content_type='application/json', - HTTP_AUTHORIZATION='Bearer ' + trusted_user.oauth2_provider_accesstoken.first().token - ).status_code == 202 - - assert mock_ingest.called - - def test_unauthorized(self, client, mock_ingest): - assert client.post( - '/api/v1/share/data/', - json.dumps(self.valid_data), - content_type='application/json' - ).status_code == 401 - assert not mock_ingest.called - - def test_get(self, client, mock_ingest): - assert client.get('/api/v1/share/data/').status_code == 405 - assert not mock_ingest.called - - def test_token_auth(self, client, trusted_user, mock_ingest): - assert client.post( - '/api/v1/share/data/', - json.dumps({}), - content_type='application/json', - HTTP_AUTHORIZATION='Token ' + trusted_user.oauth2_provider_accesstoken.first().token - ).status_code == 400 - assert not mock_ingest.called diff --git a/tests/api/test_validator.py b/tests/api/test_validator.py deleted file mode 100644 index 722fc7ea8..000000000 --- a/tests/api/test_validator.py +++ /dev/null @@ -1,393 +0,0 @@ -import json -import pytest -import requests -from unittest import mock - -from tests import factories - -invalid_work = { - 'data': { - 'type': 'NormalizedData', - 'attributes': { - 'suid': 'whatever', - 'data': { - '@graph': [ - { - '@type': 'InvalidWorkType', - 'title': 'Abstract Work', - '@id': '_:1bf1bf86939d433d96402090c33251d6', - } - ] - } - } - } -} - -invalid_proxy_work = { - 'data': { - 'type': 'NormalizedData', - 'attributes': { - 'suid': 'whatever', - 'data': { - '@graph': [ - { - '@type': 'AbstractCreativeWork', - 'title': 'Abstract Work', - '@id': '_:1bf1bf86939d433d96402090c33251d6', - } - ] - } - } - } -} - -valid_work_valid_agent = { - 'data': { - 'type': 'NormalizedData', - 'attributes': { - 'suid': 'whatever', - 'data': { - '@graph': [ - { - '@type': 'Organization', - '@id': '_:697f809c05ea4a6fba7cff3beb1ad316', - 'name': 'Publishing Group' - }, - { - 'agent': { - '@id': '_:697f809c05ea4a6fba7cff3beb1ad316', - '@type': 'Organization' - }, - 'creative_work': { - '@id': '_:1bf1bf86939d433d96402090c33251d6', - '@type': 'Article' - }, - '@id': '_:76c520ec6fe54d5097c2413886ff027e', - '@type': 'Publisher' - }, - { - '@type': 'Article', - 'title': 'Published article', - 'related_agents': [{ - '@id': '_:76c520ec6fe54d5097c2413886ff027e', - '@type': 'Publisher' - }], - '@id': '_:1bf1bf86939d433d96402090c33251d6', - } - ] - } - } - } -} - -valid_work_invalid_agent = { - 'data': { - 'type': 'NormalizedData', - 'attributes': { - 'suid': 'whatever', - 'data': { - '@graph': [ - { - '@type': 'Organization', - '@id': '_:697f809c05ea4a6fba7cff3beb1ad316', - 'name': 'Publishing Group' - }, - { - 'agent': { - '@id': '_:697f809c05ea4a6fba7cff3beb1ad316', - '@type': 'AbstractAgent', - }, - 'creative_work': { - '@id': '_:1bf1bf86939d433d96402090c33251d6', - '@type': 'Article' - }, - '@id': '_:76c520ec6fe54d5097c2413886ff027e', - '@type': 'Publisher' - }, - { - '@type': 'Article', - 'title': 'Publisher', - 'related_agents': [{ - '@id': '_:76c520ec6fe54d5097c2413886ff027e', - '@type': 'Organization' - }], - '@id': '_:1bf1bf86939d433d96402090c33251d6', - } - ] - } - } - } -} - -valid_work_invalid_agent_field = { - 'data': { - 'type': 'NormalizedData', - 'attributes': { - 'suid': 'whatever', - 'data': { - '@graph': [ - { - '@type': 'Organization', - '@id': '_:697f809c05ea4a6fba7cff3beb1ad316', - 'name': 'Publishing Group', - 'family_name': 'Person Field' - }, - { - 'agent': { - '@id': '_:697f809c05ea4a6fba7cff3beb1ad316', - '@type': 'Organization' - }, - 'creative_work': { - '@id': '_:1bf1bf86939d433d96402090c33251d6', - '@type': 'Article' - }, - '@id': '_:76c520ec6fe54d5097c2413886ff027e', - '@type': 'Publisher' - }, - { - '@type': 'Article', - 'title': 'Published', - 'publishers': [{ - '@id': '_:76c520ec6fe54d5097c2413886ff027e', - '@type': 'Publisher' - }], - '@id': '_:1bf1bf86939d433d96402090c33251d6', - } - ] - } - } - } -} - - -class Response: - def __init__(self, status_code=200, json=None, keys=None): - self.status_code = status_code - self._json = json or {} - self._keys = keys - - def json(self): - return self._json - - def __eq__(self, other): - assert other.status_code == self.status_code - - if self._keys: - assert set(other.json().keys()) == self._keys - else: - assert other.json() == self.json() - - return True - - -class TestValidator: - - POST_CASES = [{ - 'out': Response(400, json={ - 'errors': [{ - 'code': 'required', - 'detail': 'This field is required.', - 'source': {'pointer': '/data/attributes/data'}, - 'status': '400' - }] - }), - 'in': requests.Request('POST', json={ - 'data': { - 'type': 'NormalizedData', - 'attributes': {} - } - }) - }, { - 'out': Response(400, json={ - 'errors': [{ - 'code': 'parse_error', - 'detail': 'JSON parse error - Expecting value: line 1 column 1 (char 0)', - 'source': {'pointer': '/data'}, - 'status': '400' - }] - }), - 'in': requests.Request('POST', data='<html!>') - }, { - 'out': Response(400, json={ - 'errors': [{ - 'code': 'invalid', - 'detail': '@graph may not be empty', - 'source': {'pointer': '/data/attributes/data'}, - 'status': '400' - }] - }), - 'in': requests.Request('POST', json={ - 'data': { - 'type': 'NormalizedData', - 'attributes': { - 'data': { - '@graph': [] - } - } - } - }) - }, { - 'out': Response(202, keys={'data'}), - 'in': requests.Request('POST', json={ - 'data': { - 'type': 'NormalizedData', - 'attributes': { - 'suid': 'jim', - 'data': { - '@graph': [{ - '@id': '_:100', - '@type': 'Person', - 'given_name': 'Jim', - }] - } - } - } - }) - }, { - 'out': Response(400, json={ - 'errors': [{ - 'code': 'invalid', - 'detail': "'@id' is a required property at /@graph/0", - 'source': {'pointer': '/data/attributes/data'}, - 'status': '400' - }] - }), - 'in': requests.Request('POST', json={ - 'data': { - 'type': 'NormalizedData', - 'attributes': { - 'suid': 'jim', - 'data': { - '@graph': [{ - '@type': 'Person', - 'given_name': 'Jim', - }] - } - } - } - }) - }, { - 'out': Response(400, json={ - 'errors': [{ - 'code': 'invalid', - 'detail': "'AbstractAgent' is not one of [" - "'AGENT', 'Agent', 'CONSORTIUM', 'Consortium', " - "'DEPARTMENT', 'Department', " - "'INSTITUTION', 'Institution', 'ORGANIZATION', " - "'Organization', 'PERSON', 'Person', 'agent', " - "'consortium', 'department', 'institution', 'organization', 'person'" - "] at /@graph/1/agent/@type", - 'source': {'pointer': '/data/attributes/data'}, - 'status': '400' - }] - }), - 'in': requests.Request('POST', json=valid_work_invalid_agent) - }, { - 'out': Response(400, json={ - 'errors': [{ - 'code': 'invalid', - 'detail': "'AbstractCreativeWork' is not a valid type", - 'source': {'pointer': '/data/attributes/data'}, - 'status': '400' - }] - }), - 'in': requests.Request('POST', json=invalid_proxy_work) - }, { - 'out': Response(400, json={ - 'errors': [{ - 'code': 'invalid', - 'detail': "'InvalidWorkType' is not a valid type", - 'source': {'pointer': '/data/attributes/data'}, - 'status': '400' - }] - }), - 'in': requests.Request('POST', json=invalid_work) - }, { - 'out': Response(202, keys={'data'}), - 'in': requests.Request('POST', json=valid_work_valid_agent) - }, { - 'out': Response(400, json={ - 'errors': [{ - 'code': 'invalid', - 'detail': "Additional properties are not allowed ('publishers' was unexpected) at /@graph/2", - 'source': {'pointer': '/data/attributes/data'}, - 'status': '400' - }] - }), - 'in': requests.Request('POST', json=valid_work_invalid_agent_field) - }, { - # does not break because the raw information is not processed - 'out': Response(202, keys={'data'}), - 'in': requests.Request('POST', json={ - 'data': { - 'type': 'NormalizedData', - 'attributes': { - 'raw': {'type': 'RawData', 'id': 'invalid_id'}, - 'suid': 'whatever', - 'data': valid_work_valid_agent['data']['attributes']['data'] - } - } - }) - }, { - # does not break because the task information is not processed - 'out': Response(202, keys={'data'}), - 'in': requests.Request('POST', json={ - 'data': { - 'type': 'NormalizedData', - 'attributes': { - 'tasks': ['invalid_task'], - 'suid': 'whatever', - 'data': valid_work_valid_agent['data']['attributes']['data'] - } - } - }) - }] - - @pytest.mark.django_db - @pytest.mark.parametrize('_request, response', [(case['in'], case['out']) for case in POST_CASES]) - def test_validator(self, trusted_user, client, _request, response): - args, kwargs = (), {'content_type': 'application/vnd.api+json'} - - if _request.data: - kwargs['data'] = _request.data - elif _request.json is not None: - kwargs['data'] = json.dumps(_request.json) - - kwargs['HTTP_AUTHORIZATION'] = 'Bearer {}'.format(trusted_user.oauth2_provider_accesstoken.first()) - - with mock.patch('api.normalizeddata.views.digestive_tract') as mock_digestive_tract: - mock_digestive_tract.swallow__sharev2_legacy.return_value = '123' - assert response == client.post('/api/v2/normalizeddata/', *args, **kwargs) - - @pytest.mark.django_db - def test_robot_validator(self, robot_user, raw_data_id, client): - args, kwargs = (), {'content_type': 'application/vnd.api+json'} - - normalizer_task = factories.CeleryTaskResultFactory() - - _request = requests.Request('POST', json={ - 'data': { - 'type': 'NormalizedData', - 'attributes': { - 'tasks': [normalizer_task.id], - 'raw': {'type': 'RawData', 'id': raw_data_id}, - 'suid': 'whatever', - 'data': valid_work_valid_agent['data']['attributes']['data'] - } - } - }) - - if _request.data: - kwargs['data'] = _request.data - elif _request.json is not None: - kwargs['data'] = json.dumps(_request.json) - - kwargs['HTTP_AUTHORIZATION'] = 'Bearer {}'.format(robot_user.oauth2_provider_accesstoken.first()) - - with mock.patch('api.normalizeddata.views.digestive_tract') as mock_digestive_tract: - mock_digestive_tract.swallow__sharev2_legacy.return_value = '123' - response = client.post('/api/v2/normalizeddata/', *args, **kwargs) - - assert response.status_code == 202 - assert response.json()['data']['type'] == 'NormalizedData' - assert response.json()['data']['attributes'].keys() == {'task'} From f83fa725b71ee9af20a34637df93a6dee146e9ac Mon Sep 17 00:00:00 2001 From: abram axel booth <boothaa@gmail.com> Date: Wed, 5 Mar 2025 14:19:40 -0500 Subject: [PATCH 02/46] delete sharev2 ingest, models [ENG-7389][ENG-7390] --- project/settings.py | 4 - share/harvest/__init__.py | 1 - share/harvest/base.py | 265 - share/harvest/exceptions.py | 6 - share/harvest/ratelimit.py | 39 - share/harvest/scheduler.py | 125 - share/harvest/serialization.py | 59 - share/harvesters/__init__.py | 0 share/harvesters/ca_lwbin.py | 35 - share/harvesters/com_biomedcentral.py | 53 - share/harvesters/com_figshare.py | 49 - share/harvesters/com_figshare_v2.py | 48 - share/harvesters/com_mendeley_data.py | 83 - share/harvesters/com_peerj.py | 42 - share/harvesters/com_researchregistry.py | 48 - share/harvesters/com_springer.py | 52 - share/harvesters/edu_ageconsearch.py | 117 - share/harvesters/edu_gwu.py | 68 - share/harvesters/edu_harvarddataverse.py | 42 - share/harvesters/gov_clinicaltrials.py | 54 - share/harvesters/gov_doepages.py | 46 - share/harvesters/gov_nih.py | 143 - share/harvesters/gov_nsfawards.py | 100 - share/harvesters/gov_scitech.py | 39 - share/harvesters/gov_usgs.py | 40 - share/harvesters/io_osf.py | 127 - share/harvesters/oai.py | 114 - share/harvesters/org_arxiv.py | 68 - share/harvesters/org_biorxiv.py | 85 - share/harvesters/org_biorxiv_html.py | 82 - share/harvesters/org_biorxiv_rss.py | 61 - share/harvesters/org_crossref.py | 35 - share/harvesters/org_dataone.py | 43 - share/harvesters/org_elife.py | 104 - share/harvesters/org_ncar.py | 63 - share/harvesters/org_neurovault.py | 23 - share/harvesters/org_plos.py | 50 - share/harvesters/org_socialscienceregistry.py | 54 - share/harvesters/org_swbiodiversity.py | 64 - .../management/commands/populate_osf_suids.py | 83 - share/management/commands/reloadsynonyms.py | 32 - share/metadata_formats/__init__.py | 0 share/metadata_formats/base.py | 20 - share/metadata_formats/oai_dc.py | 119 - share/metadata_formats/sharev2_elastic.py | 240 - share/models/core.py | 108 +- share/models/feature_flag.py | 1 - share/models/index_backfill.py | 56 +- share/models/jobs.py | 394 - share/models/jsonld-schema.json | 52 - share/models/registration.py | 43 - share/models/sources.py | 34 - share/models/synonyms.json | 7002 ----------------- share/models/validators.py | 173 - share/regulate/__init__.py | 3 - share/regulate/regulator.py | 110 - share/regulate/steps/__init__.py | 107 - share/regulate/steps/block_extra_values.py | 45 - share/regulate/steps/cited_as.py | 17 - share/regulate/steps/deduplicate.py | 70 - share/regulate/steps/normalize_agent_names.py | 83 - share/regulate/steps/normalize_iris.py | 79 - share/regulate/steps/tokenize_tags.py | 47 - share/regulate/steps/trim_cycles.py | 57 - share/regulate/steps/validate.py | 12 - share/regulate/steps/whitespace.py | 30 - share/schema/__init__.py | 75 - share/schema/exceptions.py | 9 - share/schema/loader.py | 176 - share/schema/schema-spec.yaml | 330 - share/schema/shapes.py | 39 - share/search/index_messenger.py | 17 +- .../search/index_strategy/sharev2_elastic8.py | 30 +- share/sources/au.uow/icon.ico | Bin 1799 -> 0 bytes share/sources/au.uow/source.yaml | 20 - share/sources/be.ghent/icon.ico | Bin 159 -> 0 bytes share/sources/be.ghent/source.yaml | 33 - share/sources/br.pcurio/icon.ico | Bin 894 -> 0 bytes share/sources/br.pcurio/source.yaml | 19 - share/sources/ca.lwbin/icon.ico | Bin 538 -> 0 bytes share/sources/ca.lwbin/source.yaml | 15 - share/sources/ca.umontreal/icon.ico | Bin 574 -> 0 bytes share/sources/ca.umontreal/source.yaml | 35 - share/sources/ca.uwo/icon.ico | Bin 895 -> 0 bytes share/sources/ca.uwo/source.yaml | 20 - share/sources/ch.cern/icon.ico | Bin 525 -> 0 bytes share/sources/ch.cern/source.yaml | 19 - .../com.arizona.openrepository/icon.ico | Bin 126 -> 0 bytes .../com.arizona.openrepository/source.yaml | 60 - share/sources/com.biomedcentral/icon.ico | Bin 384 -> 0 bytes share/sources/com.biomedcentral/source.yaml | 15 - share/sources/com.dailyssrn/icon.ico | Bin 366 -> 0 bytes share/sources/com.dailyssrn/source.yaml | 12 - share/sources/com.figshare/icon.ico | Bin 1000 -> 0 bytes share/sources/com.figshare/source.yaml | 25 - share/sources/com.mendeley.data/icon.ico | Bin 6518 -> 0 bytes share/sources/com.mendeley.data/source.yaml | 15 - share/sources/com.nature/icon.ico | Bin 374 -> 0 bytes share/sources/com.nature/source.yaml | 19 - share/sources/com.peerj/icon.ico | Bin 1150 -> 0 bytes share/sources/com.peerj/source.yaml | 55 - share/sources/com.researchregistry/icon.ico | Bin 1150 -> 0 bytes .../sources/com.researchregistry/source.yaml | 15 - share/sources/com.springer/icon.ico | Bin 384 -> 0 bytes share/sources/com.springer/source.yaml | 15 - share/sources/edu.ageconsearch/icon.ico | Bin 18934 -> 0 bytes share/sources/edu.ageconsearch/source.yaml | 15 - share/sources/edu.asu/icon.ico | Bin 412 -> 0 bytes share/sources/edu.asu/source.yaml | 46 - share/sources/edu.boise_state/icon.ico | Bin 528 -> 0 bytes share/sources/edu.boise_state/source.yaml | 20 - share/sources/edu.bu.open/icon.ico | Bin 318 -> 0 bytes share/sources/edu.bu.open/source.yaml | 16 - share/sources/edu.calhoun/icon.ico | Bin 453 -> 0 bytes share/sources/edu.calhoun/source.yaml | 34 - share/sources/edu.calpoly/icon.ico | Bin 449 -> 0 bytes share/sources/edu.calpoly/source.yaml | 32 - share/sources/edu.caltech/icon.ico | Bin 511 -> 0 bytes share/sources/edu.caltech/source.yaml | 20 - share/sources/edu.chapman/icon.ico | Bin 339 -> 0 bytes share/sources/edu.chapman/source.yaml | 20 - share/sources/edu.citeseerx/icon.ico | Bin 630 -> 0 bytes share/sources/edu.citeseerx/source.yaml | 19 - share/sources/edu.cmu/icon.ico | Bin 630 -> 0 bytes share/sources/edu.cmu/source.yaml | 20 - share/sources/edu.colostate/icon.ico | Bin 661 -> 0 bytes share/sources/edu.colostate/source.yaml | 34 - share/sources/edu.columbia/icon.ico | Bin 526 -> 0 bytes share/sources/edu.columbia/source.yaml | 32 - share/sources/edu.cornell/icon.ico | Bin 1350 -> 0 bytes share/sources/edu.cornell/source.yaml | 34 - share/sources/edu.csuohio/icon.ico | Bin 795 -> 0 bytes share/sources/edu.csuohio/source.yaml | 20 - share/sources/edu.cuny/icon.ico | Bin 530 -> 0 bytes share/sources/edu.cuny/source.yaml | 20 - share/sources/edu.cuscholar/icon.ico | Bin 800 -> 0 bytes share/sources/edu.cuscholar/source.yaml | 20 - share/sources/edu.dash/icon.ico | Bin 403 -> 0 bytes share/sources/edu.dash/source.yaml | 19 - share/sources/edu.digitalhoward/icon.ico | Bin 719 -> 0 bytes share/sources/edu.digitalhoward/source.yaml | 20 - share/sources/edu.duke/icon.ico | Bin 815 -> 0 bytes share/sources/edu.duke/source.yaml | 20 - share/sources/edu.fit/icon.ico | Bin 815 -> 0 bytes share/sources/edu.fit/source.yaml | 62 - share/sources/edu.gwu/icon.ico | Bin 3262 -> 0 bytes share/sources/edu.gwu/source.yaml | 15 - share/sources/edu.harvarddataverse/icon.ico | Bin 419 -> 0 bytes .../sources/edu.harvarddataverse/source.yaml | 15 - share/sources/edu.huskiecommons/icon.ico | Bin 419 -> 0 bytes share/sources/edu.huskiecommons/source.yaml | 34 - share/sources/edu.iastate/icon.ico | Bin 599 -> 0 bytes share/sources/edu.iastate/source.yaml | 162 - share/sources/edu.icpsr/icon.ico | Bin 166 -> 0 bytes share/sources/edu.icpsr/source.yaml | 19 - share/sources/edu.iowaresearch/icon.ico | Bin 494 -> 0 bytes share/sources/edu.iowaresearch/source.yaml | 20 - share/sources/edu.iu/icon.ico | Bin 326 -> 0 bytes share/sources/edu.iu/source.yaml | 34 - share/sources/edu.iwu_commons/icon.ico | Bin 710 -> 0 bytes share/sources/edu.iwu_commons/source.yaml | 38 - share/sources/edu.jmu/icon.ico | Bin 1150 -> 0 bytes share/sources/edu.jmu/source.yaml | 19 - share/sources/edu.kent/icon.ico | Bin 323 -> 0 bytes share/sources/edu.kent/source.yaml | 20 - share/sources/edu.krex/icon.ico | Bin 388 -> 0 bytes share/sources/edu.krex/source.yaml | 34 - share/sources/edu.mason/icon.ico | Bin 671 -> 0 bytes share/sources/edu.mason/source.yaml | 34 - share/sources/edu.mit/icon.ico | Bin 188 -> 0 bytes share/sources/edu.mit/source.yaml | 192 - share/sources/edu.mizzou/icon.ico | Bin 450 -> 0 bytes share/sources/edu.mizzou/source.yaml | 34 - share/sources/edu.nau.openknowledge/icon.ico | Bin 2238 -> 0 bytes .../sources/edu.nau.openknowledge/source.yaml | 23 - share/sources/edu.nku/icon.ico | Bin 422 -> 0 bytes share/sources/edu.nku/source.yaml | 34 - share/sources/edu.oaktrust/icon.ico | Bin 453 -> 0 bytes share/sources/edu.oaktrust/source.yaml | 34 - share/sources/edu.opensiuc/icon.ico | Bin 583 -> 0 bytes share/sources/edu.opensiuc/source.yaml | 36 - share/sources/edu.pcom/icon.ico | Bin 572 -> 0 bytes share/sources/edu.pcom/source.yaml | 21 - share/sources/edu.pdxscholar/icon.ico | Bin 736 -> 0 bytes share/sources/edu.pdxscholar/source.yaml | 30 - share/sources/edu.purdue.epubs/icon.ico | Bin 894 -> 0 bytes share/sources/edu.purdue.epubs/source.yaml | 66 - share/sources/edu.purdue/icon.ico | Bin 453 -> 0 bytes share/sources/edu.purdue/source.yaml | 19 - share/sources/edu.richmond/icon.ico | Bin 1150 -> 0 bytes share/sources/edu.richmond/source.yaml | 20 - share/sources/edu.scholarsarchiveosu/icon.ico | Bin 453 -> 0 bytes .../edu.scholarsarchiveosu/source.yaml | 48 - share/sources/edu.scholarsbank/icon.ico | Bin 726 -> 0 bytes share/sources/edu.scholarsbank/source.yaml | 34 - .../sources/edu.scholarscompass_vcu/icon.ico | Bin 574 -> 0 bytes .../edu.scholarscompass_vcu/source.yaml | 47 - .../sources/edu.scholarworks_montana/icon.ico | Bin 7406 -> 0 bytes .../edu.scholarworks_montana/source.yaml | 64 - share/sources/edu.scholarworks_umass/icon.ico | Bin 630 -> 0 bytes .../edu.scholarworks_umass/source.yaml | 20 - share/sources/edu.smithsonian/icon.ico | Bin 453 -> 0 bytes share/sources/edu.smithsonian/source.yaml | 55 - share/sources/edu.stcloud/icon.ico | Bin 780 -> 0 bytes share/sources/edu.stcloud/source.yaml | 23 - share/sources/edu.texasstate/icon.ico | Bin 660 -> 0 bytes share/sources/edu.texasstate/source.yaml | 50 - share/sources/edu.triceratops/icon.ico | Bin 389 -> 0 bytes share/sources/edu.triceratops/source.yaml | 19 - share/sources/edu.trinity/icon.ico | Bin 433 -> 0 bytes share/sources/edu.trinity/source.yaml | 26 - share/sources/edu.u_south_fl/icon.ico | Bin 652 -> 0 bytes share/sources/edu.u_south_fl/source.yaml | 53 - share/sources/edu.ucf/icon.ico | Bin 1150 -> 0 bytes share/sources/edu.ucf/source.yaml | 20 - share/sources/edu.udc/icon.ico | Bin 419 -> 0 bytes share/sources/edu.udc/source.yaml | 33 - share/sources/edu.udel/icon.ico | Bin 726 -> 0 bytes share/sources/edu.udel/source.yaml | 34 - share/sources/edu.uhawaii/icon.ico | Bin 558 -> 0 bytes share/sources/edu.uhawaii/source.yaml | 70 - share/sources/edu.uiucideals/icon.ico | Bin 231 -> 0 bytes share/sources/edu.uiucideals/source.yaml | 34 - share/sources/edu.ukansas/icon.ico | Bin 583 -> 0 bytes share/sources/edu.ukansas/source.yaml | 64 - share/sources/edu.uky/icon.ico | Bin 566 -> 0 bytes share/sources/edu.uky/source.yaml | 20 - share/sources/edu.umassmed/icon.ico | Bin 386 -> 0 bytes share/sources/edu.umassmed/source.yaml | 20 - share/sources/edu.umd/icon.ico | Bin 766 -> 0 bytes share/sources/edu.umd/source.yaml | 69 - share/sources/edu.umich/icon.ico | Bin 726 -> 0 bytes share/sources/edu.umich/source.yaml | 82 - share/sources/edu.uncg/icon.ico | Bin 794 -> 0 bytes share/sources/edu.uncg/source.yaml | 19 - share/sources/edu.unl_digitalcommons/icon.ico | Bin 726 -> 0 bytes .../edu.unl_digitalcommons/source.yaml | 20 - share/sources/edu.upennsylvania/icon.ico | Bin 432 -> 0 bytes share/sources/edu.upennsylvania/source.yaml | 20 - share/sources/edu.ut_chattanooga/icon.ico | Bin 386 -> 0 bytes share/sources/edu.ut_chattanooga/source.yaml | 20 - share/sources/edu.utah/icon.ico | Bin 318 -> 0 bytes share/sources/edu.utah/source.yaml | 27 - share/sources/edu.utahstate/icon.ico | Bin 4671 -> 0 bytes share/sources/edu.utahstate/source.yaml | 20 - share/sources/edu.utaustin/icon.ico | Bin 453 -> 0 bytes share/sources/edu.utaustin/source.yaml | 50 - share/sources/edu.utktrace/icon.ico | Bin 386 -> 0 bytes share/sources/edu.utktrace/source.yaml | 33 - share/sources/edu.utuskegee/icon.ico | Bin 632 -> 0 bytes share/sources/edu.utuskegee/source.yaml | 33 - share/sources/edu.uwashington/icon.ico | Bin 527 -> 0 bytes share/sources/edu.uwashington/source.yaml | 35 - share/sources/edu.valposcholar/icon.ico | Bin 386 -> 0 bytes share/sources/edu.valposcholar/source.yaml | 25 - share/sources/edu.vtech/icon.ico | Bin 574 -> 0 bytes share/sources/edu.vtech/source.yaml | 34 - share/sources/edu.wash_state_u/icon.ico | Bin 625 -> 0 bytes share/sources/edu.wash_state_u/source.yaml | 34 - share/sources/edu.waynestate/icon.ico | Bin 622 -> 0 bytes share/sources/edu.waynestate/source.yaml | 32 - share/sources/edu.wisconsin/icon.ico | Bin 4286 -> 0 bytes share/sources/edu.wisconsin/source.yaml | 120 - share/sources/edu.wm/icon.ico | Bin 3638 -> 0 bytes share/sources/edu.wm/source.yaml | 24 - .../sources/edu.wustlopenscholarship/icon.ico | Bin 796 -> 0 bytes .../edu.wustlopenscholarship/source.yaml | 22 - share/sources/es.csic/icon.ico | Bin 1150 -> 0 bytes share/sources/es.csic/source.yaml | 32 - share/sources/et.edu.addis_ababa/icon.ico | Bin 453 -> 0 bytes share/sources/et.edu.addis_ababa/source.yaml | 34 - share/sources/eu.econstor/icon.ico | Bin 316 -> 0 bytes share/sources/eu.econstor/source.yaml | 20 - .../sources/fr.archives-ouvertes.hal/icon.ico | Bin 4286 -> 0 bytes .../fr.archives-ouvertes.hal/source.yaml | 29 - share/sources/gov.clinicaltrials/icon.ico | Bin 388 -> 0 bytes share/sources/gov.clinicaltrials/source.yaml | 15 - share/sources/gov.doepages/icon.ico | Bin 657 -> 0 bytes share/sources/gov.doepages/source.yaml | 19 - share/sources/gov.nih/icon.ico | Bin 726 -> 0 bytes share/sources/gov.nih/source.yaml | 16 - share/sources/gov.nist/icon.ico | Bin 726 -> 0 bytes share/sources/gov.nist/source.yaml | 34 - share/sources/gov.nodc/icon.ico | Bin 672 -> 0 bytes share/sources/gov.nodc/source.yaml | 19 - share/sources/gov.nsfawards/icon.ico | Bin 789 -> 0 bytes share/sources/gov.nsfawards/source.yaml | 15 - share/sources/gov.pubmedcentral/icon.ico | Bin 465 -> 0 bytes share/sources/gov.pubmedcentral/source.yaml | 37 - share/sources/gov.scitech/icon.ico | Bin 657 -> 0 bytes share/sources/gov.scitech/source.yaml | 19 - share/sources/gov.usgs/icon.ico | Bin 631 -> 0 bytes share/sources/gov.usgs/source.yaml | 15 - share/sources/info.spdataverse/icon.ico | Bin 548 -> 0 bytes share/sources/info.spdataverse/source.yaml | 19 - share/sources/info.ssoar/icon.ico | Bin 534 -> 0 bytes share/sources/info.ssoar/source.yaml | 24 - share/sources/io.osf/icon.ico | Bin 811 -> 0 bytes share/sources/io.osf/source.yaml | 53 - share/sources/org.arxiv/icon.ico | Bin 530 -> 0 bytes share/sources/org.arxiv/source.yaml | 33 - share/sources/org.bhl/icon.ico | Bin 642 -> 0 bytes share/sources/org.bhl/source.yaml | 33 - share/sources/org.biorxiv/icon.ico | Bin 522 -> 0 bytes share/sources/org.biorxiv/source.yaml | 38 - share/sources/org.cogprints/icon.ico | Bin 511 -> 0 bytes share/sources/org.cogprints/source.yaml | 19 - share/sources/org.crossref/icon.ico | Bin 629 -> 0 bytes share/sources/org.crossref/source.yaml | 15 - share/sources/org.datacite/icon.ico | Bin 726 -> 0 bytes share/sources/org.datacite/source.yaml | 39 - share/sources/org.dataone/icon.ico | Bin 791 -> 0 bytes share/sources/org.dataone/source.yaml | 15 - share/sources/org.dryad/icon.ico | Bin 801 -> 0 bytes share/sources/org.dryad/source.yaml | 20 - share/sources/org.elife/icon.ico | Bin 824 -> 0 bytes share/sources/org.elife/source.yaml | 15 - share/sources/org.elis/icon.ico | Bin 1406 -> 0 bytes share/sources/org.elis/source.yaml | 19 - share/sources/org.engrxiv/icon.ico | Bin 15086 -> 0 bytes share/sources/org.engrxiv/source.yaml | 15 - share/sources/org.erudit/icon.ico | Bin 506 -> 0 bytes share/sources/org.erudit/source.yaml | 19 - share/sources/org.mblwhoilibrary/icon.ico | Bin 453 -> 0 bytes share/sources/org.mblwhoilibrary/source.yaml | 34 - share/sources/org.mla/icon.ico | Bin 706 -> 0 bytes share/sources/org.mla/source.yaml | 19 - share/sources/org.mpra/icon.ico | Bin 894 -> 0 bytes share/sources/org.mpra/source.yaml | 19 - share/sources/org.ncar/icon.ico | Bin 690 -> 0 bytes share/sources/org.ncar/source.yaml | 18 - share/sources/org.neurovault/icon.ico | Bin 352 -> 0 bytes share/sources/org.neurovault/source.yaml | 15 - share/sources/org.newprairiepress/icon.ico | Bin 386 -> 0 bytes share/sources/org.newprairiepress/source.yaml | 20 - share/sources/org.philpapers/icon.ico | Bin 1406 -> 0 bytes share/sources/org.philpapers/source.yaml | 19 - share/sources/org.plos/icon.ico | Bin 379 -> 0 bytes share/sources/org.plos/source.yaml | 15 - share/sources/org.preprints/icon.ico | Bin 200438 -> 0 bytes share/sources/org.preprints/source.yaml | 19 - share/sources/org.psyarxiv/icon.ico | Bin 1150 -> 0 bytes share/sources/org.psyarxiv/source.yaml | 15 - share/sources/org.repec/icon.ico | Bin 894 -> 0 bytes share/sources/org.repec/source.yaml | 19 - share/sources/org.seafdec/icon.ico | Bin 16958 -> 0 bytes share/sources/org.seafdec/source.yaml | 44 - share/sources/org.shareok/icon.ico | Bin 453 -> 0 bytes share/sources/org.shareok/source.yaml | 42 - share/sources/org.sldr/icon.ico | Bin 315 -> 0 bytes share/sources/org.sldr/source.yaml | 23 - share/sources/org.socarxiv/icon.ico | Bin 894 -> 0 bytes share/sources/org.socarxiv/source.yaml | 15 - .../org.socialscienceregistry/icon.ico | Bin 1150 -> 0 bytes .../org.socialscienceregistry/source.yaml | 14 - share/sources/org.stepic/icon.ico | Bin 726 -> 0 bytes share/sources/org.stepic/source.yaml | 19 - share/sources/org.swbiodiversity/icon.ico | Bin 726 -> 0 bytes share/sources/org.swbiodiversity/source.yaml | 16 - share/sources/org.tdar/icon.ico | Bin 726 -> 0 bytes share/sources/org.tdar/source.yaml | 33 - share/sources/org.ttu/icon.ico | Bin 1406 -> 0 bytes share/sources/org.ttu/source.yaml | 34 - share/sources/org.ucescholarship/icon.ico | Bin 732 -> 0 bytes share/sources/org.ucescholarship/source.yaml | 19 - share/sources/org.zenodo/icon.ico | Bin 814 -> 0 bytes share/sources/org.zenodo/source.yaml | 37 - share/sources/pe.upc/icon.ico | Bin 318 -> 0 bytes share/sources/pe.upc/source.yaml | 34 - share/sources/pt.rcaap/icon.ico | Bin 721 -> 0 bytes share/sources/pt.rcaap/source.yaml | 19 - share/sources/ru.cyberleninka/icon.ico | Bin 664 -> 0 bytes share/sources/ru.cyberleninka/source.yaml | 19 - share/sources/tr.edu.hacettepe/icon.ico | Bin 726 -> 0 bytes share/sources/tr.edu.hacettepe/source.yaml | 19 - share/sources/uk.cambridge/icon.ico | Bin 714 -> 0 bytes share/sources/uk.cambridge/source.yaml | 38 - share/sources/uk.lshtm/icon.ico | Bin 926 -> 0 bytes share/sources/uk.lshtm/source.yaml | 19 - share/sources/za.csir/icon.ico | Bin 453 -> 0 bytes share/sources/za.csir/source.yaml | 34 - share/tasks/__init__.py | 124 - share/tasks/jobs.py | 219 - share/transform/__init__.py | 1 - share/transform/base.py | 63 - share/transform/chain/__init__.py | 12 - share/transform/chain/exceptions.py | 32 - share/transform/chain/links.py | 1102 --- share/transform/chain/parsers.py | 146 - share/transform/chain/soup.py | 71 - share/transform/chain/transformer.py | 87 - share/transform/chain/utils.py | 150 - share/transformers/__init__.py | 0 share/transformers/ca_lwbin.py | 113 - share/transformers/com_biomedcentral.py | 79 - share/transformers/com_dailyssrn.py | 17 - share/transformers/com_figshare.py | 86 - share/transformers/com_figshare_v2.py | 76 - share/transformers/com_mendeley_data.py | 441 -- share/transformers/com_peerj.py | 91 - share/transformers/com_peerj_xml.py | 161 - share/transformers/com_researchregistry.py | 134 - share/transformers/com_springer.py | 82 - share/transformers/edu_ageconsearch.py | 138 - share/transformers/edu_gwu.py | 102 - share/transformers/edu_harvarddataverse.py | 36 - share/transformers/gov_clinicaltrials.py | 111 - share/transformers/gov_nih.py | 312 - share/transformers/gov_nsfawards.py | 262 - share/transformers/gov_pubmedcentral_pmc.py | 367 - share/transformers/gov_scitech.py | 137 - share/transformers/gov_usgs.py | 146 - share/transformers/io_osf.py | 131 - share/transformers/io_osf_preprints.py | 106 - share/transformers/io_osf_registrations.py | 32 - share/transformers/mods.py | 526 -- share/transformers/oai.py | 325 - share/transformers/org_arxiv.py | 94 - share/transformers/org_biorxiv.py | 109 - share/transformers/org_biorxiv_html.py | 115 - share/transformers/org_biorxiv_rss.py | 61 - share/transformers/org_crossref.py | 143 - share/transformers/org_datacite.py | 679 -- share/transformers/org_dataone.py | 123 - share/transformers/org_elife.py | 161 - share/transformers/org_engrxiv.py | 12 - share/transformers/org_ncar.py | 127 - share/transformers/org_neurovault.py | 47 - share/transformers/org_plos.py | 64 - share/transformers/org_psyarxiv.py | 16 - share/transformers/org_socarxiv.py | 16 - .../transformers/org_socialscienceregistry.py | 224 - share/transformers/org_swbiodiversity.py | 128 - share/transformers/v1_push.py | 198 - share/transformers/v2_push.py | 14 - share/util/extensions.py | 49 - share/util/graph.py | 581 -- share/util/iris.py | 47 - share/util/nameparser.py | 13 - share/util/names.py | 28 - share/util/osf.py | 39 - share/util/source_stat.py | 181 - tests/share/test_subject_synonyms.py | 17 - 443 files changed, 70 insertions(+), 26149 deletions(-) delete mode 100644 share/harvest/__init__.py delete mode 100644 share/harvest/base.py delete mode 100644 share/harvest/exceptions.py delete mode 100644 share/harvest/ratelimit.py delete mode 100644 share/harvest/scheduler.py delete mode 100644 share/harvest/serialization.py delete mode 100644 share/harvesters/__init__.py delete mode 100644 share/harvesters/ca_lwbin.py delete mode 100644 share/harvesters/com_biomedcentral.py delete mode 100644 share/harvesters/com_figshare.py delete mode 100644 share/harvesters/com_figshare_v2.py delete mode 100644 share/harvesters/com_mendeley_data.py delete mode 100644 share/harvesters/com_peerj.py delete mode 100644 share/harvesters/com_researchregistry.py delete mode 100644 share/harvesters/com_springer.py delete mode 100644 share/harvesters/edu_ageconsearch.py delete mode 100644 share/harvesters/edu_gwu.py delete mode 100644 share/harvesters/edu_harvarddataverse.py delete mode 100644 share/harvesters/gov_clinicaltrials.py delete mode 100644 share/harvesters/gov_doepages.py delete mode 100644 share/harvesters/gov_nih.py delete mode 100644 share/harvesters/gov_nsfawards.py delete mode 100644 share/harvesters/gov_scitech.py delete mode 100644 share/harvesters/gov_usgs.py delete mode 100644 share/harvesters/io_osf.py delete mode 100644 share/harvesters/oai.py delete mode 100644 share/harvesters/org_arxiv.py delete mode 100644 share/harvesters/org_biorxiv.py delete mode 100644 share/harvesters/org_biorxiv_html.py delete mode 100644 share/harvesters/org_biorxiv_rss.py delete mode 100644 share/harvesters/org_crossref.py delete mode 100644 share/harvesters/org_dataone.py delete mode 100644 share/harvesters/org_elife.py delete mode 100644 share/harvesters/org_ncar.py delete mode 100644 share/harvesters/org_neurovault.py delete mode 100644 share/harvesters/org_plos.py delete mode 100644 share/harvesters/org_socialscienceregistry.py delete mode 100644 share/harvesters/org_swbiodiversity.py delete mode 100644 share/management/commands/populate_osf_suids.py delete mode 100644 share/management/commands/reloadsynonyms.py delete mode 100644 share/metadata_formats/__init__.py delete mode 100644 share/metadata_formats/base.py delete mode 100644 share/metadata_formats/oai_dc.py delete mode 100644 share/metadata_formats/sharev2_elastic.py delete mode 100644 share/models/jobs.py delete mode 100644 share/models/jsonld-schema.json delete mode 100644 share/models/registration.py delete mode 100644 share/models/sources.py delete mode 100644 share/models/synonyms.json delete mode 100644 share/models/validators.py delete mode 100644 share/regulate/__init__.py delete mode 100644 share/regulate/regulator.py delete mode 100644 share/regulate/steps/__init__.py delete mode 100644 share/regulate/steps/block_extra_values.py delete mode 100644 share/regulate/steps/cited_as.py delete mode 100644 share/regulate/steps/deduplicate.py delete mode 100644 share/regulate/steps/normalize_agent_names.py delete mode 100644 share/regulate/steps/normalize_iris.py delete mode 100644 share/regulate/steps/tokenize_tags.py delete mode 100644 share/regulate/steps/trim_cycles.py delete mode 100644 share/regulate/steps/validate.py delete mode 100644 share/regulate/steps/whitespace.py delete mode 100644 share/schema/__init__.py delete mode 100644 share/schema/exceptions.py delete mode 100644 share/schema/loader.py delete mode 100644 share/schema/schema-spec.yaml delete mode 100644 share/schema/shapes.py delete mode 100644 share/sources/au.uow/icon.ico delete mode 100644 share/sources/au.uow/source.yaml delete mode 100644 share/sources/be.ghent/icon.ico delete mode 100644 share/sources/be.ghent/source.yaml delete mode 100644 share/sources/br.pcurio/icon.ico delete mode 100644 share/sources/br.pcurio/source.yaml delete mode 100644 share/sources/ca.lwbin/icon.ico delete mode 100644 share/sources/ca.lwbin/source.yaml delete mode 100644 share/sources/ca.umontreal/icon.ico delete mode 100644 share/sources/ca.umontreal/source.yaml delete mode 100644 share/sources/ca.uwo/icon.ico delete mode 100644 share/sources/ca.uwo/source.yaml delete mode 100644 share/sources/ch.cern/icon.ico delete mode 100644 share/sources/ch.cern/source.yaml delete mode 100644 share/sources/com.arizona.openrepository/icon.ico delete mode 100644 share/sources/com.arizona.openrepository/source.yaml delete mode 100644 share/sources/com.biomedcentral/icon.ico delete mode 100644 share/sources/com.biomedcentral/source.yaml delete mode 100644 share/sources/com.dailyssrn/icon.ico delete mode 100644 share/sources/com.dailyssrn/source.yaml delete mode 100644 share/sources/com.figshare/icon.ico delete mode 100644 share/sources/com.figshare/source.yaml delete mode 100644 share/sources/com.mendeley.data/icon.ico delete mode 100644 share/sources/com.mendeley.data/source.yaml delete mode 100644 share/sources/com.nature/icon.ico delete mode 100644 share/sources/com.nature/source.yaml delete mode 100644 share/sources/com.peerj/icon.ico delete mode 100644 share/sources/com.peerj/source.yaml delete mode 100644 share/sources/com.researchregistry/icon.ico delete mode 100644 share/sources/com.researchregistry/source.yaml delete mode 100644 share/sources/com.springer/icon.ico delete mode 100644 share/sources/com.springer/source.yaml delete mode 100644 share/sources/edu.ageconsearch/icon.ico delete mode 100644 share/sources/edu.ageconsearch/source.yaml delete mode 100644 share/sources/edu.asu/icon.ico delete mode 100644 share/sources/edu.asu/source.yaml delete mode 100644 share/sources/edu.boise_state/icon.ico delete mode 100644 share/sources/edu.boise_state/source.yaml delete mode 100644 share/sources/edu.bu.open/icon.ico delete mode 100644 share/sources/edu.bu.open/source.yaml delete mode 100644 share/sources/edu.calhoun/icon.ico delete mode 100644 share/sources/edu.calhoun/source.yaml delete mode 100644 share/sources/edu.calpoly/icon.ico delete mode 100644 share/sources/edu.calpoly/source.yaml delete mode 100644 share/sources/edu.caltech/icon.ico delete mode 100644 share/sources/edu.caltech/source.yaml delete mode 100644 share/sources/edu.chapman/icon.ico delete mode 100644 share/sources/edu.chapman/source.yaml delete mode 100644 share/sources/edu.citeseerx/icon.ico delete mode 100644 share/sources/edu.citeseerx/source.yaml delete mode 100644 share/sources/edu.cmu/icon.ico delete mode 100644 share/sources/edu.cmu/source.yaml delete mode 100644 share/sources/edu.colostate/icon.ico delete mode 100644 share/sources/edu.colostate/source.yaml delete mode 100644 share/sources/edu.columbia/icon.ico delete mode 100644 share/sources/edu.columbia/source.yaml delete mode 100644 share/sources/edu.cornell/icon.ico delete mode 100644 share/sources/edu.cornell/source.yaml delete mode 100644 share/sources/edu.csuohio/icon.ico delete mode 100644 share/sources/edu.csuohio/source.yaml delete mode 100644 share/sources/edu.cuny/icon.ico delete mode 100644 share/sources/edu.cuny/source.yaml delete mode 100644 share/sources/edu.cuscholar/icon.ico delete mode 100644 share/sources/edu.cuscholar/source.yaml delete mode 100644 share/sources/edu.dash/icon.ico delete mode 100644 share/sources/edu.dash/source.yaml delete mode 100644 share/sources/edu.digitalhoward/icon.ico delete mode 100644 share/sources/edu.digitalhoward/source.yaml delete mode 100644 share/sources/edu.duke/icon.ico delete mode 100644 share/sources/edu.duke/source.yaml delete mode 100644 share/sources/edu.fit/icon.ico delete mode 100644 share/sources/edu.fit/source.yaml delete mode 100644 share/sources/edu.gwu/icon.ico delete mode 100644 share/sources/edu.gwu/source.yaml delete mode 100644 share/sources/edu.harvarddataverse/icon.ico delete mode 100644 share/sources/edu.harvarddataverse/source.yaml delete mode 100644 share/sources/edu.huskiecommons/icon.ico delete mode 100644 share/sources/edu.huskiecommons/source.yaml delete mode 100644 share/sources/edu.iastate/icon.ico delete mode 100644 share/sources/edu.iastate/source.yaml delete mode 100644 share/sources/edu.icpsr/icon.ico delete mode 100644 share/sources/edu.icpsr/source.yaml delete mode 100644 share/sources/edu.iowaresearch/icon.ico delete mode 100644 share/sources/edu.iowaresearch/source.yaml delete mode 100644 share/sources/edu.iu/icon.ico delete mode 100644 share/sources/edu.iu/source.yaml delete mode 100644 share/sources/edu.iwu_commons/icon.ico delete mode 100644 share/sources/edu.iwu_commons/source.yaml delete mode 100644 share/sources/edu.jmu/icon.ico delete mode 100644 share/sources/edu.jmu/source.yaml delete mode 100644 share/sources/edu.kent/icon.ico delete mode 100644 share/sources/edu.kent/source.yaml delete mode 100644 share/sources/edu.krex/icon.ico delete mode 100644 share/sources/edu.krex/source.yaml delete mode 100644 share/sources/edu.mason/icon.ico delete mode 100644 share/sources/edu.mason/source.yaml delete mode 100644 share/sources/edu.mit/icon.ico delete mode 100644 share/sources/edu.mit/source.yaml delete mode 100644 share/sources/edu.mizzou/icon.ico delete mode 100644 share/sources/edu.mizzou/source.yaml delete mode 100644 share/sources/edu.nau.openknowledge/icon.ico delete mode 100644 share/sources/edu.nau.openknowledge/source.yaml delete mode 100644 share/sources/edu.nku/icon.ico delete mode 100644 share/sources/edu.nku/source.yaml delete mode 100644 share/sources/edu.oaktrust/icon.ico delete mode 100644 share/sources/edu.oaktrust/source.yaml delete mode 100644 share/sources/edu.opensiuc/icon.ico delete mode 100644 share/sources/edu.opensiuc/source.yaml delete mode 100644 share/sources/edu.pcom/icon.ico delete mode 100644 share/sources/edu.pcom/source.yaml delete mode 100644 share/sources/edu.pdxscholar/icon.ico delete mode 100644 share/sources/edu.pdxscholar/source.yaml delete mode 100644 share/sources/edu.purdue.epubs/icon.ico delete mode 100644 share/sources/edu.purdue.epubs/source.yaml delete mode 100644 share/sources/edu.purdue/icon.ico delete mode 100644 share/sources/edu.purdue/source.yaml delete mode 100644 share/sources/edu.richmond/icon.ico delete mode 100644 share/sources/edu.richmond/source.yaml delete mode 100644 share/sources/edu.scholarsarchiveosu/icon.ico delete mode 100644 share/sources/edu.scholarsarchiveosu/source.yaml delete mode 100644 share/sources/edu.scholarsbank/icon.ico delete mode 100644 share/sources/edu.scholarsbank/source.yaml delete mode 100644 share/sources/edu.scholarscompass_vcu/icon.ico delete mode 100644 share/sources/edu.scholarscompass_vcu/source.yaml delete mode 100644 share/sources/edu.scholarworks_montana/icon.ico delete mode 100644 share/sources/edu.scholarworks_montana/source.yaml delete mode 100644 share/sources/edu.scholarworks_umass/icon.ico delete mode 100644 share/sources/edu.scholarworks_umass/source.yaml delete mode 100644 share/sources/edu.smithsonian/icon.ico delete mode 100644 share/sources/edu.smithsonian/source.yaml delete mode 100644 share/sources/edu.stcloud/icon.ico delete mode 100644 share/sources/edu.stcloud/source.yaml delete mode 100644 share/sources/edu.texasstate/icon.ico delete mode 100644 share/sources/edu.texasstate/source.yaml delete mode 100644 share/sources/edu.triceratops/icon.ico delete mode 100644 share/sources/edu.triceratops/source.yaml delete mode 100644 share/sources/edu.trinity/icon.ico delete mode 100644 share/sources/edu.trinity/source.yaml delete mode 100644 share/sources/edu.u_south_fl/icon.ico delete mode 100644 share/sources/edu.u_south_fl/source.yaml delete mode 100644 share/sources/edu.ucf/icon.ico delete mode 100644 share/sources/edu.ucf/source.yaml delete mode 100644 share/sources/edu.udc/icon.ico delete mode 100644 share/sources/edu.udc/source.yaml delete mode 100644 share/sources/edu.udel/icon.ico delete mode 100644 share/sources/edu.udel/source.yaml delete mode 100644 share/sources/edu.uhawaii/icon.ico delete mode 100644 share/sources/edu.uhawaii/source.yaml delete mode 100644 share/sources/edu.uiucideals/icon.ico delete mode 100644 share/sources/edu.uiucideals/source.yaml delete mode 100644 share/sources/edu.ukansas/icon.ico delete mode 100644 share/sources/edu.ukansas/source.yaml delete mode 100644 share/sources/edu.uky/icon.ico delete mode 100644 share/sources/edu.uky/source.yaml delete mode 100644 share/sources/edu.umassmed/icon.ico delete mode 100644 share/sources/edu.umassmed/source.yaml delete mode 100644 share/sources/edu.umd/icon.ico delete mode 100644 share/sources/edu.umd/source.yaml delete mode 100644 share/sources/edu.umich/icon.ico delete mode 100644 share/sources/edu.umich/source.yaml delete mode 100644 share/sources/edu.uncg/icon.ico delete mode 100644 share/sources/edu.uncg/source.yaml delete mode 100644 share/sources/edu.unl_digitalcommons/icon.ico delete mode 100644 share/sources/edu.unl_digitalcommons/source.yaml delete mode 100644 share/sources/edu.upennsylvania/icon.ico delete mode 100644 share/sources/edu.upennsylvania/source.yaml delete mode 100644 share/sources/edu.ut_chattanooga/icon.ico delete mode 100644 share/sources/edu.ut_chattanooga/source.yaml delete mode 100644 share/sources/edu.utah/icon.ico delete mode 100644 share/sources/edu.utah/source.yaml delete mode 100644 share/sources/edu.utahstate/icon.ico delete mode 100644 share/sources/edu.utahstate/source.yaml delete mode 100644 share/sources/edu.utaustin/icon.ico delete mode 100644 share/sources/edu.utaustin/source.yaml delete mode 100644 share/sources/edu.utktrace/icon.ico delete mode 100644 share/sources/edu.utktrace/source.yaml delete mode 100644 share/sources/edu.utuskegee/icon.ico delete mode 100644 share/sources/edu.utuskegee/source.yaml delete mode 100644 share/sources/edu.uwashington/icon.ico delete mode 100644 share/sources/edu.uwashington/source.yaml delete mode 100644 share/sources/edu.valposcholar/icon.ico delete mode 100644 share/sources/edu.valposcholar/source.yaml delete mode 100644 share/sources/edu.vtech/icon.ico delete mode 100644 share/sources/edu.vtech/source.yaml delete mode 100644 share/sources/edu.wash_state_u/icon.ico delete mode 100644 share/sources/edu.wash_state_u/source.yaml delete mode 100644 share/sources/edu.waynestate/icon.ico delete mode 100644 share/sources/edu.waynestate/source.yaml delete mode 100644 share/sources/edu.wisconsin/icon.ico delete mode 100644 share/sources/edu.wisconsin/source.yaml delete mode 100644 share/sources/edu.wm/icon.ico delete mode 100644 share/sources/edu.wm/source.yaml delete mode 100644 share/sources/edu.wustlopenscholarship/icon.ico delete mode 100644 share/sources/edu.wustlopenscholarship/source.yaml delete mode 100644 share/sources/es.csic/icon.ico delete mode 100644 share/sources/es.csic/source.yaml delete mode 100644 share/sources/et.edu.addis_ababa/icon.ico delete mode 100644 share/sources/et.edu.addis_ababa/source.yaml delete mode 100644 share/sources/eu.econstor/icon.ico delete mode 100644 share/sources/eu.econstor/source.yaml delete mode 100644 share/sources/fr.archives-ouvertes.hal/icon.ico delete mode 100644 share/sources/fr.archives-ouvertes.hal/source.yaml delete mode 100644 share/sources/gov.clinicaltrials/icon.ico delete mode 100644 share/sources/gov.clinicaltrials/source.yaml delete mode 100644 share/sources/gov.doepages/icon.ico delete mode 100644 share/sources/gov.doepages/source.yaml delete mode 100644 share/sources/gov.nih/icon.ico delete mode 100644 share/sources/gov.nih/source.yaml delete mode 100644 share/sources/gov.nist/icon.ico delete mode 100644 share/sources/gov.nist/source.yaml delete mode 100644 share/sources/gov.nodc/icon.ico delete mode 100644 share/sources/gov.nodc/source.yaml delete mode 100644 share/sources/gov.nsfawards/icon.ico delete mode 100644 share/sources/gov.nsfawards/source.yaml delete mode 100644 share/sources/gov.pubmedcentral/icon.ico delete mode 100644 share/sources/gov.pubmedcentral/source.yaml delete mode 100644 share/sources/gov.scitech/icon.ico delete mode 100644 share/sources/gov.scitech/source.yaml delete mode 100644 share/sources/gov.usgs/icon.ico delete mode 100644 share/sources/gov.usgs/source.yaml delete mode 100644 share/sources/info.spdataverse/icon.ico delete mode 100644 share/sources/info.spdataverse/source.yaml delete mode 100644 share/sources/info.ssoar/icon.ico delete mode 100644 share/sources/info.ssoar/source.yaml delete mode 100644 share/sources/io.osf/icon.ico delete mode 100644 share/sources/io.osf/source.yaml delete mode 100644 share/sources/org.arxiv/icon.ico delete mode 100644 share/sources/org.arxiv/source.yaml delete mode 100644 share/sources/org.bhl/icon.ico delete mode 100644 share/sources/org.bhl/source.yaml delete mode 100644 share/sources/org.biorxiv/icon.ico delete mode 100644 share/sources/org.biorxiv/source.yaml delete mode 100644 share/sources/org.cogprints/icon.ico delete mode 100644 share/sources/org.cogprints/source.yaml delete mode 100644 share/sources/org.crossref/icon.ico delete mode 100644 share/sources/org.crossref/source.yaml delete mode 100644 share/sources/org.datacite/icon.ico delete mode 100644 share/sources/org.datacite/source.yaml delete mode 100644 share/sources/org.dataone/icon.ico delete mode 100644 share/sources/org.dataone/source.yaml delete mode 100644 share/sources/org.dryad/icon.ico delete mode 100644 share/sources/org.dryad/source.yaml delete mode 100644 share/sources/org.elife/icon.ico delete mode 100644 share/sources/org.elife/source.yaml delete mode 100644 share/sources/org.elis/icon.ico delete mode 100644 share/sources/org.elis/source.yaml delete mode 100644 share/sources/org.engrxiv/icon.ico delete mode 100644 share/sources/org.engrxiv/source.yaml delete mode 100644 share/sources/org.erudit/icon.ico delete mode 100644 share/sources/org.erudit/source.yaml delete mode 100644 share/sources/org.mblwhoilibrary/icon.ico delete mode 100644 share/sources/org.mblwhoilibrary/source.yaml delete mode 100644 share/sources/org.mla/icon.ico delete mode 100644 share/sources/org.mla/source.yaml delete mode 100644 share/sources/org.mpra/icon.ico delete mode 100644 share/sources/org.mpra/source.yaml delete mode 100644 share/sources/org.ncar/icon.ico delete mode 100644 share/sources/org.ncar/source.yaml delete mode 100644 share/sources/org.neurovault/icon.ico delete mode 100644 share/sources/org.neurovault/source.yaml delete mode 100644 share/sources/org.newprairiepress/icon.ico delete mode 100644 share/sources/org.newprairiepress/source.yaml delete mode 100644 share/sources/org.philpapers/icon.ico delete mode 100644 share/sources/org.philpapers/source.yaml delete mode 100644 share/sources/org.plos/icon.ico delete mode 100644 share/sources/org.plos/source.yaml delete mode 100644 share/sources/org.preprints/icon.ico delete mode 100644 share/sources/org.preprints/source.yaml delete mode 100644 share/sources/org.psyarxiv/icon.ico delete mode 100644 share/sources/org.psyarxiv/source.yaml delete mode 100644 share/sources/org.repec/icon.ico delete mode 100644 share/sources/org.repec/source.yaml delete mode 100644 share/sources/org.seafdec/icon.ico delete mode 100644 share/sources/org.seafdec/source.yaml delete mode 100644 share/sources/org.shareok/icon.ico delete mode 100644 share/sources/org.shareok/source.yaml delete mode 100644 share/sources/org.sldr/icon.ico delete mode 100644 share/sources/org.sldr/source.yaml delete mode 100644 share/sources/org.socarxiv/icon.ico delete mode 100644 share/sources/org.socarxiv/source.yaml delete mode 100644 share/sources/org.socialscienceregistry/icon.ico delete mode 100644 share/sources/org.socialscienceregistry/source.yaml delete mode 100644 share/sources/org.stepic/icon.ico delete mode 100644 share/sources/org.stepic/source.yaml delete mode 100644 share/sources/org.swbiodiversity/icon.ico delete mode 100644 share/sources/org.swbiodiversity/source.yaml delete mode 100644 share/sources/org.tdar/icon.ico delete mode 100644 share/sources/org.tdar/source.yaml delete mode 100644 share/sources/org.ttu/icon.ico delete mode 100644 share/sources/org.ttu/source.yaml delete mode 100644 share/sources/org.ucescholarship/icon.ico delete mode 100644 share/sources/org.ucescholarship/source.yaml delete mode 100644 share/sources/org.zenodo/icon.ico delete mode 100644 share/sources/org.zenodo/source.yaml delete mode 100644 share/sources/pe.upc/icon.ico delete mode 100644 share/sources/pe.upc/source.yaml delete mode 100644 share/sources/pt.rcaap/icon.ico delete mode 100644 share/sources/pt.rcaap/source.yaml delete mode 100644 share/sources/ru.cyberleninka/icon.ico delete mode 100644 share/sources/ru.cyberleninka/source.yaml delete mode 100644 share/sources/tr.edu.hacettepe/icon.ico delete mode 100644 share/sources/tr.edu.hacettepe/source.yaml delete mode 100644 share/sources/uk.cambridge/icon.ico delete mode 100644 share/sources/uk.cambridge/source.yaml delete mode 100644 share/sources/uk.lshtm/icon.ico delete mode 100644 share/sources/uk.lshtm/source.yaml delete mode 100644 share/sources/za.csir/icon.ico delete mode 100644 share/sources/za.csir/source.yaml delete mode 100644 share/tasks/__init__.py delete mode 100644 share/tasks/jobs.py delete mode 100644 share/transform/__init__.py delete mode 100644 share/transform/base.py delete mode 100644 share/transform/chain/__init__.py delete mode 100644 share/transform/chain/exceptions.py delete mode 100644 share/transform/chain/links.py delete mode 100644 share/transform/chain/parsers.py delete mode 100644 share/transform/chain/soup.py delete mode 100644 share/transform/chain/transformer.py delete mode 100644 share/transform/chain/utils.py delete mode 100644 share/transformers/__init__.py delete mode 100644 share/transformers/ca_lwbin.py delete mode 100644 share/transformers/com_biomedcentral.py delete mode 100644 share/transformers/com_dailyssrn.py delete mode 100644 share/transformers/com_figshare.py delete mode 100644 share/transformers/com_figshare_v2.py delete mode 100644 share/transformers/com_mendeley_data.py delete mode 100644 share/transformers/com_peerj.py delete mode 100644 share/transformers/com_peerj_xml.py delete mode 100644 share/transformers/com_researchregistry.py delete mode 100644 share/transformers/com_springer.py delete mode 100644 share/transformers/edu_ageconsearch.py delete mode 100644 share/transformers/edu_gwu.py delete mode 100644 share/transformers/edu_harvarddataverse.py delete mode 100644 share/transformers/gov_clinicaltrials.py delete mode 100644 share/transformers/gov_nih.py delete mode 100644 share/transformers/gov_nsfawards.py delete mode 100644 share/transformers/gov_pubmedcentral_pmc.py delete mode 100644 share/transformers/gov_scitech.py delete mode 100644 share/transformers/gov_usgs.py delete mode 100644 share/transformers/io_osf.py delete mode 100644 share/transformers/io_osf_preprints.py delete mode 100644 share/transformers/io_osf_registrations.py delete mode 100644 share/transformers/mods.py delete mode 100644 share/transformers/oai.py delete mode 100644 share/transformers/org_arxiv.py delete mode 100644 share/transformers/org_biorxiv.py delete mode 100644 share/transformers/org_biorxiv_html.py delete mode 100644 share/transformers/org_biorxiv_rss.py delete mode 100644 share/transformers/org_crossref.py delete mode 100644 share/transformers/org_datacite.py delete mode 100644 share/transformers/org_dataone.py delete mode 100644 share/transformers/org_elife.py delete mode 100644 share/transformers/org_engrxiv.py delete mode 100644 share/transformers/org_ncar.py delete mode 100644 share/transformers/org_neurovault.py delete mode 100644 share/transformers/org_plos.py delete mode 100644 share/transformers/org_psyarxiv.py delete mode 100644 share/transformers/org_socarxiv.py delete mode 100644 share/transformers/org_socialscienceregistry.py delete mode 100644 share/transformers/org_swbiodiversity.py delete mode 100644 share/transformers/v1_push.py delete mode 100644 share/transformers/v2_push.py delete mode 100644 share/util/extensions.py delete mode 100644 share/util/graph.py delete mode 100644 share/util/iris.py delete mode 100644 share/util/nameparser.py delete mode 100644 share/util/names.py delete mode 100644 share/util/osf.py delete mode 100644 share/util/source_stat.py delete mode 100644 tests/share/test_subject_synonyms.py diff --git a/project/settings.py b/project/settings.py index 7b092fc10..06a8c4aab 100644 --- a/project/settings.py +++ b/project/settings.py @@ -488,10 +488,6 @@ def route_urgent_task(name, args, kwargs, options, task=None, **kw): SUBJECTS_CENTRAL_TAXONOMY = os.environ.get('SUBJECTS_CENTRAL_TAXONOMY', 'bepress') -# TODO why are these in different locations and formats?? -SUBJECTS_YAML = 'share/subjects.yaml' -SUBJECT_SYNONYMS_JSON = 'share/models/synonyms.json' - HIDE_DEPRECATED_VIEWS = strtobool(os.environ.get('HIDE_DEPRECATED_VIEWS', 'False')) # Regulator pipeline, names of setuptools entry points diff --git a/share/harvest/__init__.py b/share/harvest/__init__.py deleted file mode 100644 index 6bee901df..000000000 --- a/share/harvest/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from share.harvest.base import BaseHarvester # noqa diff --git a/share/harvest/base.py b/share/harvest/base.py deleted file mode 100644 index 83e59b5eb..000000000 --- a/share/harvest/base.py +++ /dev/null @@ -1,265 +0,0 @@ -from hashlib import sha256 -import abc -import datetime -import logging -import types -import warnings - -import pendulum -import requests - -from django.conf import settings -from django.utils import timezone - -from share.harvest.ratelimit import RateLimittedProxy -from share.harvest.serialization import DeprecatedDefaultSerializer -from share.models import RawDatum - - -logger = logging.getLogger(__name__) - - -# TODO: remove `FetchResult` (and the rest of the non-rdf legacy pipeline) -class FetchResult: - __slots__ = ('identifier', 'datum', 'datestamp', '_sha256') - - @property - def sha256(self): - if not self._sha256: - self._sha256 = sha256(self.datum.encode('utf-8')).hexdigest() - return self._sha256 - - def __init__(self, identifier, datum, datestamp=None): - self._sha256 = None - self.datestamp = datestamp - self.datum = datum - self.identifier = identifier - - def __repr__(self): - return '<{}({}, {}...)>'.format(self.__class__.__name__, self.identifier, self.sha256[:10]) - - -class BaseHarvester(metaclass=abc.ABCMeta): - """ - - Fetch: - Aquire and serialize data from a remote source, respecting rate limits. - fetch* methods return a generator that yield FetchResult objects - - Harvest: - Fetch and store data, respecting global rate limits. - harvest* methods return a generator that yield RawDatum objects - - """ - - SERIALIZER_CLASS = DeprecatedDefaultSerializer - - network_read_timeout = 30 - network_connect_timeout = 31 - - @property - def request_timeout(self): - """The timeout tuple for requests (connect, read) - """ - return (self.network_connect_timeout, self.network_read_timeout) - - def __init__(self, source_config, pretty=False, network_read_timeout=None, network_connect_timeout=None): - """ - - Args: - source_config (SourceConfig): - pretty (bool, optional): Defaults to False. - - """ - self.config = source_config - self.serializer = self.SERIALIZER_CLASS(pretty) - - self.session = requests.Session() - self.session.headers.update({'User-Agent': settings.SHARE_USER_AGENT}) - # TODO Make rate limit apply across threads - self.requests = RateLimittedProxy(self.session, self.config.rate_limit_allowance, self.config.rate_limit_period) - - self.network_read_timeout = (network_read_timeout or self.network_read_timeout) - self.network_connect_timeout = (network_connect_timeout or self.network_connect_timeout) - - def fetch_by_id(self, identifier, **kwargs): - datum = self._do_fetch_by_id(identifier, **self._get_kwargs(**kwargs)) - return FetchResult(identifier, self.serializer.serialize(datum)) - - def _do_fetch_by_id(self, identifier, **kwargs): - """Fetch a document by provider ID. - - Optional to implement, intended for dev and manual testing. - - Args: - identifier (str): Unique ID the provider uses to identify works. - - Returns: - FetchResult - - """ - raise NotImplementedError('{!r} does not support fetching by ID'.format(self)) - - def fetch(self, today=False, **kwargs): - """Fetch data from today. - - Yields: - FetchResult - - """ - return self.fetch_date_range(datetime.date.today() - datetime.timedelta(days=1), datetime.date.today(), **kwargs) - - def fetch_date(self, date: datetime.date, **kwargs): - """Fetch data from the specified date. - - Yields: - FetchResult - """ - return self.fetch_date_range(date - datetime.timedelta(days=1), date, **kwargs) - - def fetch_date_range(self, start, end, limit=None, **kwargs): - """Fetch data from the specified date range. - - Yields: - FetchResult - - """ - if not isinstance(start, datetime.date): - raise TypeError('start must be a datetime.date. Got {!r}'.format(start)) - - if not isinstance(end, datetime.date): - raise TypeError('end must be a datetime.date. Got {!r}'.format(end)) - - if start >= end: - raise ValueError('start must be before end. {!r} > {!r}'.format(start, end)) - - if limit == 0: - return # No need to do anything - - # Cast to datetimes for compat reasons - start = pendulum.instance(datetime.datetime.combine(start, datetime.time(0, 0, 0, 0, timezone.utc))) - end = pendulum.instance(datetime.datetime.combine(end, datetime.time(0, 0, 0, 0, timezone.utc))) - - if hasattr(self, 'shift_range'): - warnings.warn( - '{!r} implements a deprecated interface. ' - 'Handle date transforms in _do_fetch. ' - 'shift_range will no longer be called in SHARE 2.9.0'.format(self), - DeprecationWarning - ) - start, end = self.shift_range(start, end) - - data_gen = self._do_fetch(start, end, **self._get_kwargs(**kwargs)) - - if not isinstance(data_gen, types.GeneratorType) and len(data_gen) != 0: - raise TypeError('{!r}._do_fetch must return a GeneratorType for optimal performance and memory usage'.format(self)) - - for i, blob in enumerate(data_gen): - result = FetchResult(blob[0], self.serializer.serialize(blob[1]), *blob[2:]) - - if result.datestamp is None: - result.datestamp = start - elif (result.datestamp.date() < start.date() or result.datestamp.date() > end.date()): - if (start - result.datestamp) > pendulum.Duration(hours=24) or (result.datestamp - end) > pendulum.Duration(hours=24): - raise ValueError( - 'result.datestamp is outside of the requested date range. ' - '{} from {} is not within [{} - {}]'.format(result.datestamp, result.identifier, start, end) - ) - logger.warning( - 'result.datestamp is within 24 hours of the requested date range. ' - 'This is probably a timezone conversion error and will be accepted. ' - '{} from {} is within 24 hours of [{} - {}]'.format(result.datestamp, result.identifier, start, end) - ) - - yield result - - if limit is not None and i >= limit: - break - - def harvest_id(self, identifier, **kwargs): - """Harvest a document by ID. - - Note: - Dependent on whether fetch_by_id is implemented. - - Args: - identifier (str): Unique ID the provider uses to identify works. - - Returns: - RawDatum - - """ - datum = self.fetch_by_id(identifier, **kwargs) - return RawDatum.objects.store_data(self.config, datum) - - def harvest(self, **kwargs): - """Fetch data from yesterday. - - Yields: - RawDatum - - """ - return self.harvest_date(datetime.date.today(), **kwargs) - - def harvest_date(self, date, **kwargs): - """Harvest data from the specified date. - - Yields: - RawDatum - - """ - return self.harvest_date_range(date - datetime.timedelta(days=1), date, **kwargs) - - def harvest_date_range(self, start, end, limit=None, force=False, **kwargs): - """Fetch data from the specified date range. - - Args: - start (date): - end (date): - limit (int, optional): The maximum number of unique data to harvest. Defaults to None. - Uniqueness is determined by the SHA-256 of the raw data - force (bool, optional): Disable all safety checks, unexpected exceptions will still be raised. Defaults to False. - **kwargs: Forwared to _do_fetch. Overrides values in the source config's harvester_kwargs - - Yields: - RawDatum - - """ - if self.serializer.pretty: - raise ValueError('To ensure that data is optimally deduplicated, harvests may not occur while using a pretty serializer.') - - with self.config.acquire_lock(required=not force): - logger.info('Harvesting %s - %s from %r', start, end, self.config) - yield from RawDatum.objects.store_chunk(self.config, self.fetch_date_range(start, end, **kwargs), limit=limit) - - def _do_fetch(self, start, end, **kwargs): - """Fetch date from this source inside of the given date range. - - The given date range should be treated as [start, end) - - Any HTTP[S] requests MUST be sent using the self.requests client. - It will automatically enforce rate limits - - Args: - start_date (datetime): Date to start fetching data from, inclusively. - end_date (datetime): Date to fetch data up to, exclusively. - **kwargs: Arbitrary kwargs passed to subclasses, used to customize harvesting. Overrides values in the source config's harvester_kwargs. - - Returns: - Iterator<FetchResult>: The fetched data. - - """ - if hasattr(self, 'do_harvest'): - warnings.warn( - '{!r} implements a deprecated interface. ' - 'do_harvest has been replaced by _do_fetch for clarity. ' - 'do_harvest will no longer be called in SHARE 2.11.0'.format(self), - DeprecationWarning - ) - logger.warning('%r implements a deprecated interface. ', self) - return self.do_harvest(start, end, **kwargs) - - raise NotImplementedError() - - def _get_kwargs(self, **kwargs): - return {**(self.config.harvester_kwargs or {}), **kwargs} diff --git a/share/harvest/exceptions.py b/share/harvest/exceptions.py deleted file mode 100644 index 01aae08a7..000000000 --- a/share/harvest/exceptions.py +++ /dev/null @@ -1,6 +0,0 @@ -from share.exceptions import HarvestError - - -# TODO replace with a more generic ConcurrencyError, or delete (SHARE-1026) -class HarvesterConcurrencyError(HarvestError): - pass diff --git a/share/harvest/ratelimit.py b/share/harvest/ratelimit.py deleted file mode 100644 index f33b9b894..000000000 --- a/share/harvest/ratelimit.py +++ /dev/null @@ -1,39 +0,0 @@ -import time -import logging - - -logger = logging.getLogger(__name__) - - -class RateLimittedProxy: - - def __init__(self, proxy_to, calls, per_second): - self._proxy_to = proxy_to - self._allowance = calls - self._calls = calls - self._last_call = 0 - self._per_second = per_second - self._cache = {} - - def _check_limit(self): - if self._allowance > 1: - return - wait = self._per_second - (time.time() - self._last_call) - if wait > 0: - logger.debug('Rate limitting %s. Sleeping for %s', self._proxy_to, wait) - time.sleep(wait) - self._allowance = self._calls - logger.debug('Access granted for %s', self._proxy_to) - - def _called(self): - self._allowance -= 1 - self._last_call = time.time() - - def __call__(self, *args, **kwargs): - self._check_limit() - ret = self._proxy_to(*args, **kwargs) - self._called() - return ret - - def __getattr__(self, name): - return self._cache.setdefault(name, self.__class__(getattr(self._proxy_to, name), self._calls, self._per_second)) diff --git a/share/harvest/scheduler.py b/share/harvest/scheduler.py deleted file mode 100644 index c8d6f7efb..000000000 --- a/share/harvest/scheduler.py +++ /dev/null @@ -1,125 +0,0 @@ -import pendulum - -from django.db import models - -from share.models import HarvestJob - - -class HarvestScheduler: - """Utility class for creating HarvestJobs - - All date ranges are treated as [start, end) - - """ - - def __init__(self, source_config, claim_jobs=False): - self.source_config = source_config - self.claim_jobs = claim_jobs - - def all(self, cutoff=None, allow_full_harvest=True, **kwargs): - """ - Args: - cutoff (date, optional): The upper bound to schedule harvests to. Default to today. - allow_full_harvest (bool, optional): Allow a SourceConfig to generate a full harvest. Defaults to True. - The SourceConfig.full_harvest must be marked True and have earliest_date set. - **kwargs: Forwarded to .range - - Returns: - A list of harvest jobs - - """ - if cutoff is None: - cutoff = pendulum.now().date() - - # TODO take harvest/sourceconfig version into account here - if hasattr(self.source_config, 'latest'): - latest_date = self.source_config.latest - else: - latest_date = self.source_config.harvest_jobs.aggregate(models.Max('end_date'))['end_date__max'] - - # If we can build full harvests and the earliest job that would be generated does NOT exist - # Go ahead and reset the latest_date to the earliest_date - if allow_full_harvest and self.source_config.earliest_date and self.source_config.full_harvest: - if not self.source_config.harvest_jobs.filter(start_date=self.source_config.earliest_date).exists(): - latest_date = self.source_config.earliest_date - - # If nothing sets latest_date, default to the soonest possible harvest - if not latest_date: - latest_date = cutoff - self.source_config.harvest_interval - - return self.range(latest_date, cutoff, **kwargs) - - def today(self, **kwargs): - """ - Functionally the same as calling .range(today, tomorrow)[0]. - You probably want to use .yesterday rather than .today. - - Args: - **kwargs: Forwarded to .date - - Returns: - A single Harvest job that *includes* today. - - """ - return self.date(pendulum.today().date(), **kwargs) - - def yesterday(self, **kwargs): - """ - Functionally the same as calling .range(yesterday, today)[0]. - - Args: - **kwargs: Forwarded to .date - - Returns: - A single Harvest job that *includes* yesterday. - - """ - return self.date(pendulum.yesterday().date(), **kwargs) - - def date(self, date, **kwargs): - """ - Args: - date (date): - **kwargs: Forwarded to .range - - Returns: - A single Harvest job that *includes* date. - - """ - return self.range(date, date.add(days=1), **kwargs)[0] - - def range(self, start, end, save=True): - """ - - Args: - start (date): - end (date): - save (bool, optional): If True, attempt to save the created HarvestJobs. Defaults to True. - - Returns: - A list of HarvestJobs within [start, end). - - """ - jobs = [] - - job_kwargs = { - 'source_config': self.source_config, - 'source_config_version': self.source_config.version, - 'harvester_version': getattr(self.source_config.get_harvester_class(), 'VERSION', 1), - } - if self.claim_jobs: - job_kwargs['claimed'] = True - - start = pendulum.datetime(start.year, start.month, start.day) - end = pendulum.datetime(end.year, end.month, end.day) - - sd, ed = start, start - - while ed + self.source_config.harvest_interval <= end: - sd, ed = ed, ed + self.source_config.harvest_interval - jobs.append(HarvestJob(start_date=sd, end_date=ed, **job_kwargs)) - - if jobs and save: - return HarvestJob.objects.bulk_get_or_create(jobs) - - return jobs diff --git a/share/harvest/serialization.py b/share/harvest/serialization.py deleted file mode 100644 index 0979b83fd..000000000 --- a/share/harvest/serialization.py +++ /dev/null @@ -1,59 +0,0 @@ -import json -import logging -import warnings - -logger = logging.getLogger(__name__) - - -class RawDatumSerializer: - """A deterministic serializer for harvested data. - """ - - def __init__(self, pretty): - self.pretty = pretty - - def serialize(self, value): - raise NotImplementedError() - - -class DictSerializer(RawDatumSerializer): - - def serialize(self, value): - return json.dumps(value, sort_keys=True, indent=4 if self.pretty else None) - - -class StringLikeSerializer(RawDatumSerializer): - - def serialize(self, value): - if isinstance(value, bytes): - return value.decode('utf-8') - - if isinstance(value, str): - return value - - raise TypeError('Expected str or bytes, got {!r}'.format(type(value))) - - -class DeprecatedDefaultSerializer(RawDatumSerializer): - def __init__(self, pretty=False): - super().__init__(pretty=pretty) - self.warned = False - self.dict_serializer = DictSerializer(pretty=pretty) - warnings.warn('{!r} is deprecated. Use a serializer meant for the data returned'.format(self), DeprecationWarning) - - def serialize(self, data, pretty=False): - if isinstance(data, str): - return data - if isinstance(data, bytes): - if not self.warned: - self.warned = True - warnings.warn( - '{!r}.encode_data got a bytes instance. ' - 'do_harvest should be returning str types as only the harvester will know how to properly encode the bytes ' - 'defaulting to decoding as utf-8'.format(self), - DeprecationWarning - ) - return data.decode('utf-8') - if isinstance(data, dict): - return self.dict_serializer.serialize(data) - raise Exception('Unable to properly encode data blob {!r}. Data should be a dict, bytes, or str objects.'.format(data)) diff --git a/share/harvesters/__init__.py b/share/harvesters/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/share/harvesters/ca_lwbin.py b/share/harvesters/ca_lwbin.py deleted file mode 100644 index 4d9b49115..000000000 --- a/share/harvesters/ca_lwbin.py +++ /dev/null @@ -1,35 +0,0 @@ -import pendulum - -from furl import furl - -from share.harvest.base import BaseHarvester - - -class LWBINHarvester(BaseHarvester): - VERSION = 1 - - # LWBIN does not have an HTTPS URL - limit = 100 - - def do_harvest(self, start_date, end_date): - page = 0 - - while True: - # Searching by time is not permitted by the LWBIN CKAN API. All records must be scanned each time. - page += 1 - response = self.requests.get(furl(self.config.base_url).set(query_params={ - 'limit': self.limit, - 'page': page, - }).url, timeout=self.request_timeout) - - for record in response.json()['result']: - date = pendulum.parse(record['metadata_modified']) - if date < start_date: - return # We're all caught up - if date > end_date: - continue # Reaching too far back - - yield record['id'], record - - if len(response.json()['result']) != self.limit: - break diff --git a/share/harvesters/com_biomedcentral.py b/share/harvesters/com_biomedcentral.py deleted file mode 100644 index fa0c9e4f0..000000000 --- a/share/harvesters/com_biomedcentral.py +++ /dev/null @@ -1,53 +0,0 @@ -from datetime import timedelta - -from furl import furl - -from django.conf import settings - -from share.harvest.base import BaseHarvester - - -class BiomedCentralHarvester(BaseHarvester): - VERSION = 1 - - def __init__(self, app_config): - super().__init__(app_config) - self.offset = 1 - self.page_size = 100 - self.url = 'https://api.springer.com/meta/v1/json' - - def do_harvest(self, start_date, end_date): - if not settings.SPRINGER_API_KEY: - raise Exception('SPRINGER_API_KEY not provided') - - end_date = end_date.date() - start_date = start_date.date() - - # BioMed Central API only accepts a specific date, not a date range, for retrieving articles - # so we must create our own list of dates - dates = [start_date + timedelta(n) for n in range((end_date - start_date).days + 1)] - - for date in dates: - yield from self.fetch_records(date) - - def fetch_records(self, date): - self.offset = 0 - resp = self.requests.get(self.build_url(date), timeout=self.request_timeout) - total = int(resp.json()['result'][0]['total']) - - while self.offset < total: - records = resp.json()['records'] - - for record in records: - yield (record['identifier'], record) - - self.offset += len(records) - resp = self.requests.get(self.build_url(date)) - - def build_url(self, date): - return furl(self.url).set(query_params={ - 'api_key': settings.SPRINGER_API_KEY, - 'q': 'date:{}'.format(date), - 'p': self.page_size, - 's': self.offset - }).url diff --git a/share/harvesters/com_figshare.py b/share/harvesters/com_figshare.py deleted file mode 100644 index 7b2949b21..000000000 --- a/share/harvesters/com_figshare.py +++ /dev/null @@ -1,49 +0,0 @@ -from datetime import timedelta - -from furl import furl - -from share.harvest import BaseHarvester - - -class FigshareHarvester(BaseHarvester): - VERSION = 1 - - # Other harvesters should not have to implement this method - def shift_range(self, start_date, end_date): - """Figshare should always have a 24 hour delay because they - manually go through and check for test projects. Most of them - are removed within 24 hours. - So, we will shift everything back a day with harvesting to ensure - nothing is harvested on the day of. - """ - return (start_date - timedelta(days=1)), (end_date - timedelta(days=1)) - - def do_harvest(self, start_date, end_date): - # Inputs are a DateTime object, many APIs only accept dates - end_date = end_date.date() - start_date = start_date.date() - - # Fetch records is a separate function for readability - # Ends up returning a list of tuples with provider given id and the document itself - return self.fetch_records(furl(self.config.base_url).set(query_params={ - 'search_for': '*', - 'to_date': end_date.isoformat(), - 'from_date': start_date.isoformat(), - }).url) - - def fetch_records(self, url): - count, page = 0, 1 - - resp = self.requests.get(url) - total = resp.json()['items_found'] - - while True: - if count >= total: - break - - for item in resp.json()['items']: - count += 1 - yield (item['article_id'], item) - - page += 1 - resp = self.requests.get(furl(url).add(query_params={'page': page}).url) diff --git a/share/harvesters/com_figshare_v2.py b/share/harvesters/com_figshare_v2.py deleted file mode 100644 index 98642a6b4..000000000 --- a/share/harvesters/com_figshare_v2.py +++ /dev/null @@ -1,48 +0,0 @@ -import pendulum - -from furl import furl - -from share.harvest import BaseHarvester - - -class FigshareHarvester(BaseHarvester): - VERSION = 1 - - page_size = 50 - - def _do_fetch(self, start_date, end_date): - url = furl(self.config.base_url).set(query_params={ - 'order_direction': 'asc', - 'order': 'modified_date', - 'page_size': self.page_size, - 'modified_since': start_date.date().isoformat(), - }) - return self.fetch_records(url, end_date.date()) - - def fetch_records(self, url, end_day): - page = 1 - last_seen_day = None - - while True: - page += 1 - url.args['page'] = page - resp = self.requests.get(url.url) - - if last_seen_day and resp.status_code == 422: - # We've asked for too much. Time to readjust date range - url.args['modified_since'] = last_seen_day.isoformat() - page = 0 - continue - - for item in resp.json(): - resp = self.requests.get(item['url']) - detail = resp.json() - last_seen_day = pendulum.parse(detail['modified_date']).date() - - if last_seen_day > end_day: - return - - yield item['url'], detail - - if len(resp.json()) < self.page_size: - return # We've hit the end of our results diff --git a/share/harvesters/com_mendeley_data.py b/share/harvesters/com_mendeley_data.py deleted file mode 100644 index a86b776c2..000000000 --- a/share/harvesters/com_mendeley_data.py +++ /dev/null @@ -1,83 +0,0 @@ -from furl import furl -import pendulum - -from django.conf import settings - -from share.harvest import BaseHarvester - - -class MendeleyHarvester(BaseHarvester): - VERSION = 1 - MENDELEY_OAUTH_URL = 'https://api.mendeley.com/oauth/token' - - def get_token(self): - """ Mendeley gives tokens that last for one hour. A new token will be - requested everytime the harvester is run to ensure the access token is - valid. - """ - data = {'grant_type': 'client_credentials', 'scope': 'all'} - headers = {'Content-Type': 'application/x-www-form-urlencoded'} - - r = self.requests.post( - self.MENDELEY_OAUTH_URL, - headers=headers, - data=data, - auth=(settings.MENDELEY_API_CLIENT_ID, settings.MENDELEY_API_CLIENT_SECRET), - ) - if r.status_code != 200: - raise Exception('Access token not granted. Stopping harvest.') - return r.json()['access_token'] - - def do_harvest(self, start_date, end_date): - if not settings.MENDELEY_API_CLIENT_ID or not settings.MENDELEY_API_CLIENT_SECRET: - raise Exception('Mendeley authorization information not provided') - - self.requests.headers.update({'Authorization': 'Bearer ' + self.get_token()}) - - ACCEPT_HEADER = 'application/vnd.mendeley-public-dataset.1+json' - headers = {'Accept': ACCEPT_HEADER} - - # Inputs are a DateTime object, many APIs only accept dates - start_date = start_date.date() - - # Fetch records is a separate function for readability - # Ends up returning a list of tuples with provider given id and the document itself - return self.fetch_records(furl(self.config.base_url).set(query_params={ - 'modified_since': start_date.isoformat(), - 'fields': 'results.*', - 'limit': '100', # chance of timing out with larger requests - 'sort': 'publish_date', - 'order': 'asc', - }).url, headers, end_date) - - def fetch_records(self, url, headers, end_date): - - resp = self.requests.get(url, headers=headers) - - while True: - for dataset in resp.json()['results']: - # modified_since filters on publish_date - if pendulum.parse(dataset['publish_date']) >= end_date: - break - # Send another request to get useful contributor information - if 'contributors' in dataset: - for contributor in dataset['contributors']: - try: - profile_resp = self.get_contributor_profile(headers, contributor['profile_id']) - contributor['full_profile'] = profile_resp.json() - except KeyError: - continue - yield (dataset['id'], dataset) - - try: - resp = self.requests.get(resp.links['next']['url'], headers=headers) - except KeyError: - break - - def get_contributor_profile(self, headers, contributor_uuid): - ACCEPT_HEADER = 'application/vnd.mendeley-profiles.1+json' - BASE_PROFILE_URL = 'https://api.mendeley.com/profiles/' - - contributor_headers = {'Accept': ACCEPT_HEADER} - profile_url = furl(BASE_PROFILE_URL).join(contributor_uuid).url - return self.requests.get(profile_url, headers=contributor_headers) diff --git a/share/harvesters/com_peerj.py b/share/harvesters/com_peerj.py deleted file mode 100644 index a2eb7753e..000000000 --- a/share/harvesters/com_peerj.py +++ /dev/null @@ -1,42 +0,0 @@ -import pendulum - -import logging -from share.harvest import BaseHarvester - -logger = logging.getLogger(__name__) - - -class PeerJHarvester(BaseHarvester): - VERSION = 1 - - def do_harvest(self, start_date: pendulum.DateTime, end_date: pendulum.DateTime, identifier_prefix='', fetch_xml=False): - url = self.config.base_url - while True: - logger.debug('Fetching page %s', url) - resp = self.requests.get(url) - resp.raise_for_status() - resp_data = resp.json() - - for record in resp_data['_items']: - if pendulum.parse(record['date']) < start_date: - logger.info('%s is before %s, ending harvest', record['date'], start_date) - return - - if pendulum.parse(record['date']) > end_date: - logger.debug('%s is after %s, skipping', record['date'], end_date) - continue - - doc_id = identifier_prefix + record['identifiers']['peerj'] - - if fetch_xml: - logger.debug('Fetching article %s', record['_links']['alternate']['xml']['href']) - details = self.requests.get(record['_links']['alternate']['xml']['href']) - details.raise_for_status() - yield doc_id, details.content - else: - yield doc_id, record - - if 'next' not in resp_data['_links']: - logger.info('No "next" key found, ending harvest') - return - url = resp_data['_links']['next']['href'] diff --git a/share/harvesters/com_researchregistry.py b/share/harvesters/com_researchregistry.py deleted file mode 100644 index e065a029c..000000000 --- a/share/harvesters/com_researchregistry.py +++ /dev/null @@ -1,48 +0,0 @@ -import json -from furl import furl - -from django.conf import settings - -from share.harvest import BaseHarvester - - -# Built by inspecting http://www.researchregistry.com/browse-the-registry.html -class ResearchRegistryHarvester(BaseHarvester): - VERSION = 1 - - HEADERS = { - 'X-Knack-Application-Id': settings.RESEARCHREGISTRY_APPLICATION_ID, - 'X-Knack-REST-API-Key': settings.RESEARCHREGISTRY_API_KEY - } - DATE_FIELD = 'field_2' # Registration Date - ID_FIELD = 'field_21' # Research Registry UIN - - def fetch_page(self, page, start_date, end_date): - url = furl(self.config.base_url) - url.args['page'] = page - url.args['rows_per_page'] = 1000 - url.args['format'] = 'raw' - url.args['filters'] = json.dumps({ - 'match': 'and', - 'rules': [ - # date filters are strictly less/greater than, not equal to - {'field': self.DATE_FIELD, 'operator': 'is after', 'value': start_date.subtract(days=1).to_date_string()}, - {'field': self.DATE_FIELD, 'operator': 'is before', 'value': end_date.add(days=1).to_date_string()}, - ] - }) - response = self.requests.get(url.url, headers=self.HEADERS) - if response.status_code // 100 != 2: - raise ValueError('Malformed response ({}) from {}. Got {}'.format(response, url.url, response.content)) - return response.json() - - def do_harvest(self, start_date, end_date): - page = 1 - while True: - data = self.fetch_page(page, start_date, end_date) - total_pages = data['total_pages'] - records = data['records'] - for r in records: - yield r[self.ID_FIELD], r - if page >= total_pages: - break - page += 1 diff --git a/share/harvesters/com_springer.py b/share/harvesters/com_springer.py deleted file mode 100644 index c01089a74..000000000 --- a/share/harvesters/com_springer.py +++ /dev/null @@ -1,52 +0,0 @@ -from datetime import timedelta - -from furl import furl - -from django.conf import settings - -from share.harvest import BaseHarvester - - -class SpringerHarvester(BaseHarvester): - VERSION = 1 - - def __init__(self, app_config): - super().__init__(app_config) - self.offset = 1 - self.page_size = 100 - - def do_harvest(self, start_date, end_date): - if not settings.SPRINGER_API_KEY: - raise Exception('SPRINGER_API_KEY not provided') - - end_date = end_date.date() - start_date = start_date.date() - - # BioMed Central API only accepts a specific date, not a date range, for retrieving articles - # so we must create our own list of dates - dates = [start_date + timedelta(n) for n in range((end_date - start_date).days + 1)] - - for date in dates: - yield from self.fetch_records(date) - - def fetch_records(self, date): - self.offset = 0 - resp = self.requests.get(self.build_url(date)) - total = int(resp.json()['result'][0]['total']) - - while self.offset < total: - records = resp.json()['records'] - - for record in records: - yield (record['identifier'], record) - - self.offset += len(records) - resp = self.requests.get(self.build_url(date)) - - def build_url(self, date): - return furl(self.config.base_url).set(query_params={ - 'api_key': settings.SPRINGER_API_KEY, - 'q': 'date:{}'.format(date), - 'p': self.page_size, - 's': self.offset - }).url diff --git a/share/harvesters/edu_ageconsearch.py b/share/harvesters/edu_ageconsearch.py deleted file mode 100644 index b924b5f39..000000000 --- a/share/harvesters/edu_ageconsearch.py +++ /dev/null @@ -1,117 +0,0 @@ -import logging -import dateutil - -from bs4 import BeautifulSoup -from furl import furl -import pendulum - -from share.harvest import BaseHarvester - -logger = logging.getLogger('__name__') - - -class AgEconHarvester(BaseHarvester): - """ - Query Parameters: - month (MM) - year (YYYY) - order (oldestFirst or None) - starts_with (YYYY-MM-DD) they don't always have a day - top (page number) - - Returns: - Page with nearest date - 20 records/page - """ - VERSION = 1 - - fields = { - 'title': 'title', - 'other titles': 'other_titles', - 'authors': 'authors', - 'editors': 'editors', - 'editors (email)': 'editors_email', - 'authors (email)': 'authors_email', - 'keywords': 'keywords', - 'jel codes': 'jel_codes', - 'issue date': 'issue_date', - 'series/report no.': 'series_report_number', - 'abstract': 'abstract', - 'uri': 'uri', - 'institution/association': 'institution_association', - 'identifiers': 'identifiers', - 'total pages': 'total_pages', - 'from page': 'from_page', - 'to page': 'to_page', - 'notes': 'notes', - 'collections:': 'collections', - } - - # Request page with nearest date - def do_harvest(self, start_date: pendulum.DateTime, end_date: pendulum.DateTime): - return self.fetch_records(start_date, end_date) - - # Fetch the list of work urls on a single result page and return results within date range - def fetch_records(self, start_date, end_date): - logger.info('Harvesting %s - %s', start_date, end_date) - logger.debug('Fetching page %s', self.config.base_url) - - url = furl(self.config.base_url) - url.args['starts_with'] = start_date - r = self.requests.get(url.url) - - r.raise_for_status() - within_date_range = True - while within_date_range: - document = BeautifulSoup(r.text, 'html.parser') - results = document.select('a[href^="/handle/"]')[1:] - for result in results: - url = 'http://ageconsearch.umn.edu{}'.format(result.attrs['href']) - work = self.fetch_work(url) - date_status = self.check_record_date(work['issue_date'], start_date, end_date) - - # if date is > start_date continue and skip - if date_status == 'after': - continue - elif date_status == 'before': - within_date_range = False - return - yield work['primary_identifier'], work - - r = self.requests.get('http://ageconsearch.umn.edu/{}'.format(document.find('a', string='Next page').attrs['href'])) - - def check_record_date(self, issue_date, start_date, end_date): - date_object = dateutil.parser.parse(issue_date, default=pendulum.datetime(2016, 1, 1)) - - if date_object < start_date.start_of('day'): - return 'before' - if date_object > end_date.end_of('day'): - return 'after' - - return 'within' - - # Pull data out of html - def fetch_work(self, url): - r = self.requests.get(url) - r.raise_for_status() - soup = BeautifulSoup(r.text, 'lxml') - data = {} - - data['primary_identifier'] = soup.find('code').text - display_table = soup.find(class_='itemDisplayTable').find_all('tr') - - for row in display_table: - label = row.find(class_='metadataFieldLabel').text.replace(':\xa0', '').lower() - value_object = row.find(class_='metadataFieldValue') - if value_object.string: - value = value_object.string - else: - contents = [] - for content in value_object.contents: - contents.append(content.string or content) - # Feels a little hacky - value = [val for val in contents if val != BeautifulSoup('<br/>', 'lxml').br] - - data[self.fields[label]] = value - - return data diff --git a/share/harvesters/edu_gwu.py b/share/harvesters/edu_gwu.py deleted file mode 100644 index cb3645c0f..000000000 --- a/share/harvesters/edu_gwu.py +++ /dev/null @@ -1,68 +0,0 @@ -import logging -import pendulum - -from furl import furl -from bs4 import BeautifulSoup - -from share.harvest import BaseHarvester - -logger = logging.getLogger(__name__) - - -class GWScholarSpaceHarvester(BaseHarvester): - VERSION = 1 - - def do_harvest(self, start_date, end_date): - end_date = end_date.date() - start_date = start_date.date() - - # There is no apparent way to filter by date range, just sort by date. - url = furl(self.config.base_url + '/catalog') - url.args['per_page'] = 10 # If it gets more active, consider upping to 50 or 100 - url.args['sort'] = 'system_modified_dtsi+desc' - - # Fetch records is a separate function for readability - # Ends up returning a list of tuples with provider given id and the document itself - return self.fetch_records(url, start_date, end_date) - - def fetch_records(self, url, start_date, end_date): - count, page = 0, 1 - resp = self.requests.get(furl(url).set(query_params={'page': page}), timeout=self.request_timeout) - soup = BeautifulSoup(resp.content, 'lxml') - try: - total = int(soup.select('#sortAndPerPage .page_entries strong')[-1].text.replace(',', '')) - except IndexError: - total = 0 - - logging.info('Found %d results from GW ScholarSpace', total) - - while count < total: - links = map(lambda a: a['href'], soup.select('#search-results li h2 > a')) - - if not links: - break - - logger.info('On document %d of %d (%d%%)', count, total, (count / total) * 100) - for link in links: - item_response = self.requests.get(self.config.base_url + link) - if item_response.status_code // 100 != 2: - logger.warning('Got non-200 status %s from %s', item_response, link) - continue - item_response.raise_for_status() - soup = BeautifulSoup(item_response.content, 'lxml') - - # Skip records outside the date range - date_modified = pendulum.parse(soup.find(itemprop='dateModified').text) - if date_modified > end_date: - continue - if date_modified < start_date: - return - - item = soup.find(id='content').find(itemscope=True) - - count += 1 - yield link, str(item) - - page += 1 - resp = self.requests.get(furl(url).set(query_params={'page': page})) - soup = BeautifulSoup(resp.content, 'lxml') diff --git a/share/harvesters/edu_harvarddataverse.py b/share/harvesters/edu_harvarddataverse.py deleted file mode 100644 index dfacf661c..000000000 --- a/share/harvesters/edu_harvarddataverse.py +++ /dev/null @@ -1,42 +0,0 @@ -from django.conf import settings - -from furl import furl - -from share.harvest import BaseHarvester - - -class HarvardDataverseHarvester(BaseHarvester): - VERSION = 1 - - type = 'dataset' - MAX_ITEMS_PER_REQUEST = 1000 - - def do_harvest(self, start_date, end_date): - end_date = end_date.date() - start_date = start_date.date() - - return self.fetch_records(furl(self.config.base_url).set(query_params={ - 'q': '*', - 'type': self.type, - 'per_page': self.MAX_ITEMS_PER_REQUEST, - 'key': settings.DATAVERSE_API_KEY, - 'sort': 'date', - 'order': 'asc', - 'fq': 'dateSort:[{}T00:00:00Z TO {}T00:00:00Z]'.format(start_date.isoformat(), end_date.isoformat()) - }).url) - - def fetch_records(self, url): - response = self.requests.get(url, timeout=self.request_timeout) - total_num = response.json()['data']['total_count'] - num_processed = 0 - - while num_processed < total_num: - records = response.json()['data']['items'] - num_processed += len(records) - - for record in records: - yield (record['global_id'], record) - - response = self.requests.get(furl(url).add(query_params={ - 'start': str(num_processed) - }).url, timeout=self.request_timeout) diff --git a/share/harvesters/gov_clinicaltrials.py b/share/harvesters/gov_clinicaltrials.py deleted file mode 100644 index 74b87ed8e..000000000 --- a/share/harvesters/gov_clinicaltrials.py +++ /dev/null @@ -1,54 +0,0 @@ -import logging - -from furl import furl - -from lxml import etree - -from share.harvest import BaseHarvester - - -logger = logging.getLogger(__name__) - - -class ClinicalTrialsHarvester(BaseHarvester): - VERSION = 2 - - def do_harvest(self, start_date, end_date): - end_date = end_date.date() - start_date = start_date.date() - - return self.fetch_records(furl(self.config.base_url).set(query_params={ - 'displayxml': 'true', - 'lup_s': start_date.strftime('%m/%d/%Y'), - 'lup_e': end_date.strftime('%m/%d/%Y') - }).url) - - def fetch_records(self, url): - resp = self.requests.get(url) - resp_xml = etree.XML(resp.content) - num_records = int(resp_xml.xpath('//search_results/@count')[0]) - - if num_records > 0: - # create a new URL to request all results - url = furl(url).add(query_params={ - 'count': num_records - }).url - - all_records_resp = self.requests.get(url) - all_records_doc = etree.XML(all_records_resp.content) - - # retrieve the URLs for each document to make requests for their full content - record_urls = [ - furl(record.xpath('url/node()')[0]).set(query_params={ - 'displayxml': 'true' - }).url - for record in all_records_doc.xpath('//clinical_study') - ] - - total = len(record_urls) - for i, url in enumerate(record_urls): - logger.debug('[%d / %d] Requesting %s', i, total, url) - record_resp = self.requests.get(url) - - doc = etree.fromstring(record_resp.content, parser=etree.XMLParser(recover=True)) - yield doc.xpath('//nct_id/node()')[0], etree.tostring(doc, encoding=str) diff --git a/share/harvesters/gov_doepages.py b/share/harvesters/gov_doepages.py deleted file mode 100644 index 5a1c40970..000000000 --- a/share/harvesters/gov_doepages.py +++ /dev/null @@ -1,46 +0,0 @@ -from lxml import etree - -from furl import furl - -from share.harvest import BaseHarvester - - -class DoepagesHarvester(BaseHarvester): - VERSION = 1 - - namespaces = { - 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', - 'dc': 'http://purl.org/dc/elements/1.1/', - 'dcq': 'http://purl.org/dc/terms/' - } - - def do_harvest(self, start_date, end_date): - end_date = end_date.date() - start_date = start_date.date() - - resp = self.requests.get(furl(self.config.base_url).set(query_params={ - 'nrows': 1, - 'EntryDateFrom': start_date.strftime('%m/%d/%Y'), - 'EntryDateTo': end_date.strftime('%m/%d/%Y'), - }).url) - - initial_doc = etree.XML(resp.content) - num_results = int(initial_doc.xpath('//records/@count', namespaces=self.namespaces)[0]) - - records_url = furl(self.config.base_url).set(query_params={ - 'nrows': num_results, - 'EntryDateFrom': start_date.strftime('%m/%d/%Y'), - 'EntryDateTo': end_date.strftime('%m/%d/%Y'), - }).url - - return self.fetch_records(records_url) - - def fetch_records(self, url): - resp = self.requests.get(url) - xml = etree.XML(resp.content) - records = xml.xpath('records/record') - - for record in records: - doc_id = record.xpath('dc:ostiId/node()', namespaces=self.namespaces)[0] - doc = etree.tostring(record) - yield (doc_id, doc) diff --git a/share/harvesters/gov_nih.py b/share/harvesters/gov_nih.py deleted file mode 100644 index 90d82d6e5..000000000 --- a/share/harvesters/gov_nih.py +++ /dev/null @@ -1,143 +0,0 @@ -import re -import logging - -from bs4 import BeautifulSoup -from datetime import date, timedelta -from dateutil.parser import parse -from io import BytesIO -from lxml import etree -from zipfile import ZipFile - -from share.harvest import BaseHarvester - - -logger = logging.getLogger(__name__) - - -class NIHHarvester(BaseHarvester): - """ - h/t to @fabianvf for this harvester. - """ - VERSION = 1 - - namespaces = {'xsi': "http://www.w3.org/2001/XMLSchema-instance"} - - def do_harvest(self, start_date, end_date, table_url): - end_date = end_date.date() - start_date = start_date.date() - logger.info('Harvesting NIH %s - %s', start_date, end_date) - - # get ExPORTER page html and rows storing records - html = self.requests.get(table_url).content - soup = BeautifulSoup(html, 'lxml') - table = soup.find('table', id="ContentPlaceHolder1_ProjectData_dgProjectData") - rows = table.find_all('tr', class_="row_bg") - urls = [i for i in self.construct_urls(self.config.base_url, start_date, end_date, rows)] - logger.debug('Found %d urls to grab', len(urls)) - records = self.xml_records(self.get_xml_files(urls)) - - for record in records: - doc = etree.tostring(record) - doc_id = record.xpath('.//APPLICATION_ID/node()', namespaces=self.namespaces)[0] - yield (doc_id, doc) - - def daterange(self, start_date, end_date): - """ - Get all the dates between the start_date and the end_date - """ - for ordinal in range(start_date.toordinal(), end_date.toordinal()): - yield date.fromordinal(ordinal) - - def get_days_of_week(self, start_date, end_date, day_of_week): - """ - First convert start_date and end_date to have the day of week we require. - Then get all the dates of the specified day of week between the start_date and end_date. - """ - start_date = start_date - timedelta(days=(start_date.weekday() - day_of_week)) - end_date = end_date - timedelta(days=(end_date.weekday() - day_of_week)) - - for ordinal in range(start_date.toordinal(), end_date.toordinal() + 1): - if date.fromordinal(ordinal).weekday() == day_of_week: - yield date.fromordinal(ordinal) - - def get_fiscal_year(self, mydate=date.today()): - """ - Return the current fiscal year. Each fiscal year starts on October 1 - """ - if mydate.month < 10: - return mydate.year - return mydate.year + 1 - - def get_fiscal_years(self, dates): - """ - Given a range of dates, get unique fiscal years - """ - return tuple(set(map(self.get_fiscal_year, dates))) - - def parse_month_column(self, month_column, day_of_week): - """ - Given a month column string, return the date of a day (Monday by default) of that week - An example of a month column string: September 2015, WEEK 1 - """ - month_year, week = iter(map(lambda x: x.strip(), month_column.split(','))) - first_day = parse('1 ' + month_year) - first_day -= timedelta(days=(first_day.weekday() - day_of_week + 7 * (1 if first_day.weekday() - day_of_week <= 0 else 0))) - week = int(re.search('.*([0-9]{1,2})', week).group(1)) - mydate = first_day + timedelta(week * 7) - return mydate.date() - - def parse_row(self, row, day_of_week): - """ - Get a row of the ExPORTER table, return the date of a day (Monday by default) of that week, the fiscal year, - and the url of the xml file - To keep the format consistent, if the record is from previous fiscal years, None is returned - """ - row_text = list(map(lambda x: x.text.strip('\t\n\r').strip('</td>'), row)) - row_text = list(map(lambda x: x.strip(), row_text)) - month_column = row_text[1] - fiscal_year = int(row_text[2]) - url = row[3].find('a').get('href') - - if month_column.lower() == u"all": - return (None, fiscal_year, url) - elif re.match(r'[A-Za-z\s]* [0-9]{4}, WEEK \d+', month_column): - date = self.parse_month_column(month_column, day_of_week) - return (date, fiscal_year, url) - else: - raise ValueError('Unrecognized month column format: "{}"'.format(month_column)) - - def parse_rows(self, rows, day_of_week): - """ - A generator to parse all the rows - """ - for row in rows: - yield self.parse_row(row('td'), day_of_week) - - def construct_urls(self, url, start_date, end_date, rows, day_of_week=0): - """ - Given date range, constructs urls of corresponded XML files. - """ - dates = [i for i in self.get_days_of_week(start_date, end_date, day_of_week)] - fiscal_years = self.get_fiscal_years(dates) - for data in self.parse_rows(rows, day_of_week): - if data[0] in dates or (data[0] is None and data[1] in fiscal_years): - yield ''.join([self.config.base_url, data[2]]) - - def xml_records(self, files): - for xml_file in files: - # Avoids eating all available memory. Read one row at a time. - for _, record in etree.iterparse(xml_file, tag='row'): - yield record - # Saw this on SO. claims to save more memory. - # Probably does, lxml might be building one giant tree. - record.clear() - - def get_xml_files(self, urls): - for zip_url in urls: - logger.info('Fetching URL %s', zip_url) - data = self.requests.get(zip_url) - zipfile = ZipFile(BytesIO(data.content)) - with zipfile.open(zipfile.namelist()[0], 'r') as f: - # NOTE: reading the entire file in at once can - # take up A LOT of memory. Use lxml's iterparse. - yield f diff --git a/share/harvesters/gov_nsfawards.py b/share/harvesters/gov_nsfawards.py deleted file mode 100644 index 384b7ec8b..000000000 --- a/share/harvesters/gov_nsfawards.py +++ /dev/null @@ -1,100 +0,0 @@ -import pendulum -import logging - -from furl import furl -from typing import Tuple -from typing import Union -from typing import Iterator - -from share.harvest import BaseHarvester - -logger = logging.getLogger(__name__) - - -PAGE_SIZE = 25 - -NSF_FIELDS = [ - 'id', - 'agency', - 'awardeeCity', - 'awardeeCountryCode', - 'awardeeCounty', - 'awardeeDistrictCode', - 'awardeeName', - 'awardeeStateCode', - 'awardeeZipCode', - 'cfdaNumber', - 'coPDPI', - 'date', - 'startDate', - 'expDate', - 'estimatedTotalAmt', - 'fundsObligatedAmt', - 'dunsNumber', - 'fundProgramName', - 'parentDunsNumber', - 'pdPIName', - 'perfCity', - 'perfCountryCode', - 'perfCounty', - 'perfDistrictCode', - 'perfLocation', - 'perfStateCode', - 'perfZipCode', - 'poName', - 'primaryProgram', - 'transType', - 'title', - 'awardee', - 'poPhone', - 'poEmail', - 'awardeeAddress', - 'perfAddress', - 'publicationResearch', - 'publicationConference', - 'fundAgencyCode', - 'awardAgencyCode', - 'projectOutComesReport', - 'abstractText', - 'piFirstName', - 'piMiddeInitial', - 'piLastName', - 'piPhone', - 'piEmail' -] - - -class NSFAwardsHarvester(BaseHarvester): - VERSION = 2 - - def shift_range(self, start_date: pendulum.DateTime, end_date: pendulum.DateTime) -> Tuple[pendulum.DateTime, pendulum.DateTime]: - # HACK: Records are made available one business day *after* their "date". - # Accounting for holidays, they might be delayed over a 4-day weekend. - # When harvesting yesterday's data, actually reach back farther... - if end_date.is_same_day(pendulum.now()): - start_date = start_date.subtract(days=5) - return start_date, end_date - - def _do_fetch(self, start_date: pendulum.DateTime, end_date: pendulum.DateTime) -> Iterator[Tuple[str, Union[str, dict, bytes]]]: - url = furl(self.config.base_url) - - url.args['dateStart'] = start_date.date().strftime('%m/%d/%Y') - url.args['dateEnd'] = end_date.date().strftime('%m/%d/%Y') - url.args['offset'] = 0 - url.args['printFields'] = ','.join(NSF_FIELDS) - url.args['rpp'] = PAGE_SIZE - - return self.fetch_records(url) - - def fetch_records(self, url: furl) -> Iterator[Tuple[str, Union[str, dict, bytes], pendulum.DateTime]]: - while True: - logger.info('Fetching %s', url.url) - records = self.requests.get(url.url).json()['response'].get('award', []) - - for record in records: - yield (record['id'], record, pendulum.from_format(record['date'], '%m/%d/%Y')) - - if len(records) < PAGE_SIZE: - break - - url.args['offset'] += PAGE_SIZE diff --git a/share/harvesters/gov_scitech.py b/share/harvesters/gov_scitech.py deleted file mode 100644 index 269e68310..000000000 --- a/share/harvesters/gov_scitech.py +++ /dev/null @@ -1,39 +0,0 @@ -from furl import furl - -from lxml import etree - -from share.harvest import BaseHarvester - - -class SciTechHarvester(BaseHarvester): - VERSION = 1 - - namespaces = { - 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', - 'dc': 'http://purl.org/dc/elements/1.1/', - 'dcq': 'http://purl.org/dc/terms/' - } - - def do_harvest(self, start_date, end_date): - end_date = end_date.date() - start_date = start_date.date() - - page = 0 - more_pages = True - - while more_pages: - response = self.requests.get(furl(self.config.base_url).set(query_params={ - 'page': page, - 'EntryDateTo': end_date.strftime('%m/%d/%Y'), - 'EntryDateFrom': start_date.strftime('%m/%d/%Y'), - }).url) - - xml = etree.XML(response.content) - records = xml.xpath('records/record') - for record in records: - doc_id = record.xpath('dc:ostiId/node()', namespaces=self.namespaces)[0] - doc = etree.tostring(record) - yield (doc_id, doc) - - page += 1 - more_pages = xml.xpath('//records/@morepages')[0] == 'true' diff --git a/share/harvesters/gov_usgs.py b/share/harvesters/gov_usgs.py deleted file mode 100644 index 76afebff1..000000000 --- a/share/harvesters/gov_usgs.py +++ /dev/null @@ -1,40 +0,0 @@ -from furl import furl - -from datetime import date - -from share.harvest import BaseHarvester - - -class USGSHarvester(BaseHarvester): - VERSION = 1 - - def do_harvest(self, start_date, end_date): - today = date.today() - end_date = end_date.date() - start_date = start_date.date() - - end_days_back = (today - end_date).days - start_days_back = (today - start_date).days - - # The USGS API does not support date ranges - for days_back in range(end_days_back, start_days_back): - page = 1 - page_size = 100 - - while True: - resp = self.requests.get(furl(self.config.base_url).set(query_params={ - 'mod_x_days': days_back + 1, - 'page_number': page, - 'page_size': page_size - }).url) - - records = resp.json()['records'] - - for record in records: - record_id = record['id'] - yield (record_id, record) - - if len(records) < page_size: - break - - page += 1 diff --git a/share/harvesters/io_osf.py b/share/harvesters/io_osf.py deleted file mode 100644 index 0915dcbb2..000000000 --- a/share/harvesters/io_osf.py +++ /dev/null @@ -1,127 +0,0 @@ -import logging -import datetime - -from django.conf import settings - -from furl import furl - -from share.exceptions import HarvestError -from share.harvest import BaseHarvester - - -QA_TAG = 'qatest' -logger = logging.getLogger(__name__) - - -class NodeSuddenlyUnavailable(HarvestError): - # A node was deleted or made private after it was seen at /v2/nodes, - # but before we could fetch its embeds - pass - - -class OSFHarvester(BaseHarvester): - VERSION = 1 - - # override BaseHarvester._do_fetch - def _do_fetch(self, start_date, end_date, path, query_params=None, embed_attrs=None): - return self._fetch_records(self._build_url(start_date, end_date, path, query_params), embed_attrs) - - # override BaseHarvester._do_fetch_by_id - def _do_fetch_by_id(self, guid, path, query_params=None, embed_attrs=None): - url = self._build_guid_url(guid, path, query_params).url - response = self.requests.get(url) - - if response.status_code // 100 != 2: - raise ValueError('Malformed response ({}) from {}. Got {}'.format(response, url, response.content)) - - logger.debug('Fetched record "%s"', guid) - - record = response.json()['data'] - return self._populate_embeds(record, embed_attrs) - - def _setup_session(self): - # so prod SHARE doesn't get throttled - if settings.OSF_BYPASS_THROTTLE_TOKEN: - self.session.headers.update({'X-THROTTLE-TOKEN': settings.OSF_BYPASS_THROTTLE_TOKEN}) - - def _build_url(self, start_date, end_date, path, query_params): - self._setup_session() - - url = furl(settings.OSF_API_URL + path) - url.args['page[size]'] = 100 - # url.args['filter[public]'] = 'true' - # OSF turns dates into date @ midnight so we have to go ahead one more day - url.args['filter[date_modified][gte]'] = start_date.date().isoformat() - url.args['filter[date_modified][lte]'] = (end_date + datetime.timedelta(days=2)).date().isoformat() - for param, value in (query_params or {}).items(): - url.args[param] = value - return url - - def _build_guid_url(self, guid, path, query_params): - self._setup_session() - - url = furl(settings.OSF_API_URL) - url.path.add(path).add(guid) - for param, value in (query_params or {}).items(): - url.args[param] = value - return url - - def _fetch_records(self, url, embed_attrs): - while True: - records, next_page = self._fetch_page(url) - - for record in records.json()['data']: - if record['attributes'].get('tags') and QA_TAG in record['attributes']['tags']: - continue - - try: - record = self._populate_embeds(record, embed_attrs) - except NodeSuddenlyUnavailable: - continue - - yield record['id'], record - - if not next_page: - break - - def _fetch_page(self, url, next_page=None): - logger.debug('Making request to {}'.format(url.url)) - - records = self.requests.get(url.url) - - if records.status_code in (401, 410): - raise NodeSuddenlyUnavailable('Node unharvestable ({}) at {}. Got {}'.format(records, url.url, records.content)) - if records.status_code // 100 != 2: - raise ValueError('Malformed response ({}) from {}. Got {}'.format(records, url.url, records.content)) - - next_page = records.json()['links'].get('next') - next_page = furl(next_page) if next_page else None - - logger.debug('Found {} records.'.format(len(records.json()['data']))) - - return records, next_page - - def _populate_embeds(self, record, embed_attrs): - for attr, key in (embed_attrs or {}).items(): - embedded = record - try: - for key in key.split('.'): - embedded = embedded[key] - except KeyError: - logger.warning('Could not access attribute %s at %s', attr, key) - continue - - logger.info('Populating embedded attribute "{}" for "{}"'.format(attr, record['id'])) - - data = [] - url = furl(embedded).add(args={'page[size]': 100}) - - while True: - resp, url = self._fetch_page(url) - data.extend(resp.json()['data']) - - if not url: - break - - record[attr] = data - return record diff --git a/share/harvesters/oai.py b/share/harvesters/oai.py deleted file mode 100644 index 9ea67828f..000000000 --- a/share/harvesters/oai.py +++ /dev/null @@ -1,114 +0,0 @@ -import logging -import time - -import pendulum -from furl import furl -from lxml import etree - -from share.harvest import BaseHarvester -from share.harvest.serialization import StringLikeSerializer - -logger = logging.getLogger(__name__) - - -class OAIHarvestException(Exception): - pass - - -class OAIHarvester(BaseHarvester): - VERSION = 1 - - namespaces = { - 'dc': 'http://purl.org/dc/elements/1.1/', - 'ns0': 'http://www.openarchives.org/OAI/2.0/', - 'oai_dc': 'http://www.openarchives.org/OAI/2.0/', - } - SERIALIZER_CLASS = StringLikeSerializer - - def _do_fetch(self, start_date, end_date, metadata_prefix, time_granularity=True, set_spec=None): - url = furl(self.config.base_url) - - if set_spec: - url.args['set'] = set_spec - url.args['verb'] = 'ListRecords' - url.args['metadataPrefix'] = metadata_prefix - - if time_granularity: - url.args['from'] = start_date.format('YYYY-MM-DD') + 'T00:00:00Z' - url.args['until'] = end_date.format('YYYY-MM-DD') + 'T00:00:00Z' - else: - url.args['from'] = start_date.date().isoformat() - url.args['until'] = end_date.date().isoformat() - - return self.fetch_records(url) - - def fetch_records(self, url: furl) -> list: - token, used_tokens = None, set() - - while True: - records, token = self.fetch_page(url, token=token) - - if token in used_tokens: - raise ValueError('Found duplicate resumption token "{}" from {!r}'.format(token, self)) - used_tokens.add(token) - - for record in records: - datestamp = record.xpath('ns0:header/ns0:datestamp', namespaces=self.namespaces)[0].text - if datestamp: - datestamp = pendulum.parse(datestamp) - else: - datestamp = None - - yield ( - record.xpath('ns0:header/ns0:identifier', namespaces=self.namespaces)[0].text, - etree.tostring(record, encoding=str), - datestamp, - ) - - if not token or not records: - break - - def fetch_page(self, url: furl, token: str = None) -> (list, str): - if token: - url.args = {'resumptionToken': token, 'verb': 'ListRecords'} - - while True: - logger.info('Making request to {}'.format(url.url)) - resp = self.requests.get(url.url, timeout=self.request_timeout) - if resp.ok: - break - if resp.status_code == 503: - sleep = int(resp.headers.get('retry-after', 5)) + 2 # additional 2 seconds for good measure - logger.warning('Server responded with %s. Waiting %s seconds.', resp, sleep) - time.sleep(sleep) - continue - resp.raise_for_status() - - parsed = etree.fromstring(resp.content, parser=etree.XMLParser(recover=True)) - - error = parsed.xpath('//ns0:error', namespaces=self.namespaces) - if error and (len(error) > 1 or error[0].get('code') != 'noRecordsMatch'): - raise OAIHarvestException(error[0].get('code'), error[0].text) - - records = parsed.xpath('//ns0:record', namespaces=self.namespaces) - token = (parsed.xpath('//ns0:resumptionToken/node()', namespaces=self.namespaces) + [None])[0] - - logger.info('Found {} records. Continuing with token {}'.format(len(records), token)) - - return records, token - - def _do_fetch_by_id(self, identifier, metadata_prefix, **kwargs): - url = furl(self.config.base_url) - url.args['verb'] = 'GetRecord' - url.args['metadataPrefix'] = metadata_prefix - url.args['identifier'] = identifier - return etree.tostring(self.fetch_page(url)[0][0], encoding=str) - - def metadata_formats(self): - url = furl(self.config.base_url) - url.args['verb'] = 'ListMetadataFormats' - resp = self.requests.get(url.url, timeout=self.request_timeout) - resp.raise_for_status() - parsed = etree.fromstring(resp.content) - formats = parsed.xpath('//ns0:metadataPrefix', namespaces=self.namespaces) - return [f.text for f in formats] diff --git a/share/harvesters/org_arxiv.py b/share/harvesters/org_arxiv.py deleted file mode 100644 index d85a3594b..000000000 --- a/share/harvesters/org_arxiv.py +++ /dev/null @@ -1,68 +0,0 @@ -import logging -import datetime - -from furl import furl -from lxml import etree - -from share.harvest import BaseHarvester - -logger = logging.getLogger(__name__) - - -class ArxivHarvester(BaseHarvester): - VERSION = 1 - - namespaces = { - 'ns0': 'http://www.w3.org/2005/Atom' - } - - def get_next_url(self): - return (furl(self.config.base_url).set(query_params={ - 'search_query': 'all', - 'max_results': '100', - 'sortBy': 'lastUpdatedDate', - 'sortOrder': 'descending', - 'start': self.start_page_num - }).url) - - def do_harvest(self, start_date, end_date): - # Arxiv does not have filter dates; can sort by last updated - start_date = start_date.date() - self.start_page_num = 0 - # Fetch records is a separate function for readability - # Ends up returning a list of tuples with provider given id and the document itself - return self.fetch_records(start_date) - - def fetch_records(self, start_date): - records = self.fetch_page(self.get_next_url()) - - while True: - for index, record in enumerate(records): - # '2016-06-28T19:54:40Z' - updated = datetime.datetime.strptime(record.xpath('ns0:updated', namespaces=self.namespaces)[0].text, '%Y-%m-%dT%H:%M:%SZ').date() - if updated < start_date: - logger.info('Record index {}: Updated record date {} is less than start date {}.'.format(index, updated, start_date)) - return - - yield ( - record.xpath('ns0:id', namespaces=self.namespaces)[0].text, - etree.tostring(record), - ) - - self.start_page_num += 100 - records = self.fetch_page(self.get_next_url()) - - if not records: - break - - def fetch_page(self, url): - logger.info('Making request to {}'.format(url)) - - resp = self.requests.get(url, verify=False) - parsed = etree.fromstring(resp.content) - - records = parsed.xpath('//ns0:entry', namespaces=self.namespaces) - - logger.info('Found {} records.'.format(len(records))) - - return records diff --git a/share/harvesters/org_biorxiv.py b/share/harvesters/org_biorxiv.py deleted file mode 100644 index 0d036d6cf..000000000 --- a/share/harvesters/org_biorxiv.py +++ /dev/null @@ -1,85 +0,0 @@ -import logging -import re - -from furl import furl -from bs4 import BeautifulSoup - -from share.harvest import BaseHarvester - -logger = logging.getLogger(__name__) - - -class BiorxivHarvester(BaseHarvester): - VERSION = 1 - - def do_harvest(self, start_date, end_date): - - end_date = end_date.date() - start_date = start_date.date() - - # I wish I was wrong, the url is just "<key1>:<value> <key2>:<value>" and so on - url = furl(self.config.base_url).add( - path=' '.join([ - 'limit_from:{}'.format(start_date), - 'limit_to:{}'.format(end_date), - 'numresults:100', - 'sort:publication-date', - 'direction:descending', - 'format_result:standard', - ]) - ).url - - # Fetch records is a separate function for readability - # Ends up returning a list of tuples with provider given id and the document itself - return self.fetch_records(url, start_date, end_date) - - def fetch_records(self, url, start_date, end_date): - count, page = 0, 0 - resp = self.requests.get(furl(url).set(query_params={'page': page})) - total = BeautifulSoup(resp.content, 'html.parser').find(id='page-title').text.split(' ')[0].strip().replace(',', '') - - try: - total = int(total) - except ValueError: - # Handle the case of "No" results - assert total == 'No' - total = 0 - - logging.info('Found %d results from biorxiv', total) - - while count < total: - links = re.findall(b'href="(/content/early/[^"]+?/[^"]+)"', resp.content) - - logger.info('On document %d of %d (%d%%)', count, total, (count / total) * 100) - - for link in links: - article = self.requests.get('http://biorxiv.org' + link.decode()) - if article.status_code // 100 != 2: - logger.warning('Got non-200 status %s from %s', article, link) - continue - article.raise_for_status() - soup = BeautifulSoup(article.content, 'lxml') - - data = { - 'subject-areas': [ - subject.a.text.strip() - for subject in - soup.find_all(**{'class': 'highwire-article-collection-term'}) - ] - } - - for meta in BeautifulSoup(article.content, 'lxml').find_all('meta'): - if 'name' not in meta.attrs: - continue - if meta.attrs['name'] in data: - if not isinstance(data[meta.attrs['name']], list): - data[meta.attrs['name']] = [data[meta.attrs['name']]] - data[meta.attrs['name']].append(meta.attrs['content']) - else: - data[meta.attrs['name']] = meta.attrs['content'] - - count += 1 - yield link.decode(), data - - page += 1 - resp = self.requests.get(furl(url).set(query_params={'page': page})) diff --git a/share/harvesters/org_biorxiv_html.py b/share/harvesters/org_biorxiv_html.py deleted file mode 100644 index 30eeef39d..000000000 --- a/share/harvesters/org_biorxiv_html.py +++ /dev/null @@ -1,82 +0,0 @@ -import itertools -import logging -import re - -from furl import furl -from bs4 import BeautifulSoup -from bs4 import Comment - -from share.harvest import BaseHarvester - - -logger = logging.getLogger(__name__) - - -class BiorxivHarvester(BaseHarvester): - VERSION = 1 - - def do_harvest(self, start_date, end_date): - - end_date = end_date.date() - start_date = start_date.date() - - # I wish I was wrong, the url is just "<key1>:<value> <key2>:<value>" and so on - url = furl(self.config.base_url).add( - path=' '.join([ - 'limit_from:{}'.format(start_date), - 'limit_to:{}'.format(end_date), - 'numresults:100', - 'sort:publication-date', - 'direction:descending', - 'format_result:standard', - ]) - ).url - - # Fetch records is a separate function for readability - # Ends up returning a list of tuples with provider given id and the document itself - return self.fetch_records(url, start_date, end_date) - - def fetch_records(self, url, start_date, end_date): - count, page = 0, 0 - resp = self.requests.get(furl(url).set(query_params={'page': page})) - total = BeautifulSoup(resp.content, 'html.parser').find(id='page-title').text.split(' ')[0].strip().replace(',', '') - - if total == 'No': - total = 0 - else: - total = int(total) - - logging.info('Found %d results from biorxiv', total) - - while count < total: - links = re.findall(b'href="(/content/early/[^"]+?/[^"]+)"', resp.content) - - logger.info('On document %d of %d (%d%%)', count, total, (count / total) * 100) - - for link in links: - url = 'http://biorxiv.org' + link.decode() - logger.debug('[%d/%d] Requesting %s', count, total, url) - article = self.requests.get(url) - article.raise_for_status() - - soup = BeautifulSoup(article.content, 'lxml') - - # Peel out script tags and css things to minimize size of HTML - for el in itertools.chain( - soup('img'), - soup('link', rel=('stylesheet', 'dns-prefetch')), - soup('link', {'type': re.compile('.')}), - soup('noscript'), - soup('script'), - soup(string=lambda x: isinstance(x, Comment)), - ): - el.extract() - - # Links have PKs and dates in them. /content/early/YYYY/MM/DD/PK or /content/early/YYYY/MM/DD/PK.REV - identifier = re.match(r'/content/early/\d{4}/\d{2}/\d{2}/(\d+)(?:\.\d+)?$', link.decode()).group(1) - - yield identifier, str(soup) - - count += 1 - page += 1 - resp = self.requests.get(furl(url).set(query_params={'page': page})) diff --git a/share/harvesters/org_biorxiv_rss.py b/share/harvesters/org_biorxiv_rss.py deleted file mode 100644 index a5a66156e..000000000 --- a/share/harvesters/org_biorxiv_rss.py +++ /dev/null @@ -1,61 +0,0 @@ -import datetime -import logging - -from furl import furl -from lxml import etree - -from share.harvest import BaseHarvester - -logger = logging.getLogger(__name__) - - -class BiorxivHarvester(BaseHarvester): - VERSION = 1 - - namespaces = { - 'dc': 'http://purl.org/dc/elements/1.1/', - 'syn': 'http://purl.org/rss/1.0/modules/syndication/', - 'content': 'http://purl.org/rss/1.0/modules/content/', - 'admin': 'http://webns.net/mvcb/', - 'prism': 'http://purl.org/rss/1.0/modules/prism/', - 'taxo': 'http://purl.org/rss/1.0/modules/taxonomy/', - 'ns0': 'http://purl.org/rss/1.0/', - 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' - } - - def do_harvest(self, start_date, end_date): - # BioRxiv does not have filter dates; returns 30 most recent - start_date = start_date.date() - url = furl(self.config.base_url).set(query_params={ - 'subject': 'all' - }).url - # Fetch records is a separate function for readability - # Ends up returning a list of tuples with provider given id and the document itself - return self.fetch_records(url, start_date) - - def fetch_records(self, url, start_date): - records = self.fetch_page(url) - - for index, record in enumerate(records): - # '2016-06-30' - updated = datetime.datetime.strptime(record.xpath('dc:date', namespaces=self.namespaces)[0].text, '%Y-%m-%d').date() - if updated < start_date: - logger.info('Record index {}: Record date {} is less than start date {}.'.format(index, updated, start_date)) - return - - yield ( - record.xpath('dc:identifier', namespaces=self.namespaces)[0].text, - etree.tostring(record), - ) - - def fetch_page(self, url): - logger.info('Making request to {}'.format(url)) - - resp = self.requests.get(url, verify=False) - parsed = etree.fromstring(resp.content) - - records = parsed.xpath('//ns0:item', namespaces=self.namespaces) - - logger.info('Found {} records.'.format(len(records))) - - return records diff --git a/share/harvesters/org_crossref.py b/share/harvesters/org_crossref.py deleted file mode 100644 index fda60ee9f..000000000 --- a/share/harvesters/org_crossref.py +++ /dev/null @@ -1,35 +0,0 @@ -from furl import furl - -from share.harvest import BaseHarvester - - -class CrossRefHarvester(BaseHarvester): - VERSION = 1 - - def do_harvest(self, start_date, end_date): - start_date = start_date.date() - end_date = end_date.date() - - return self.fetch_records(furl(self.config.base_url).set(query_params={ - 'filter': 'from-update-date:{},until-update-date:{}'.format( - start_date.isoformat(), - end_date.isoformat() - ), - 'rows': 1000, - })) - - def fetch_records(self, url: furl): - cursor = '*' - - while True: - url.args['cursor'] = cursor - resp = self.requests.get(url.url) - resp.raise_for_status() - message = resp.json()['message'] - records = message['items'] - cursor = message['next-cursor'] - - if not records: - break - for record in records: - yield (record['DOI'], record) diff --git a/share/harvesters/org_dataone.py b/share/harvesters/org_dataone.py deleted file mode 100644 index 1cac47222..000000000 --- a/share/harvesters/org_dataone.py +++ /dev/null @@ -1,43 +0,0 @@ -from furl import furl - -from lxml import etree - -from share.harvest import BaseHarvester - - -class DataOneHarvester(BaseHarvester): - VERSION = 1 - - def do_harvest(self, start_date, end_date): - end_date = end_date.format('YYYY-MM-DD') + 'T00:00:00Z' - start_date = start_date.format('YYYY-MM-DD') + 'T00:00:00Z' - - url = furl(self.config.base_url).set(query_params={ - 'q': 'dateModified:[{} TO {}]'.format(start_date, end_date), - 'start': 0, - 'rows': 1 - }).url - - return self.fetch_records(url, start_date, end_date) - - def fetch_records(self, url, start_date, end_date): - resp = self.requests.get(url) - doc = etree.XML(resp.content) - - total_records = int(doc.xpath("//result/@numFound")[0]) - records_processed = 0 - - while records_processed < total_records: - response = self.requests.get(furl(url).set(query_params={ - 'q': 'dateModified:[{} TO {}]'.format(start_date, end_date), - 'start': records_processed, - 'rows': 1000 - }).url) - - docs = etree.XML(response.content).xpath('//doc') - for doc in docs: - doc_id = doc.xpath("str[@name='id']")[0].text - doc = etree.tostring(doc) - yield (doc_id, doc) - - records_processed += len(docs) diff --git a/share/harvesters/org_elife.py b/share/harvesters/org_elife.py deleted file mode 100644 index c0851f074..000000000 --- a/share/harvesters/org_elife.py +++ /dev/null @@ -1,104 +0,0 @@ -import time -import logging -import requests - -from django.conf import settings - -from furl import furl - -from lxml import etree - -from share.harvest import BaseHarvester - - -logger = logging.getLogger(__name__) - - -class ELifeHarvester(BaseHarvester): - VERSION = 1 - - BASE_DATA_URL = 'https://raw.githubusercontent.com/elifesciences/elife-article-xml/master/{}' - BASE_URL = 'https://api.github.com/repos/elifesciences/elife-article-xml/commits{}' - - def request(self, *args, **kwargs): - if settings.GITHUB_API_KEY: - kwargs.setdefault('headers', {})['Authorization'] = 'token {}'.format(settings.GITHUB_API_KEY) - - while True: - response = self.requests.get(*args, **kwargs) - - if int(response.headers.get('X-RateLimit-Remaining', 0)) == 0: - reset = int(response.headers.get('X-RateLimit-Reset', time.time())) - time.time() - logger.warning('Hit GitHub ratelimit sleeping for %s seconds', reset) - time.sleep(reset) - - if response.status_code != 403: - response.raise_for_status() - return response - - def do_harvest(self, start_date, end_date): - end_date = end_date.date() - start_date = start_date.date() - - logger.info("The data for each record must be requested individually - this may take a while... ") - - for sha in self.fetch_commits(start_date, end_date): - for file_name in self.fetch_file_names(sha): - if not file_name.endswith('.xml'): - continue - record = self.fetch_xml(file_name) - if record is not None: - continue - doc = etree.tostring(record) - doc_id = record.xpath('//article-id[@*]')[0].text - yield (doc_id, doc) - - def fetch_commits(self, start_date, end_date): - page = -1 - url = self.BASE_URL.format('?') - - while True: - page += 1 - response = self.request(furl(url).set(query_params={ - 'since': start_date.isoformat(), - 'until': end_date.isoformat(), - 'page': page, - 'per_page': 100 - }).url) - - commits = response.json() - for commit in commits: - if commit.get('sha'): - yield commit['sha'] - - if len(commits) != 100: - break - - def fetch_file_names(self, sha): - page = -1 - url = self.BASE_URL.format('/{}'.format(sha)) - - while True: - page += 1 - response = self.request(furl(url).set(query_params={ - 'page': page, - 'per_page': 100 - })) - - files = response.json()['files'] - for f in files: - yield f['filename'] - - if len(files) != 100: - break - - def fetch_xml(self, file_name): - file_url = furl(self.BASE_DATA_URL.format(file_name)) - # Not using self.requests when getting the file contents because the eLife rate limit (1, 60) does not apply - resp = requests.get(file_url.url) - if resp.status_code == 404: - logger.warning('Could not download file %s', file_name) - return None - resp.raise_for_status() - xml = etree.XML(resp.content) - return xml diff --git a/share/harvesters/org_ncar.py b/share/harvesters/org_ncar.py deleted file mode 100644 index 6e5e94d67..000000000 --- a/share/harvesters/org_ncar.py +++ /dev/null @@ -1,63 +0,0 @@ -import logging - -from furl import furl -from lxml import etree - -from share.harvest import BaseHarvester - -logger = logging.getLogger(__name__) - - -# TODO Could we use the OAI harvester instead, or is there something non-standard about NCAR? -class NCARHarvester(BaseHarvester): - VERSION = 1 - - namespaces = { - 'OAI-PMH': 'http://www.openarchives.org/OAI/2.0/', - 'dif': 'http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/' - } - url = 'https://www.earthsystemgrid.org/oai/repository' - - def do_harvest(self, start_date, end_date): - url = furl(self.url).set(query_params={ - 'verb': 'ListRecords', - 'metadataPrefix': 'dif', - 'from': start_date.format('YYYY-MM-DD') + 'T00:00:00Z', - 'until': end_date.format('YYYY-MM-DD') + 'T00:00:00Z', - }) - - return self.fetch_records(url) - - def fetch_records(self, url): - records, token = self.fetch_page(url, token=None) - - while True: - for record in records: - yield ( - record.xpath('./OAI-PMH:header/OAI-PMH:identifier/node()', namespaces=self.namespaces)[0], - etree.tostring(record), - ) - - records, token = self.fetch_page(url, token=token) - - if not token or not records: - break - - def fetch_page(self, url, token): - if token: - url.remove('from') - url.remove('until') - url.remove('metadataPrefix') - url.args['resumptionToken'] = token - - logger.info('Making request to {}'.format(url)) - - resp = self.requests.get(url.url) - parsed = etree.fromstring(resp.content) - - records = parsed.xpath('//OAI-PMH:record', namespaces=self.namespaces) - token = (parsed.xpath('//OAI-PMH:resumptionToken/node()', namespaces=self.namespaces) + [None])[0] - - logger.info('Found {} records. Continuing with token {}'.format(len(records), token)) - - return records, token diff --git a/share/harvesters/org_neurovault.py b/share/harvesters/org_neurovault.py deleted file mode 100644 index 4e8e6d226..000000000 --- a/share/harvesters/org_neurovault.py +++ /dev/null @@ -1,23 +0,0 @@ -import pendulum - -from share.harvest import BaseHarvester - - -class NeuroVaultHarvester(BaseHarvester): - VERSION = 1 - - def do_harvest(self, start_date, end_date): - api_url = self.config.base_url - while api_url: - response = self.requests.get(api_url) - records = response.json() - for record in records['results']: - date = pendulum.parse(record['modify_date']) - if date < start_date: - return # We're all caught up - if date > end_date: - continue # Reaching too far back - - yield record['id'], record - - api_url = records['next'] diff --git a/share/harvesters/org_plos.py b/share/harvesters/org_plos.py deleted file mode 100644 index ab505ffec..000000000 --- a/share/harvesters/org_plos.py +++ /dev/null @@ -1,50 +0,0 @@ -from furl import furl -from lxml import etree - -from django.conf import settings - -from share.harvest import BaseHarvester - - -class PLOSHarvester(BaseHarvester): - VERSION = 1 - - MAX_ROWS_PER_REQUEST = 999 - - def do_harvest(self, start_date, end_date): - - if not settings.PLOS_API_KEY: - raise Exception('PLOS api key not defined.') - - start_date = start_date.isoformat().split('.')[0] + 'Z' - end_date = end_date.isoformat().split('.')[0] + 'Z' - - return self.fetch_rows(furl(self.config.base_url).set(query_params={ - 'q': 'publication_date:[{} TO {}]'.format(start_date, end_date), - 'rows': '0', - 'api_key': settings.PLOS_API_KEY - }).url, start_date, end_date) - - def fetch_rows(self, url, start_date, end_date): - resp = self.requests.get(url) - - total_rows = etree.XML(resp.content).xpath('//result/@numFound') - total_rows = int(total_rows[0]) if total_rows else 0 - - current_row = 0 - while current_row < total_rows: - response = self.requests.get(furl(self.config.base_url).set(query_params={ - 'q': 'publication_date:[{} TO {}]'.format(start_date, end_date), - 'start': current_row, - 'api_key': settings.PLOS_API_KEY, - 'rows': self.MAX_ROWS_PER_REQUEST - }).url) - - docs = etree.XML(response.content).xpath('//doc') - for doc in docs: - if doc.xpath("arr[@name='abstract']") or doc.xpath("str[@name='author_display']"): - doc_id = doc.xpath("str[@name='id']")[0].text - doc = etree.tostring(doc) - yield (doc_id, doc) - - current_row += len(docs) diff --git a/share/harvesters/org_socialscienceregistry.py b/share/harvesters/org_socialscienceregistry.py deleted file mode 100644 index 59302c519..000000000 --- a/share/harvesters/org_socialscienceregistry.py +++ /dev/null @@ -1,54 +0,0 @@ -import csv -import datetime -import logging - - -from share.harvest import BaseHarvester - -logger = logging.getLogger(__name__) - - -class SCHarvester(BaseHarvester): - """ - """ - VERSION = 1 - - def _do_fetch(self, start, end, **kwargs): - end_date = end.date() - start_date = start.date() - logger.info('Harvesting the social science registry %s - %s', start_date, end_date) - return self.fetch_records(start_date, end_date) - - def fetch_records(self, start_date, end_date): - - csv_response = self.requests.get(self.config.base_url + '/trials/search.csv') - csv_response.raise_for_status() - - record_list = list(csv.reader(csv_response.text.split('\n'))) - record_list = record_list[1:] - total_records = len(record_list) - - logging.info('Found total %d results from the social science registry', total_records) - standard_size = len(record_list[0]) - records_ignored = 0 - records_harvested = 0 - - for i, record in enumerate(record_list): - logger.info('On trial %d of %d (%d%%)', i, total_records, (i / total_records) * 100) - - if len(record) != standard_size or len(record) == 0: - records_ignored += 1 - continue - - last_updated = datetime.datetime.strptime(record[2], '%B %d, %Y').date() - - if last_updated < start_date: - logger.info('Trial {}: Trial date {} is less than start date {}.'.format(i, last_updated, start_date)) - else: - yield ( - record[5], - {'record': record} - ) - records_harvested += 1 - logging.info('Total records harvested %d for date range %s - %s', records_harvested, start_date, end_date) - logging.info('Total records ignored %d for incorrect csv formatting', records_ignored) diff --git a/share/harvesters/org_swbiodiversity.py b/share/harvesters/org_swbiodiversity.py deleted file mode 100644 index 014d35071..000000000 --- a/share/harvesters/org_swbiodiversity.py +++ /dev/null @@ -1,64 +0,0 @@ -import itertools -import logging -import re - -from bs4 import BeautifulSoup, Comment -from furl import furl - -from share.harvest import BaseHarvester - - -logger = logging.getLogger(__name__) - - -class SWHarvester(BaseHarvester): - """ - - """ - VERSION = 1 - - def _do_fetch(self, start, end, list_url): - end_date = end.date() - start_date = start.date() - logger.info('Harvesting swbiodiversity %s - %s', start_date, end_date) - return self.fetch_records(list_url) - - def fetch_records(self, list_url): - response = self.requests.get(list_url) - response.raise_for_status() - soup = BeautifulSoup(response.content, 'lxml') - records = soup.find_all('a') - - record_list = [] - for record in records: - record_content = re.findall(r'collid=(\d+)', record.get('href')) - if record_content and record_content[0] not in record_list: - record_list.append(record_content[0]) - total = len(record_list) - - logging.info('Found %d results from swbiodiversity', total) - - for count, identifier in enumerate(record_list): - - logger.info('On collection %d of %d (%d%%)', count, total, (count / total) * 100) - - collection_page = furl(list_url) - collection_page.args['collid'] = identifier - response = self.requests.get(collection_page.url) - response.raise_for_status() - - raw_data = BeautifulSoup(response.content, 'html.parser') - # Peel out script tags and css things to minimize size of HTML - for el in itertools.chain( - raw_data('img'), - raw_data('link', rel=('stylesheet', 'dns-prefetch')), - raw_data('link', {'type': re.compile('.')}), - raw_data('noscript'), - raw_data('script'), - raw_data(string=lambda x: isinstance(x, Comment)), - ): - el.extract() - - record = raw_data.find(id='innertext') - - yield collection_page.url, str(record) diff --git a/share/management/commands/populate_osf_suids.py b/share/management/commands/populate_osf_suids.py deleted file mode 100644 index 2b17d465e..000000000 --- a/share/management/commands/populate_osf_suids.py +++ /dev/null @@ -1,83 +0,0 @@ -from share.management.commands import BaseShareCommand -from share.models import NormalizedData, RawDatum, ShareUser, SourceUniqueIdentifier -from share.util.graph import MutableGraph -from share.util.osf import osf_sources, guess_osf_guid - - -CHUNK_SIZE = 2000 - - -def update_suid(normalized_datum, new_suid_identifier): - raw_datum = normalized_datum.raw - if not raw_datum: - print(f'normd {normalized_datum.id}: skip, no raw') - return 0 - - existing_suid = raw_datum.suid - new_suid, created = SourceUniqueIdentifier.objects.get_or_create( - identifier=new_suid_identifier, - source_config_id=existing_suid.source_config_id, - ) - - if new_suid == existing_suid: - print(f'normd {normalized_datum.id}: skip, already has correct suid {existing_suid.id}') - return 0 - - print(f'normd {normalized_datum.id}: updating suid from {existing_suid.id} to {new_suid.id}...') - update_old_suid_raws(existing_suid, new_suid) - existing_suid.delete() - return 1 - - -def update_old_suid_raws(old_suid, new_suid): - for raw_datum in list(RawDatum.objects.defer('datum').filter(suid=old_suid)): - # RawDatum is unique on (suid, sha256), so there will be 0 or 1 duplicates - duplicate_raw = RawDatum.objects.filter(suid=new_suid, sha256=raw_datum.sha256).first() - - if duplicate_raw: - if duplicate_raw == raw_datum: - raise Exception(f'wtf the duplicate is the same one (rawd:{raw_datum}, old_suid:{old_suid}, new_suid:{new_suid})') - print(f'> rawd {raw_datum.id}: deleting in favor of dupe => {duplicate_raw.id}') - NormalizedData.objects.filter(raw=raw_datum).update(raw=duplicate_raw) - raw_datum.delete() - else: - print(f'> rawd {raw_datum.id}: update suid ({old_suid.id} => {new_suid.id})') - raw_datum.suid = new_suid - raw_datum.save(update_fields=['suid']) - - -def get_normd_ids(start_id): - print(f'-- getting {CHUNK_SIZE} normd ids starting with {start_id} --') - normd_id_qs = NormalizedData.objects.filter( - id__gte=start_id, - raw__isnull=False, - source__in=ShareUser.objects.filter(source__in=osf_sources()) - ).order_by('id').values_list('id', flat=True) - return list(normd_id_qs[:CHUNK_SIZE]) - - -class Command(BaseShareCommand): - def add_arguments(self, parser): - parser.add_argument('--commit', action='store_true', help='Should the script actually commit?') - parser.add_argument('--start-id', type=int, default=0, help='NormalizedData id to begin at') - - def handle(self, *args, **options): - commit = options.get('commit') - start_id = options['start_id'] - - total_seen = 0 - total_updated = 0 - - normd_ids = get_normd_ids(start_id) - while normd_ids: - total_seen += len(normd_ids) - for normd_id in normd_ids: - normd = NormalizedData.objects.get(id=normd_id) - mgraph = MutableGraph.from_jsonld(normd.data) - guid = guess_osf_guid(mgraph) - if guid: - with self.rollback_unless_commit(commit=commit): - total_updated += update_suid(normd, guid) - next_start_id = int(normd_ids[-1]) + 1 - normd_ids = get_normd_ids(next_start_id) - print(f'-- done! looked at {total_seen} NormalizedData; updated {total_updated} --') diff --git a/share/management/commands/reloadsynonyms.py b/share/management/commands/reloadsynonyms.py deleted file mode 100644 index 1193b02e3..000000000 --- a/share/management/commands/reloadsynonyms.py +++ /dev/null @@ -1,32 +0,0 @@ -import glob -import json - -from django.conf import settings -from django.core.management.base import BaseCommand - - -class Command(BaseCommand): - - def handle(self, *args, **options): - self.stdout.write('Loading synonyms...') - - count = 0 - - synonyms = {} - with open(settings.SUBJECT_SYNONYMS_JSON) as fobj: - for subject in json.load(fobj): - synonyms[subject['name'].lower().strip()] = [subject['name']] - - for filename in glob.glob('providers/**/subject-mapping.json', recursive=True): - self.stdout.write('Loading {}...'.format(filename)) - - with open(filename, 'r') as fobj: - for key, value in json.load(fobj).items(): - for syn in value: - synonyms.setdefault(syn.lower().strip(), []).append(key) - count += 1 - - with open('share/models/synonyms.json', 'w') as fobj: - json.dump(synonyms, fobj, indent=4) - - self.stdout.write('Loaded {} synonyms into synonyms.json'.format(count)) diff --git a/share/metadata_formats/__init__.py b/share/metadata_formats/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/share/metadata_formats/base.py b/share/metadata_formats/base.py deleted file mode 100644 index 6f124bc5d..000000000 --- a/share/metadata_formats/base.py +++ /dev/null @@ -1,20 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Optional - - -class MetadataFormatter(ABC): - @abstractmethod - def format(self, normalized_data) -> Optional[str]: - """return a string representation of the given metadata in the formatter's format - """ - raise NotImplementedError - - def format_as_deleted(self, suid) -> Optional[str]: - """return a string representation of a deleted suid - - if returns None, the corresponding FormattedMetadataRecord will be deleted - if returns a string, the FMR will not be deleted -- this is for situations - like sharev2_elastic, where an FMR with `is_deleted: true` is required - to trigger deletion from the elasticsearch index - """ - return None diff --git a/share/metadata_formats/oai_dc.py b/share/metadata_formats/oai_dc.py deleted file mode 100644 index ae1492fcc..000000000 --- a/share/metadata_formats/oai_dc.py +++ /dev/null @@ -1,119 +0,0 @@ -from lxml import etree - -from share.util.graph import MutableGraph -from share.util.names import get_related_agent_name - -from share.oaipmh.util import format_datetime, ns, nsmap, SubEl, OAI_DC -from share.metadata_formats.base import MetadataFormatter - - -class OaiDcFormatter(MetadataFormatter): - """builds an XML fragment in dublin core format, meant to be included within the - <metadata> element of an OAI-PMH `listRecords` or `showRecord` response - - see https://www.openarchives.org/OAI/openarchivesprotocol.html for more details - """ - FORMAT_IRI = str(OAI_DC) - - def format(self, normalized_datum): - mgraph = MutableGraph.from_jsonld(normalized_datum.data) - central_work = mgraph.get_central_node(guess=True) - - if ( - not central_work - or central_work.concrete_type != 'abstractcreativework' - or central_work['is_deleted'] - ): - return self.format_as_deleted(None) - - dc_formatted = self.build_dublin_core(central_work) - return etree.tostring(dc_formatted, encoding='unicode') - - def build_dublin_core(self, work_node): - dc_element = etree.Element( - ns('oai_dc', 'dc'), - attrib={ - ns('xsi', 'schemaLocation'): 'http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd' - }, - nsmap=nsmap('oai_dc', 'dc', 'xsi'), - ) - SubEl(dc_element, ns('dc', 'title'), work_node['title']) - - for creator_name in self._get_related_agent_names(work_node, {'creator'}): - SubEl(dc_element, ns('dc', 'creator'), creator_name) - - subject_names = { - subject_node['name'] - for subject_node in work_node['subjects'] - } - for subject_name in sorted(subject_names): - SubEl(dc_element, ns('dc', 'subject'), subject_name) - - description = work_node['description'] - if description: - SubEl(dc_element, ns('dc', 'description'), description) - - for publisher_name in sorted(self._get_related_agent_names(work_node, {'publisher'})): - SubEl(dc_element, ns('dc', 'publisher'), publisher_name) - - for contributor_name in sorted(self._get_related_agent_names(work_node, {'contributor', 'principalinvestigator', 'principalinvestigatorcontact'})): - SubEl(dc_element, ns('dc', 'contributor'), contributor_name) - - date = work_node['date_published'] or work_node['date_updated'] - if date: - SubEl(dc_element, ns('dc', 'date'), format_datetime(date)) - - SubEl(dc_element, ns('dc', 'type'), work_node.type) - - identifier_uris = { - identifier_node['uri'] - for identifier_node in work_node['identifiers'] - } - for identifier_uri in sorted(identifier_uris): - SubEl(dc_element, ns('dc', 'identifier'), identifier_uri) - - language = work_node['language'] - if language: - SubEl(dc_element, ns('dc', 'language'), language) - - for related_uri in self._get_related_uris(work_node): - SubEl(dc_element, ns('dc', 'relation'), related_uri) - - if work_node['rights']: - SubEl(dc_element, ns('dc', 'rights'), work_node['rights']) - - if work_node['free_to_read_type']: - SubEl(dc_element, ns('dc', 'rights'), work_node['free_to_read_type']) - - return dc_element - - def _get_related_agent_names(self, work_node, relation_types): - def sort_key(relation_node): - order_cited = relation_node['order_cited'] - if order_cited is None: - return 9999999 # those without order_cited go last - return int(order_cited) - - relation_nodes = sorted( - [ - relation_node - for relation_node in work_node['agent_relations'] - if relation_node.type in relation_types - ], - key=sort_key, - ) - - # remove falsy values - return filter(None, [ - get_related_agent_name(relation) - for relation in relation_nodes - ]) - - def _get_related_uris(self, work_node): - related_work_uris = set() - for related_work_node in work_node['related_works']: - related_work_uris.update( - identifier['uri'] - for identifier in related_work_node['identifiers'] - ) - return sorted(related_work_uris) diff --git a/share/metadata_formats/sharev2_elastic.py b/share/metadata_formats/sharev2_elastic.py deleted file mode 100644 index b00fd5a1f..000000000 --- a/share/metadata_formats/sharev2_elastic.py +++ /dev/null @@ -1,240 +0,0 @@ -import json -import re - -from django.conf import settings - -from share.util.graph import MutableGraph -from share.util.names import get_related_agent_name -from share.util import IDObfuscator -from trove.vocab.namespaces import SHAREv2 - -from .base import MetadataFormatter - - -def format_type(type_name): - # convert from PascalCase to lower case with spaces between words - return re.sub(r'\B([A-Z])', r' \1', type_name).lower() - - -def format_node_type(node): - return format_type(node.schema_type.name) - - -def format_node_type_lineage(node): - return [format_type(t) for t in node.schema_type.type_lineage] - - -# values that, for the purpose of indexing in elasticsearch, are equivalent to absence -EMPTY_VALUES = (None, '') - - -def strip_empty_values(thing): - if isinstance(thing, dict): - return { - k: strip_empty_values(v) - for k, v in thing.items() - if v not in EMPTY_VALUES - } - if isinstance(thing, list): - return [ - strip_empty_values(v) - for v in thing - if v not in EMPTY_VALUES - ] - if isinstance(thing, tuple): - return tuple( - strip_empty_values(v) - for v in thing - if v not in EMPTY_VALUES - ) - return thing - - -class ShareV2ElasticFormatter(MetadataFormatter): - FORMAT_IRI = SHAREv2.sharev2_elastic - - def format_as_deleted(self, suid): - # a document with is_deleted:True will be deleted from the elastic index - # TODO handle deletion better -- maybe put a `deleted` field on suids and actually delete the FormattedMetadataRecord - return json.dumps({ - 'id': IDObfuscator.encode(suid), - 'is_deleted': True, - }) - - def format(self, normalized_datum): - mgraph = MutableGraph.from_jsonld(normalized_datum.data) - central_work = mgraph.get_central_node(guess=True) - - if not central_work or central_work.concrete_type != 'abstractcreativework': - return None - - suid = normalized_datum.raw.suid - - if central_work['is_deleted']: - return self.format_as_deleted(suid) - - source_name = suid.source_config.source.long_title - return json.dumps(strip_empty_values({ - 'id': IDObfuscator.encode(suid), - 'sources': [source_name], - 'source_config': suid.source_config.label, - 'source_unique_id': suid.identifier, - - 'type': format_node_type(central_work), - 'types': format_node_type_lineage(central_work), - - # attributes: - 'date_created': suid.get_date_first_seen().isoformat(), - 'date_modified': normalized_datum.created_at.isoformat(), - 'date_published': central_work['date_published'], - 'date_updated': central_work['date_updated'], - 'description': central_work['description'] or '', - 'justification': central_work['justification'], - 'language': central_work['language'], - 'registration_type': central_work['registration_type'], - 'retracted': bool(central_work['withdrawn']), - 'title': central_work['title'], - 'withdrawn': central_work['withdrawn'], - - 'date': ( - central_work['date_published'] - or central_work['date_updated'] - or normalized_datum.created_at.isoformat() - ), - - # agent relations: - 'affiliations': self._get_related_agent_names(central_work, ['agentworkrelation']), - 'contributors': self._get_related_agent_names(central_work, [ - 'contributor', - 'creator', - 'principalinvestigator', - 'principalinvestigatorcontact', - ]), - 'funders': self._get_related_agent_names(central_work, ['funder']), - 'publishers': self._get_related_agent_names(central_work, ['publisher']), - 'hosts': self._get_related_agent_names(central_work, ['host']), - - # other relations: - 'identifiers': [ - identifier_node['uri'] - for identifier_node in central_work['identifiers'] - ], - 'tags': [ - tag_node['name'] - for tag_node in central_work['tags'] - ], - 'subjects': self._get_subjects(central_work, source_name), - 'subject_synonyms': self._get_subject_synonyms(central_work), - - # osf-specific extra - 'osf_related_resource_types': (central_work['extra'] or {}).get('osf_related_resource_types'), - - # a bunch of nested data because reasons -- used mostly for rendering search results - 'lists': { - 'affiliations': self._build_related_agent_list(central_work, ['agentworkrelation']), - 'contributors': self._build_related_agent_list(central_work, [ - 'contributor', - 'creator', - 'principalinvestigator', - 'principalinvestigatorcontact', - ]), - 'funders': self._build_related_agent_list(central_work, ['funder']), - 'publishers': self._build_related_agent_list(central_work, ['publisher']), - 'hosts': self._build_related_agent_list(central_work, ['host']), - 'lineage': self._build_work_lineage(central_work), - }, - })) - - def _get_related_agent_names(self, work_node, relation_types): - return [ - get_related_agent_name(relation_node) - for relation_node in work_node['agent_relations'] - if relation_node.type in relation_types - ] - - def _get_subjects(self, work_node, source_name): - return [ - self._serialize_subject(through_subject['subject'], source_name) - for through_subject in work_node['subject_relations'] - if ( - not through_subject['is_deleted'] - and not through_subject['subject']['is_deleted'] - ) - ] - - def _get_subject_synonyms(self, work_node): - return [ - self._serialize_subject(through_subject['subject']['central_synonym']) - for through_subject in work_node['subject_relations'] - if ( - not through_subject['is_deleted'] - and not through_subject['subject']['is_deleted'] - and through_subject['subject']['central_synonym'] - ) - ] - - def _serialize_subject(self, subject_node, source_name=None): - subject_lineage = [subject_node['name']] - next_subject = subject_node['parent'] - while next_subject: - subject_lineage.insert(0, next_subject['name']) - next_subject = next_subject['parent'] - - if source_name and subject_node['central_synonym']: - taxonomy_name = source_name - else: - taxonomy_name = settings.SUBJECTS_CENTRAL_TAXONOMY - - subject_lineage.insert(0, taxonomy_name) - return '|'.join(subject_lineage) - - def _build_list_agent(self, relation_node): - agent_node = relation_node['agent'] - return { - 'type': format_node_type(agent_node), - 'types': format_node_type_lineage(agent_node), - 'name': agent_node['name'] or get_related_agent_name(relation_node), - 'given_name': agent_node['given_name'], - 'family_name': agent_node['family_name'], - 'additional_name': agent_node['additional_name'], - 'suffix': agent_node['suffix'], - 'identifiers': [ - identifier_node['uri'] - for identifier_node in agent_node['identifiers'] - ], - 'relation': format_node_type(relation_node), - 'order_cited': relation_node['order_cited'], - 'cited_as': relation_node['cited_as'], - } - - def _build_related_agent_list(self, work_node, relation_types): - return [ - self._build_list_agent(relation_node) - for relation_node in work_node['agent_relations'] - if relation_node.type in relation_types - ] - - def _build_work_lineage(self, work_node): - try: - parent_work = next( - relation_node['related'] - for relation_node in work_node['outgoing_creative_work_relations'] - if relation_node.type == 'ispartof' - ) - except StopIteration: - return () - - parent_lineage = self._build_work_lineage(parent_work) - parent_data = { - 'type': format_node_type(parent_work), - 'types': format_node_type_lineage(parent_work), - 'title': parent_work['title'], - 'identifiers': [ - identifier_node['uri'] - for identifier_node in parent_work['identifiers'] - ], - } - return ( - *parent_lineage, - parent_data, - ) diff --git a/share/models/core.py b/share/models/core.py index 2a4260682..d95d5937e 100644 --- a/share/models/core.py +++ b/share/models/core.py @@ -3,8 +3,6 @@ import random import string -from model_utils import Choices - from django.conf import settings from django.contrib.auth.base_user import AbstractBaseUser, BaseUserManager from django.contrib.auth.models import PermissionsMixin, Group @@ -19,13 +17,11 @@ from osf_oauth2_adapter.apps import OsfOauth2AdapterConfig -from share.models.fields import DateTimeAwareJSONField, ShareURLField -from share.models.validators import JSONLDValidator +from share.models.fields import ShareURLField from share.util import BaseJSONAPIMeta -from share.util.extensions import Extensions logger = logging.getLogger(__name__) -__all__ = ('ShareUser', 'NormalizedData', 'FormattedMetadataRecord',) +__all__ = ('ShareUser',) class ShareUserManager(BaseUserManager): @@ -195,103 +191,3 @@ def _setup_user_token_and_groups(share_user): ) if not is_robot: share_user.groups.add(Group.objects.get(name=OsfOauth2AdapterConfig.humans_group_name)) - - -class NormalizedData(models.Model): - id = models.AutoField(primary_key=True) - created_at = models.DateTimeField(null=True, auto_now_add=True) - raw = models.ForeignKey('RawDatum', null=True, on_delete=models.CASCADE) - data = DateTimeAwareJSONField(validators=[JSONLDValidator(), ]) - source = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE) - tasks = models.ManyToManyField('CeleryTaskResult') - - class JSONAPIMeta(BaseJSONAPIMeta): - pass - - def __str__(self): - return '<{}({}, {}, {})>'.format(self.__class__.__name__, self.id, self.source.get_short_name(), self.created_at) - - __repr__ = __str__ - - -class FormattedMetadataRecordManager(models.Manager): - def get_or_create_formatted_record(self, suid_id, record_format): - try: - return self.get(suid=suid_id, record_format=record_format) - except self.model.DoesNotExist: - (_record,) = self.save_formatted_records( - suid_id=suid_id, - record_formats=[record_format], - ) - return _record - - def delete_formatted_records(self, suid): - records = [] - for record_format in Extensions.get_names('share.metadata_formats'): - formatter = Extensions.get('share.metadata_formats', record_format)() - formatted_record = formatter.format_as_deleted(suid) - record = self._save_formatted_record(suid.id, record_format, formatted_record) - if record is not None: - records.append(record) - return records - - def save_formatted_records(self, suid=None, record_formats=None, normalized_datum=None, suid_id=None): - if suid is None: - _suid_id = suid_id - else: - assert suid_id is None, 'expected suid or suid_id, not both' - _suid_id = suid.id - if normalized_datum is None: - normalized_datum = NormalizedData.objects.filter(raw__suid_id=_suid_id).order_by('-created_at').first() - if record_formats is None: - record_formats = Extensions.get_names('share.metadata_formats') - - records = [] - for record_format in record_formats: - formatter = Extensions.get('share.metadata_formats', record_format)() - formatted_record = formatter.format(normalized_datum) - record = self._save_formatted_record(_suid_id, record_format, formatted_record) - if record is not None: - records.append(record) - return records - - def _save_formatted_record(self, suid_id, record_format, formatted_record): - if formatted_record: - record, _ = self.update_or_create( - suid_id=suid_id, - record_format=record_format, - defaults={ - 'formatted_metadata': formatted_record, - }, - ) - else: - self.filter(suid_id=suid_id, record_format=record_format).delete() - record = None - return record - - -class FormattedMetadataRecord(models.Model): - RECORD_FORMAT = Choices(*Extensions.get_names('share.metadata_formats')) - - objects = FormattedMetadataRecordManager() - - id = models.AutoField(primary_key=True) - suid = models.ForeignKey('SourceUniqueIdentifier', on_delete=models.CASCADE) - record_format = models.TextField(choices=RECORD_FORMAT) - date_modified = models.DateTimeField(auto_now=True) - formatted_metadata = models.TextField() # could be JSON, XML, or whatever - - class JSONAPIMeta(BaseJSONAPIMeta): - pass - - class Meta: - unique_together = ('suid', 'record_format') - indexes = [ - models.Index(fields=['date_modified'], name='fmr_date_modified_index') - ] - - def __repr__(self): - return f'<{self.__class__.__name__}({self.id}, {self.record_format}, suid:{self.suid_id})>' - - def __str__(self): - return repr(self) diff --git a/share/models/feature_flag.py b/share/models/feature_flag.py index efd9edc76..a1ea95022 100644 --- a/share/models/feature_flag.py +++ b/share/models/feature_flag.py @@ -28,7 +28,6 @@ def _flag_cache_key(self, flag_name) -> str: class FeatureFlag(models.Model): # flag name constants ELASTIC_EIGHT_DEFAULT = 'elastic_eight_default' - IGNORE_SHAREV2_INGEST = 'ignore_sharev2_ingest' SUGGEST_CREATOR_FACET = 'suggest_creator_facet' FORBID_UNTRUSTED_FEED = 'forbid_untrusted_feed' TROVESEARCH_DENORMILY = 'trovesearch_denormily' diff --git a/share/models/index_backfill.py b/share/models/index_backfill.py index 47dff03c7..5a2e6d35f 100644 --- a/share/models/index_backfill.py +++ b/share/models/index_backfill.py @@ -2,6 +2,8 @@ import traceback import typing +import celery +from django.conf import settings from django.db import models, transaction from share import exceptions @@ -32,9 +34,9 @@ def get_with_mutex(self, **backfill_filter_kwargs): class IndexBackfill(models.Model): INITIAL = 'initial' # default state; nothing else happen - WAITING = 'waiting' # "schedule_index_backfill" enqueued - SCHEDULING = 'scheduling' # "schedule_index_backfill" running (indexer daemon going) - INDEXING = 'indexing' # "schedule_index_backfill" finished (indexer daemon continuing) + WAITING = 'waiting' # "task__schedule_index_backfill" enqueued + SCHEDULING = 'scheduling' # "task__schedule_index_backfill" running (indexer daemon going) + INDEXING = 'indexing' # "task__schedule_index_backfill" finished (indexer daemon continuing) COMPLETE = 'complete' # admin confirmed backfill complete ERROR = 'error' # something wrong (check error_* fields) BACKFILL_STATUS_CHOICES = ( @@ -97,8 +99,7 @@ def pls_start(self, index_strategy): locked_self.backfill_status = IndexBackfill.INITIAL locked_self.__update_error(None) try: - import share.tasks - share.tasks.schedule_index_backfill.apply_async((locked_self.pk,)) + task__schedule_index_backfill.apply_async((locked_self.pk,)) except Exception as error: locked_self.__update_error(error) else: @@ -142,3 +143,48 @@ def __update_error(self, error): self.error_context = '' else: raise NotImplementedError(f'expected Exception or None (got {error})') + + +@celery.shared_task(bind=True) +def task__schedule_index_backfill(self, index_backfill_pk): + from share import models as db + from share.search.index_messenger import IndexMessenger + from share.search import index_strategy + from share.search.messages import MessageType + from trove import models as trove_db + + _index_backfill = db.IndexBackfill.objects.get(pk=index_backfill_pk) + _index_backfill.pls_note_scheduling_has_begun() + try: + _index_strategy = index_strategy.get_strategy(_index_backfill.index_strategy_name) + _messenger = IndexMessenger(celery_app=self.app, index_strategys=[_index_strategy]) + _messagetype = _index_strategy.backfill_message_type + assert _messagetype in _index_strategy.supported_message_types + if _messagetype == MessageType.BACKFILL_INDEXCARD: + _targetid_queryset = ( + trove_db.Indexcard.objects + .exclude(source_record_suid__source_config__disabled=True) + .exclude(source_record_suid__source_config__source__is_deleted=True) + .values_list('id', flat=True) + ) + elif _messagetype == MessageType.BACKFILL_SUID: + _targetid_queryset = ( + db.SourceUniqueIdentifier.objects + .exclude(source_config__disabled=True) + .exclude(source_config__source__is_deleted=True) + .values_list('id', flat=True) + ) + else: + raise ValueError(f'unknown backfill messagetype {_messagetype}') + _chunk_size = settings.ELASTICSEARCH['CHUNK_SIZE'] + _messenger.stream_message_chunks( + _messagetype, + _targetid_queryset.iterator(chunk_size=_chunk_size), + chunk_size=_chunk_size, + urgent=False, + ) + except Exception as error: + _index_backfill.pls_mark_error(error) + raise error + else: + _index_backfill.pls_note_scheduling_has_finished() diff --git a/share/models/jobs.py b/share/models/jobs.py deleted file mode 100644 index 2a3795781..000000000 --- a/share/models/jobs.py +++ /dev/null @@ -1,394 +0,0 @@ -import re -import signal -import threading -import enum -import logging -import traceback -from contextlib import contextmanager - -from celery.exceptions import Retry -from model_utils import Choices - -from django.conf import settings -from django.db import connections -from django.db import models -from django.db import transaction -from django.db.models.expressions import RawSQL -from django.utils import timezone -from django.utils.translation import gettext_lazy as _ - -from share.util import chunked, BaseJSONAPIMeta - - -__all__ = ('HarvestJob',) -logger = logging.getLogger(__name__) - - -def get_share_version(): - return settings.VERSION - - -class AbstractJobManager(models.Manager): - def get_queryset(self): - return LockableQuerySet(self.model, using=self._db) - - def bulk_get_or_create(self, - objs, - defaults=None, - using='default', - update_fields=None, - defer_fields=None, - chunk_size=500, - ): - if len(self.model._meta.unique_together) != 1: - raise ValueError('Cannot determine the constraint to use for ON CONFLICT') - - def col(field_name): - return self.model._meta.get_field(field_name).column - - columns = [] - field_names = [] - defaults = defaults or {} - - for field in self.model._meta.concrete_fields: - if field is not self.model._meta.pk: - columns.append(field.column) - field_names.append(field.attname) - if field in defaults: - continue - if field.default is not models.NOT_PROVIDED or field.null: - defaults[field.attname] = field._get_default() - elif isinstance(field, models.DateField) and (field.auto_now or field.auto_now_add): - defaults[field.attname] = timezone.now() - - constraint = ', '.join( - '"{}"'.format(col(f)) - for f in self.model._meta.unique_together[0] - ) - - if update_fields: - update = [ - '"{0}" = EXCLUDED."{0}"'.format(col(f)) - for f in update_fields - ] - else: - update = ['id = "{}".id'.format(self.model._meta.db_table)] - - returning = '*' - if defer_fields: - defer_columns = {col(f) for f in defer_fields} - returning = ', '.join(['id'] + [c for c in columns if c not in defer_columns]) - - loaded = [] - with transaction.atomic(using): - for chunk in chunked(objs, chunk_size): - if not chunk: - break - loaded.extend(self.raw(''' - INSERT INTO "{model._meta.db_table}" - ({columns}) - VALUES - {values} - ON CONFLICT - ({constraint}) - DO UPDATE SET - {update} - RETURNING - {returning} - '''.format( - model=self.model, - columns=', '.join(columns), - constraint=constraint, - values=', '.join(['%s'] * len(chunk)), - update=', '.join(update), - returning=returning, - ), [ - tuple(getattr(obj, f, None) or defaults[f] for f in field_names) - for obj in chunk - ])) - return loaded - - -class AbstractBaseJob(models.Model): - STATUS = Choices( - (0, 'created', _('Created')), - (1, 'started', _('Started')), - (2, 'failed', _('Failed')), - (3, 'succeeded', _('Succeeded')), - (4, 'rescheduled', _('Rescheduled')), - # Used to be "defunct" which turnout to be defunct - # Removed to avoid confusion but number has been left the same for backwards compatibility - (6, 'forced', _('Forced')), - (7, 'skipped', _('Skipped')), - (8, 'retried', _('Retrying')), - (9, 'cancelled', _('Cancelled')), - ) - - READY_STATUSES = ( - STATUS.created, - STATUS.started, - STATUS.rescheduled, - STATUS.cancelled, - ) - - class SkipReasons(enum.Enum): - duplicated = 'Previously Succeeded' - encompassed = 'Encompassing task succeeded' - comprised = 'Comprised of succeeded tasks' - pointless = 'Any effects will be overwritten by another queued job' - obsolete = 'Uses an old version of a dependency' - disabled = 'Source(Config) disabled/deleted' - - task_id = models.UUIDField(null=True) - status = models.IntegerField(db_index=True, choices=STATUS, default=STATUS.created) - - claimed = models.BooleanField(null=True) - - error_type = models.TextField(blank=True, null=True, db_index=True) - error_message = models.TextField(blank=True, null=True, db_column='message') - error_context = models.TextField(blank=True, default='', db_column='context') - completions = models.IntegerField(default=0) - - date_started = models.DateTimeField(null=True, blank=True) - date_created = models.DateTimeField(auto_now_add=True, editable=False) - date_modified = models.DateTimeField(auto_now=True, editable=False, db_index=True) - - share_version = models.TextField(default=get_share_version, editable=False) - - objects = AbstractJobManager() - - class JSONAPIMeta(BaseJSONAPIMeta): - pass - - class Meta: - abstract = True - ordering = ('-date_modified', ) - - def start(self, claim=False): - # TODO double check existing values to make sure everything lines up. - stamp = timezone.now() - logger.debug('Setting %r to started at %r', self, stamp) - self.status = self.STATUS.started - self.claimed = claim - self.date_started = stamp - self.save(update_fields=('status', 'claimed', 'date_started', 'date_modified')) - - return True - - def fail(self, exception): - logger.debug('Setting %r to failed due to %r', self, exception) - - self.error_message = exception - if isinstance(exception, Exception): - self.error_type = type(exception).__name__ - tb = traceback.TracebackException.from_exception(exception) - self.error_context = '\n'.join(tb.format(chain=True)) - else: - self.error_type = None - self.error_context = '' - - self.status = self.STATUS.failed - self.claimed = False - self.save(update_fields=('status', 'error_type', 'error_message', 'error_context', 'claimed', 'date_modified')) - - return True - - def succeed(self): - self.error_type = None - self.error_message = None - self.error_context = '' - self.claimed = False - self.completions += 1 - self.status = self.STATUS.succeeded - logger.debug('Setting %r to succeeded with %d completions', self, self.completions) - self.save(update_fields=('status', 'error_type', 'error_message', 'error_context', 'completions', 'claimed', 'date_modified')) - - return True - - def reschedule(self, claim=False): - self.status = self.STATUS.rescheduled - self.claimed = claim - self.save(update_fields=('status', 'claimed', 'date_modified')) - - return True - - def forced(self, exception): - logger.debug('Setting %r to forced with error_context %r', self, exception) - - self.error_message = exception - if isinstance(exception, Exception): - self.error_type = type(exception).__name__ - tb = traceback.TracebackException.from_exception(exception) - self.error_context = '\n'.join(tb.format(chain=True)) - else: - self.error_type = None - self.error_context = '' - - self.status = self.STATUS.forced - self.claimed = False - self.save(update_fields=('status', 'error_type', 'error_message', 'error_context', 'claimed', 'date_modified')) - - return True - - def skip(self, reason): - logger.debug('Setting %r to skipped with context %r', self, reason) - - self.completions += 1 - self.error_context = reason.value - self.status = self.STATUS.skipped - self.claimed = False - self.save(update_fields=('status', 'error_context', 'completions', 'claimed', 'date_modified')) - - return True - - def cancel(self): - logger.debug('Setting %r to cancelled', self) - - self.status = self.STATUS.cancelled - self.claimed = False - self.save(update_fields=('status', 'claimed', 'date_modified')) - - return True - - @contextmanager - def handle(self): - # Flush any pending changes. Any updates - # beyond here will be field specific - self.save() - - is_main_thread = threading.current_thread() == threading.main_thread() - - if is_main_thread: - # Protect ourselves from SIGTERM - def on_sigterm(sig, frame): - self.cancel() - prev_handler = signal.signal(signal.SIGTERM, on_sigterm) - - self.start() - try: - yield - except Retry as e: - self.fail(e) - self.reschedule(claim=True) - except Exception as e: - self.fail(e) - raise - else: - # If the handler didn't handle setting a status, assume success - if self.status == self.STATUS.started: - self.succeed() - finally: - if is_main_thread: - # Detach from SIGTERM, resetting the previous handle - signal.signal(signal.SIGTERM, prev_handler) - - def __repr__(self): - return '<{} {} ({})>'.format(self.__class__.__name__, self.id, self.STATUS[self.status]) - - -class PGLock(models.Model): - """A wrapper around Postgres' pg_locks system table. - manged = False stops this model from doing anything strange to the table - but allows us to safely query this table. - """ - - pid = models.IntegerField(primary_key=True) - locktype = models.TextField() - objid = models.IntegerField() - classid = models.IntegerField() - - class Meta: - managed = False - db_table = 'pg_locks' - - -class LockableQuerySet(models.QuerySet): - LOCK_ACQUIRED = re.sub(r'\s\s+', ' ', ''' - pg_try_advisory_lock(%s::REGCLASS::INTEGER, "{0.model._meta.db_table}"."{0.column}") - ''').strip() - - def unlocked(self, relation): - """Filter out any rows that have an advisory lock on the related field. - - Args: - relation: (str): The related object to check for an advisory lock on. - - """ - field = self.model._meta.get_field(relation) - - if not field.is_relation: - raise ValueError('Field "{}" of "{}" is not a relation'.format(relation, self.model)) - - return self.select_related(relation).annotate( - is_locked=models.Exists(PGLock.objects.filter( - locktype='advisory', - objid=models.OuterRef(field.column), - classid=RawSQL('%s::REGCLASS::INTEGER', [field.related_model._meta.db_table]) - )) - ).exclude(is_locked=True) - - @contextmanager - def lock_first(self, relation): - item = None - field = self.model._meta.get_field(relation) - - if not field.is_relation: - raise ValueError('Field "{}" of "{}" is not a relation'.format(relation, self.model)) - - try: - item = type(self)(self.model, using=self.db).select_related(relation).filter( - id__in=self.values('id')[:1] - ).annotate( - lock_acquired=RawSQL(self.LOCK_ACQUIRED.format(field), [field.related_model._meta.db_table]) - ).first() - - yield item - finally: - if item and item.lock_acquired: - with connections[self.db].cursor() as cursor: - cursor.execute('SELECT pg_advisory_unlock(%s::REGCLASS::INTEGER, %s)', [field.related_model._meta.db_table, getattr(item, field.attname)]) - - def acquire_lock(self, relation): - """Attempts to acquire an advisory lock for ALL rows returned by this queryset. - - Note: - Locks not take effect until the queryset is evaluated. - It will, however, affect everything if you use .all(). - - Args: - relation: (str): The related object to attempt to acquire an advisory lock on. - - """ - field = self.model._meta.get_field(relation) - - if not field.is_relation: - raise ValueError('Field "{}" of "{}" is not a relation'.format(relation, self.model)) - - return self.select_related(relation).annotate( - lock_acquired=RawSQL(self.LOCK_ACQUIRED.format(field), [field.related_model._meta.db_table]) - ) - - -class HarvestJob(AbstractBaseJob): - # May want to look into using DateRange in the future - end_date = models.DateField(db_index=True) - start_date = models.DateField(db_index=True) - - source_config = models.ForeignKey('SourceConfig', editable=False, related_name='harvest_jobs', on_delete=models.CASCADE) - source_config_version = models.PositiveIntegerField() - harvester_version = models.PositiveIntegerField() - - class Meta: - unique_together = ('source_config', 'start_date', 'end_date', 'harvester_version', 'source_config_version', ) - # Used to be inaccurately named - db_table = 'share_harvestlog' - - def __repr__(self): - return '<{type}({id}, {status}, {source}, {start_date}, {end_date})>'.format( - type=type(self).__name__, - id=self.id, - source=self.source_config.label, - status=self.STATUS[self.status], - end_date=self.end_date.isoformat(), - start_date=self.start_date.isoformat(), - ) diff --git a/share/models/jsonld-schema.json b/share/models/jsonld-schema.json deleted file mode 100644 index b73d23ed8..000000000 --- a/share/models/jsonld-schema.json +++ /dev/null @@ -1,52 +0,0 @@ -{ - "title": "SHARE ChangeSet JSONLD", - "description": "A sub-set of JSONLD", - "$schema": "http://json-schema.org/draft-04/schema#", - - "definitions": { - "primitives": { - "type": ["string", "number", "boolean", "null"] - }, - "reference": { - "type": "object", - "required": ["@id", "@type"], - "additionalProperties": false, - "properties": { - "@type": {"type": "string"}, - "@id": {"type": "string"} - } - } - }, - - "required": ["@graph"], - "additionalProperties": false, - "properties": { - "@context": {"type": "object"}, - - "@graph": { - "description": "Used to express a graph.", - "type": "array", - "items": { - "type": "object", - "required": ["@id", "@type"], - "properties": { - "extra": {"type": "object"}, - "@type": {"type": "string"}, - "@id": {"type": ["string", "integer"]} - }, - "additionalProperties": { - "oneOf": [ - {"$ref": "#/definitions/reference"}, - {"$ref": "#/definitions/primitives"}, - {"type": "array", "items": {"$ref": "#/definitions/reference"}} - ] - } - } - }, - - "central_node_id": { - "description": "@id of the central node", - "type": ["string", "null"] - } - } -} diff --git a/share/models/registration.py b/share/models/registration.py deleted file mode 100644 index c78062c98..000000000 --- a/share/models/registration.py +++ /dev/null @@ -1,43 +0,0 @@ -from model_utils import Choices - -from django.conf import settings -from django.db import models -from django.utils.translation import gettext as _ - -from share import util - - -class ProviderRegistration(models.Model): - - STATUS = Choices( - (0, 'pending', _('pending')), - (1, 'accepted', _('accepted')), - (2, 'implemented', _('implemented')), - (3, 'rejected', _('rejected')) - ) - - status = models.IntegerField(choices=STATUS, default=STATUS.pending) - submitted_at = models.DateTimeField(auto_now_add=True, db_index=True) - submitted_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE) - - contact_name = models.TextField(max_length=300) - contact_email = models.EmailField() - contact_affiliation = models.TextField(max_length=300) - - direct_source = models.BooleanField(default=False) - - source_name = models.TextField(max_length=300) - source_description = models.TextField(max_length=1000) - source_rate_limit = models.TextField(blank=True, default='', max_length=300) - source_documentation = models.TextField(blank=True, default='', max_length=300) - source_preferred_metadata_prefix = models.TextField(blank=True, default='', max_length=300) - source_oai = models.BooleanField(default=False) - source_base_url = models.URLField(blank=True, default='') - source_disallowed_sets = models.TextField(blank=True, default='', max_length=300) - source_additional_info = models.TextField(blank=True, default='', max_length=1000) - - class Meta: - ordering = ['-submitted_at'] - - class JSONAPIMeta(util.BaseJSONAPIMeta): - pass diff --git a/share/models/sources.py b/share/models/sources.py deleted file mode 100644 index 1706e1f43..000000000 --- a/share/models/sources.py +++ /dev/null @@ -1,34 +0,0 @@ -import logging - -from django.db import models - -from share.models.ingest import SourceConfig - -logger = logging.getLogger(__name__) -__all__ = ('SourceStat',) - - -class SourceStat(models.Model): - config = models.ForeignKey(SourceConfig, on_delete=models.CASCADE) - is_deleted = models.BooleanField(default=False) - date_created = models.DateTimeField(auto_now_add=True) - response_status_code = models.SmallIntegerField(blank=True, null=True) - response_elapsed_time = models.FloatField(blank=True, null=True) - response_exception = models.TextField(blank=True, null=True) - earliest_datestamp_config = models.DateField(blank=True, null=True) - base_url_config = models.TextField() - admin_note = models.TextField(blank=True) - grade = models.FloatField() - - # OAI specific - earliest_datestamp_source = models.DateField(blank=True, null=True) - earliest_datestamps_match = models.BooleanField(default=False) - - base_url_source = models.TextField(blank=True, null=True) - base_urls_match = models.BooleanField(default=False) - - def __repr__(self): - return '<{}({}, {})>'.format(self.__class__.__name__, self.pk, self.config.label) - - def __str__(self): - return '{}: {}'.format(self.config.source.long_title, self.config.label) diff --git a/share/models/synonyms.json b/share/models/synonyms.json deleted file mode 100644 index 562af8a94..000000000 --- a/share/models/synonyms.json +++ /dev/null @@ -1,7002 +0,0 @@ -{ - "l43": [ - "Industrial Organization" - ], - "obstetrics and gynecology": [ - "Obstetrics and Gynecology" - ], - "medicine and health sciences": [ - "Medicine and Health Sciences" - ], - "d57": [ - "Political Economy" - ], - "d1": [ - "Behavioral Economics" - ], - "b00": [ - "Economic History" - ], - "f02": [ - "International Economics" - ], - "multi-vehicle systems and air traffic control": [ - "Multi-Vehicle Systems and Air Traffic Control" - ], - "m51": [ - "Economics" - ], - "l94": [ - "Industrial Organization" - ], - "indo-european linguistics and philology": [ - "Indo-European Linguistics and Philology" - ], - "e00": [ - "Macroeconomics" - ], - "z1": [ - "Economics" - ], - "public relations and advertising": [ - "Public Relations and Advertising" - ], - "legal issues": [ - "Law" - ], - "y5": [ - "Economics" - ], - "n61": [ - "Economic History" - ], - "vocational rehabilitation counseling": [ - "Vocational Rehabilitation Counseling" - ], - "logic": [ - "Logic and Foundations of Mathematics" - ], - "z22": [ - "Economics" - ], - "f45": [ - "Macroeconomics", - "International Economics" - ], - "oceanography and atmospheric sciences and meteorology": [ - "Oceanography and Atmospheric Sciences and Meteorology" - ], - "d45": [ - "Political Economy" - ], - "m2": [ - "Business Administration, Management, and Operations" - ], - "k29": [ - "Law and Economics" - ], - "critical care nursing": [ - "Critical Care Nursing" - ], - "d51": [ - "Political Economy" - ], - "medical humanities": [ - "Medical Humanities" - ], - "g32": [ - "Finance" - ], - "q19": [ - "Agricultural and Resource Economics" - ], - "d10": [ - "Behavioral Economics" - ], - "other kinesiology": [ - "Other Kinesiology" - ], - "q11": [ - "Agricultural and Resource Economics" - ], - "n56": [ - "Economic History" - ], - "n44": [ - "Economic History" - ], - "new religious movements": [ - "New Religious Movements" - ], - "statistical models": [ - "Statistical Models", - "Numerical Analysis and Scientific Computing" - ], - "mental disorders": [ - "Mental Disorders" - ], - "d79": [ - "Political Economy" - ], - "social psychology": [ - "Social Psychology" - ], - "growth and development": [ - "Growth and Development" - ], - "f3": [ - "International Economics" - ], - "engineering physics": [ - "Engineering Physics" - ], - "other psychology": [ - "Other Psychology" - ], - "mormon studies": [ - "Mormon Studies" - ], - "h29": [ - "Public Economics" - ], - "m54": [ - "Economics" - ], - "collective bargaining": [ - "Collective Bargaining" - ], - "other theatre and performance studies": [ - "Other Theatre and Performance Studies" - ], - "occupational and environmental health nursing": [ - "Occupational and Environmental Health Nursing" - ], - "b59": [ - "Economics" - ], - "systems neuroscience": [ - "Systems Neuroscience" - ], - "east asian languages and societies": [ - "East Asian Languages and Societies" - ], - "p12": [ - "Political Economy" - ], - "ceramic arts": [ - "Ceramic Arts" - ], - "d83": [ - "Political Economy" - ], - "interior architecture": [ - "Interior Architecture" - ], - "first and second language acquisition": [ - "First and Second Language Acquisition" - ], - "other french and francophone language and literature": [ - "Other French and Francophone Language and Literature" - ], - "radiochemistry": [ - "Radiochemistry" - ], - "european law": [ - "European Law" - ], - "comparative and laboratory animal medicine": [ - "Comparative and Laboratory Animal Medicine" - ], - "m30": [ - "Marketing" - ], - "design of experiments and sample surveys": [ - "Design of Experiments and Sample Surveys" - ], - "pharmacoeconomics and pharmaceutical economics": [ - "Pharmacoeconomics and Pharmaceutical Economics" - ], - "history of philosophy": [ - "History of Philosophy" - ], - "f21": [ - "International Economics" - ], - "computational neuroscience": [ - "Computational Neuroscience" - ], - "i32": [ - "Income Distribution" - ], - "aviation safety and security": [ - "Aviation Safety and Security" - ], - "d91": [ - "Economics" - ], - "c50": [ - "Econometrics" - ], - "missions and world christianity": [ - "Missions and World Christianity" - ], - "g02": [ - "Finance" - ], - "l70": [ - "Industrial Organization" - ], - "music practice": [ - "Music Practice" - ], - "biology": [ - "Biology" - ], - "e6": [ - "Macroeconomics" - ], - "j24": [ - "Labor Economics" - ], - "german language and literature": [ - "German Language and Literature" - ], - "m29": [ - "Business Administration, Management, and Operations" - ], - "n63": [ - "Economic History" - ], - "q58": [ - "Agricultural and Resource Economics" - ], - "p46": [ - "Political Economy" - ], - "health and medical physics": [ - "Health and Medical Physics" - ], - "n83": [ - "Economic History" - ], - "f37": [ - "International Economics" - ], - "n35": [ - "Economic History" - ], - "g12": [ - "Finance" - ], - "elder law": [ - "Elder Law" - ], - "h39": [ - "Public Economics" - ], - "n23": [ - "Economic History" - ], - "philosophy of language": [ - "Philosophy of Language" - ], - "earth sciences": [ - "Earth Sciences" - ], - "interpersonal and small group communication": [ - "Interpersonal and Small Group Communication" - ], - "d47": [ - "Political Economy" - ], - "power and energy": [ - "Power and Energy" - ], - "f36": [ - "International Economics" - ], - "l95": [ - "Industrial Organization" - ], - "biblical studies": [ - "Biblical Studies" - ], - "i22": [ - "Education Economics" - ], - "online and distance education": [ - "Online and Distance Education" - ], - "p27": [ - "Political Economy" - ], - "developmental psychology": [ - "Developmental Psychology" - ], - "g24": [ - "Finance" - ], - "endocrine system diseases": [ - "Endocrine System Diseases" - ], - "l17": [ - "Industrial Organization" - ], - "n52": [ - "Economic History" - ], - "digital humanities": [ - "Digital Humanities" - ], - "o56": [ - "Growth and Development" - ], - "j20": [ - "Labor Economics" - ], - "vlsi and circuits, embedded and hardware systems": [ - "VLSI and Circuits, Embedded and Hardware Systems" - ], - "animal cognition": [ - "Behavior and Ethology" - ], - "decision making": [ - "Philosophy" - ], - "health psychology": [ - "Health Psychology" - ], - "classics": [ - "Classics" - ], - "primatology": [ - "Animal Sciences" - ], - "marketing": [ - "Marketing" - ], - "animal studies": [ - "Animal Studies" - ], - "viticulture and oenology": [ - "Viticulture and Oenology" - ], - "psychophysiology": [ - "Psychology" - ], - "philosophy of mind": [ - "Philosophy of Mind" - ], - "environmental health and protection": [ - "Environmental Health and Protection" - ], - "urban, community and regional planning": [ - "Urban, Community and Regional Planning" - ], - "c87": [ - "Econometrics" - ], - "estates and trusts": [ - "Estates and Trusts" - ], - "f23": [ - "International Economics" - ], - "b19": [ - "Economic History" - ], - "surgical procedures, operative": [ - "Surgical Procedures, Operative" - ], - "c88": [ - "Economics" - ], - "complexity theory": [ - "Theory and Algorithms" - ], - "logic and foundations of mathematics": [ - "Logic and Foundations of Mathematics" - ], - "p1": [ - "Political Economy" - ], - "f42": [ - "Macroeconomics", - "International Economics" - ], - "c7": [ - "Economic Theory" - ], - "graphic design": [ - "Graphic Design" - ], - "other nursing": [ - "Other Nursing" - ], - "graphic communications": [ - "Graphic Communications" - ], - "i24": [ - "Education Economics" - ], - "state and local government law": [ - "State and Local Government Law" - ], - "forest biology": [ - "Forest Biology" - ], - "instrumentation": [ - "Instrumentation" - ], - "microarrays": [ - "Microarrays" - ], - "plasm-ph": [ - "Plasma and Beam Physics" - ], - "biological and physical anthropology": [ - "Biological and Physical Anthropology" - ], - "d12": [ - "Behavioral Economics" - ], - "urban studies and planning": [ - "Urban Studies and Planning" - ], - "mathematics": [ - "Mathematics" - ], - "other oceanography and atmospheric sciences and meteorology": [ - "Other Oceanography and Atmospheric Sciences and Meteorology" - ], - "other mechanical engineering": [ - "Other Mechanical Engineering" - ], - "b50": [ - "Economics" - ], - "m4": [ - "Accounting" - ], - "z38": [ - "Economics" - ], - "admiralty": [ - "Admiralty" - ], - "construction engineering": [ - "Construction Engineering" - ], - "polynesian studies": [ - "Polynesian Studies" - ], - "apiculture": [ - "Apiculture" - ], - "h55": [ - "Public Economics" - ], - "o1": [ - "Growth and Development" - ], - "y4": [ - "Economics" - ], - "non-linear dynamics": [ - "Non-linear Dynamics" - ], - "applied mathematics": [ - "Applied Mathematics" - ], - "musculoskeletal diseases": [ - "Musculoskeletal Diseases" - ], - "j3": [ - "Labor Economics" - ], - "statistical theory": [ - "Statistical Theory" - ], - "n6": [ - "Economic History" - ], - "j70": [ - "Labor Economics" - ], - "social control, law, crime, and deviance": [ - "Social Control, Law, Crime, and Deviance" - ], - "harmonic analysis and representation": [ - "Harmonic Analysis and Representation" - ], - "h60": [ - "Public Economics" - ], - "syntax": [ - "Syntax" - ], - "human geography": [ - "Human Geography" - ], - "z3": [ - "Economics" - ], - "preventive medicine": [ - "Preventive Medicine" - ], - "disorders of environmental origin": [ - "Disorders of Environmental Origin" - ], - "jewish studies": [ - "Jewish Studies" - ], - "scholarly publishing": [ - "Scholarly Publishing" - ], - "molecular and cellular neuroscience": [ - "Molecular and Cellular Neuroscience" - ], - "d59": [ - "Political Economy" - ], - "f65": [ - "Finance", - "International Economics" - ], - "e22": [ - "Macroeconomics" - ], - "j64": [ - "Labor Economics" - ], - "plant sciences": [ - "Plant Sciences" - ], - "c83": [ - "Economics" - ], - "other medical sciences": [ - "Other Medical Sciences" - ], - "bioethics and medical ethics": [ - "Bioethics and Medical Ethics" - ], - "agency": [ - "Agency" - ], - "g13": [ - "Finance" - ], - "computational biology": [ - "Computational Biology" - ], - "d2": [ - "Industrial Organization" - ], - "h21": [ - "Public Economics" - ], - "other biochemistry, biophysics, and structural biology": [ - "Other Biochemistry, Biophysics, and Structural Biology" - ], - "international humanitarian law": [ - "International Humanitarian Law" - ], - "ancient history, greek and roman through late antiquity": [ - "Ancient History, Greek and Roman through Late Antiquity" - ], - "z00": [ - "Economics" - ], - "health law and policy": [ - "Health Law and Policy" - ], - "o29": [ - "Growth and Development" - ], - "music pedagogy": [ - "Music Pedagogy" - ], - "n64": [ - "Economic History" - ], - "other environmental sciences": [ - "Other Environmental Sciences" - ], - "biotechnology": [ - "Biotechnology" - ], - "psychiatric and mental health": [ - "Psychiatric and Mental Health" - ], - "h69": [ - "Public Economics" - ], - "engineering science and materials": [ - "Engineering Science and Materials" - ], - "geomorphology": [ - "Geomorphology" - ], - "clinical epidemiology": [ - "Clinical Epidemiology" - ], - "screenwriting": [ - "Screenwriting" - ], - "k13": [ - "Law and Economics" - ], - "b27": [ - "Economic History" - ], - "orthotics and prosthetics": [ - "Orthotics and Prosthetics" - ], - "national security law": [ - "National Security Law" - ], - "n27": [ - "Economic History" - ], - "z21": [ - "Economics" - ], - "portfolio and security analysis": [ - "Portfolio and Security Analysis" - ], - "music performance": [ - "Music Performance" - ], - "m11": [ - "Business Administration, Management, and Operations" - ], - "r29": [ - "Regional Economics" - ], - "osteopathic medicine and osteopathy": [ - "Osteopathic Medicine and Osteopathy" - ], - "other social and behavioral sciences": [ - "Other Social and Behavioral Sciences" - ], - "labor relations": [ - "Labor Relations" - ], - "semantics and pragmatics": [ - "Semantics and Pragmatics" - ], - "systems engineering": [ - "Systems Engineering" - ], - "urology": [ - "Urology" - ], - "science and technology law": [ - "Science and Technology Law" - ], - "q40": [ - "Agricultural and Resource Economics" - ], - "disaster law": [ - "Disaster Law" - ], - "e5": [ - "Macroeconomics" - ], - "modern literature": [ - "Modern Literature" - ], - "political theory": [ - "Political Theory" - ], - "classical archaeology and art history": [ - "Classical Archaeology and Art History" - ], - "surgery": [ - "Surgery" - ], - "acting": [ - "Acting" - ], - "c67": [ - "Economic Theory" - ], - "i15": [ - "Health Economics" - ], - "b32": [ - "Economic History" - ], - "labor and employment law": [ - "Labor and Employment Law" - ], - "weed science": [ - "Weed Science" - ], - "accessibility": [ - "Accessibility" - ], - "radiology and medical imaging": [ - "Radiology" - ], - "n32": [ - "Economic History" - ], - "q-bio": [ - "Biology" - ], - "c19": [ - "Econometrics" - ], - "transportation engineering": [ - "Transportation Engineering" - ], - "f31": [ - "International Economics" - ], - "biomedical and dental materials": [ - "Biomedical and Dental Materials" - ], - "f12": [ - "International Economics" - ], - "d80": [ - "Political Economy" - ], - "l2": [ - "Industrial Organization" - ], - "r1": [ - "Regional Economics" - ], - "sales and merchandising": [ - "Sales and Merchandising" - ], - "performance studies": [ - "Performance Studies" - ], - "e12": [ - "Macroeconomics" - ], - "c92": [ - "Economics" - ], - "l22": [ - "Industrial Organization" - ], - "volcanology": [ - "Volcanology" - ], - "radio": [ - "Radio" - ], - "neuropharmacology": [ - "Neurology" - ], - "other rehabilitation and therapy": [ - "Other Rehabilitation and Therapy" - ], - "o39": [ - "Growth and Development" - ], - "computer engineering": [ - "Computer Engineering" - ], - "e24": [ - "Macroeconomics" - ], - "agricultural education": [ - "Agricultural Education" - ], - "g23": [ - "Finance" - ], - "p2": [ - "Political Economy" - ], - "animal law": [ - "Animal Law" - ], - "y3": [ - "Economics" - ], - "mathematical biology": [ - "Biology" - ], - "n42": [ - "Economic History" - ], - "l99": [ - "Industrial Organization" - ], - "d50": [ - "Political Economy" - ], - "litigation": [ - "Litigation" - ], - "c43": [ - "Econometrics" - ], - "f66": [ - "Labor Economics", - "International Economics" - ], - "j26": [ - "Labor Economics" - ], - "l32": [ - "Industrial Organization" - ], - "soil science": [ - "Soil Science" - ], - "a13": [ - "Economics" - ], - "n22": [ - "Economic History" - ], - "other business": [ - "Other Business" - ], - "psycholinguistics and neurolinguistics": [ - "Psycholinguistics and Neurolinguistics" - ], - "n41": [ - "Economic History" - ], - "community-based learning": [ - "Community-based Learning" - ], - "n14": [ - "Economic History" - ], - "a1": [ - "Economics" - ], - "children's and young adult literature": [ - "Children's and Young Adult Literature" - ], - "energy policy": [ - "Energy Policy" - ], - "h68": [ - "Public Economics" - ], - "l64": [ - "Industrial Organization" - ], - "lesbian, gay, bisexual, and transgender studies": [ - "Lesbian, Gay, Bisexual, and Transgender Studies" - ], - "h11": [ - "Public Economics" - ], - "funct-an": [ - "Mathematics" - ], - "folklore": [ - "Folklore" - ], - "sports sciences": [ - "Sports Sciences" - ], - "computational science": [ - "Numerical Analysis and Computation" - ], - "other religion": [ - "Other Religion" - ], - "construction engineering and management": [ - "Construction Engineering and Management" - ], - "genetics": [ - "Genetics" - ], - "e44": [ - "Macroeconomics" - ], - "f10": [ - "International Economics" - ], - "m53": [ - "Economics" - ], - "survival analysis": [ - "Survival Analysis" - ], - "e2": [ - "Macroeconomics" - ], - "digital communications and networking": [ - "Digital Communications and Networking" - ], - "environmental public health": [ - "Environmental Public Health" - ], - "pediatric nursing": [ - "Pediatric Nursing" - ], - "physical processes": [ - "Physical Processes" - ], - "i30": [ - "Income Distribution" - ], - "o41": [ - "Growth and Development" - ], - "education law": [ - "Education Law" - ], - "legal theory": [ - "Legal Theory" - ], - "numerical analysis and scientific computing": [ - "Numerical Analysis and Scientific Computing" - ], - "i31": [ - "Income Distribution" - ], - "g10": [ - "Finance" - ], - "n57": [ - "Economic History" - ], - "other neuroscience and neurobiology": [ - "Other Neuroscience and Neurobiology" - ], - "n93": [ - "Economic History" - ], - "social and behavioral sciences": [ - "Social and Behavioral Sciences" - ], - "p13": [ - "Political Economy" - ], - "agricultural science": [ - "Agricultural Science", - "Agriculture" - ], - "remote sensing": [ - "Remote Sensing" - ], - "h51": [ - "Public Economics", - "Health Economics" - ], - "l21": [ - "Industrial Organization" - ], - "j16": [ - "Labor Economics" - ], - "j11": [ - "Labor Economics" - ], - "energy systems": [ - "Energy Systems" - ], - "r40": [ - "Regional Economics" - ], - "e19": [ - "Macroeconomics" - ], - "ethology": [ - "Behavior and Ethology" - ], - "r51": [ - "Finance", - "Regional Economics" - ], - "income distribution": [ - "Income Distribution" - ], - "n2": [ - "Economic History" - ], - "gerontology": [ - "Gerontology" - ], - "d72": [ - "Political Economy" - ], - "ophthalmology": [ - "Ophthalmology" - ], - "k14": [ - "Law and Economics" - ], - "d40": [ - "Political Economy" - ], - "d00": [ - "Economics" - ], - "finance": [ - "Finance" - ], - "performance management": [ - "Performance Management" - ], - "evolutionary psychology": [ - "Psychology" - ], - "other american studies": [ - "Other American Studies" - ], - "comparative literature": [ - "Comparative Literature" - ], - "sports studies": [ - "Sports Studies" - ], - "zoology": [ - "Zoology" - ], - "p23": [ - "Political Economy" - ], - "f47": [ - "Macroeconomics", - "International Economics" - ], - "law enforcement and corrections": [ - "Law Enforcement and Corrections" - ], - "influenza virus vaccines": [ - "Influenza Virus Vaccines" - ], - "b1": [ - "Economic History" - ], - "p26": [ - "Political Economy" - ], - "body regions": [ - "Body Regions" - ], - "social history": [ - "Social History" - ], - "medical genetics": [ - "Medical Genetics" - ], - "p44": [ - "Political Economy" - ], - "automotive engineering": [ - "Automotive Engineering" - ], - "agricultural economics": [ - "Agricultural Economics" - ], - "learnability": [ - "Linguistics" - ], - "theory and algorithms": [ - "Theory and Algorithms" - ], - "computer sciences": [ - "Computer Sciences" - ], - "e25": [ - "Macroeconomics" - ], - "d73": [ - "Political Economy" - ], - "d89": [ - "Political Economy" - ], - "p00": [ - "Political Economy" - ], - "f33": [ - "International Economics" - ], - "d29": [ - "Industrial Organization" - ], - "n15": [ - "Economic History" - ], - "neuroanatomy": [ - "Neuroscience and Neurobiology" - ], - "r10": [ - "Regional Economics" - ], - "sociology of culture": [ - "Sociology of Culture" - ], - "social welfare law": [ - "Social Welfare Law" - ], - "h22": [ - "Public Economics" - ], - "j40": [ - "Labor Economics" - ], - "stomatognathic diseases": [ - "Stomatognathic Diseases" - ], - "o2": [ - "Growth and Development" - ], - "n76": [ - "Economic History" - ], - "developmental neuroscience": [ - "Developmental Neuroscience" - ], - "nucleic acids, nucleotides, and nucleosides": [ - "Nucleic Acids, Nucleotides, and Nucleosides" - ], - "technical and professional writing": [ - "Technical and Professional Writing" - ], - "discrete mathematics and combinatorics": [ - "Discrete Mathematics and Combinatorics" - ], - "b51": [ - "Economics" - ], - "rheumatology": [ - "Rheumatology" - ], - "i19": [ - "Health Economics" - ], - "curriculum and instruction": [ - "Curriculum and Instruction" - ], - "o11": [ - "Growth and Development" - ], - "glaciology": [ - "Glaciology" - ], - "law and politics": [ - "Law and Politics" - ], - "latin american literature": [ - "Latin American Literature" - ], - "recreational therapy": [ - "Recreational Therapy" - ], - "h7": [ - "Public Economics" - ], - "air and space law": [ - "Air and Space Law" - ], - "clinical trials": [ - "Clinical Trials", - "Translational Medical Research" - ], - "alg-geom": [ - "Algebraic Geometry" - ], - "collection development and management": [ - "Collection Development and Management" - ], - "horticulture": [ - "Horticulture" - ], - "e66": [ - "Macroeconomics" - ], - "b4": [ - "Economics" - ], - "small or companion animal medicine": [ - "Small or Companion Animal Medicine" - ], - "sports medicine": [ - "Sports Medicine" - ], - "petroleum engineering": [ - "Petroleum Engineering" - ], - "c89": [ - "Economics" - ], - "m59": [ - "Economics" - ], - "l49": [ - "Industrial Organization" - ], - "o38": [ - "Growth and Development" - ], - "c63": [ - "Economic Theory" - ], - "d63": [ - "Income Distribution" - ], - "b14": [ - "Economic History" - ], - "taxation-state and local": [ - "Taxation-State and Local" - ], - "b40": [ - "Economics" - ], - "islamic world and near east history": [ - "Islamic World and Near East History" - ], - "antitrust and trade regulation": [ - "Antitrust and Trade Regulation" - ], - "o24": [ - "Growth and Development" - ], - "electronic devices and semiconductor manufacturing": [ - "Electronic Devices and Semiconductor Manufacturing" - ], - "m16": [ - "Business Administration, Management, and Operations" - ], - "n67": [ - "Economic History" - ], - "diagnosis": [ - "Diagnosis" - ], - "anatomy and physiology": [ - "Anatomy" - ], - "cond-mat.supr-con": [ - "Condensed Matter Physics" - ], - "ergonomics": [ - "Ergonomics" - ], - "photography": [ - "Photography" - ], - "international and comparative labor relations": [ - "International and Comparative Labor Relations" - ], - "n82": [ - "Economic History" - ], - "african languages and societies": [ - "African Languages and Societies" - ], - "student counseling and personnel services": [ - "Student Counseling and Personnel Services" - ], - "h42": [ - "Public Economics" - ], - "landscape architecture": [ - "Landscape Architecture" - ], - "evolution": [ - "Evolution" - ], - "indian and aboriginal law": [ - "Indian and Aboriginal Law" - ], - "law and economics": [ - "Law and Economics" - ], - "n91": [ - "Economic History" - ], - "fourteenth amendment": [ - "Fourteenth Amendment" - ], - "respiratory therapy": [ - "Respiratory Therapy" - ], - "k25": [ - "Law and Economics" - ], - "i21": [ - "Education Economics" - ], - "q5": [ - "Agricultural and Resource Economics" - ], - "legal biography": [ - "Legal Biography" - ], - "j29": [ - "Labor Economics" - ], - "o53": [ - "Growth and Development" - ], - "cultural history": [ - "Cultural History" - ], - "pain management": [ - "Pain Management" - ], - "macroeconomics": [ - "Macroeconomics" - ], - "j17": [ - "Labor Economics" - ], - "c24": [ - "Economic Theory" - ], - "legal education": [ - "Legal Education" - ], - "n4": [ - "Economic History" - ], - "educational sociology": [ - "Educational Sociology" - ], - "social and cultural anthropology": [ - "Social and Cultural Anthropology" - ], - "h3": [ - "Public Economics" - ], - "longitudinal data analysis and time series": [ - "Longitudinal Data Analysis and Time Series" - ], - "c01": [ - "Econometrics" - ], - "i20": [ - "Education Economics" - ], - "rural sociology": [ - "Rural Sociology" - ], - "n75": [ - "Economic History" - ], - "economic theory": [ - "Economic Theory" - ], - "q39": [ - "Agricultural and Resource Economics" - ], - "archaeological anthropology": [ - "Archaeological Anthropology" - ], - "social work": [ - "Social Work" - ], - "agronomy and crop sciences life sciences": [ - "Agronomy and Crop Sciences Life Sciences" - ], - "q-fin": [ - "Finance" - ], - "agriculture law": [ - "Agriculture Law" - ], - "fungi": [ - "Fungi" - ], - "k23": [ - "Law and Economics" - ], - "plant biology": [ - "Plant Biology" - ], - "psychobiology": [ - "Biological Psychology" - ], - "r52": [ - "Regional Economics" - ], - "j15": [ - "Labor Economics" - ], - "cell anatomy": [ - "Cell Anatomy" - ], - "b23": [ - "Economic History" - ], - "other history of art, architecture, and archaeology": [ - "Other History of Art, Architecture, and Archaeology" - ], - "f6": [ - "International Economics" - ], - "neuroscience and neurobiology": [ - "Neuroscience and Neurobiology" - ], - "comparative philosophy": [ - "Comparative Philosophy" - ], - "nonfiction": [ - "Nonfiction" - ], - "r22": [ - "Regional Economics" - ], - "bilingual, multilingual, and multicultural education": [ - "Bilingual, Multilingual, and Multicultural Education" - ], - "art and materials conservation": [ - "Art and Materials Conservation" - ], - "n45": [ - "Economic History" - ], - "physics.chem-ph": [ - "Biological and Chemical Physics" - ], - "poultry or avian science": [ - "Poultry or Avian Science" - ], - "c44": [ - "Econometrics" - ], - "h12": [ - "Public Economics" - ], - "o21": [ - "Growth and Development" - ], - "g01": [ - "Finance" - ], - "h84": [ - "Public Economics" - ], - "j22": [ - "Labor Economics" - ], - "b12": [ - "Economic History" - ], - "l97": [ - "Industrial Organization" - ], - "y20": [ - "Economics" - ], - "b21": [ - "Economic History" - ], - "behavior and ethology": [ - "Behavior and Ethology" - ], - "o57": [ - "Growth and Development" - ], - "q59": [ - "Agricultural and Resource Economics" - ], - "p5": [ - "Political Economy" - ], - "institutional and historical": [ - "Institutional and Historical" - ], - "c58": [ - "Econometrics" - ], - "computer law": [ - "Computer Law" - ], - "italian literature": [ - "Italian Literature" - ], - "medical anatomy": [ - "Medical Anatomy" - ], - "structural materials": [ - "Structural Materials" - ], - "h89": [ - "Public Economics" - ], - "c61": [ - "Economic Theory" - ], - "i38": [ - "Income Distribution" - ], - "laboratory and basic science research life sciences": [ - "Laboratory and Basic Science Research Life Sciences" - ], - "islamic studies": [ - "Islamic Studies" - ], - "c35": [ - "Economic Theory" - ], - "spatial science": [ - "Spatial Science" - ], - "other physiology": [ - "Other Physiology" - ], - "kinesiotherapy": [ - "Kinesiotherapy" - ], - "k16": [ - "Law and Economics" - ], - "other animal sciences": [ - "Other Animal Sciences" - ], - "medical biochemistry": [ - "Medical Biochemistry" - ], - "tribology": [ - "Tribology" - ], - "geotechnical engineering": [ - "Geotechnical Engineering" - ], - "social statistics": [ - "Social Statistics" - ], - "j23": [ - "Labor Economics" - ], - "meat science": [ - "Meat Science" - ], - "telemedicine": [ - "Telemedicine" - ], - "l66": [ - "Industrial Organization" - ], - "anesthesia and analgesia": [ - "Anesthesia and Analgesia" - ], - "q24": [ - "Agricultural and Resource Economics" - ], - "bioengineering": [ - "Biomedical Engineering and Bioengineering" - ], - "physics.space-ph": [ - "Physics" - ], - "h2": [ - "Public Economics" - ], - "h24": [ - "Public Economics" - ], - "communication sciences and disorders": [ - "Communication Sciences and Disorders" - ], - "theory and criticism": [ - "Theory and Criticism" - ], - "meteorology": [ - "Meteorology" - ], - "f24": [ - "International Economics" - ], - "science and medical education": [ - "Science and Mathematics Education" - ], - "nanotechnology": [ - "Nanotechnology" - ], - "medieval history": [ - "Medieval History" - ], - "l38": [ - "Industrial Organization" - ], - "geology": [ - "Geology" - ], - "f64": [ - "International Economics" - ], - "p28": [ - "Political Economy" - ], - "cosmochemistry": [ - "Cosmochemistry" - ], - "b17": [ - "Economic History" - ], - "j82": [ - "Labor Economics" - ], - "k19": [ - "Law and Economics" - ], - "sociology": [ - "Sociology" - ], - "c38": [ - "Economic Theory" - ], - "applied linguistics": [ - "Applied Linguistics" - ], - "educational leadership": [ - "Educational Leadership" - ], - "basque studies": [ - "Basque Studies" - ], - "otorhinolaryngologic diseases": [ - "Otorhinolaryngologic Diseases" - ], - "n7": [ - "Economic History" - ], - "rhetoric and composition": [ - "Rhetoric and Composition" - ], - "leisure studies": [ - "Leisure Studies" - ], - "p3": [ - "Political Economy" - ], - "d24": [ - "Industrial Organization" - ], - "medical cell biology": [ - "Medical Cell Biology" - ], - "f55": [ - "Political Economy", - "International Economics" - ], - "statistical, nonlinear, and soft matter physics": [ - "Statistical, Nonlinear, and Soft Matter Physics" - ], - "commercial law": [ - "Commercial Law" - ], - "chemical actions and uses": [ - "Chemical Actions and Uses" - ], - "polymer chemistry": [ - "Polymer Chemistry" - ], - "ecology and evolutionary biology": [ - "Ecology and Evolutionary Biology" - ], - "environmental sciences": [ - "Environmental Sciences" - ], - "p35": [ - "Political Economy" - ], - "c93": [ - "Economics" - ], - "prosthodontics and prosthodontology": [ - "Prosthodontics and Prosthodontology" - ], - "physics.med-ph": [ - "Health and Medical Physics" - ], - "d42": [ - "Political Economy" - ], - "food biotechnology": [ - "Food Biotechnology" - ], - "y92": [ - "Economics" - ], - "music therapy": [ - "Music Therapy" - ], - "models and methods": [ - "Models and Methods" - ], - "australian studies": [ - "Australian Studies" - ], - "c23": [ - "Economic Theory" - ], - "speech and hearing science": [ - "Speech and Hearing Science" - ], - "q23": [ - "Agricultural and Resource Economics" - ], - "graphics and human computer interfaces": [ - "Graphics and Human Computer Interfaces" - ], - "palliative care": [ - "Palliative Care" - ], - "k38": [ - "Law and Economics" - ], - "radiology": [ - "Radiology" - ], - "european history": [ - "European History" - ], - "nuclear engineering": [ - "Nuclear Engineering" - ], - "legal history": [ - "Legal History" - ], - "exercise physiology": [ - "Exercise Physiology" - ], - "portuguese literature": [ - "Portuguese Literature" - ], - "q49": [ - "Agricultural and Resource Economics" - ], - "medical pathology": [ - "Medical Pathology" - ], - "law and society": [ - "Law and Society" - ], - "health services administration": [ - "Health Services Administration" - ], - "other mathematics": [ - "Other Mathematics" - ], - "scholarly communication": [ - "Scholarly Communication" - ], - "applied mechanics": [ - "Applied Mechanics" - ], - "philosophy of science": [ - "Philosophy of Science" - ], - "o33": [ - "Growth and Development" - ], - "other history": [ - "Other History" - ], - "musculoskeletal system": [ - "Musculoskeletal System" - ], - "z10": [ - "Economics" - ], - "e32": [ - "Macroeconomics" - ], - "health and medical administration": [ - "Health and Medical Administration" - ], - "ceramic materials": [ - "Ceramic Materials" - ], - "o19": [ - "Growth and Development" - ], - "r59": [ - "Regional Economics" - ], - "b52": [ - "Economics" - ], - "physical and environmental geography": [ - "Physical and Environmental Geography" - ], - "french and francophone literature": [ - "French and Francophone Literature" - ], - "land use law": [ - "Land Use Law" - ], - "f63": [ - "Growth and Development", - "International Economics" - ], - "environmental law": [ - "Environmental Law" - ], - "r48": [ - "Economic Policy", - "Regional Economics" - ], - "ecology": [ - "Ecology and Evolutionary Biology" - ], - "allergy and immunology": [ - "Allergy and Immunology" - ], - "neural nets": [ - "OS and Networks" - ], - "russian linguistics": [ - "Russian Linguistics" - ], - "medical pharmacology": [ - "Medical Pharmacology" - ], - "d58": [ - "Political Economy" - ], - "i18": [ - "Health Economics" - ], - "virus diseases": [ - "Virus Diseases" - ], - "publishing": [ - "Publishing" - ], - "other economics": [ - "Other Economics" - ], - "c46": [ - "Econometrics" - ], - "human-computer interaction": [ - "Graphics and Human Computer Interfaces" - ], - "community-based research": [ - "Community-based Research" - ], - "e52": [ - "Macroeconomics" - ], - "r23": [ - "Regional Economics" - ], - "public health education and promotion": [ - "Public Health Education and Promotion" - ], - "physics.ed-ph": [ - "Science and Mathematics Education" - ], - "parasitology": [ - "Parasitology" - ], - "k12": [ - "Law and Economics" - ], - "i10": [ - "Health Economics" - ], - "h00": [ - "Public Economics" - ], - "systems and integrative physiology life sciences": [ - "Systems and Integrative Physiology Life Sciences" - ], - "hardware systems": [ - "Hardware Systems" - ], - "engineering education": [ - "Engineering Education" - ], - "genealogy": [ - "Genealogy" - ], - "neurogenetics": [ - "Neuroscience and Neurobiology" - ], - "databases and information systems": [ - "Databases and Information Systems" - ], - "h6": [ - "Public Economics" - ], - "optics": [ - "Optics" - ], - "q48": [ - "Agricultural and Resource Economics" - ], - "a29": [ - "Education" - ], - "i00": [ - "Education Economics" - ], - "j39": [ - "Labor Economics" - ], - "american material culture": [ - "American Material Culture" - ], - "geography": [ - "Geography" - ], - "e65": [ - "Macroeconomics" - ], - "h54": [ - "Public Economics" - ], - "biochemistry, biophysics, and structural biology": [ - "Biochemistry, Biophysics, and Structural Biology" - ], - "alternative and complementary medicine": [ - "Alternative and Complementary Medicine" - ], - "other rhetoric and composition": [ - "Other Rhetoric and Composition" - ], - "constitutional law": [ - "Constitutional Law" - ], - "k30": [ - "Law and Economics" - ], - "teacher education and professional development": [ - "Teacher Education and Professional Development" - ], - "k39": [ - "Law and Economics" - ], - "e61": [ - "Macroeconomics" - ], - "arts management": [ - "Arts Management" - ], - "j68": [ - "Labor Economics" - ], - "food chemistry": [ - "Food Chemistry" - ], - "gender and sexuality": [ - "Gender and Sexuality" - ], - "m49": [ - "Accounting" - ], - "japanese studies": [ - "Japanese Studies" - ], - "a23": [ - "Higher Education and Teaching" - ], - "business intelligence": [ - "Business Intelligence" - ], - "n25": [ - "Economic History" - ], - "other international and area studies": [ - "Other International and Area Studies" - ], - "stars, interstellar medium and the galaxy": [ - "Stars, Interstellar Medium and the Galaxy" - ], - "f41": [ - "Macroeconomics", - "International Economics" - ], - "j45": [ - "Labor Economics" - ], - "medical neurobiology": [ - "Medical Neurobiology" - ], - "physics.atom-ph": [ - "Atomic, Molecular and Optical Physics" - ], - "geographic information sciences": [ - "Geographic Information Sciences" - ], - "conflict of laws": [ - "Conflict of Laws" - ], - "anthropological linguistics and sociolinguistics": [ - "Anthropological Linguistics and Sociolinguistics" - ], - "juvenile law": [ - "Juvenile Law" - ], - "q17": [ - "Agricultural and Resource Economics" - ], - "immunology": [ - "Medical Immunology" - ], - "neuropsychology": [ - "Neurology" - ], - "q3": [ - "Agricultural and Resource Economics" - ], - "ancient, medieval, renaissance and baroque art and architecture": [ - "Ancient, Medieval, Renaissance and Baroque Art and Architecture" - ], - "c55": [ - "Econometrics" - ], - "lipids": [ - "Lipids" - ], - "p34": [ - "Political Economy" - ], - "comparative and foreign law": [ - "Comparative and Foreign Law" - ], - "race and ethnicity": [ - "Race and Ethnicity" - ], - "algebraic geometry": [ - "Algebraic Geometry" - ], - "z30": [ - "Economics" - ], - "american studies": [ - "American Studies" - ], - "n8": [ - "Economic History" - ], - "medical physiology": [ - "Medical Physiology" - ], - "m10": [ - "Business Administration, Management, and Operations" - ], - "otolaryngology": [ - "Otolaryngology" - ], - "h53": [ - "Public Economics" - ], - "large or food animal and equine medicine": [ - "Large or Food Animal and Equine Medicine" - ], - "taxation": [ - "Taxation" - ], - "f68": [ - "International Economics" - ], - "c25": [ - "Economic Theory" - ], - "r41": [ - "Regional Economics" - ], - "n92": [ - "Economic History" - ], - "f22": [ - "International Economics" - ], - "l73": [ - "Industrial Organization" - ], - "environmental policy": [ - "Environmental Policy" - ], - "solv-int": [ - "Physics" - ], - "elementary particles and fields and string theory": [ - "Elementary Particles and Fields and String Theory" - ], - "religion law": [ - "Religion Law" - ], - "legal": [ - "Legal" - ], - "molecular biology": [ - "Molecular Biology" - ], - "other physics": [ - "Other Physics" - ], - "influenza humans": [ - "Influenza Humans" - ], - "l15": [ - "Industrial Organization" - ], - "physical chemistry": [ - "Physical Chemistry" - ], - "taxation-transnational": [ - "Taxation-Transnational" - ], - "r21": [ - "Regional Economics" - ], - "operations and supply chain management": [ - "Operations and Supply Chain Management" - ], - "q12": [ - "Agricultural and Resource Economics" - ], - "aviation": [ - "Aviation" - ], - "y7": [ - "Economics" - ], - "m39": [ - "Marketing" - ], - "n13": [ - "Economic History" - ], - "public health": [ - "Public Health" - ], - "organizational communication": [ - "Organizational Communication" - ], - "h30": [ - "Public Economics" - ], - "l41": [ - "Industrial Organization" - ], - "german literature": [ - "German Literature" - ], - "j48": [ - "Labor Economics" - ], - "supreme court of the united states": [ - "Supreme Court of the United States" - ], - "o34": [ - "Growth and Development" - ], - "k35": [ - "Law and Economics" - ], - "service learning": [ - "Service Learning" - ], - "marriage and family therapy and counseling": [ - "Marriage and Family Therapy and Counseling" - ], - "systems biology": [ - "Systems Biology" - ], - "j28": [ - "Labor Economics" - ], - "l93": [ - "Industrial Organization" - ], - "health services research": [ - "Health Services Research" - ], - "j42": [ - "Labor Economics" - ], - "l42": [ - "Industrial Organization" - ], - "viruses": [ - "Viruses" - ], - "latin american languages and societies": [ - "Latin American Languages and Societies" - ], - "l26": [ - "Industrial Organization" - ], - "z28": [ - "Economics" - ], - "strategic management policy": [ - "Strategic Management Policy" - ], - "entomology": [ - "Entomology" - ], - "secured transactions": [ - "Secured Transactions" - ], - "tourism": [ - "Tourism" - ], - "n26": [ - "Economic History" - ], - "melanesian studies": [ - "Melanesian Studies" - ], - "feminist philosophy": [ - "Feminist Philosophy" - ], - "organizations law": [ - "Organizations Law" - ], - "other educational administration and supervision": [ - "Other Educational Administration and Supervision" - ], - "pragmatics": [ - "Semantics and Pragmatics" - ], - "r32": [ - "Regional Economics" - ], - "j19": [ - "Labor Economics" - ], - "r12": [ - "Regional Economics" - ], - "m19": [ - "Business Administration, Management, and Operations" - ], - "o31": [ - "Growth and Development" - ], - "hemic and immune systems": [ - "Hemic and Immune Systems" - ], - "p49": [ - "Political Economy" - ], - "f69": [ - "International Economics" - ], - "race, ethnicity and post-colonial studies": [ - "Race, Ethnicity and Post-Colonial Studies" - ], - "d44": [ - "Political Economy" - ], - "spanish and portuguese language and literature": [ - "Spanish and Portuguese Language and Literature" - ], - "o20": [ - "Growth and Development" - ], - "comparative nutrition": [ - "Comparative Nutrition" - ], - "plant breeding and genetics life sciences": [ - "Plant Breeding and Genetics Life Sciences" - ], - "m42": [ - "Accounting" - ], - "d14": [ - "Finance", - "Behavioral Economics", - "Economics" - ], - "geochemistry": [ - "Geochemistry" - ], - "biochemistry": [ - "Biochemistry" - ], - "thermodynamics": [ - "Thermodynamics" - ], - "computer-aided engineering and design": [ - "Computer-Aided Engineering and Design" - ], - "h50": [ - "Public Economics" - ], - "l65": [ - "Industrial Organization" - ], - "pharmacy and pharmaceutical sciences": [ - "Pharmacy and Pharmaceutical Sciences" - ], - "environmental education": [ - "Environmental Education" - ], - "p48": [ - "Political Economy" - ], - "b22": [ - "Economic History" - ], - "e3": [ - "Macroeconomics" - ], - "other languages, societies, and cultures": [ - "Other Languages, Societies, and Cultures" - ], - "neural modeling": [ - "Neuroscience and Neurobiology" - ], - "n33": [ - "Economic History" - ], - "e69": [ - "Macroeconomics" - ], - "m5": [ - "Economics" - ], - "c68": [ - "Economic Theory" - ], - "c26": [ - "Economic Theory" - ], - "cells": [ - "Cells" - ], - "agribusiness": [ - "Agribusiness" - ], - "condensed matter physics": [ - "Condensed Matter Physics" - ], - "economic history": [ - "Economic History" - ], - "fruit science": [ - "Fruit Science" - ], - "q00": [ - "Agricultural and Resource Economics" - ], - "other philosophy": [ - "Other Philosophy" - ], - "psychology": [ - "Psychology" - ], - "r11": [ - "Regional Economics" - ], - "forest management": [ - "Forest Management" - ], - "biomedical": [ - "Biomedical" - ], - "biomedical devices and instrumentation": [ - "Biomedical Devices and Instrumentation" - ], - "applied statistics": [ - "Applied Statistics" - ], - "multivariate analysis": [ - "Multivariate Analysis" - ], - "ecosystem science": [ - "Ecology and Evolutionary Biology" - ], - "y9": [ - "Economics" - ], - "l44": [ - "Industrial Organization" - ], - "cognitive psychology": [ - "Cognitive Psychology" - ], - "inorganic chemicals": [ - "Inorganic Chemicals" - ], - "digestive system": [ - "Digestive System" - ], - "metal and jewelry arts": [ - "Metal and Jewelry Arts" - ], - "algebra": [ - "Algebra" - ], - "organizational behavior and theory": [ - "Organizational Behavior and Theory" - ], - "mental and social health": [ - "Mental and Social Health" - ], - "consumer protection law": [ - "Consumer Protection Law" - ], - "j10": [ - "Labor Economics" - ], - "psychophysics": [ - "Psychology" - ], - "other medicine and health sciences": [ - "Other Medicine and Health Sciences" - ], - "audio arts and acoustics": [ - "Audio Arts and Acoustics" - ], - "tourism and travel": [ - "Tourism and Travel" - ], - "architectural technology": [ - "Architectural Technology" - ], - "evidence based medicine": [ - "Medicine and Health Sciences" - ], - "jurisprudence": [ - "Jurisprudence" - ], - "electrical and computer engineering": [ - "Electrical and Computer Engineering" - ], - "infrastructure": [ - "Infrastructure" - ], - "architecture": [ - "Architecture" - ], - "legal writing and research": [ - "Legal Writing and Research" - ], - "l3": [ - "Industrial Organization" - ], - "journalism studies": [ - "Journalism Studies" - ], - "d90": [ - "Economics" - ], - "medical microbiology": [ - "Medical Microbiology" - ], - "intellectual history": [ - "Intellectual History" - ], - "other german language and literature": [ - "Other German Language and Literature" - ], - "bioelectrical and neuroengineering": [ - "Bioelectrical and Neuroengineering" - ], - "z23": [ - "Economics" - ], - "veterinary microbiology and immunobiology": [ - "Veterinary Microbiology and Immunobiology" - ], - "o54": [ - "Growth and Development" - ], - "j47": [ - "Labor Economics" - ], - "g30": [ - "Finance" - ], - "judges": [ - "Judges" - ], - "m52": [ - "Economics" - ], - "language": [ - "Programming Languages and Compilers" - ], - "administrative law": [ - "Administrative Law" - ], - "f11": [ - "International Economics" - ], - "ethnomusicology": [ - "Ethnomusicology" - ], - "f32": [ - "International Economics" - ], - "other feminist, gender, and sexuality studies": [ - "Other Feminist, Gender, and Sexuality Studies" - ], - "nuclear": [ - "Nuclear" - ], - "comparative and evolutionary physiology": [ - "Comparative and Evolutionary Physiology" - ], - "c13": [ - "Econometrics" - ], - "other life sciences": [ - "Other Life Sciences" - ], - "d5": [ - "Political Economy" - ], - "contracts": [ - "Contracts" - ], - "ethics and political philosophy": [ - "Ethics and Political Philosophy" - ], - "aquaculture, fisheries, and fish science": [ - "Aquaculture and Fisheries Life Sciences" - ], - "medical biomathematics and biometrics": [ - "Medical Biomathematics and Biometrics" - ], - "desert ecology": [ - "Desert Ecology" - ], - "dance": [ - "Dance" - ], - "family law": [ - "Family Law" - ], - "respiratory medicine": [ - "Medicine and Health Sciences" - ], - "tax law": [ - "Tax Law" - ], - "clinical and medical social work": [ - "Clinical and Medical Social Work" - ], - "american politics": [ - "American Politics" - ], - "e14": [ - "Macroeconomics" - ], - "politics and social change": [ - "Politics and Social Change" - ], - "categorical data analysis": [ - "Categorical Data Analysis" - ], - "genetic processes": [ - "Genetic Processes" - ], - "food science and technology": [ - "Food Science" - ], - "h32": [ - "Public Economics" - ], - "policy design, analysis, and evaluation": [ - "Policy Design, Analysis, and Evaluation" - ], - "cmp-lg": [ - "Physics", - "Programming Languages and Compilers" - ], - "other law": [ - "Other Law" - ], - "health communication": [ - "Health Communication" - ], - "disability and equity in education": [ - "Disability and Equity in Education" - ], - "gaming and casino operations management": [ - "Gaming and Casino Operations Management" - ], - "social psychology and interaction": [ - "Social Psychology and Interaction" - ], - "r30": [ - "Regional Economics" - ], - "c71": [ - "Economic Theory" - ], - "dg-ga": [ - "Geometry and Topology" - ], - "m37": [ - "Marketing" - ], - "veterinary pathology and pathobiology": [ - "Veterinary Pathology and Pathobiology" - ], - "b5": [ - "Economics" - ], - "f17": [ - "International Economics" - ], - "wood science and pulp, paper technology": [ - "Wood Science and Pulp, Paper Technology" - ], - "materials chemistry": [ - "Materials Chemistry" - ], - "appalachian studies": [ - "Appalachian Studies" - ], - "d52": [ - "Political Economy" - ], - "l71": [ - "Industrial Organization" - ], - "n86": [ - "Economic History" - ], - "neuroendocrinology": [ - "Neuroscience and Neurobiology" - ], - "n55": [ - "Economic History" - ], - "h74": [ - "Public Economics" - ], - "machine learning": [ - "Computer Sciences" - ], - "social welfare": [ - "Social Welfare" - ], - "c49": [ - "Econometrics" - ], - "d33": [ - "Income Distribution" - ], - "o43": [ - "Growth and Development" - ], - "acc-phys": [ - "Physics" - ], - "film and media studies": [ - "Film and Media Studies" - ], - "creative writing": [ - "Creative Writing" - ], - "r50": [ - "Regional Economics" - ], - "near and middle eastern studies": [ - "Near and Middle Eastern Studies" - ], - "h75": [ - "Public Economics", - "Health Economics", - "Education Economics" - ], - "l98": [ - "Industrial Organization" - ], - "nursing midwifery": [ - "Nursing Midwifery" - ], - "american literature": [ - "American Literature" - ], - "inorganic chemistry": [ - "Inorganic Chemistry" - ], - "l14": [ - "Industrial Organization" - ], - "e39": [ - "Macroeconomics" - ], - "e4": [ - "Macroeconomics" - ], - "archival science": [ - "Archival Science" - ], - "b25": [ - "Economic History" - ], - "heterocyclic compounds": [ - "Heterocyclic Compounds" - ], - "human and clinical nutrition": [ - "Human and Clinical Nutrition" - ], - "urban studies": [ - "Urban Studies" - ], - "archaea": [ - "Archaea" - ], - "c02": [ - "Economics" - ], - "other spanish and portuguese language and literature": [ - "Other Spanish and Portuguese Language and Literature" - ], - "d8": [ - "Political Economy" - ], - "f18": [ - "International Economics" - ], - "transportation": [ - "Transportation" - ], - "g28": [ - "Finance" - ], - "l74": [ - "Industrial Organization" - ], - "n37": [ - "Economic History" - ], - "real estate": [ - "Real Estate" - ], - "f19": [ - "International Economics" - ], - "cell biology": [ - "Cell Biology" - ], - "literature in english, anglophone outside british isles and north america": [ - "Literature in English, Anglophone outside British Isles and North America" - ], - "other linguistics": [ - "Other Linguistics" - ], - "pharmacy administration, policy and regulation": [ - "Pharmacy Administration, Policy and Regulation" - ], - "rehabilitation and therapy": [ - "Rehabilitation and Therapy" - ], - "behavior and behavior mechanisms": [ - "Behavior and Behavior Mechanisms" - ], - "e29": [ - "Macroeconomics" - ], - "speech pathology and audiology": [ - "Speech Pathology and Audiology" - ], - "cognitive neuroscience": [ - "Cognitive Neuroscience" - ], - "d62": [ - "Income Distribution" - ], - "management information systems": [ - "Management Information Systems" - ], - "cond-mat.mtrl-sci": [ - "Condensed Matter Physics" - ], - "human rights law": [ - "Human Rights Law" - ], - "immune system diseases": [ - "Immune System Diseases" - ], - "labor economics": [ - "Labor Economics" - ], - "quantum physics": [ - "Quantum Physics" - ], - "hematology": [ - "Hematology" - ], - "coupled natural and human systems": [ - "Ecology and Evolutionary Biology" - ], - "family medicine": [ - "Family Medicine" - ], - "history of gender": [ - "History of Gender" - ], - "number theory": [ - "Number Theory" - ], - "other microbiology": [ - "Other Microbiology" - ], - "n54": [ - "Economic History" - ], - "math-ph": [ - "Physics", - "Mathematics" - ], - "international and comparative education": [ - "International and Comparative Education" - ], - "broadcast and video studies": [ - "Broadcast and Video Studies" - ], - "physics.geo-ph": [ - "Geophysics and Seismology" - ], - "other classics": [ - "Other Classics" - ], - "c99": [ - "Economics" - ], - "legal ethics and professional responsibility": [ - "Legal Ethics and Professional Responsibility" - ], - "immunology and infectious disease": [ - "Immunology and Infectious Disease" - ], - "k20": [ - "Law and Economics" - ], - "cognitive behavioral therapy": [ - "Cognitive Behavioral Therapy" - ], - "polymer science": [ - "Polymer Science" - ], - "o25": [ - "Growth and Development" - ], - "l86": [ - "Industrial Organization" - ], - "pacific islands languages and societies": [ - "Pacific Islands Languages and Societies" - ], - "congenital, hereditary, and neonatal diseases and abnormalities": [ - "Congenital, Hereditary, and Neonatal Diseases and Abnormalities" - ], - "supr-con": [ - "Condensed Matter Physics" - ], - "carbohydrates": [ - "Carbohydrates" - ], - "nonprofit organizations law": [ - "Nonprofit Organizations Law" - ], - "l96": [ - "Industrial Organization" - ], - "eastern european studies": [ - "Eastern European Studies" - ], - "p30": [ - "Political Economy" - ], - "neoplasms": [ - "Neoplasms" - ], - "law": [ - "Law" - ], - "l50": [ - "Industrial Organization" - ], - "q47": [ - "Agricultural and Resource Economics" - ], - "immigration law": [ - "Immigration Law" - ], - "c54": [ - "Econometrics" - ], - "n24": [ - "Economic History" - ], - "j7": [ - "Labor Economics" - ], - "ocean engineering": [ - "Ocean Engineering" - ], - "f60": [ - "International Economics" - ], - "other pharmacy and pharmaceutical sciences": [ - "Other Pharmacy and Pharmaceutical Sciences" - ], - "multicultural psychology": [ - "Multicultural Psychology" - ], - "l7": [ - "Industrial Organization" - ], - "human ecology": [ - "Human Ecology" - ], - "nonprofit administration and management": [ - "Nonprofit Administration and Management" - ], - "d78": [ - "Political Economy" - ], - "l39": [ - "Industrial Organization" - ], - "biological engineering": [ - "Biological Engineering" - ], - "christian denominations and sects": [ - "Christian Denominations and Sects" - ], - "i26": [ - "Education Economics" - ], - "home economics": [ - "Home Economics" - ], - "disability law": [ - "Disability Law" - ], - "cs": [ - "Computer Sciences" - ], - "m12": [ - "Business Administration, Management, and Operations" - ], - "c70": [ - "Economic Theory" - ], - "marine biology": [ - "Marine Biology" - ], - "q53": [ - "Agricultural and Resource Economics" - ], - "m21": [ - "Business Administration, Management, and Operations" - ], - "h56": [ - "Public Economics" - ], - "liturgy and worship": [ - "Liturgy and Worship" - ], - "respiratory system": [ - "Respiratory System" - ], - "d21": [ - "Industrial Organization" - ], - "nucl-ex": [ - "Nuclear" - ], - "h63": [ - "Public Economics" - ], - "gaming law": [ - "Gaming Law" - ], - "health information technology": [ - "Health Information Technology" - ], - "systems and integrative engineering": [ - "Systems and Integrative Engineering" - ], - "human computer interaction": [ - "Graphics and Human Computer Interfaces" - ], - "stratigraphy": [ - "Stratigraphy" - ], - "h49": [ - "Public Economics" - ], - "o15": [ - "Growth and Development" - ], - "ethical issues": [ - "Ethics and Political Philosophy" - ], - "aeronautical vehicles": [ - "Aeronautical Vehicles" - ], - "linguistics": [ - "Linguistics" - ], - "organization development": [ - "Organization Development" - ], - "music theory": [ - "Music Theory" - ], - "disease modeling": [ - "Disease Modeling" - ], - "toxicology": [ - "Toxicology" - ], - "stomatognathic system": [ - "Stomatognathic System" - ], - "nursing administration": [ - "Nursing Administration" - ], - "o13": [ - "Growth and Development" - ], - "g3": [ - "Finance" - ], - "h77": [ - "Public Economics" - ], - "h19": [ - "Public Economics" - ], - "l81": [ - "Industrial Organization" - ], - "integrative biology": [ - "Integrative Biology" - ], - "p39": [ - "Political Economy" - ], - "g34": [ - "Finance" - ], - "c14": [ - "Econometrics" - ], - "o30": [ - "Growth and Development" - ], - "other statistics and probability": [ - "Other Statistics and Probability" - ], - "atom-ph": [ - "Atomic, Molecular and Optical Physics" - ], - "leadership studies": [ - "Leadership Studies" - ], - "p50": [ - "Political Economy" - ], - "y91": [ - "Economics" - ], - "labor history": [ - "Labor History" - ], - "n43": [ - "Economic History" - ], - "q-alg": [ - "Algebra" - ], - "brain imaging": [ - "Neuroscience and Neurobiology" - ], - "biosecurity": [ - "Biosecurity" - ], - "food processing": [ - "Food Processing" - ], - "g38": [ - "Finance" - ], - "maintenance technology": [ - "Maintenance Technology" - ], - "artificial intelligence": [ - "Artificial Intelligence and Robotics" - ], - "a20": [ - "Education" - ], - "unions": [ - "Unions" - ], - "h8": [ - "Public Economics" - ], - "c91": [ - "Economics" - ], - "taxonomy": [ - "Biology" - ], - "d3": [ - "Income Distribution" - ], - "other architecture": [ - "Other Architecture" - ], - "y50": [ - "Economics" - ], - "behavioral neuroscience": [ - "Neuroscience and Neurobiology" - ], - "k33": [ - "Law and Economics" - ], - "dentistry": [ - "Dentistry" - ], - "literature in english, north america, ethnic and cultural minority": [ - "Literature in English, North America, Ethnic and Cultural Minority" - ], - "y90": [ - "Economics" - ], - "d60": [ - "Income Distribution" - ], - "j38": [ - "Labor Economics" - ], - "r5": [ - "Regional Economics" - ], - "c69": [ - "Economic Theory" - ], - "p24": [ - "Political Economy" - ], - "defense and security studies": [ - "Defense and Security Studies" - ], - "physiology": [ - "Physiology" - ], - "chao-dyn": [ - "Physics" - ], - "b11": [ - "Economic History" - ], - "natural resources management and policy": [ - "Natural Resources Management and Policy" - ], - "securities law": [ - "Securities Law" - ], - "paleontology": [ - "Paleontology" - ], - "dental materials": [ - "Dental Materials" - ], - "taxation-federal": [ - "Taxation-Federal" - ], - "d31": [ - "Income Distribution" - ], - "psychological phenomena and processes": [ - "Psychological Phenomena and Processes" - ], - "m00": [ - "Business Administration, Management, and Operations" - ], - "b30": [ - "Economic History" - ], - "physics.flu-dyn": [ - "Fluid Dynamics" - ], - "l23": [ - "Industrial Organization" - ], - "o17": [ - "Growth and Development" - ], - "other chemical engineering": [ - "Other Chemical Engineering" - ], - "o10": [ - "Growth and Development" - ], - "econometrics": [ - "Econometrics" - ], - "medical biophysics": [ - "Medical Biophysics" - ], - "physics.pop-ph": [ - "Physics" - ], - "r00": [ - "Regional Economics" - ], - "social influence and political communication": [ - "Social Influence and Political Communication" - ], - "american art and architecture": [ - "American Art and Architecture" - ], - "historical linguistics": [ - "Comparative and Historical Linguistics" - ], - "behavioral biology": [ - "Behavior and Ethology" - ], - "j4": [ - "Labor Economics" - ], - "forest sciences": [ - "Forest Sciences" - ], - "legal profession": [ - "Legal Profession" - ], - "historic preservation and conservation": [ - "Historic Preservation and Conservation" - ], - "p20": [ - "Political Economy" - ], - "h82": [ - "Public Economics" - ], - "musculoskeletal, neural, and ocular physiology": [ - "Musculoskeletal, Neural, and Ocular Physiology" - ], - "j63": [ - "Labor Economics" - ], - "sociobiology": [ - "Biology" - ], - "b20": [ - "Economic History" - ], - "r2": [ - "Regional Economics" - ], - "fashion design": [ - "Fashion Design" - ], - "c51": [ - "Econometrics" - ], - "game design": [ - "Game Design" - ], - "legislation": [ - "Legislation" - ], - "latin american studies": [ - "Latin American Studies" - ], - "p22": [ - "Political Economy" - ], - "population biology": [ - "Population Biology" - ], - "aquaculture and fisheries life sciences": [ - "Aquaculture and Fisheries Life Sciences" - ], - "optometry": [ - "Optometry" - ], - "g31": [ - "Finance" - ], - "l6": [ - "Industrial Organization" - ], - "h25": [ - "Public Economics" - ], - "j61": [ - "Labor Economics" - ], - "pre-elementary, early childhood, kindergarten teacher education": [ - "Pre-Elementary, Early Childhood, Kindergarten Teacher Education" - ], - "numerical analysis and computation": [ - "Numerical Analysis and Computation" - ], - "c29": [ - "Economic Theory" - ], - "j1": [ - "Labor Economics" - ], - "l5": [ - "Industrial Organization" - ], - "operations research, systems engineering and industrial engineering": [ - "Operations Research, Systems Engineering and Industrial Engineering" - ], - "neurology": [ - "Neurology" - ], - "special education and teaching": [ - "Special Education and Teaching" - ], - "torts": [ - "Torts" - ], - "complex fluids": [ - "Complex Fluids" - ], - "n65": [ - "Economic History" - ], - "dynamical systems": [ - "Dynamical Systems" - ], - "d86": [ - "Economic Theory" - ], - "history of the pacific islands": [ - "History of the Pacific Islands" - ], - "music education": [ - "Music Education" - ], - "psychiatric and mental health nursing": [ - "Psychiatric and Mental Health Nursing" - ], - "k34": [ - "Law and Economics" - ], - "sheep and goat science": [ - "Sheep and Goat Science" - ], - "food security": [ - "Food Security" - ], - "trauma": [ - "Trauma" - ], - "bayes-an": [ - "Statistics and Probability" - ], - "caribbean languages and societies": [ - "Caribbean Languages and Societies" - ], - "pharmaceutics and drug design": [ - "Pharmaceutics and Drug Design" - ], - "other civil and environmental engineering": [ - "Other Civil and Environmental Engineering" - ], - "j50": [ - "Labor Economics" - ], - "composition": [ - "Composition" - ], - "y2": [ - "Economics" - ], - "k36": [ - "Law and Economics" - ], - "other communication": [ - "Other Communication" - ], - "mining engineering": [ - "Mining Engineering" - ], - "d39": [ - "Income Distribution" - ], - "k00": [ - "Law and Economics" - ], - "physics.atm-clus": [ - "Atomic, Molecular and Optical Physics" - ], - "paleobiology": [ - "Paleobiology" - ], - "critical and cultural studies": [ - "Critical and Cultural Studies" - ], - "communication": [ - "Communication" - ], - "e10": [ - "Macroeconomics" - ], - "developmental biology": [ - "Developmental Biology" - ], - "i3": [ - "Income Distribution" - ], - "quantitative psychology": [ - "Quantitative Psychology" - ], - "cardiovascular diseases": [ - "Cardiovascular Diseases" - ], - "q57": [ - "Agricultural and Resource Economics" - ], - "christianity": [ - "Christianity" - ], - "applied cognitive psychology": [ - "Cognitive Psychology" - ], - "g17": [ - "Finance" - ], - "data storage systems": [ - "Data Storage Systems" - ], - "f1": [ - "International Economics" - ], - "g00": [ - "Finance" - ], - "genetics and genomics": [ - "Genetics and Genomics" - ], - "european languages and societies": [ - "European Languages and Societies" - ], - "regional sociology": [ - "Regional Sociology" - ], - "process control and systems": [ - "Process Control and Systems" - ], - "d02": [ - "Economics" - ], - "f50": [ - "Political Economy", - "International Economics" - ], - "h10": [ - "Public Economics" - ], - "b26": [ - "Economic History" - ], - "food science": [ - "Food Science" - ], - "other veterinary medicine": [ - "Other Veterinary Medicine" - ], - "engineering": [ - "Engineering" - ], - "j81": [ - "Labor Economics" - ], - "virology": [ - "Virology" - ], - "j41": [ - "Labor Economics" - ], - "business law, public responsibility, and ethics business": [ - "Business Law, Public Responsibility, and Ethics Business" - ], - "e11": [ - "Macroeconomics" - ], - "business organizations law": [ - "Business Organizations Law" - ], - "j59": [ - "Labor Economics" - ], - "physiological psychology": [ - "Psychology" - ], - "j30": [ - "Labor Economics" - ], - "k41": [ - "Law and Economics" - ], - "y10": [ - "Economics" - ], - "oncology": [ - "Oncology" - ], - "z12": [ - "Economics" - ], - "g35": [ - "Finance" - ], - "slavic languages and societies": [ - "Slavic Languages and Societies" - ], - "j13": [ - "Labor Economics" - ], - "cataloging and metadata": [ - "Cataloging and Metadata" - ], - "other medical specialties": [ - "Other Medical Specialties" - ], - "geophysics and seismology": [ - "Geophysics and Seismology" - ], - "criminology and criminal justice": [ - "Criminology and Criminal Justice" - ], - "benefits and compensation": [ - "Benefits and Compensation" - ], - "n73": [ - "Economic History" - ], - "g29": [ - "Finance" - ], - "f53": [ - "Political Economy", - "International Economics" - ], - "r31": [ - "Regional Economics" - ], - "environmental design": [ - "Environmental Design" - ], - "continental philosophy": [ - "Continental Philosophy" - ], - "art therapy": [ - "Art Therapy" - ], - "e60": [ - "Macroeconomics" - ], - "sustainability": [ - "Sustainability" - ], - "plastic surgery": [ - "Plastic Surgery" - ], - "other dentistry": [ - "Other Dentistry" - ], - "natural resource economics": [ - "Natural Resource Economics" - ], - "l29": [ - "Industrial Organization" - ], - "f4": [ - "Macroeconomics", - "International Economics" - ], - "h4": [ - "Public Economics" - ], - "l62": [ - "Industrial Organization" - ], - "peace and conflict": [ - "Peace and Conflict" - ], - "behavioral disciplines and activities": [ - "Behavioral Disciplines and Activities" - ], - "q30": [ - "Agricultural and Resource Economics" - ], - "p32": [ - "Political Economy" - ], - "adult and continuing education administration": [ - "Adult and Continuing Education Administration" - ], - "chemical and pharmacologic phenomena": [ - "Chemical and Pharmacologic Phenomena" - ], - "a12": [ - "Economics" - ], - "j52": [ - "Labor Economics" - ], - "electrical and electronics": [ - "Electrical and Electronics" - ], - "h41": [ - "Public Economics" - ], - "medical education": [ - "Medical Education" - ], - "comp-gas": [ - "Physics" - ], - "american popular culture": [ - "American Popular Culture" - ], - "diplomatic history": [ - "Diplomatic History" - ], - "law and philosophy": [ - "Law and Philosophy" - ], - "other italian language and literature": [ - "Other Italian Language and Literature" - ], - "animal diseases": [ - "Animal Diseases" - ], - "other legal studies": [ - "Other Legal Studies" - ], - "o22": [ - "Growth and Development" - ], - "vision science": [ - "Vision Science" - ], - "natural resources law": [ - "Natural Resources Law" - ], - "periodontics and periodontology": [ - "Periodontics and Periodontology" - ], - "dairy science": [ - "Dairy Science" - ], - "sense organs": [ - "Sense Organs" - ], - "hospitality administration and management": [ - "Hospitality Administration and Management" - ], - "p14": [ - "Political Economy" - ], - "applied behavior analysis": [ - "Applied Behavior Analysis" - ], - "health economics": [ - "Health Economics" - ], - "infectious disease": [ - "Infectious Disease" - ], - "criminal procedure": [ - "Criminal Procedure" - ], - "information literacy": [ - "Information Literacy" - ], - "fluids and secretions": [ - "Fluids and Secretions" - ], - "industrial organization": [ - "Industrial Organization" - ], - "nature and society relations": [ - "Nature and Society Relations" - ], - "business": [ - "Business" - ], - "family, life course, and society": [ - "Family, Life Course, and Society" - ], - "public history": [ - "Public History" - ], - "h20": [ - "Public Economics" - ], - "f62": [ - "Macroeconomics", - "International Economics" - ], - "c10": [ - "Econometrics" - ], - "q2": [ - "Agricultural and Resource Economics" - ], - "l9": [ - "Industrial Organization" - ], - "j14": [ - "Labor Economics" - ], - "gr-qc": [ - "Cosmology, Relativity, and Gravity" - ], - "c36": [ - "Economic Theory" - ], - "chemistry": [ - "Chemistry" - ], - "social policy": [ - "Social Policy" - ], - "q42": [ - "Agricultural and Resource Economics" - ], - "e20": [ - "Macroeconomics" - ], - "second amendment": [ - "Second Amendment" - ], - "k37": [ - "Law and Economics" - ], - "property law and real estate": [ - "Property Law and Real Estate" - ], - "z19": [ - "Economics" - ], - "other ecology and evolutionary biology": [ - "Other Ecology and Evolutionary Biology" - ], - "computer science": [ - "Computer Sciences" - ], - "physics.ins-det": [ - "Instrumentation" - ], - "q25": [ - "Agricultural and Resource Economics" - ], - "science policy": [ - "Science and Technology Law" - ], - "d41": [ - "Political Economy" - ], - "e03": [ - "Macroeconomics", - "Behavioral Economics" - ], - "other biomedical engineering and bioengineering": [ - "Other Biomedical Engineering and Bioengineering" - ], - "y6": [ - "Economics" - ], - "risk analysis": [ - "Risk Analysis" - ], - "inequality and stratification": [ - "Inequality and Stratification" - ], - "other engineering science and materials": [ - "Other Engineering Science and Materials" - ], - "o32": [ - "Growth and Development" - ], - "poetry": [ - "Poetry" - ], - "hep-th": [ - "Physics", - "Elementary Particles and Fields and String Theory" - ], - "d7": [ - "Political Economy" - ], - "comparative psychology": [ - "Psychology" - ], - "hydrology": [ - "Hydrology" - ], - "q01": [ - "Agricultural and Resource Economics" - ], - "computational engineering": [ - "Computational Engineering" - ], - "d04": [ - "Economic Policy" - ], - "n94": [ - "Economic History" - ], - "neurophysiology": [ - "Neurology" - ], - "first amendment": [ - "First Amendment" - ], - "organismal biological physiology": [ - "Organismal Biological Physiology" - ], - "j54": [ - "Labor Economics" - ], - "r53": [ - "Regional Economics" - ], - "n87": [ - "Economic History" - ], - "counseling": [ - "Counseling" - ], - "bioresource and agricultural engineering": [ - "Bioresource and Agricultural Engineering" - ], - "c22": [ - "Economic Theory" - ], - "c32": [ - "Economic Theory" - ], - "h57": [ - "Public Economics" - ], - "recreation business": [ - "Recreation Business" - ], - "veterinary preventive medicine, epidemiology, and public health": [ - "Veterinary Preventive Medicine, Epidemiology, and Public Health" - ], - "italian language and literature": [ - "Italian Language and Literature" - ], - "special functions": [ - "Special Functions" - ], - "m13": [ - "Business Administration, Management, and Operations" - ], - "d82": [ - "Political Economy" - ], - "economic policy": [ - "Economic Policy" - ], - "taxation-federal estate and gift": [ - "Taxation-Federal Estate and Gift" - ], - "j60": [ - "Labor Economics" - ], - "n47": [ - "Economic History" - ], - "equipment and supplies": [ - "Equipment and Supplies" - ], - "z11": [ - "Economics" - ], - "q35": [ - "Agricultural and Resource Economics" - ], - "q32": [ - "Agricultural and Resource Economics" - ], - "m50": [ - "Economics" - ], - "latina/o studies": [ - "Latina/o Studies" - ], - "international and intercultural communication": [ - "International and Intercultural Communication" - ], - "international and community nutrition": [ - "International and Community Nutrition" - ], - "podiatry": [ - "Podiatry" - ], - "g19": [ - "Finance" - ], - "genetic phenomena": [ - "Genetic Phenomena" - ], - "e13": [ - "Macroeconomics" - ], - "d84": [ - "Political Economy" - ], - "l83": [ - "Industrial Organization" - ], - "embryonic structures": [ - "Embryonic Structures" - ], - "astro-ph": [ - "Astrophysics and Astronomy" - ], - "cultural resource management and policy analysis": [ - "Cultural Resource Management and Policy Analysis" - ], - "o16": [ - "Growth and Development" - ], - "e47": [ - "Macroeconomics" - ], - "industrial engineering": [ - "Industrial Engineering" - ], - "sports management": [ - "Sports Management" - ], - "analysis": [ - "Analysis" - ], - "p47": [ - "Political Economy" - ], - "instructional media design": [ - "Instructional Media Design" - ], - "h61": [ - "Public Economics" - ], - "rule of law": [ - "Rule of Law" - ], - "medicine and health": [ - "Medicine and Health" - ], - "occupational health and industrial hygiene": [ - "Occupational Health and Industrial Hygiene" - ], - "p19": [ - "Political Economy" - ], - "q13": [ - "Agricultural and Resource Economics" - ], - "environmental indicators and impact assessment": [ - "Environmental Indicators and Impact Assessment" - ], - "animals": [ - "Animals" - ], - "n71": [ - "Economic History" - ], - "z13": [ - "Economics" - ], - "l90": [ - "Industrial Organization" - ], - "b16": [ - "Economic History" - ], - "atmospheric sciences": [ - "Atmospheric Sciences" - ], - "gifted education": [ - "Gifted Education" - ], - "environmental health life sciences": [ - "Environmental Health Life Sciences" - ], - "d61": [ - "Income Distribution" - ], - "management sciences and quantitative methods": [ - "Management Sciences and Quantitative Methods" - ], - "phonetics and phonology": [ - "Phonetics and Phonology" - ], - "m41": [ - "Accounting" - ], - "m31": [ - "Marketing" - ], - "mechanical engineering": [ - "Mechanical Engineering" - ], - "policy history, theory, and methosd": [ - "Policy History, Theory, and Methosd" - ], - "l00": [ - "Industrial Organization" - ], - "atomic, molecular and optical physics": [ - "Atomic, Molecular and Optical Physics" - ], - "metaphysics": [ - "Metaphysics" - ], - "fiber, textile, and weaving arts": [ - "Fiber, Textile, and Weaving Arts" - ], - "linguistic anthropology": [ - "Linguistic Anthropology" - ], - "secondary education and teaching": [ - "Secondary Education and Teaching" - ], - "operational research": [ - "Operational Research" - ], - "o42": [ - "Growth and Development" - ], - "o5": [ - "Growth and Development" - ], - "l33": [ - "Industrial Organization" - ], - "l13": [ - "Industrial Organization" - ], - "c53": [ - "Econometrics" - ], - "pulmonology": [ - "Pulmonology" - ], - "m40": [ - "Accounting" - ], - "community college leadership": [ - "Community College Leadership" - ], - "organic chemistry": [ - "Organic Chemistry" - ], - "j6": [ - "Labor Economics" - ], - "amino acids, peptides, and proteins": [ - "Amino Acids, Peptides, and Proteins" - ], - "agricultural and resource economics": [ - "Agricultural and Resource Economics" - ], - "c8": [ - "Economics" - ], - "systems engineering and multidisciplinary design optimization": [ - "Systems Engineering and Multidisciplinary Design Optimization" - ], - "f40": [ - "Macroeconomics", - "International Economics" - ], - "structural biology": [ - "Structural Biology" - ], - "membrane science": [ - "Membrane Science" - ], - "software engineering": [ - "Software Engineering" - ], - "z2": [ - "Economics" - ], - "o40": [ - "Growth and Development" - ], - "h44": [ - "Public Economics" - ], - "o49": [ - "Growth and Development" - ], - "l69": [ - "Industrial Organization" - ], - "r20": [ - "Regional Economics" - ], - "l78": [ - "Industrial Organization" - ], - "k21": [ - "Law and Economics" - ], - "bioinformatics": [ - "Bioinformatics" - ], - "n5": [ - "Economic History" - ], - "medical biotechnology": [ - "Medical Biotechnology" - ], - "l84": [ - "Industrial Organization" - ], - "law and race": [ - "Law and Race" - ], - "f00": [ - "International Economics" - ], - "d19": [ - "Economics" - ], - "mycology": [ - "Biology" - ], - "immunoprophylaxis and therapy": [ - "Immunoprophylaxis and Therapy" - ], - "r4": [ - "Regional Economics" - ], - "other chemistry": [ - "Other Chemistry" - ], - "bacterial infections and mycoses": [ - "Bacterial Infections and Mycoses" - ], - "signal processing": [ - "Signal Processing" - ], - "aerospace engineering": [ - "Aerospace Engineering" - ], - "industrial technology": [ - "Industrial Technology" - ], - "j46": [ - "Labor Economics" - ], - "q29": [ - "Agricultural and Resource Economics" - ], - "international and area studies": [ - "International and Area Studies" - ], - "oral history": [ - "Oral History" - ], - "algae": [ - "Algae" - ], - "b15": [ - "Economic History" - ], - "visual studies": [ - "Visual Studies" - ], - "l12": [ - "Industrial Organization" - ], - "electro-mechanical systems": [ - "Electro-Mechanical Systems" - ], - "q02": [ - "Agricultural and Resource Economics" - ], - "child psychology": [ - "Child Psychology" - ], - "recreation, parks and tourism administration": [ - "Recreation, Parks and Tourism Administration" - ], - "b24": [ - "Economic History" - ], - "environmental microbiology and microbial ecology life sciences": [ - "Environmental Microbiology and Microbial Ecology Life Sciences" - ], - "latin american history": [ - "Latin American History" - ], - "advertising and promotion management": [ - "Advertising and Promotion Management" - ], - "h81": [ - "Public Economics" - ], - "b29": [ - "Economic History" - ], - "n9": [ - "Economic History" - ], - "j51": [ - "Labor Economics" - ], - "l67": [ - "Industrial Organization" - ], - "domestic and intimate partner violence": [ - "Domestic and Intimate Partner Violence" - ], - "south and southeast asian languages and societies": [ - "South and Southeast Asian Languages and Societies" - ], - "corporate finance": [ - "Corporate Finance" - ], - "legal studies": [ - "Legal Studies" - ], - "near eastern languages and societies": [ - "Near Eastern Languages and Societies" - ], - "y30": [ - "Economics" - ], - "z33": [ - "Economics" - ], - "dramatic literature, criticism and theory": [ - "Dramatic Literature, Criticism and Theory" - ], - "social simulation": [ - "Social Psychology" - ], - "pathogenic microbiology": [ - "Pathogenic Microbiology" - ], - "physiological processes": [ - "Physiological Processes" - ], - "other computer engineering": [ - "Other Computer Engineering" - ], - "natural law": [ - "Natural Law" - ], - "o52": [ - "Growth and Development" - ], - "interdisciplinary arts and media": [ - "Interdisciplinary Arts and Media" - ], - "the sun and the solar system": [ - "The Sun and the Solar System" - ], - "other music": [ - "Other Music" - ], - "pharmacology, toxicology and environmental health": [ - "Pharmacology, Toxicology and Environmental Health" - ], - "other public health": [ - "Other Public Health" - ], - "d30": [ - "Income Distribution" - ], - "biochemical phenomena, metabolism, and nutrition": [ - "Biochemical Phenomena, Metabolism, and Nutrition" - ], - "modern languages": [ - "Modern Languages" - ], - "art education": [ - "Art Education" - ], - "d03": [ - "Behavioral Economics" - ], - "elementary education and teaching": [ - "Elementary Education and Teaching" - ], - "d69": [ - "Income Distribution" - ], - "physics": [ - "Physics" - ], - "comparative and historical linguistics": [ - "Comparative and Historical Linguistics" - ], - "television": [ - "Television" - ], - "bacteria": [ - "Bacteria" - ], - "korean studies": [ - "Korean Studies" - ], - "navigation, guidance, control and dynamics": [ - "Navigation, Guidance, Control and Dynamics" - ], - "cardiology": [ - "Cardiology" - ], - "ethnic studies": [ - "Ethnic Studies" - ], - "other earth sciences": [ - "Other Earth Sciences" - ], - "l80": [ - "Industrial Organization" - ], - "plant science": [ - "Plant Sciences" - ], - "comparative methodologies and theories": [ - "Comparative Methodologies and Theories" - ], - "rhetoric": [ - "Rhetoric" - ], - "asian studies": [ - "Asian Studies" - ], - "n21": [ - "Economic History" - ], - "sociology of religion": [ - "Sociology of Religion" - ], - "f49": [ - "Macroeconomics", - "International Economics" - ], - "law of the sea": [ - "Law of the Sea" - ], - "f20": [ - "International Economics" - ], - "transport phenomena": [ - "Transport Phenomena" - ], - "history of religion": [ - "History of Religion" - ], - "geometry and topology": [ - "Geometry and Topology" - ], - "h87": [ - "Public Economics" - ], - "p33": [ - "Political Economy" - ], - "j49": [ - "Labor Economics" - ], - "perioperative, operating room and surgical nursing": [ - "Perioperative, Operating Room and Surgical Nursing" - ], - "r14": [ - "Regional Economics" - ], - "f34": [ - "International Economics" - ], - "q26": [ - "Agricultural and Resource Economics" - ], - "p16": [ - "Political Economy" - ], - "l87": [ - "Industrial Organization" - ], - "e30": [ - "Macroeconomics" - ], - "p10": [ - "Political Economy" - ], - "q10": [ - "Agricultural and Resource Economics" - ], - "l79": [ - "Industrial Organization" - ], - "structural engineering": [ - "Structural Engineering" - ], - "p11": [ - "Political Economy" - ], - "l20": [ - "Industrial Organization" - ], - "catholic studies": [ - "Catholic Studies" - ], - "pediatric dentistry and pedodontics": [ - "Pediatric Dentistry and Pedodontics" - ], - "d70": [ - "Political Economy" - ], - "j21": [ - "Labor Economics" - ], - "political economy": [ - "Political Economy" - ], - "medieval studies": [ - "Medieval Studies" - ], - "e51": [ - "Macroeconomics" - ], - "c52": [ - "Econometrics" - ], - "j71": [ - "Labor Economics" - ], - "history of christianity": [ - "History of Christianity" - ], - "art direction": [ - "Art Direction" - ], - "structures and materials": [ - "Structures and Materials" - ], - "gender, race, sexuality, and ethnicity in communication": [ - "Gender, Race, Sexuality, and Ethnicity in Communication" - ], - "other electrical and computer engineering": [ - "Other Electrical and Computer Engineering" - ], - "p31": [ - "Political Economy" - ], - "native american studies": [ - "Indigenous Studies" - ], - "e37": [ - "Macroeconomics" - ], - "j88": [ - "Labor Economics" - ], - "q22": [ - "Agricultural and Resource Economics" - ], - "urban education": [ - "Urban Education" - ], - "dental public health and education": [ - "Dental Public Health and Education" - ], - "n00": [ - "Economic History" - ], - "f13": [ - "International Economics" - ], - "other operations research, systems engineering and industrial engineering": [ - "Other Operations Research, Systems Engineering and Industrial Engineering" - ], - "scandinavian studies": [ - "Scandinavian Studies" - ], - "veterinary medicine": [ - "Veterinary Medicine" - ], - "o23": [ - "Growth and Development" - ], - "entertainment, arts, and sports law": [ - "Entertainment, Arts, and Sports Law" - ], - "behavioral neurobiology": [ - "Behavioral Neurobiology" - ], - "n11": [ - "Economic History" - ], - "m55": [ - "Economics" - ], - "q1": [ - "Agricultural and Resource Economics" - ], - "i28": [ - "Education Economics" - ], - "c11": [ - "Econometrics" - ], - "medical nutrition": [ - "Medical Nutrition" - ], - "n53": [ - "Economic History" - ], - "c33": [ - "Economic Theory" - ], - "q37": [ - "Agricultural and Resource Economics" - ], - "management and operations": [ - "Management and Operations" - ], - "environmental chemistry": [ - "Environmental Chemistry" - ], - "z18": [ - "Economics" - ], - "playwriting": [ - "Playwriting" - ], - "motor control": [ - "Motor Control" - ], - "p45": [ - "Political Economy" - ], - "behavioral economics": [ - "Behavioral Economics" - ], - "f2": [ - "International Economics" - ], - "r33": [ - "Regional Economics" - ], - "m20": [ - "Business Administration, Management, and Operations" - ], - "n36": [ - "Economic History" - ], - "l24": [ - "Industrial Organization" - ], - "n1": [ - "Economic History" - ], - "school psychology": [ - "School Psychology" - ], - "c6": [ - "Economic Theory" - ], - "endodontics and endodontology": [ - "Endodontics and Endodontology" - ], - "e16": [ - "Macroeconomics" - ], - "c15": [ - "Econometrics" - ], - "humane education": [ - "Humane Education" - ], - "y80": [ - "Economics" - ], - "asian history": [ - "Asian History" - ], - "c4": [ - "Econometrics" - ], - "h83": [ - "Public Economics" - ], - "molecular, genetic, and biochemical nutrition": [ - "Molecular, Genetic, and Biochemical Nutrition" - ], - "movement and mind-body therapies": [ - "Movement and Mind-Body Therapies" - ], - "physiotherapy": [ - "Physiotherapy" - ], - "j80": [ - "Labor Economics" - ], - "c78": [ - "Economic Theory" - ], - "emergency and critical care": [ - "Emergency Medicine" - ], - "yiddish language and literature": [ - "Yiddish Language and Literature" - ], - "physics.gen-ph": [ - "Physics" - ], - "c31": [ - "Economic Theory" - ], - "n51": [ - "Economic History" - ], - "c62": [ - "Economic Theory" - ], - "n50": [ - "Economic History" - ], - "j89": [ - "Labor Economics" - ], - "z39": [ - "Economics" - ], - "metallurgy": [ - "Metallurgy" - ], - "african history": [ - "African History" - ], - "architectural history and criticism": [ - "Architectural History and Criticism" - ], - "p52": [ - "Political Economy" - ], - "other education": [ - "Other Education" - ], - "german linguistics": [ - "German Linguistics" - ], - "k4": [ - "Law and Economics" - ], - "p42": [ - "Political Economy" - ], - "m38": [ - "Marketing" - ], - "other geography": [ - "Other Geography" - ], - "other genetics and genomics": [ - "Other Genetics and Genomics" - ], - "n72": [ - "Economic History" - ], - "h23": [ - "Public Economics" - ], - "r15": [ - "Regional Economics" - ], - "d99": [ - "Economics" - ], - "reproductive and urinary physiology": [ - "Reproductive and Urinary Physiology" - ], - "g21": [ - "Finance" - ], - "i29": [ - "Education Economics" - ], - "k11": [ - "Law and Economics" - ], - "microbial physiology": [ - "Microbial Physiology" - ], - "tectonics and structure": [ - "Tectonics and Structure" - ], - "biology and biomimetic materials": [ - "Biology and Biomimetic Materials" - ], - "d74": [ - "Political Economy" - ], - "o4": [ - "Growth and Development" - ], - "e26": [ - "Macroeconomics" - ], - "anaesthesiology and pain management": [ - "Anesthesiology" - ], - "counselor education": [ - "Counselor Education" - ], - "hep-ex": [ - "Physics", - "Elementary Particles and Fields and String Theory" - ], - "l1": [ - "Industrial Organization" - ], - "other materials science and engineering": [ - "Other Materials Science and Engineering" - ], - "l11": [ - "Industrial Organization" - ], - "l63": [ - "Industrial Organization" - ], - "i2": [ - "Education Economics" - ], - "molecular genetics": [ - "Molecular Genetics" - ], - "dynamic systems": [ - "Dynamic Systems" - ], - "molecular, cellular, and tissue engineering": [ - "Molecular, Cellular, and Tissue Engineering" - ], - "president/executive department": [ - "President/Executive Department" - ], - "o44": [ - "Growth and Development" - ], - "ancient philosophy": [ - "Ancient Philosophy" - ], - "k31": [ - "Law and Economics" - ], - "military, war, and peace": [ - "Military, War, and Peace" - ], - "n95": [ - "Economic History" - ], - "quantitative, qualitative, comparative, and historical methodologies": [ - "Quantitative, Qualitative, Comparative, and Historical Methodologies" - ], - "art and design": [ - "Art and Design" - ], - "pediatrics": [ - "Pediatrics" - ], - "public affairs": [ - "Public Affairs" - ], - "k15": [ - "Law and Economics" - ], - "enzymes and coenzymes": [ - "Enzymes and Coenzymes" - ], - "l31": [ - "Industrial Organization" - ], - "q15": [ - "Agricultural and Resource Economics" - ], - "transportation law": [ - "Transportation Law" - ], - "health and physical education": [ - "Health and Physical Education" - ], - "other cell and developmental biology": [ - "Other Cell and Developmental Biology" - ], - "place and environment": [ - "Place and Environment" - ], - "e42": [ - "Macroeconomics" - ], - "biomedical engineering and bioengineering": [ - "Biomedical Engineering and Bioengineering" - ], - "h71": [ - "Public Economics" - ], - "esthetics": [ - "Esthetics" - ], - "technology and innovation": [ - "Technology and Innovation" - ], - "special education administration": [ - "Special Education Administration" - ], - "biomaterials": [ - "Biomaterials" - ], - "nutritional epidemiology": [ - "Nutritional Epidemiology" - ], - "ethics in religion": [ - "Ethics in Religion" - ], - "ethics": [ - "Ethics and Political Philosophy" - ], - "cancer biology": [ - "Cancer Biology" - ], - "z20": [ - "Economics" - ], - "political history": [ - "Political History" - ], - "celtic studies": [ - "Celtic Studies" - ], - "h1": [ - "Public Economics" - ], - "expeditionary education": [ - "Expeditionary Education" - ], - "e41": [ - "Macroeconomics" - ], - "d71": [ - "Political Economy" - ], - "chicana/o studies": [ - "Chicana/o Studies" - ], - "logic and foundations": [ - "Logic and Foundations" - ], - "h43": [ - "Public Economics" - ], - "public administration": [ - "Public Administration" - ], - "b49": [ - "Economics" - ], - "economics": [ - "Economics" - ], - "e1": [ - "Macroeconomics" - ], - "morphology": [ - "Morphology" - ], - "genetic structures": [ - "Genetic Structures" - ], - "energy and utilities law": [ - "Energy and Utilities Law" - ], - "ordinary differential equations and applied dynamics": [ - "Ordinary Differential Equations and Applied Dynamics" - ], - "physics.ao-ph": [ - "Physics" - ], - "industrial and product design": [ - "Industrial and Product Design" - ], - "neuroscience": [ - "Neuroscience and Neurobiology" - ], - "science and technology studies": [ - "Science and Technology Studies" - ], - "communications law": [ - "Communications Law" - ], - "cond-mat.soft": [ - "Condensed Matter Physics" - ], - "emergency and disaster management": [ - "Emergency and Disaster Management" - ], - "community college education administration": [ - "Community College Education Administration" - ], - "q56": [ - "Agricultural and Resource Economics" - ], - "q38": [ - "Agricultural and Resource Economics" - ], - "public law and legal theory": [ - "Public Law and Legal Theory" - ], - "evolutionary studies": [ - "Ecology and Evolutionary Biology" - ], - "bioimaging and biomedical optics": [ - "Bioimaging and Biomedical Optics" - ], - "l88": [ - "Industrial Organization" - ], - "training and development": [ - "Training and Development" - ], - "investigative techniques": [ - "Investigative Techniques" - ], - "k1": [ - "Law and Economics" - ], - "intellectual property law": [ - "Intellectual Property Law" - ], - "practical theology": [ - "Practical Theology" - ], - "o12": [ - "Growth and Development" - ], - "legal remedies": [ - "Legal Remedies" - ], - "n97": [ - "Economic History" - ], - "printmaking": [ - "Printmaking" - ], - "other psychiatry and psychology": [ - "Other Psychiatry and Psychology" - ], - "biological phenomena, cell phenomena, and immunity": [ - "Biological Phenomena, Cell Phenomena, and Immunity" - ], - "h13": [ - "Public Economics" - ], - "neurochemistry": [ - "Neuroscience and Neurobiology" - ], - "c80": [ - "Economics" - ], - "z31": [ - "Economics" - ], - "cognitive disorders": [ - "Psychiatry and Psychology" - ], - "diseases": [ - "Diseases" - ], - "n81": [ - "Economic History" - ], - "workers' compensation law": [ - "Workers' Compensation Law" - ], - "natural resources and conservation": [ - "Natural Resources and Conservation" - ], - "military history": [ - "Military History" - ], - "immunopathology": [ - "Immunopathology" - ], - "public affairs, public policy and public administration": [ - "Public Affairs, Public Policy and Public Administration" - ], - "eye diseases": [ - "Eye Diseases" - ], - "demography, population, and ecology": [ - "Demography, Population, and Ecology" - ], - "g22": [ - "Finance" - ], - "e62": [ - "Macroeconomics" - ], - "manufacturing": [ - "Manufacturing" - ], - "tissues": [ - "Tissues" - ], - "j2": [ - "Labor Economics" - ], - "l59": [ - "Industrial Organization" - ], - "animal structures": [ - "Animal Structures" - ], - "d53": [ - "Political Economy" - ], - "arts and humanities": [ - "Arts and Humanities" - ], - "science and technology policy": [ - "Science and Technology Policy" - ], - "q18": [ - "Agricultural and Resource Economics" - ], - "o50": [ - "Growth and Development" - ], - "metabolic sciences": [ - "Endocrinology, Diabetes, and Metabolism" - ], - "other engineering": [ - "Other Engineering" - ], - "community health and preventive medicine": [ - "Community Health and Preventive Medicine" - ], - "q34": [ - "Agricultural and Resource Economics" - ], - "plants": [ - "Plants" - ], - "n62": [ - "Economic History" - ], - "l89": [ - "Industrial Organization" - ], - "pharmaceutical preparations": [ - "Pharmaceutical Preparations" - ], - "other anthropology": [ - "Other Anthropology" - ], - "hemic and lymphatic diseases": [ - "Hemic and Lymphatic Diseases" - ], - "government contracts": [ - "Government Contracts" - ], - "o47": [ - "Growth and Development" - ], - "gynecology and obstetrics": [ - "Obstetrics and Gynecology" - ], - "d43": [ - "Political Economy" - ], - "hindu studies": [ - "Hindu Studies" - ], - "c82": [ - "Economics" - ], - "computational linguistics": [ - "Computational Linguistics" - ], - "education economics": [ - "Education Economics" - ], - "kinesiology": [ - "Kinesiology", - "Musculoskeletal, Neural, and Ocular Physiology" - ], - "adap-org": [ - "Physics" - ], - "macromolecular substances": [ - "Macromolecular Substances" - ], - "p36": [ - "Political Economy" - ], - "k49": [ - "Law and Economics" - ], - "neurolinguistics": [ - "Psycholinguistics and Neurolinguistics" - ], - "h52": [ - "Public Economics", - "Education Economics" - ], - "climate": [ - "Climate" - ], - "artificial intelligence and robotics": [ - "Artificial Intelligence and Robotics" - ], - "criminal law": [ - "Criminal Law" - ], - "criminology": [ - "Criminology" - ], - "anesthesiology": [ - "Anesthesiology" - ], - "micronesian studies": [ - "Micronesian Studies" - ], - "russian literature": [ - "Russian Literature" - ], - "bankruptcy law": [ - "Bankruptcy Law" - ], - "animal-assisted therapy": [ - "Animal-Assisted Therapy" - ], - "p4": [ - "Political Economy" - ], - "j08": [ - "Labor Economics" - ], - "h40": [ - "Public Economics" - ], - "asian american studies": [ - "Asian American Studies" - ], - "machine vision": [ - "Computer Sciences" - ], - "r38": [ - "Regional Economics" - ], - "otorhinolaryngology": [ - "Otorhinolaryngologic Diseases" - ], - "allergy and clinical immunology": [ - "Allergy and Immunology" - ], - "d49": [ - "Political Economy" - ], - "italian linguistics": [ - "Italian Linguistics" - ], - "history of science, technology, and medicine": [ - "History of Science, Technology, and Medicine" - ], - "d64": [ - "Income Distribution" - ], - "health policy": [ - "Health Policy", - "Health Law and Policy" - ], - "other immunology and infectious disease": [ - "Other Immunology and Infectious Disease" - ], - "dance movement therapy": [ - "Dance Movement Therapy" - ], - "military studies": [ - "Military and Veterans Studies" - ], - "fluid dynamics": [ - "Fluid Dynamics" - ], - "theory, knowledge and science": [ - "Theory, Knowledge and Science" - ], - "b54": [ - "Economics" - ], - "c73": [ - "Economic Theory" - ], - "anthropology": [ - "Anthropology", - "Biological and Physical Anthropology" - ], - "q54": [ - "Agricultural and Resource Economics" - ], - "other food science": [ - "Other Food Science" - ], - "digestive system diseases": [ - "Digestive System Diseases" - ], - "other arts and humanities": [ - "Other Arts and Humanities" - ], - "hormones, hormone substitutes, and hormone antagonists": [ - "Hormones, Hormone Substitutes, and Hormone Antagonists" - ], - "c9": [ - "Economics" - ], - "neuropsychiatry": [ - "Neurology" - ], - "q31": [ - "Agricultural and Resource Economics" - ], - "r49": [ - "Regional Economics" - ], - "polycyclic compounds": [ - "Polycyclic Compounds" - ], - "b31": [ - "Economic History" - ], - "byzantine and modern greek": [ - "Byzantine and Modern Greek" - ], - "language description and documentation": [ - "Language Description and Documentation" - ], - "medical sciences": [ - "Medical Sciences" - ], - "os and networks": [ - "OS and Networks" - ], - "c21": [ - "Economic Theory" - ], - "computer and systems architecture": [ - "Computer and Systems Architecture" - ], - "cellular and molecular physiology": [ - "Cellular and Molecular Physiology" - ], - "g15": [ - "Finance" - ], - "construction law": [ - "Construction Law" - ], - "typological linguistics and linguistic diversity": [ - "Typological Linguistics and Linguistic Diversity" - ], - "insurance": [ - "Insurance" - ], - "f30": [ - "International Economics" - ], - "analytical chemistry": [ - "Analytical Chemistry" - ], - "j79": [ - "Labor Economics" - ], - "f61": [ - "International Economics" - ], - "work, economy and organizations": [ - "Work, Economy and Organizations" - ], - "law and gender": [ - "Law and Gender" - ], - "b2": [ - "Economic History" - ], - "female urogenital diseases and pregnancy complications": [ - "Female Urogenital Diseases and Pregnancy Complications" - ], - "c41": [ - "Econometrics" - ], - "dermatology": [ - "Dermatology" - ], - "nervous system": [ - "Nervous System" - ], - "nursing": [ - "Nursing" - ], - "statistics and probability": [ - "Statistics and Probability" - ], - "insurance law": [ - "Insurance Law" - ], - "j8": [ - "Labor Economics" - ], - "catalysis and reaction engineering": [ - "Catalysis and Reaction Engineering" - ], - "p59": [ - "Political Economy" - ], - "p29": [ - "Political Economy" - ], - "education policy": [ - "Education Policy" - ], - "exercise science": [ - "Exercise Science" - ], - "gastroenterology and hepatology": [ - "Hepatology", - "Gastroenterology" - ], - "biogeochemistry": [ - "Biogeochemistry" - ], - "pharmacology": [ - "Pharmacology", - "Medical Pharmacology" - ], - "illustration": [ - "Illustration" - ], - "endocrine system": [ - "Endocrine System" - ], - "courts": [ - "Courts" - ], - "d9": [ - "Economics" - ], - "emergency medicine": [ - "Emergency Medicine" - ], - "p21": [ - "Political Economy" - ], - "e-commerce": [ - "E-Commerce" - ], - "human resources management": [ - "Human Resources Management" - ], - "g2": [ - "Finance" - ], - "other astrophysics and astronomy": [ - "Other Astrophysics and Astronomy" - ], - "liberal studies": [ - "Liberal Studies" - ], - "q27": [ - "Agricultural and Resource Economics" - ], - "d13": [ - "Behavioral Economics" - ], - "programming languages and compilers": [ - "Programming Languages and Compilers" - ], - "literature in english, north america": [ - "Literature in English, North America" - ], - "e49": [ - "Macroeconomics" - ], - "i13": [ - "Health Economics" - ], - "n12": [ - "Economic History" - ], - "n85": [ - "Economic History" - ], - "international economics": [ - "International Economics" - ], - "a22": [ - "Higher Education and Teaching" - ], - "experimental analysis of behavior": [ - "Experimental Analysis of Behavior" - ], - "pathological conditions, signs and symptoms": [ - "Pathological Conditions, Signs and Symptoms" - ], - "f15": [ - "International Economics" - ], - "j18": [ - "Labor Economics" - ], - "literature in english, british isles": [ - "Literature in English, British Isles" - ], - "r19": [ - "Regional Economics" - ], - "nervous system diseases": [ - "Nervous System Diseases" - ], - "physics.optics": [ - "Atomic, Molecular and Optical Physics" - ], - "c20": [ - "Economic Theory" - ], - "education": [ - "Education" - ], - "b13": [ - "Economic History" - ], - "buddhist studies": [ - "Buddhist Studies" - ], - "l40": [ - "Industrial Organization" - ], - "medical immunology": [ - "Medical Immunology" - ], - "speech and rhetorical studies": [ - "Speech and Rhetorical Studies" - ], - "n66": [ - "Economic History" - ], - "housing law": [ - "Housing Law" - ], - "o14": [ - "Growth and Development" - ], - "hep-lat": [ - "Physics", - "Elementary Particles and Fields and String Theory" - ], - "e31": [ - "Macroeconomics" - ], - "g11": [ - "Finance" - ], - "engineering mechanics": [ - "Engineering Mechanics" - ], - "social and philosophical foundations of education": [ - "Social and Philosophical Foundations of Education" - ], - "fourth amendment": [ - "Fourth Amendment" - ], - "social media": [ - "Social Media" - ], - "theory and philosophy": [ - "Theory and Philosophy" - ], - "religious thought, theology and philosophy of religion": [ - "Religious Thought, Theology and Philosophy of Religion" - ], - "computer security": [ - "Information Security" - ], - "hiv": [ - "Virus Diseases" - ], - "c60": [ - "Economic Theory" - ], - "architectural engineering": [ - "Architectural Engineering" - ], - "i1": [ - "Health Economics" - ], - "feminist, gender, and sexuality studies": [ - "Feminist, Gender, and Sexuality Studies" - ], - "natural products chemistry and pharmacognosy": [ - "Natural Products Chemistry and Pharmacognosy" - ], - "r42": [ - "Regional Economics" - ], - "civic and community engagement": [ - "Civic and Community Engagement" - ], - "women's studies": [ - "Women's Studies" - ], - "c18": [ - "Econometrics" - ], - "hydraulic engineering": [ - "Hydraulic Engineering" - ], - "bacteriology": [ - "Bacteriology" - ], - "environmental monitoring": [ - "Environmental Monitoring" - ], - "quant-ph": [ - "Physics" - ], - "cond-mat.mes-hall": [ - "Condensed Matter Physics" - ], - "e64": [ - "Macroeconomics" - ], - "canadian history": [ - "Canadian History" - ], - "public health and community nursing": [ - "Public Health and Community Nursing" - ], - "public policy": [ - "Public Policy" - ], - "j78": [ - "Labor Economics" - ], - "other forestry and forest sciences": [ - "Other Forestry and Forest Sciences" - ], - "d4": [ - "Political Economy" - ], - "c3": [ - "Economic Theory" - ], - "c45": [ - "Econometrics" - ], - "community health": [ - "Community Health" - ], - "physics.plasm-ph": [ - "Plasma and Beam Physics" - ], - "hawaiian studies": [ - "Hawaiian Studies" - ], - "n74": [ - "Economic History" - ], - "skin and connective tissue diseases": [ - "Skin and Connective Tissue Diseases" - ], - "french and francophone language and literature": [ - "French and Francophone Language and Literature" - ], - "r39": [ - "Regional Economics" - ], - "translational medical research": [ - "Translational Medical Research" - ], - "z29": [ - "Economics" - ], - "substance abuse and addiction": [ - "Substance Abuse and Addiction" - ], - "circulatory and respiratory physiology": [ - "Circulatory and Respiratory Physiology" - ], - "psycholinguistics": [ - "Psycholinguistics and Neurolinguistics" - ], - "phonology": [ - "Phonetics and Phonology" - ], - "e58": [ - "Macroeconomics" - ], - "q16": [ - "Agricultural and Resource Economics" - ], - "d87": [ - "Political Economy" - ], - "l68": [ - "Industrial Organization" - ], - "other analytical, diagnostic and therapeutic techniques and equipment": [ - "Other Analytical, Diagnostic and Therapeutic Techniques and Equipment" - ], - "p40": [ - "Political Economy" - ], - "anatomy": [ - "Anatomy" - ], - "international law": [ - "International Law" - ], - "entrepreneurial and small business operations": [ - "Entrepreneurial and Small Business Operations" - ], - "j00": [ - "Labor Economics" - ], - "psychiatry and psychology": [ - "Psychiatry and Psychology" - ], - "endocrinology, diabetes, and metabolism": [ - "Endocrinology, Diabetes, and Metabolism" - ], - "h26": [ - "Public Economics" - ], - "animal behavior": [ - "Animal Sciences", - "Behavior and Ethology" - ], - "j01": [ - "Labor Economics" - ], - "e40": [ - "Macroeconomics" - ], - "theatre and performance studies": [ - "Theatre and Performance Studies" - ], - "nucl-th": [ - "Nuclear" - ], - "university extension": [ - "University Extension" - ], - "physics.acc-ph": [ - "Physics" - ], - "higher education administration": [ - "Higher Education Administration" - ], - "business and corporate communications": [ - "Business and Corporate Communications" - ], - "n96": [ - "Economic History" - ], - "religion": [ - "Religion" - ], - "internal medicine": [ - "Internal Medicine" - ], - "robotics": [ - "Robotics", - "Artificial Intelligence and Robotics" - ], - "c81": [ - "Economics" - ], - "h62": [ - "Public Economics" - ], - "l85": [ - "Industrial Organization" - ], - "interactive arts": [ - "Interactive Arts" - ], - "spanish literature": [ - "Spanish Literature" - ], - "i39": [ - "Income Distribution" - ], - "immunology of infectious disease": [ - "Immunology of Infectious Disease" - ], - "q14": [ - "Agricultural and Resource Economics" - ], - "h59": [ - "Public Economics" - ], - "oil, gas, and mineral law": [ - "Oil, Gas, and Mineral Law" - ], - "b41": [ - "Economics" - ], - "polymer and organic materials": [ - "Polymer and Organic Materials" - ], - "m48": [ - "Accounting" - ], - "cond-mat.str-el": [ - "Condensed Matter Physics" - ], - "controls and control theory": [ - "Controls and Control Theory" - ], - "physics.bio-ph": [ - "Biological and Chemical Physics" - ], - "j65": [ - "Labor Economics" - ], - "a2": [ - "Education" - ], - "drugs and devices": [ - "Analytical, Diagnostic and Therapeutic Techniques and Equipment" - ], - "organisms": [ - "Organisms" - ], - "b53": [ - "Economics" - ], - "biochemical and biomolecular engineering": [ - "Biochemical and Biomolecular Engineering" - ], - "behavioral analysis": [ - "Applied Behavior Analysis" - ], - "respiratory tract diseases": [ - "Respiratory Tract Diseases" - ], - "jurisdiction": [ - "Jurisdiction" - ], - "physics.hist-ph": [ - "History" - ], - "l25": [ - "Industrial Organization" - ], - "c79": [ - "Economic Theory" - ], - "geriatric nursing": [ - "Geriatric Nursing" - ], - "regional economics": [ - "Regional Economics" - ], - "f44": [ - "Macroeconomics", - "International Economics" - ], - "cosmology, relativity, and gravity": [ - "Cosmology, Relativity, and Gravity" - ], - "h70": [ - "Public Economics" - ], - "probability": [ - "Probability" - ], - "a19": [ - "Economics" - ], - "counseling psychology": [ - "Counseling Psychology" - ], - "international public health": [ - "International Public Health" - ], - "law and psychology": [ - "Law and Psychology" - ], - "dutch studies": [ - "Dutch Studies" - ], - "b3": [ - "Economic History" - ], - "musicology": [ - "Musicology" - ], - "theoretical biology": [ - "Biology" - ], - "c59": [ - "Econometrics" - ], - "space vehicles": [ - "Space Vehicles" - ], - "h31": [ - "Public Economics" - ], - "k40": [ - "Law and Economics" - ], - "q50": [ - "Agricultural and Resource Economics" - ], - "painting": [ - "Painting" - ], - "integumentary system": [ - "Integumentary System" - ], - "other teacher education and professional development": [ - "Other Teacher Education and Professional Development" - ], - "film production": [ - "Film Production" - ], - "q36": [ - "Agricultural and Resource Economics" - ], - "a10": [ - "Economics" - ], - "d85": [ - "Economic Theory" - ], - "materials science and engineering": [ - "Materials Science and Engineering" - ], - "english language and literature": [ - "English Language and Literature" - ], - "mineral physics": [ - "Mineral Physics" - ], - "semiconductor and optical materials": [ - "Semiconductor and Optical Materials" - ], - "transnational law": [ - "Transnational Law" - ], - "l91": [ - "Industrial Organization" - ], - "clinical psychology": [ - "Clinical Psychology" - ], - "o18": [ - "Growth and Development" - ], - "g14": [ - "Finance" - ], - "physical therapy": [ - "Physical Therapy" - ], - "educational methods": [ - "Educational Methods" - ], - "other physical sciences and mathematics": [ - "Other Physical Sciences and Mathematics" - ], - "election law": [ - "Election Law" - ], - "gastroenterology": [ - "Gastroenterology" - ], - "dental hygiene": [ - "Dental Hygiene" - ], - "j62": [ - "Labor Economics" - ], - "infectious diseases": [ - "Infectious Disease" - ], - "m3": [ - "Marketing" - ], - "e17": [ - "Macroeconomics" - ], - "veterinary toxicology and pharmacology": [ - "Veterinary Toxicology and Pharmacology" - ], - "fresh water studies": [ - "Fresh Water Studies" - ], - "histology": [ - "Cell Anatomy" - ], - "vital and health statistics": [ - "Vital and Health Statistics" - ], - "j32": [ - "Labor Economics" - ], - "food microbiology": [ - "Food Microbiology" - ], - "r28": [ - "Regional Economics" - ], - "h73": [ - "Public Economics" - ], - "perceptual cognitive psychology": [ - "Cognition and Perception" - ], - "g39": [ - "Finance" - ], - "f43": [ - "Macroeconomics", - "International Economics" - ], - "ornithology": [ - "Ornithology" - ], - "f38": [ - "International Economics" - ], - "j31": [ - "Labor Economics" - ], - "biological psychology": [ - "Biological Psychology" - ], - "q51": [ - "Agricultural and Resource Economics" - ], - "biological and chemical physics": [ - "Biological and Chemical Physics" - ], - "soviet and post-soviet studies": [ - "Soviet and Post-Soviet Studies" - ], - "organic chemicals": [ - "Organic Chemicals" - ], - "astrodynamics": [ - "Astrodynamics" - ], - "music": [ - "Music" - ], - "critical care": [ - "Critical Care" - ], - "g1": [ - "Finance" - ], - "modern art and architecture": [ - "Modern Art and Architecture" - ], - "n17": [ - "Economic History" - ], - "african american studies": [ - "African American Studies" - ], - "history of art, architecture, and archaeology": [ - "History of Art, Architecture, and Archaeology" - ], - "psychoanalysis and psychotherapy": [ - "Psychoanalysis and Psychotherapy" - ], - "translation studies": [ - "Translation Studies" - ], - "mass communication": [ - "Mass Communication" - ], - "museum studies": [ - "Museum Studies" - ], - "c00": [ - "Economics" - ], - "complex mixtures": [ - "Complex Mixtures" - ], - "j44": [ - "Labor Economics" - ], - "aerodynamics and fluid mechanics": [ - "Aerodynamics and Fluid Mechanics" - ], - "j43": [ - "Labor Economics" - ], - "water law": [ - "Water Law" - ], - "l30": [ - "Industrial Organization" - ], - "m15": [ - "Business Administration, Management, and Operations" - ], - "q52": [ - "Agricultural and Resource Economics" - ], - "digital circuits": [ - "Digital Circuits" - ], - "set theory": [ - "Set Theory" - ], - "f52": [ - "Political Economy", - "International Economics" - ], - "o3": [ - "Growth and Development" - ], - "epidemiology": [ - "Epidemiology" - ], - "b10": [ - "Economic History" - ], - "scholarship of teaching and learning": [ - "Scholarship of Teaching and Learning" - ], - "classical literature and philology": [ - "Classical Literature and Philology" - ], - "cognition and perception": [ - "Cognition and Perception" - ], - "a11": [ - "Economics" - ], - "cardiovascular system": [ - "Cardiovascular System" - ], - "control theory": [ - "Control Theory" - ], - "united states history": [ - "United States History" - ], - "other political science": [ - "Other Political Science" - ], - "international business": [ - "International Business" - ], - "hep-ph": [ - "Physics", - "Elementary Particles and Fields and String Theory" - ], - "curriculum and social inquiry": [ - "Curriculum and Social Inquiry" - ], - "global health": [ - "Public Health" - ], - "c30": [ - "Economic Theory" - ], - "junior high, intermediate, middle school education and teaching": [ - "Junior High, Intermediate, Middle School Education and Teaching" - ], - "l10": [ - "Industrial Organization" - ], - "other public affairs, public policy and public administration": [ - "Other Public Affairs, Public Policy and Public Administration" - ], - "other english language and literature": [ - "Other English Language and Literature" - ], - "dietetics and clinical nutrition": [ - "Dietetics and Clinical Nutrition" - ], - "physics.class-ph": [ - "Physics" - ], - "history": [ - "History" - ], - "other applied mathematics": [ - "Other Applied Mathematics" - ], - "m14": [ - "Business Administration, Management, and Operations" - ], - "terrestrial and aquatic ecology": [ - "Terrestrial and Aquatic Ecology" - ], - "c12": [ - "Econometrics" - ], - "civil law": [ - "Civil Law" - ], - "c72": [ - "Economic Theory" - ], - "biophysics": [ - "Biophysics" - ], - "o35": [ - "Growth and Development" - ], - "urogenital system": [ - "Urogenital System" - ], - "l51": [ - "Industrial Organization" - ], - "n84": [ - "Economic History" - ], - "sexuality and the law": [ - "Sexuality and the Law" - ], - "orthopedics": [ - "Orthopedics" - ], - "pathology": [ - "Pathology" - ], - "partial differential equations": [ - "Partial Differential Equations" - ], - "genomics": [ - "Genomics" - ], - "finance and financial management": [ - "Finance and Financial Management" - ], - "oil, gas, and energy": [ - "Oil, Gas, and Energy" - ], - "q28": [ - "Agricultural and Resource Economics" - ], - "psychology of movement": [ - "Psychology of Movement" - ], - "oral biology and oral pathology": [ - "Oral Biology and Oral Pathology" - ], - "l53": [ - "Industrial Organization" - ], - "personality and social contexts": [ - "Personality and Social Contexts" - ], - "q33": [ - "Agricultural and Resource Economics" - ], - "f39": [ - "International Economics" - ], - "community psychology": [ - "Community Psychology" - ], - "n77": [ - "Economic History" - ], - "l82": [ - "Industrial Organization" - ], - "f01": [ - "International Economics" - ], - "l72": [ - "Industrial Organization" - ], - "nlin": [ - "Physics" - ], - "primary care": [ - "Primary Care" - ], - "reading and language": [ - "Reading and Language" - ], - "veterinary physiology": [ - "Veterinary Physiology" - ], - "e59": [ - "Macroeconomics" - ], - "women's health": [ - "Women's Health" - ], - "j53": [ - "Labor Economics" - ], - "veterinary infectious diseases": [ - "Veterinary Infectious Diseases" - ], - "somatic bodywork and related therapeutic practices": [ - "Somatic Bodywork and Related Therapeutic Practices" - ], - "f29": [ - "International Economics" - ], - "civil rights and discrimination": [ - "Civil Rights and Discrimination" - ], - "f14": [ - "International Economics" - ], - "systems architecture": [ - "Systems Architecture" - ], - "american film studies": [ - "American Film Studies" - ], - "life sciences": [ - "Life Sciences" - ], - "c65": [ - "Economic Theory" - ], - "k24": [ - "Law and Economics" - ], - "oceanography": [ - "Oceanography" - ], - "p41": [ - "Political Economy" - ], - "evidence": [ - "Evidence" - ], - "r3": [ - "Regional Economics" - ], - "french linguistics": [ - "French Linguistics" - ], - "other film and media studies": [ - "Other Film and Media Studies" - ], - "f51": [ - "Political Economy", - "International Economics" - ], - "l19": [ - "Industrial Organization" - ], - "microbiology": [ - "Microbiology" - ], - "theatre history": [ - "Theatre History" - ], - "botany": [ - "Botany" - ], - "k2": [ - "Law and Economics" - ], - "j12": [ - "Labor Economics" - ], - "communication technology and new media": [ - "Communication Technology and New Media" - ], - "sculpture": [ - "Sculpture" - ], - "synthetic biology": [ - "Biology" - ], - "speech": [ - "Computer Sciences" - ], - "k10": [ - "Law and Economics" - ], - "r13": [ - "Regional Economics" - ], - "k42": [ - "Law and Economics" - ], - "y70": [ - "Economics" - ], - "d92": [ - "Economics" - ], - "other pharmacology, toxicology and environmental health": [ - "Other Pharmacology, Toxicology and Environmental Health" - ], - "nanomedicine": [ - "Nanomedicine" - ], - "mesomycetozoea": [ - "Mesomycetozoea" - ], - "industrial and organizational psychology": [ - "Industrial and Organizational Psychology" - ], - "geriatrics": [ - "Geriatrics" - ], - "spanish linguistics": [ - "Spanish Linguistics" - ], - "other aerospace engineering": [ - "Other Aerospace Engineering" - ], - "orthodontics and orthodontology": [ - "Orthodontics and Orthodontology" - ], - "therapeutics": [ - "Therapeutics" - ], - "z32": [ - "Economics" - ], - "african studies": [ - "African Studies" - ], - "d46": [ - "Political Economy" - ], - "h5": [ - "Public Economics" - ], - "science and mathematics education": [ - "Science and Mathematics Education" - ], - "food and drug law": [ - "Food and Drug Law" - ], - "o51": [ - "Growth and Development" - ], - "privacy law": [ - "Privacy Law" - ], - "p51": [ - "Political Economy" - ], - "f35": [ - "International Economics" - ], - "discourse and text linguistics": [ - "Discourse and Text Linguistics" - ], - "forensic science and technology": [ - "Forensic Science and Technology" - ], - "higher education": [ - "Higher Education" - ], - "c5": [ - "Econometrics" - ], - "philosophy": [ - "Philosophy" - ], - "nutritional and metabolic diseases": [ - "Nutritional and Metabolic Diseases" - ], - "p17": [ - "Political Economy" - ], - "plasma and beam physics": [ - "Plasma and Beam Physics" - ], - "accounting": [ - "Accounting" - ], - "h76": [ - "Public Economics" - ], - "h80": [ - "Public Economics" - ], - "water resource management": [ - "Water Resource Management" - ], - "p25": [ - "Political Economy" - ], - "male urogenital diseases": [ - "Male Urogenital Diseases" - ], - "occupational therapy": [ - "Occupational Therapy" - ], - "cond-mat.stat-mech": [ - "Condensed Matter Physics" - ], - "c34": [ - "Economic Theory" - ], - "biomechanics and biotransport": [ - "Biomechanics and Biotransport" - ], - "chem-ph": [ - "Biological and Chemical Physics" - ], - "environmental engineering": [ - "Environmental Engineering" - ], - "systems and communications": [ - "Systems and Communications" - ], - "veterinary anatomy": [ - "Veterinary Anatomy" - ], - "i23": [ - "Education Economics" - ], - "maternal, child health and neonatal nursing": [ - "Maternal, Child Health and Neonatal Nursing" - ], - "n46": [ - "Economic History" - ], - "physics.data-an": [ - "Physics", - "Statistics and Probability" - ], - "fine arts": [ - "Fine Arts" - ], - "biomechanics": [ - "Biomechanics" - ], - "c40": [ - "Econometrics" - ], - "a14": [ - "Economics" - ], - "business administration, management, and operations": [ - "Business Administration, Management, and Operations" - ], - "educational assessment, evaluation, and research": [ - "Educational Assessment, Evaluation, and Research" - ], - "art practice": [ - "Art Practice" - ], - "public economics": [ - "Public Economics" - ], - "sedimentology": [ - "Sedimentology" - ], - "food and beverage management": [ - "Food and Beverage Management" - ], - "accounting law": [ - "Accounting Law" - ], - "electromagnetics and photonics": [ - "Electromagnetics and Photonics" - ], - "g18": [ - "Finance" - ], - "l61": [ - "Industrial Organization" - ], - "agriculture": [ - "Agriculture" - ], - "nephrology": [ - "Nephrology" - ], - "l92": [ - "Industrial Organization" - ], - "d81": [ - "Political Economy" - ], - "internet law": [ - "Internet Law" - ], - "n31": [ - "Economic History" - ], - "f16": [ - "International Economics" - ], - "l60": [ - "Industrial Organization" - ], - "j58": [ - "Labor Economics" - ], - "y1": [ - "Economics" - ], - "conservation biology": [ - "Natural Resources and Conservation" - ], - "biomechanical engineering": [ - "Biomechanical Engineering" - ], - "e50": [ - "Macroeconomics" - ], - "i14": [ - "Health Economics" - ], - "banking and finance law": [ - "Banking and Finance Law" - ], - "nutrition": [ - "Nutrition", - "Medical Nutrition" - ], - "comparative politics": [ - "Comparative Politics" - ], - "neurosciences": [ - "Neurosciences" - ], - "other plant sciences": [ - "Other Plant Sciences" - ], - "physics.soc-ph": [ - "Nature and Society Relations" - ], - "a21": [ - "Secondary Education and Teaching" - ], - "j69": [ - "Labor Economics" - ], - "medical jurisprudence": [ - "Medical Jurisprudence" - ], - "animal sciences": [ - "Animal Sciences" - ], - "analytical, diagnostic and therapeutic techniques and equipment": [ - "Analytical, Diagnostic and Therapeutic Techniques and Equipment" - ], - "other mental and social health": [ - "Other Mental and Social Health" - ], - "fashion business": [ - "Fashion Business" - ], - "j5": [ - "Labor Economics" - ], - "women's history": [ - "Women's History" - ], - "chinese studies": [ - "Chinese Studies" - ], - "p18": [ - "Political Economy" - ], - "common law": [ - "Common Law" - ], - "n3": [ - "Economic History" - ], - "patt-sol": [ - "Physics" - ], - "parasitic diseases": [ - "Parasitic Diseases" - ], - "physics.comp-ph": [ - "Physics" - ], - "medicinal-pharmaceutical chemistry": [ - "Medicinal-Pharmaceutical Chemistry" - ], - "q20": [ - "Agricultural and Resource Economics" - ], - "semantics": [ - "Semantics and Pragmatics" - ], - "maternal and child health": [ - "Maternal and Child Health" - ], - "c57": [ - "Econometrics" - ], - "n01": [ - "Economic History" - ], - "plant pathology": [ - "Plant Pathology" - ], - "q55": [ - "Agricultural and Resource Economics" - ], - "n34": [ - "Economic History" - ], - "i12": [ - "Health Economics" - ], - "statistics": [ - "Statistics and Probability" - ], - "e27": [ - "Macroeconomics" - ], - "other chemicals and drugs": [ - "Other Chemicals and Drugs" - ], - "propulsion and power": [ - "Propulsion and Power" - ], - "hepatology": [ - "Hepatology" - ], - "h79": [ - "Public Economics" - ], - "d23": [ - "Industrial Organization" - ], - "y40": [ - "Economics" - ], - "educational administration and supervision": [ - "Educational Administration and Supervision" - ], - "p15": [ - "Political Economy" - ], - "medical toxicology": [ - "Medical Toxicology" - ], - "family practice nursing": [ - "Family Practice Nursing" - ], - "integrative medicine": [ - "Integrative Medicine" - ], - "political science": [ - "Political Science" - ], - "nanoscience and nanotechnology": [ - "Nanoscience and Nanotechnology" - ], - "diabetes and endocrinology": [ - "Endocrinology, Diabetes, and Metabolism" - ], - "p37": [ - "Political Economy" - ], - "civil and environmental engineering": [ - "Civil and Environmental Engineering" - ], - "r58": [ - "Regional Economics" - ], - "acoustics, dynamics, and controls": [ - "Acoustics, Dynamics, and Controls" - ], - "c1": [ - "Econometrics" - ], - "andrology": [ - "Medical Specialties" - ], - "dynamics and dynamical systems": [ - "Dynamics and Dynamical Systems" - ], - "c2": [ - "Economic Theory" - ], - "ao-sci": [ - "Physics" - ], - "f54": [ - "Political Economy", - "International Economics" - ], - "l8": [ - "Industrial Organization" - ], - "d11": [ - "Economic Theory" - ], - "medical specialties": [ - "Medical Specialties" - ], - "i25": [ - "Education Economics" - ], - "chemicals and drugs": [ - "Chemicals and Drugs" - ], - "other sociology": [ - "Other Sociology" - ], - "cond-mat.other": [ - "Condensed Matter Physics" - ], - "o55": [ - "Growth and Development" - ], - "biodiversity": [ - "Biodiversity" - ], - "civil engineering": [ - "Civil Engineering" - ], - "y60": [ - "Economics" - ], - "endocrinology": [ - "Endocrinology" - ], - "h27": [ - "Public Economics" - ], - "oral and maxillofacial surgery": [ - "Oral and Maxillofacial Surgery" - ], - "e21": [ - "Macroeconomics" - ], - "l16": [ - "Industrial Organization" - ], - "medical molecular biology": [ - "Medical Molecular Biology" - ], - "biological factors": [ - "Biological Factors" - ], - "d01": [ - "Macroeconomics", - "Behavioral Economics" - ], - "fiction": [ - "Fiction" - ], - "marketing law": [ - "Marketing Law" - ], - "retirement security law": [ - "Retirement Security Law" - ], - "other nutrition": [ - "Other Nutrition" - ], - "elementary and middle and secondary education administration": [ - "Elementary and Middle and Secondary Education Administration" - ], - "epistemology": [ - "Epistemology" - ], - "q21": [ - "Agricultural and Resource Economics" - ], - "k3": [ - "Law and Economics" - ], - "cond-mat": [ - "Condensed Matter Physics" - ], - "e63": [ - "Macroeconomics" - ], - "external galaxies": [ - "External Galaxies" - ], - "p43": [ - "Political Economy" - ], - "d6": [ - "Income Distribution" - ], - "f59": [ - "Political Economy", - "International Economics" - ], - "e02": [ - "Macroeconomics" - ], - "other computer sciences": [ - "Other Computer Sciences" - ], - "higher education and teaching": [ - "Higher Education and Teaching" - ], - "medicinal and pharmaceutical chemistry": [ - "Medicinal and Pharmaceutical Chemistry" - ], - "book and paper": [ - "Book and Paper" - ], - "c39": [ - "Economic Theory" - ], - "immunity": [ - "Immunity" - ], - "nanotechnology fabrication": [ - "Nanotechnology Fabrication" - ], - "international relations": [ - "International Relations" - ], - "mtrl-th": [ - "Condensed Matter Physics" - ], - "d22": [ - "Industrial Organization" - ], - "comparative linguistics": [ - "Comparative and Historical Linguistics" - ], - "surgery and surgical specialties": [ - "Surgery" - ], - "physical sciences and mathematics": [ - "Physical Sciences and Mathematics" - ], - "k32": [ - "Law and Economics" - ], - "civil procedure": [ - "Civil Procedure" - ], - "i11": [ - "Health Economics" - ], - "e23": [ - "Macroeconomics" - ], - "contemporary art": [ - "Contemporary Art" - ], - "j33": [ - "Labor Economics" - ], - "astrophysics and astronomy": [ - "Astrophysics and Astronomy" - ], - "f5": [ - "Political Economy", - "International Economics" - ], - "g33": [ - "Finance" - ], - "biometry": [ - "Biometry" - ], - "cond-mat.dis-nn": [ - "Condensed Matter Physics" - ], - "j83": [ - "Labor Economics" - ], - "digestive, oral, and skin physiology": [ - "Digestive, Oral, and Skin Physiology" - ], - "library and information science": [ - "Library and Information Science" - ], - "educational psychology": [ - "Educational Psychology" - ], - "q41": [ - "Agricultural and Resource Economics" - ], - "heat transfer, combustion": [ - "Heat Transfer, Combustion" - ], - "cell and developmental biology": [ - "Cell and Developmental Biology" - ], - "chemical engineering": [ - "Chemical Engineering" - ], - "n16": [ - "Economic History" - ], - "biogeography": [ - "Biology" - ], - "q43": [ - "Agricultural and Resource Economics" - ], - "translational medicine": [ - "Translational Medical Research" - ], - "statistical methodology": [ - "Statistical Methodology" - ], - "psychiatry": [ - "Psychiatry" - ], - "biostatistics": [ - "Biostatistics" - ], - "medicinal chemistry and pharmaceutics": [ - "Medicinal Chemistry and Pharmaceutics" - ], - "international trade law": [ - "International Trade Law" - ], - "history of religions of western origin": [ - "History of Religions of Western Origin" - ], - "l4": [ - "Industrial Organization" - ], - "cond-mat.quant-gas": [ - "Condensed Matter Physics" - ], - "history of religions of eastern origins": [ - "History of Religions of Eastern Origins" - ], - "dispute resolution and arbitration": [ - "Dispute Resolution and Arbitration" - ], - "y8": [ - "Economics" - ], - "asian art and architecture": [ - "Asian Art and Architecture" - ], - "mechanics of materials": [ - "Mechanics of Materials" - ], - "h72": [ - "Public Economics" - ], - "k22": [ - "Law and Economics" - ], - "adult and continuing education and teaching": [ - "Adult and Continuing Education and Teaching" - ], - "environmental studies": [ - "Environmental Studies" - ], - "e43": [ - "Macroeconomics" - ] -} diff --git a/share/models/validators.py b/share/models/validators.py deleted file mode 100644 index cd14d6f69..000000000 --- a/share/models/validators.py +++ /dev/null @@ -1,173 +0,0 @@ -import os -import json -from collections import OrderedDict -from itertools import chain - -from jsonschema import exceptions -from jsonschema import Draft4Validator, draft4_format_checker - -from django.core.exceptions import ValidationError -from django.utils.deconstruct import deconstructible - -from share.schema import ShareV2Schema -from share.schema.exceptions import SchemaKeyError -from share.schema.shapes import AttributeDataType, AttributeDataFormat, RelationShape -from share.transform.chain.links import IRILink -from share.transform.chain.exceptions import InvalidIRI - - -def is_valid_jsonld(value): - raise Exception('Deprecated; use JSONLDValidator') - - -@deconstructible -class JSONLDValidator: - - __json_schema_cache = {} - __validator_cache = {} - - with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'jsonld-schema.json')) as fobj: - jsonld_schema = Draft4Validator(json.load(fobj)) - - db_type_map = { - AttributeDataType.STRING: 'string', - AttributeDataType.BOOLEAN: 'boolean', - AttributeDataType.INTEGER: 'integer', - AttributeDataType.DATETIME: 'string', - AttributeDataType.OBJECT: 'object', - } - - def __init__(self, check_existence=True): - self.__check_existence = check_existence - - def __call__(self, value): - try: - JSONLDValidator.jsonld_schema.validate(value) - except exceptions.ValidationError as e: - raise ValidationError('{} at /{}'.format(e.message, '/'.join(str(x) for x in e.path))) - - if len(value['@graph']) < 1: - raise ValidationError('@graph may not be empty') - - refs = {'blank': set(), 'concrete': set()} - nodes = {'blank': set(), 'concrete': set()} - for i, node in enumerate(value['@graph']): - try: - self.validate_node(node, refs, nodes) - except exceptions.ValidationError as e: - e.path.appendleft(i) # Hack to add in a leading slash - raise ValidationError('{} at /@graph/{}'.format(e.message, '/'.join(str(x) for x in e.path))) - - if refs['blank'] - nodes['blank']: - raise ValidationError('Unresolved references {}'.format(json.dumps([ - OrderedDict([('@id', id), ('@type', type)]) for id, type in - sorted(refs['blank'] - nodes['blank']) - ]))) - - def __eq__(self, other): - return self.__check_existence == other.__check_existence - - def validate_node(self, value, refs, nodes): - try: - schema_type = ShareV2Schema().get_type(value['@type']) - except SchemaKeyError: - raise ValidationError("'{}' is not a valid type".format(value['@type'])) - - self.validator_for(schema_type).validate(value) - - for key, val in value.items(): - if not isinstance(val, dict) or key == 'extra': - continue - - if val['@id'].startswith('_:'): - refs['blank'].add((val['@id'], val['@type'].lower())) - else: - refs['concrete'].add((val['@id'], val['@type'].lower())) - - if value['@id'].startswith('_:'): - nodes['blank'].add((value['@id'], value['@type'].lower())) - else: - nodes['concrete'].add((value['@id'], value['@type'].lower())) - - def json_schema_for_field(self, share_field): - if share_field.is_relation: - if share_field.relation_shape == RelationShape.MANY_TO_MANY: - concrete_type = share_field.through_concrete_type - else: - concrete_type = share_field.related_concrete_type - - rel = { - 'type': 'object', - 'description': getattr(share_field, 'description', ''), - 'required': ['@id', '@type'], - 'additionalProperties': False, - 'properties': { - '@id': {'type': ['string', 'integer']}, - # Sorted so the same error message is sent back every time - '@type': {'enum': sorted(chain(*[ - # ideally would be case-insensitive, but jsonschema enums don't know how. - # instead, allow 'FooBar', 'foobar', and 'FOOBAR' casings - (type_name, type_name.lower(), type_name.upper()) - for type_name in ShareV2Schema().get_type_names(concrete_type) - ]))} - } - } - if share_field.relation_shape in (RelationShape.MANY_TO_MANY, RelationShape.ONE_TO_MANY): - return {'type': 'array', 'items': rel} - return rel - - schema = { - 'type': JSONLDValidator.db_type_map[share_field.data_type], - 'description': getattr(share_field, 'description', ''), - } - if share_field.data_format == AttributeDataFormat.URI: - schema['format'] = 'uri' - - return schema - - def json_schema_for_type(self, share_schema_type): - if share_schema_type.name in JSONLDValidator.__json_schema_cache: - return JSONLDValidator.__json_schema_cache[share_schema_type.name] - - schema = { - 'type': 'object', - 'required': ['@id', '@type'], - 'additionalProperties': False, - 'properties': { - '@type': {'type': 'string'}, - '@id': {'type': ['integer', 'string']}, - } - } - - share_schema = ShareV2Schema() - - for field_name in share_schema_type.explicit_fields: - share_field = share_schema.get_field(share_schema_type.name, field_name) - if share_field.is_required: - schema['required'].append(share_field.name) - schema['properties'][share_field.name] = self.json_schema_for_field(share_field) - - return JSONLDValidator.__json_schema_cache.setdefault( - share_schema_type.name, - schema, - ) - - def validator_for(self, share_schema_type): - if share_schema_type.name in JSONLDValidator.__validator_cache: - return JSONLDValidator.__validator_cache[share_schema_type.name] - - schema = self.json_schema_for_type(share_schema_type) - - return JSONLDValidator.__validator_cache.setdefault( - share_schema_type.name, - Draft4Validator(schema, format_checker=draft4_format_checker), - ) - - -def is_valid_iri(iri): - # raises InvalidIRI if invalid - IRILink().execute(iri) - return True - - -draft4_format_checker.checks('uri', raises=InvalidIRI)(is_valid_iri) diff --git a/share/regulate/__init__.py b/share/regulate/__init__.py deleted file mode 100644 index 0eebfb0e8..000000000 --- a/share/regulate/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .regulator import Regulator - -__all__ = ('Regulator',) diff --git a/share/regulate/regulator.py b/share/regulate/regulator.py deleted file mode 100644 index 83041b575..000000000 --- a/share/regulate/regulator.py +++ /dev/null @@ -1,110 +0,0 @@ -import logging - -from django.conf import settings - -from share import exceptions -from share.util.extensions import Extensions - - -logger = logging.getLogger(__name__) - - -class RegulatorConfigError(exceptions.ShareException): - pass - - -class InfiniteRegulationError(exceptions.ShareException): - pass - - -class Regulator: - VERSION = 1 - - def __init__( - self, *, - source_config=None, - regulator_config=None, - validate=True, - ): - self._logs = [] - self._custom_steps = Steps( - self, - source_config.regulator_steps if source_config else None, - validate=validate, - ) - self._default_steps = Steps( - self, - regulator_config or settings.SHARE_REGULATOR_CONFIG, - validate=validate - ) - - def regulate(self, graph): - self._custom_steps.run(graph) - self._default_steps.run(graph) - - -class Steps: - MAX_RUNS = 31 - - node_steps = () - graph_steps = () - validate_steps = () - - def __init__(self, regulator, regulator_config, node=True, graph=True, validate=True): - self.regulator = regulator - self.regulator_config = regulator_config - if not regulator_config: - return - if node: - self.node_steps = self._load_steps(regulator_config.get('NODE_STEPS'), 'share.regulate.steps.node') - if graph: - self.graph_steps = self._load_steps(regulator_config.get('GRAPH_STEPS'), 'share.regulate.steps.graph') - if validate: - self.validate_steps = self._load_steps(regulator_config.get('VALIDATE_STEPS'), 'share.regulate.steps.validate') - - def run(self, graph): - runs = 0 - while True: - self._run_steps(graph, self.node_steps) - - graph.changed = False - self._run_steps(graph, self.graph_steps) - if not graph.changed: - break - - runs += 1 - if runs >= self.MAX_RUNS: - raise InfiniteRegulationError('Regulator config: {}'.format(self.regulator_config)) - self._run_steps(graph, self.validate_steps) - - def _run_steps(self, graph, steps): - for step in steps: - try: - step.run(graph) - finally: - if step.logs: - self.regulator._logs.extend(step.logs) - - def _load_steps(self, step_configs, namespace): - try: - steps = [] - for step in (step_configs or []): - if isinstance(step, str): - steps.append(self._load_step(namespace, step)) - elif isinstance(step, (list, tuple)) and len(step) == 2: - steps.append(self._load_step(namespace, step[0], step[1])) - else: - raise RegulatorConfigError('Each step must be a string or (name, settings) pair. Got: {}'.format(step)) - return tuple(steps) - except Exception: - raise RegulatorConfigError('Error loading regulator step config for namespace {}'.format(namespace)) - - def _load_step(self, namespace, name, settings=None): - """Instantiate and return a regulator step for the given config. - - Params: - namespace: Name of the step's entry point group in setup.py - name: Name of the step's entry point in setup.py - [settings]: Optional dictionary, passed as keyword arguments when initializing the step - """ - return Extensions.get(namespace, name)(**(settings or {})) diff --git a/share/regulate/steps/__init__.py b/share/regulate/steps/__init__.py deleted file mode 100644 index 19e16f585..000000000 --- a/share/regulate/steps/__init__.py +++ /dev/null @@ -1,107 +0,0 @@ -import abc - -from share.exceptions import RegulateError - - -class BaseStep(abc.ABC): - logs = None - - def __init__(self): - self.logs = [] - - def info(self, description, node_id=None): - """Log information about a change made to the graph. - """ - log = dict(description=description, rejected=False, node_id=node_id) - self.logs.append(log) - - def error(self, description, node_id=None, exception=None): - """Indicate a severe problem with the data, halt regulation. - """ - log = dict(description=description, rejected=True, node_id=node_id) - self.logs.append(log) - raise RegulateError('Regulation failed: {}'.format(description)) from exception - - @abc.abstractmethod - def run(self, graph): - raise NotImplementedError - - -class NodeStep(BaseStep): - node_types = None - - def __init__(self, *args, node_types=None, **kwargs): - """Initialize a NodeStep. - - Params: - regulator: Regulator instance - [node_types]: List of node types this step will be run on. e.g. ['WorkIdentifier'] - """ - super().__init__(*args, **kwargs) - - if node_types: - self.node_types = [t.lower() for t in node_types] - - def valid_target(self, node): - """Return True if `node` is a valid target for this regulator step. - - Override to filter the nodes this step will run on. - """ - if self.node_types is None: - return True - return (node.type in self.node_types or node.concrete_type in self.node_types) - - @abc.abstractmethod - def regulate_node(self, node): - raise NotImplementedError - - def run(self, graph): - self.logs.clear() - for node in self._iter_nodes(graph): - if self.valid_target(node): - self.regulate_node(node) - - def _iter_nodes(self, graph): - """Iterate through the graph's nodes in no particular order, allowing nodes to be added/deleted while iterating - """ - visited = set() - nodes = list(graph) - while nodes: - for n in nodes: - if n in graph: - yield n - visited.add(n) - nodes = set(graph) - visited - - -class GraphStep(BaseStep): - def run(self, graph): - self.logs.clear() - self.regulate_graph(graph) - - @abc.abstractmethod - def regulate_graph(self, graph): - raise NotImplementedError - - -class ValidationStep(BaseStep): - def run(self, graph): - self.logs.clear() - # TODO: convert to read-only graph? - self.validate_graph(graph) - - @abc.abstractmethod - def validate_graph(self, graph): - """Validate the graph. - - Call `self.reject` or `self.fail` if the graph is invalid. - Must not modify the graph in any way. - """ - raise NotImplementedError - - def reject(self, description, node_id=None, exception=None): - """Indicate a regulated graph failed validation and will not be merged into the SHARE dataset. - """ - log = dict(description=description, rejected=True, node_id=node_id) - self.logs.append(log) - raise RegulateError('Graph failed validation: {}'.format(description)) from exception diff --git a/share/regulate/steps/block_extra_values.py b/share/regulate/steps/block_extra_values.py deleted file mode 100644 index 4175ae357..000000000 --- a/share/regulate/steps/block_extra_values.py +++ /dev/null @@ -1,45 +0,0 @@ -from share.regulate.steps import NodeStep - - -class BlockExtraValues(NodeStep): - """Discard nodes based on key/value pairs in their `extra` dict - - Settings: - blocked_values: Non-empty dict. If all its key/value pairs exist in a - node's `extra`, discard that node. - [node_types]: Optional list of node types (inherited from NodeStep). - If given, filter the list of nodes this step will consider. - - Example config (YAML): - Discard work identifiers with {'identifier_type': 'srbnumber'} - - ```yaml - - namespace: share.regulate.steps.node - name: block_extra_values - settings: - node_types: - - WorkIdentifer - blocked_values: - identifier_type: srbnumber - ``` - """ - def __init__(self, *args, blocked_values, **kwargs): - super().__init__(*args, **kwargs) - if not blocked_values or not isinstance(blocked_values, dict): - raise TypeError('blocked_values setting must be a non-empty dict') - self.blocked_values = blocked_values - - def regulate_node(self, node): - extra = node['extra'] - if not extra: - return - - if all(extra.get(k) == v for k, v in self.blocked_values.items()): - node.delete() - self.info( - '{}: Extra data match blocked values {}; deleting node.'.format( - self.__class__.__name__, - self.blocked_values - ), - node_id=node.id - ) diff --git a/share/regulate/steps/cited_as.py b/share/regulate/steps/cited_as.py deleted file mode 100644 index 4e390fe29..000000000 --- a/share/regulate/steps/cited_as.py +++ /dev/null @@ -1,17 +0,0 @@ -from share.regulate.steps import NodeStep - - -class CitedAs(NodeStep): - """Set contributor cited_as, if empty. - - Example config: - ```yaml - - namespace: share.regulate.steps.node - name: cited_as - ``` - """ - node_types = ['abstractagentworkrelation'] - - def regulate_node(self, node): - if not node['cited_as']: - node['cited_as'] = node['agent']['name'] diff --git a/share/regulate/steps/deduplicate.py b/share/regulate/steps/deduplicate.py deleted file mode 100644 index 80ccd7e65..000000000 --- a/share/regulate/steps/deduplicate.py +++ /dev/null @@ -1,70 +0,0 @@ -from share.regulate.steps import GraphStep - - -class Deduplicate(GraphStep): - """Look for duplicate nodes and merge/discard them - - Example config (YAML): - ```yaml - - namespace: share.regulate.steps.graph - name: deduplicate - ``` - """ - MAX_MERGES = 100 - - # map from concrete type to set of fields used to dedupe - DEDUPLICATION_CRITERIA = { - # works and agents may be merged if duplicate identifiers are merged - # 'abstractcreativework': {}, - # 'abstractagent': {}, - 'abstractagentworkrelation': {'creative_work', 'agent', 'type'}, - 'abstractagentrelation': {'subject', 'related', 'type'}, - 'abstractworkrelation': {'subject', 'related', 'type'}, - 'workidentifier': {'uri'}, - 'agentidentifier': {'uri'}, - 'subject': {'name', 'parent', 'central_synonym'}, - 'tag': {'name'}, - 'throughtags': {'tag', 'creative_work'}, - # 'award': {}, - 'throughawards': {'funder', 'award'}, - 'throughsubjects': {'subject', 'creative_work'}, - } - - def regulate_graph(self, graph): - # naive algorithm, O(n*m) (n: number of nodes, m: number of merges) - # but merges shouldn't be common, so probably not worth optimizing - count = 0 - while self._merge_first_dupe(graph): - count += 1 - if count > self.MAX_MERGES: - self.error('Way too many deduplications') - return - - def _merge_first_dupe(self, graph): - dupe_index = {} - for node in graph: - node_key = self._get_node_key(node) - if node_key: - other_node = dupe_index.get(node_key) - if other_node: - graph.merge_nodes(node, other_node) - return True - dupe_index[node_key] = node - return False - - def _get_node_key(self, node): - criteria = self.DEDUPLICATION_CRITERIA.get(node.concrete_type) - if not criteria: - return None - return ( - node.concrete_type, - tuple( - self._get_criterion_value(node, criterion) - for criterion in criteria - ) - ) - - def _get_criterion_value(self, node, criterion_name): - if criterion_name == 'type': - return node.type - return node[criterion_name] diff --git a/share/regulate/steps/normalize_agent_names.py b/share/regulate/steps/normalize_agent_names.py deleted file mode 100644 index 8785161b9..000000000 --- a/share/regulate/steps/normalize_agent_names.py +++ /dev/null @@ -1,83 +0,0 @@ -import re - -from share.regulate.steps import NodeStep -from share.transform.chain.links import GuessAgentTypeLink -from share.schema import ShareV2Schema -from share.util import strip_whitespace - - -class NormalizeAgentNames(NodeStep): - """Parse agent names and save in a normalized form. - - Example config: - ```yaml - - namespace: share.regulate.steps.node - name: normalize_agent_names - ``` - """ - NULL_RE = re.compile(r'^(?:\s*(none|null|empty)\s*)?$', re.I) - - def valid_target(self, node): - return node.concrete_type == 'abstractagent' - - def regulate_node(self, node): - if node.type == 'person': - self._normalize_person(node) - else: - self._normalize_non_person(node) - - def _normalize_person(self, node): - name = strip_whitespace(node['name'] or '') - - if not name: - # try building the name from parts - name = strip_whitespace(' '.join(( - node['given_name'] or '', - node['additional_name'] or '', - node['family_name'] or '', - node['suffix'] or '', - ))) - - if not name: - # try getting the name from "cited_as" - cited_as_names = [ - relation['cited_as'] - for relation in node['work_relations'] - if relation['cited_as'] - ] - if len(cited_as_names) == 1: - name = cited_as_names[0] - - if not name or self.NULL_RE.match(name): - self.info('Discarding unnamed person', node.id) - node.delete() - else: - node['name'] = name - - def _normalize_non_person(self, node): - name = node['name'] - if not name or self.NULL_RE.match(name): - self.info('Discarding unnamed agent', node.id) - node.delete() - return - - maybe_type_name = GuessAgentTypeLink(default=node.type).execute(name) - maybe_type = ShareV2Schema().get_type(maybe_type_name) - # If the new type is MORE specific, upgrade. Otherwise ignore - if maybe_type.distance_from_concrete_type > node.schema_type.distance_from_concrete_type: - node.type = maybe_type.name - - match = re.match(r'^(.*(?:Departa?ment|Institute).+?);(?: (.+?); )?([^;]+)$', name, re.I) - if match: - *parts, location = [strip_whitespace(x) for x in match.groups() if x and strip_whitespace(x)] - node['name'] = ' - '.join(reversed(parts)) - node['location'] = location - return - - match = re.match(r'^(.+?), ([^,]+), ([^,]+)$', name, re.I) - if match: - name, *location = [strip_whitespace(x) for x in match.groups() if x and strip_whitespace(x)] - node['name'] = name - node['location'] = ', '.join(location) - - node['name'] = name diff --git a/share/regulate/steps/normalize_iris.py b/share/regulate/steps/normalize_iris.py deleted file mode 100644 index 30de9732f..000000000 --- a/share/regulate/steps/normalize_iris.py +++ /dev/null @@ -1,79 +0,0 @@ -from share.regulate.steps import NodeStep -from share.transform.chain.links import IRILink -from share.transform.chain.exceptions import InvalidIRI - - -class NormalizeIRIs(NodeStep): - """Normalize identifiers into consistent IRI formats. - - Parse out the IRI's scheme and authority and set them on the identifier model. - - Settings: - [urn_fallback]: Boolean (default False). If True, unrecognized identifiers will - be normalized into a URN with authority "share", e.g. `urn://share/123`. - If False, nodes with unrecognized identifiers will be discarded. - [blocked_schemes]: Optional list of schemes. Identifier nodes with a blocked - scheme will be discarded. - [blocked_authorities]: Optional list of authorities. Identifier nodes with - a blocked authority will be discarded. - [iri_field]: Field in which the IRI is stored (default 'uri') - [scheme_field]: Field in which the IRI's scheme is stored (default 'scheme') - [authority_field]: Field in which the IRI's authority is stored (default 'host') - [node_types]: Optional list of node types (inherited from NodeStep). - If given, filter the list of nodes this step will consider. - - Example config: - Normalize work identifier IRIs. Discard work identifiers with scheme 'mailto', or - with authority 'issn' or 'orcid.org'. - - ```json - { - 'namespace': 'share.regulate.steps.node', - 'name': 'normalize_iris', - 'settings': { - 'node_types': ['workidentifier'], - 'blocked_schemes': ['mailto'], - 'blocked_authorities': ['issn', 'orcid.org'], - }, - }, - ``` - """ - def __init__(self, *args, - blocked_schemes=None, - blocked_authorities=None, - urn_fallback=False, - iri_field='uri', - scheme_field='scheme', - authority_field='host', - **kwargs): - super().__init__(*args, **kwargs) - - self.iri_field = iri_field - self.scheme_field = scheme_field - self.authority_field = authority_field - self.blocked_schemes = blocked_schemes - self.blocked_authorities = blocked_authorities - self.urn_fallback = urn_fallback - - def regulate_node(self, node): - old_iri = node[self.iri_field] - try: - ret = IRILink(urn_fallback=self.urn_fallback).execute(old_iri) - node[self.authority_field] = ret['authority'] - node[self.scheme_field] = ret['scheme'] - - new_iri = ret['IRI'] - if old_iri != new_iri: - node[self.iri_field] = new_iri - self.info('Normalized IRI "{}" into "{}"'.format(old_iri, new_iri), node.id) - - if self.blocked_schemes and ret['scheme'] in self.blocked_schemes: - self.info('Discarding identifier based on invalid scheme "{}"'.format(ret['scheme']), node.id) - node.delete() - elif self.blocked_authorities and ret['authority'] in self.blocked_authorities: - self.info('Discarding identifier based on invalid authority "{}"'.format(ret['authority']), node.id) - node.delete() - - except InvalidIRI: - self.info('Discarding identifier based on unrecognized IRI "{}"'.format(old_iri), node.id) - node.delete() diff --git a/share/regulate/steps/tokenize_tags.py b/share/regulate/steps/tokenize_tags.py deleted file mode 100644 index 34102e3a2..000000000 --- a/share/regulate/steps/tokenize_tags.py +++ /dev/null @@ -1,47 +0,0 @@ -import re - -from share.regulate.steps import NodeStep -from share.util import strip_whitespace - - -class TokenizeTags(NodeStep): - """Recognize lists of tags, split them into multiple nodes - - Example config: - ```yaml - - namespace: share.regulate.steps.node - name: tokenize - ``` - """ - node_types = ('tag',) - - def regulate_node(self, node): - tags = list(map( - lambda t: t.lower(), - filter(None, ( - strip_whitespace(part) - for part in re.split(',|;', node['name']) - )) - )) - - if not tags: - self.info('Discarding nameless tag', node.id) - node.delete() - return - - if len(tags) == 1: - node['name'] = tags[0] - return - - through_tags = node['work_relations'] - for tag in sorted(tags): - new_tag = node.graph.add_node(None, 'tag', {'name': tag}) - self.info('Added tokenized tag', new_tag.id) - for through_tag in through_tags: - node.graph.add_node(None, 'throughtags', { - 'tag': new_tag, - 'creative_work': through_tag['creative_work'] - }) - - self.info('Discarded tag with multiple names', node.id) - node.delete() diff --git a/share/regulate/steps/trim_cycles.py b/share/regulate/steps/trim_cycles.py deleted file mode 100644 index ce3a7df27..000000000 --- a/share/regulate/steps/trim_cycles.py +++ /dev/null @@ -1,57 +0,0 @@ -from share.regulate.steps import NodeStep - - -class TrimCycles(NodeStep): - """Remove circular relations - - Settings: - relation_fields: Non-empty list of field names. If length 1, delete the node if the field - points to itself. If length >1, delete the node if more than one of the given fields - point to the same node. - [delete_node]: Boolean (default True). If false, remove the offending edge(s) instead of - deleting the node. - [node_types]: Optional list of node types (inherited from NodeStep). - If given, filter the list of nodes this step will consider. - - Example config: - ```yaml - - namespace: share.regulate.steps.node - name: trim_cycles - settings: - relation_fields: - - subject - - related - node_types: - - agentrelation - ``` - """ - def __init__(self, *args, - relation_fields, - delete_node=True, - **kwargs): - super().__init__(*args, **kwargs) - - self.relation_fields = relation_fields - self.delete_node = delete_node - - def regulate_node(self, node): - if len(self.relation_fields) == 1: - field_name = self.relation_fields[0] - related = node[field_name] - if related and related == node: - self._trim(node) - else: - related_nodes = set(filter(None, ( - node[f] for f in self.relation_fields - ))) - if len(related_nodes) != len(self.relation_fields): - self._trim(node) - - def _trim(self, node): - if self.delete_node: - self.info('Discarding node with circular relation', node.id) - node.delete() - else: - self.info('Discarding circular relations from node', node.id) - for field_name in self.relation_fields: - del node[field_name] diff --git a/share/regulate/steps/validate.py b/share/regulate/steps/validate.py deleted file mode 100644 index 6db3a0dad..000000000 --- a/share/regulate/steps/validate.py +++ /dev/null @@ -1,12 +0,0 @@ -from django.core.exceptions import ValidationError - -from share.models.validators import JSONLDValidator -from share.regulate.steps import ValidationStep - - -class JSONLDValidatorStep(ValidationStep): - def validate_graph(self, graph): - try: - JSONLDValidator()(graph.to_jsonld(in_edges=False)) - except ValidationError as e: - self.reject('Failed JSON-LD schema validation', exception=e) diff --git a/share/regulate/steps/whitespace.py b/share/regulate/steps/whitespace.py deleted file mode 100644 index f5f6907cd..000000000 --- a/share/regulate/steps/whitespace.py +++ /dev/null @@ -1,30 +0,0 @@ -import re - -from share.regulate.steps import NodeStep -from share.util import strip_whitespace - - -class StripWhitespace(NodeStep): - """Normalize whitespace in string values. - - Strip leading and trailing whitespace, and replace non-space whitespace and - multiple whitespace characters in a row with a single space. - - If a field value is an empty string or something like "none", discard it. - - Example config: - ```yaml - - namespace: share.regulate.steps.node - name: whitespace - ``` - """ - NULL_RE = re.compile(r'^(?:\s*(none|null|empty)\s*)?$', re.I) - - def regulate_node(self, node): - for k, v in node.attrs().items(): - if isinstance(v, str): - v = strip_whitespace(v) - if self.NULL_RE.match(v): - node[k] = '' - else: - node[k] = v diff --git a/share/schema/__init__.py b/share/schema/__init__.py deleted file mode 100644 index 632b894f4..000000000 --- a/share/schema/__init__.py +++ /dev/null @@ -1,75 +0,0 @@ -import yaml -from typing import Set, Union - -from share.schema.exceptions import SchemaKeyError -from share.schema.loader import SchemaLoader -from share.schema.shapes import ( - ShareV2SchemaType, - ShareV2SchemaAttribute, - ShareV2SchemaRelation, -) - - -class ShareV2Schema: - # will be loaded only once - _schema_types = None - _schema_type_names = None - _schema_fields = None - - @classmethod - def load_schema(cls): - with open('share/schema/schema-spec.yaml') as fobj: - type_spec_list = yaml.load(fobj, Loader=yaml.CLoader) - loader = SchemaLoader(type_spec_list) - cls._schema_types = loader.schema_types - cls._schema_fields = loader.schema_fields - - cls._schema_type_names = { - concrete_type.lower(): { - schema_type.name - for schema_type in loader.schema_types.values() - if schema_type.concrete_type == concrete_type - } - for concrete_type in loader.concrete_types - } - - @property - def schema_types(self): - if ShareV2Schema._schema_types is None: - ShareV2Schema.load_schema() - return ShareV2Schema._schema_types - - @property - def schema_fields(self): - if ShareV2Schema._schema_fields is None: - ShareV2Schema.load_schema() - return ShareV2Schema._schema_fields - - @property - def schema_type_names(self): - if ShareV2Schema._schema_type_names is None: - ShareV2Schema.load_schema() - return ShareV2Schema._schema_type_names - - def get_type(self, type_name) -> ShareV2SchemaType: - try: - return self.schema_types[type_name.lower()] - except KeyError: - raise SchemaKeyError(f'type "{type_name}" not found in SHARE schema') - - def get_field(self, type_name, field_name) -> Union[ShareV2SchemaAttribute, ShareV2SchemaRelation]: - if type_name.lower() in self.schema_type_names: - concrete_type = type_name - else: - concrete_type = self.get_type(type_name).concrete_type - key = (concrete_type.lower(), field_name.lower()) - try: - return self.schema_fields[key] - except KeyError: - raise SchemaKeyError(f'field "{type_name}.{field_name}" not found in SHARE schema') - - def get_type_names(self, concrete_type) -> Set[str]: - try: - return self.schema_type_names[concrete_type.lower()] - except KeyError: - raise SchemaKeyError(f'concrete type "{concrete_type}" not found in SHARE schema') diff --git a/share/schema/exceptions.py b/share/schema/exceptions.py deleted file mode 100644 index 563b1c96c..000000000 --- a/share/schema/exceptions.py +++ /dev/null @@ -1,9 +0,0 @@ -from share.exceptions import ShareException - - -class SchemaLoadError(ShareException): - pass - - -class SchemaKeyError(ShareException): - pass diff --git a/share/schema/loader.py b/share/schema/loader.py deleted file mode 100644 index e73666056..000000000 --- a/share/schema/loader.py +++ /dev/null @@ -1,176 +0,0 @@ -from share.schema.exceptions import SchemaLoadError -from share.schema.shapes import ( - RelationShape, - AttributeDataType, - AttributeDataFormat, - ShareV2SchemaAttribute, - ShareV2SchemaRelation, - ShareV2SchemaType, -) - - -class SchemaLoader: - def __init__(self, type_spec_list): - self.schema_types = {} - self.schema_fields = {} - self.explicit_field_names = {} - - try: - self.concrete_types = set( - type_spec['concrete_type'] - for type_spec in type_spec_list - ) - self._load_all_attrs_and_relations(type_spec_list) - self._load_types(type_spec_list) - except KeyError as error: - raise SchemaLoadError(error) - - def _load_all_attrs_and_relations(self, type_spec_list): - for type_spec in type_spec_list: - concrete_type = type_spec['concrete_type'] - self._load_attributes(concrete_type, type_spec.get('attributes', [])) - self._load_relations(concrete_type, type_spec.get('relations', [])) - - def _load_types(self, type_spec_list): - # assumes load_all_attrs_and_relations has already been called - for type_spec in type_spec_list: - concrete_type = type_spec['concrete_type'] - type_tree = type_spec.get('type_tree', None) - - if type_tree: - self._add_type_tree(concrete_type, type_tree) - else: - self._add_type(concrete_type, concrete_type) - - def _load_attributes(self, concrete_type, attr_list): - for attr_dict in attr_list: - attr = self._build_attribute(attr_dict) - self._add_field(concrete_type, attr) - - def _load_relations(self, concrete_type, relation_list): - for relation_dict in relation_list: - relation = self._build_relation(relation_dict) - self._add_relation(concrete_type, relation) - if relation.inverse_relation: - self._add_inverse_relation(concrete_type, relation) - - def _add_inverse_relation(self, concrete_type, relation): - inverse_relation_shape = { - RelationShape.MANY_TO_MANY: RelationShape.MANY_TO_MANY, - RelationShape.MANY_TO_ONE: RelationShape.ONE_TO_MANY, - RelationShape.ONE_TO_MANY: RelationShape.MANY_TO_ONE, - }[relation.relation_shape] - - inverse_relation = ShareV2SchemaRelation( - name=relation.inverse_relation, - relation_shape=inverse_relation_shape, - related_concrete_type=concrete_type, - - # same through type, but flip incoming/outgoing relations - through_concrete_type=relation.through_concrete_type, - incoming_through_relation=relation.outgoing_through_relation, - outgoing_through_relation=relation.incoming_through_relation, - - inverse_relation=relation.name, - is_implicit=True, - ) - self._add_relation(relation.related_concrete_type, inverse_relation) - - def _add_type(self, concrete_type, type_name, type_lineage=()): - self.schema_types[type_name.lower()] = ShareV2SchemaType( - name=type_name, - concrete_type=concrete_type, - explicit_fields=set(self.explicit_field_names.get(concrete_type, [])), - type_lineage=type_lineage, - ) - - def _add_type_tree(self, concrete_type, type_tree, parent_type_lineage=()): - for type_name, subtree in type_tree.items(): - type_lineage = (type_name, *parent_type_lineage) - self._add_type(concrete_type, type_name, type_lineage) - if subtree: - self._add_type_tree(concrete_type, subtree, type_lineage) - - def _add_relation(self, concrete_type, relation): - key = (concrete_type.lower(), relation.name.lower()) - existing_relation = self.schema_fields.get(key, None) - if existing_relation: - is_existing_relation_compatible = ( - existing_relation.is_relation - and relation.name == existing_relation.name - and relation.relation_shape == existing_relation.relation_shape - and relation.related_concrete_type == existing_relation.related_concrete_type - and relation.through_concrete_type == existing_relation.through_concrete_type - and relation.incoming_through_relation == existing_relation.incoming_through_relation - and relation.outgoing_through_relation == existing_relation.outgoing_through_relation - and relation.inverse_relation == existing_relation.inverse_relation - ) - if not is_existing_relation_compatible: - raise SchemaLoadError(f'relation defined two incompatible ways -- maybe a bad inverse? existing: {existing_relation} trying to add: {relation}') - - if existing_relation.is_implicit: - # let the new relation overwrite the implicit one - del self.schema_fields[key] - elif relation.is_implicit: - return # no need to do anything -- the same relation already exists - else: - raise SchemaLoadError(f'conflicting explicit relations (new: {relation} existing: {existing_relation})') - - self._add_field(concrete_type, relation) - - def _add_field(self, concrete_type, attr_or_relation): - if concrete_type not in self.concrete_types: - raise SchemaLoadError(f'invalid concrete_type ({concrete_type}) on field {attr_or_relation}') - key = (concrete_type.lower(), attr_or_relation.name.lower()) - if key in self.schema_fields: - raise SchemaLoadError(f'field defined twice: {key}') - self.schema_fields[key] = attr_or_relation - if not getattr(attr_or_relation, 'is_implicit', False): - self.explicit_field_names.setdefault(concrete_type, []).append(attr_or_relation.name) - - def _build_attribute(self, attr_dict): - return ShareV2SchemaAttribute( - # required - name=attr_dict['name'], - data_type=AttributeDataType[attr_dict['data_type'].upper()], - - # optional - is_required=attr_dict.get('is_required', False), - data_format=AttributeDataFormat[attr_dict['data_format'].upper()] if 'data_format' in attr_dict else None, - ) - - def _build_relation(self, relation_dict): - new_relation = ShareV2SchemaRelation( - # required - name=relation_dict['name'], - relation_shape=RelationShape[relation_dict['relation_shape'].upper()], - related_concrete_type=relation_dict['related_concrete_type'], - - # optional - through_concrete_type=relation_dict.get('through_concrete_type', None), - incoming_through_relation=relation_dict.get('incoming_through_relation', None), - outgoing_through_relation=relation_dict.get('outgoing_through_relation', None), - inverse_relation=relation_dict.get('inverse_relation', None), - is_required=relation_dict.get('is_required', False), - ) - if new_relation.related_concrete_type not in self.concrete_types: - raise SchemaLoadError(f'invalid related_concrete_type on relation {new_relation}') - if new_relation.through_concrete_type and new_relation.through_concrete_type not in self.concrete_types: - raise SchemaLoadError(f'invalid through_concrete_type on relation {new_relation}') - - required_m2m_attrs = { - 'through_concrete_type', - 'incoming_through_relation', - 'outgoing_through_relation', - } - present_m2m_attrs = { - attr_name - for attr_name in required_m2m_attrs - if getattr(new_relation, attr_name) - } - if present_m2m_attrs and new_relation.relation_shape != RelationShape.MANY_TO_MANY: - raise SchemaLoadError(f'{present_m2m_attrs} set on non-m2m relation {new_relation}') - if new_relation.relation_shape == RelationShape.MANY_TO_MANY and len(present_m2m_attrs) != len(required_m2m_attrs): - missing_m2m_attrs = required_m2m_attrs - present_m2m_attrs - raise SchemaLoadError(f'm2m relation {new_relation} missing required attrs {missing_m2m_attrs}') - return new_relation diff --git a/share/schema/schema-spec.yaml b/share/schema/schema-spec.yaml deleted file mode 100644 index 04b29380a..000000000 --- a/share/schema/schema-spec.yaml +++ /dev/null @@ -1,330 +0,0 @@ -- concrete_type: AbstractCreativeWork - type_tree: - CreativeWork: - DataSet: - Patent: - Poster: - Publication: - Article: - Book: - ConferencePaper: - Dissertation: - Preprint: - Project: - Registration: - Report: - Thesis: - WorkingPaper: - Presentation: - Repository: - Retraction: - Software: - attributes: - - name: title - data_type: string - - name: description - data_type: string - - name: is_deleted - data_type: boolean - - name: date_published - data_type: datetime - - name: date_updated - data_type: datetime - - name: free_to_read_type - data_type: string - data_format: uri - - name: free_to_read_date - data_type: datetime - - name: rights - data_type: string - - name: language - data_type: string - - name: registration_type - data_type: string - - name: withdrawn - data_type: boolean - - name: justification - data_type: string - - name: extra - data_type: object - relations: - - name: subjects - relation_shape: many_to_many - related_concrete_type: Subject - through_concrete_type: ThroughSubjects - incoming_through_relation: creative_work - outgoing_through_relation: subject - inverse_relation: creative_works - - name: tags - relation_shape: many_to_many - related_concrete_type: Tag - through_concrete_type: ThroughTags - incoming_through_relation: creative_work - outgoing_through_relation: tag - inverse_relation: creative_works - - name: related_agents - relation_shape: many_to_many - related_concrete_type: AbstractAgent - through_concrete_type: AbstractAgentWorkRelation - incoming_through_relation: creative_work - outgoing_through_relation: agent - inverse_relation: related_works - - name: related_works - relation_shape: many_to_many - related_concrete_type: AbstractCreativeWork - through_concrete_type: AbstractWorkRelation - incoming_through_relation: subject - outgoing_through_relation: related - -- concrete_type: AbstractAgent - type_tree: - Agent: - Organization: - Consortium: - Department: - Institution: - Person: - attributes: - - name: name - data_type: string - - name: location - data_type: string - - name: family_name - data_type: string - - name: given_name - data_type: string - - name: additional_name - data_type: string - - name: suffix - data_type: string - - name: extra - data_type: object - relations: - - name: related_agents - relation_shape: many_to_many - related_concrete_type: AbstractAgent - through_concrete_type: AbstractAgentRelation - incoming_through_relation: subject - outgoing_through_relation: related - - name: related_works - relation_shape: many_to_many - related_concrete_type: AbstractCreativeWork - through_concrete_type: AbstractAgentWorkRelation - incoming_through_relation: agent - outgoing_through_relation: creative_work - inverse_relation: related_agents - -- concrete_type: AbstractAgentWorkRelation - type_tree: - AgentWorkRelation: - Contributor: - Creator: - PrincipalInvestigator: - PrincipalInvestigatorContact: - Funder: - Host: - Publisher: - attributes: - - name: cited_as - data_type: string - - name: order_cited - data_type: integer - - name: extra - data_type: object - relations: - - name: creative_work - relation_shape: many_to_one - related_concrete_type: AbstractCreativeWork - inverse_relation: agent_relations - is_required: true - - name: agent - relation_shape: many_to_one - related_concrete_type: AbstractAgent - inverse_relation: work_relations - is_required: true - - name: awards - relation_shape: many_to_many - related_concrete_type: Award - through_concrete_type: ThroughAwards - incoming_through_relation: funder - outgoing_through_relation: award - -- concrete_type: AbstractAgentRelation - type_tree: - AgentRelation: - IsAffiliatedWith: - IsEmployedBy: - IsMemberOf: - attributes: - - name: extra - data_type: object - relations: - - name: subject - relation_shape: many_to_one - related_concrete_type: AbstractAgent - inverse_relation: outgoing_agent_relations - is_required: true - - name: related - relation_shape: many_to_one - related_concrete_type: AbstractAgent - inverse_relation: incoming_agent_relations - is_required: true - -- concrete_type: AbstractWorkRelation - type_tree: - WorkRelation: - Cites: - Compiles: - Corrects: - Discusses: - Disputes: - Documents: - Extends: - IsDerivedFrom: - IsPartOf: - IsSupplementTo: - References: - RepliesTo: - Retracts: - Reviews: - UsesDataFrom: - attributes: - - name: extra - data_type: object - relations: - - name: subject - relation_shape: many_to_one - related_concrete_type: AbstractCreativeWork - inverse_relation: outgoing_creative_work_relations - is_required: true - - name: related - relation_shape: many_to_one - related_concrete_type: AbstractCreativeWork - inverse_relation: incoming_creative_work_relations - is_required: true - -- concrete_type: WorkIdentifier - attributes: - - name: uri - data_type: string - data_format: uri - is_required: true - - name: host - data_type: string - - name: scheme - data_type: string - - name: extra - data_type: object - relations: - - name: creative_work - relation_shape: many_to_one - related_concrete_type: AbstractCreativeWork - inverse_relation: identifiers - is_required: true - -- concrete_type: AgentIdentifier - attributes: - - name: uri - data_type: string - data_format: uri - is_required: true - - name: host - data_type: string - - name: scheme - data_type: string - - name: extra - data_type: object - relations: - - name: agent - relation_shape: many_to_one - related_concrete_type: AbstractAgent - inverse_relation: identifiers - is_required: true - -- concrete_type: Subject - attributes: - - name: name - data_type: string - is_required: true - - name: is_deleted - data_type: boolean - - name: uri - data_type: string - data_format: uri - - name: extra - data_type: object - relations: - - name: parent - relation_shape: many_to_one - related_concrete_type: Subject - inverse_relation: children - - name: central_synonym - relation_shape: many_to_one - related_concrete_type: Subject - inverse_relation: custom_synonyms - -- concrete_type: ThroughSubjects - attributes: - - name: is_deleted - data_type: boolean - relations: - - name: subject - relation_shape: many_to_one - related_concrete_type: Subject - inverse_relation: work_relations - is_required: true - - name: creative_work - relation_shape: many_to_one - related_concrete_type: AbstractCreativeWork - inverse_relation: subject_relations - is_required: true - -- concrete_type: Tag - attributes: - - name: name - data_type: string - is_required: true - - name: extra - data_type: object - -- concrete_type: ThroughTags - relations: - - name: tag - relation_shape: many_to_one - related_concrete_type: Tag - inverse_relation: work_relations - is_required: true - - name: creative_work - relation_shape: many_to_one - related_concrete_type: AbstractCreativeWork - inverse_relation: tag_relations - is_required: true - -- concrete_type: Award - attributes: - - name: name - data_type: string - - name: description - data_type: string - - name: date - data_type: datetime - - name: award_amount - data_type: integer - - name: uri - data_type: string - data_format: uri - - name: extra - data_type: object - -- concrete_type: ThroughAwards - relations: - - name: funder - relation_shape: many_to_one - related_concrete_type: AbstractAgentWorkRelation - inverse_relation: null - is_required: true - - name: award - relation_shape: many_to_one - related_concrete_type: Award - inverse_relation: null - is_required: true diff --git a/share/schema/shapes.py b/share/schema/shapes.py deleted file mode 100644 index ee720320f..000000000 --- a/share/schema/shapes.py +++ /dev/null @@ -1,39 +0,0 @@ -from enum import Enum -from typing import Set, NamedTuple, Optional, Tuple - - -RelationShape = Enum('RelationShape', ['MANY_TO_MANY', 'MANY_TO_ONE', 'ONE_TO_MANY']) -AttributeDataType = Enum('AttributeDataType', ['BOOLEAN', 'STRING', 'INTEGER', 'DATETIME', 'OBJECT']) -AttributeDataFormat = Enum('AttributeDataFormat', ['URI']) - - -class ShareV2SchemaType(NamedTuple): - name: str - concrete_type: str - explicit_fields: Set[str] - type_lineage: Tuple[str] = () - - @property - def distance_from_concrete_type(self): - return len(self.type_lineage) - - -class ShareV2SchemaAttribute(NamedTuple): - name: str - data_type: AttributeDataType - data_format: Optional[AttributeDataFormat] - is_required: bool = False - is_relation: bool = False - - -class ShareV2SchemaRelation(NamedTuple): - name: str - relation_shape: RelationShape - related_concrete_type: str - inverse_relation: str - through_concrete_type: Optional[str] = None - incoming_through_relation: Optional[str] = None - outgoing_through_relation: Optional[str] = None - is_required: bool = False - is_implicit: bool = False - is_relation: bool = True diff --git a/share/search/index_messenger.py b/share/search/index_messenger.py index 67a7b154b..d7ee2655e 100644 --- a/share/search/index_messenger.py +++ b/share/search/index_messenger.py @@ -45,15 +45,14 @@ def notify_indexcard_update(self, indexcards, *, urgent=False): ), urgent=urgent, ) - if FeatureFlag.objects.flag_is_up(FeatureFlag.IGNORE_SHAREV2_INGEST): - # for back-compat: - self.notify_suid_update( - [ - _indexcard.source_record_suid_id - for _indexcard in indexcards - ], - urgent=urgent, - ) + # send "suid"-based messages for indexes that use `MessageType.INDEX_SUID` + self.notify_suid_update( + [ + _indexcard.source_record_suid_id + for _indexcard in indexcards + ], + urgent=urgent, + ) def notify_suid_update(self, suid_ids, *, urgent=False): self.send_messages_chunk( diff --git a/share/search/index_strategy/sharev2_elastic8.py b/share/search/index_strategy/sharev2_elastic8.py index 6de96a668..868b72981 100644 --- a/share/search/index_strategy/sharev2_elastic8.py +++ b/share/search/index_strategy/sharev2_elastic8.py @@ -5,11 +5,7 @@ from django.db.models import F import elasticsearch8 -from share.models import ( - FeatureFlag, - FormattedMetadataRecord, - SourceUniqueIdentifier, -) +from share.models import SourceUniqueIdentifier from share.search import exceptions from share.search import messages from share.search.index_strategy.elastic8 import Elastic8IndexStrategy @@ -172,22 +168,14 @@ def _get_doc_id(self, suid_id: int): return IDObfuscator.encode_id(suid_id, SourceUniqueIdentifier) def _load_docs(self, suid_ids) -> typing.Iterable[tuple[int, str]]: - if FeatureFlag.objects.flag_is_up(FeatureFlag.IGNORE_SHAREV2_INGEST): - _card_qs = ( - DerivedIndexcard.objects - .filter(upriver_indexcard__source_record_suid_id__in=suid_ids) - .filter(deriver_identifier__in=ResourceIdentifier.objects.queryset_for_iri(SHAREv2.sharev2_elastic)) - .annotate(suid_id=F('upriver_indexcard__source_record_suid_id')) - ) - for _card in _card_qs: - yield (_card.suid_id, _card.derived_text) - else: # legacy path: pull from FormattedMetadataRecord - _record_qs = FormattedMetadataRecord.objects.filter( - suid_id__in=suid_ids, - record_format='sharev2_elastic', - ) - for _record in _record_qs: - yield (_record.suid_id, _record.formatted_metadata) + _card_qs = ( + DerivedIndexcard.objects + .filter(upriver_indexcard__source_record_suid_id__in=suid_ids) + .filter(deriver_identifier__in=ResourceIdentifier.objects.queryset_for_iri(SHAREv2.sharev2_elastic)) + .annotate(suid_id=F('upriver_indexcard__source_record_suid_id')) + ) + for _card in _card_qs: + yield (_card.suid_id, _card.derived_text) # optional method from IndexStrategy def pls_handle_search__passthru(self, request_body=None, request_queryparams=None) -> dict: diff --git a/share/sources/au.uow/icon.ico b/share/sources/au.uow/icon.ico deleted file mode 100644 index 771b093c3bde250e8d0e206d60fce3f023710e0e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1799 zcmah~drT8|9IqfjK}2Pfx;ed6h|93OyS}edp!6Z#R&AgR44by}j#B80>(v%ylDRQo zXq~u#n$5XrU?w^gH&B_yp&%2NZG;6MSc?cz<e~WJc-UPlF#a&^lDpsczW4cj-{0I0 z<=Qn7VQ+;|D3l0!iX@dh7x?cvLF9AqSGipBSVBlMi8ON#Vb|aoMTDBOF+gtA=&)2w zgBBLFV(}D8uv@RnBr+8r3bbY;L*vIV97YStrcmM&9Ttr?A0vQlOs6*qX+!5-G(e9E zX&D>^q_BvwTzyIrj-?f?RcVXzwFpW}OaS5?0@8pHBQ$`+XfRm?4k0aIS3t^sHAn*j z5F%eldl^)wLJ5e?I0kSS5M2v#VSvkKz-$O%BdY)=1VbRq1R)+Bh6Eg*0D^(Zi$;3G z(HudlL^kP*d=t`g3Bn=(LA%|~u(KFuTnEAkf`AYcWHRX_f^ID|5gG^GWL+_3L4sMe zxZXnO%_hKaQIl=95keXn>B|(17KLKku*o`ED6(XrLt_D91_T<7{=5QcE0K!*7se~m zR#l+|15+`p*@kP$e&no}B9payy`w20s-4DJY`7tiC#nT817^fbgp~{m4n#yoE?9@_ z$p&f+60_Fk|0<UVX*Qc46>ua>h>tMY2rF48NrqtrW{db71d^~|4#XGp{m%RbW)eva z10}f<j);jMJcuRZi(yy_BN8!;uw*<0f}~ugG<k|EH(3ddNsIZN`nmdPE?4$iu0V`q z8p4dL%x1&XsK^Zlo0Y)*YCxRE@%L{sLJZe$!cZA*HUfbL2=uS;D?@lP80N7d9uwm7 zVVE!CN|F&7Tf}0qWg<vQo8+Q13N@AQ|8hYx7*s1D^n?MMoJ^5{n3N29|8xip8k3GJ z10BV3G&Tc4OHkr^6A=&kht8lUu>t8N4V+;tpq??MQ%G|3{o?e_kuTF*i<!t(#mSv4 zRzx@{l%NZ8iAd$Rkv;D7`TC=m+fOD~$C^i?*CiA?#-8@oxqUCjb*Ng6=JedjiCW-U zRK8f*zL$vPEd2V8PNP=yYwmSFMAjUjQV}Zk=&`~MB1U@2TUmKvgV0A|cVC=%UN`7U zbo4A+y)r*{=Q}A)E|;&9N|ky$+a9V0;%(_?ju!sIr&dxEOT1h9Mr*}4!EfR%V_kh4 zldz=AKtze}MU+ST+{q08piY+^bS@;!TUuXsBE9Uy{){$$eL-t$Nr_L)sX2Y>{i|~= zkF3kOJ{@qoU2fOJ=vb(M8qNxhj*DWkoIu5ytw3C8oa*z+7}tra!Gp0a%i||5U2Ck? z0<rI2x_0HtaBoLne#OROt$J^HZ-nzy&zn74^Sn>{oyW?*JW`66G&R>&9L_@WsG0B8 z)<Co9v+><TOxS~o!jSskm!A*r81^MLRjf<g&yLx3DEOl`q(RqSI?^)qLg)2)u0@-{ zf$gf*>tp{6t=ljjyt!;4HFBH%V^37~g@Qp($4F!>T6*^E^$ZXRd7`%LpWEc}MGiJF zPo<ZqEgXA%|NQPl+$d#v4d1;x+-WzSMb*cOhW_5wIJ>`-IMk`?A7+nLMY@Z=sJvbO z#M^wiCHebe<;uA`9{p1M?Dh@$-L9+7;}7;l?{RmXd$#nCobQZ{)xw{A2lpNK)>aLb zN(w*QDdMK2gj+`N>i*VISyoVcvaWqGM<G{ya{qQOed(=-NA&Z%AOD&ZvWOX^Or-qK z=X5g;7YvV74Ta9{s^wP14b1cOpyuDQHhFu*y3N?K4STN4+bmzNsg6jZ^we*wywxx| z9@*r(8GYE8wI%QFUs3iS6MQYhzR`QS!HdlcI_A6$-0=)#w6Fg8TfxW|QPk?PdG00D zwz8wO#~bdYU-slF*-4GP*h=22)wAB1zy0*^F7BH0kU0=VAp&UEQ+pZyUz1$AR&p{~ Gz4c#sXTwea diff --git a/share/sources/au.uow/source.yaml b/share/sources/au.uow/source.yaml deleted file mode 100644 index a2216d882..000000000 --- a/share/sources/au.uow/source.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://ro.uow.edu.au/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: au.uow - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://ro.uow.edu.au -long_title: Research Online @ University of Wollongong -name: au.uow -user: providers.au.uow diff --git a/share/sources/be.ghent/icon.ico b/share/sources/be.ghent/icon.ico deleted file mode 100644 index 518e3fd5338f606d12554733e803c2752d86cee8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 159 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61SBU+%rFB|%*9TgAsieWw;%dH0CK}UT^vI+ zCZ3(-&38b7gY{tdcH5sjGJK_eMu_>=aEtw569{$C%DUM2>gn^`_LE%h94?{vZ!oJ| z;|zYV_;=Xm;AgTeiW7?^8w=Ln^q9@~e}D7m=|xlDUtrY#;ho}p-7y(x1%s!npUXO@ GgeCxy1~?%A diff --git a/share/sources/be.ghent/source.yaml b/share/sources/be.ghent/source.yaml deleted file mode 100644 index de1c8dcbb..000000000 --- a/share/sources/be.ghent/source.yaml +++ /dev/null @@ -1,33 +0,0 @@ -configs: -- base_url: https://biblio.ugent.be/oai - disabled: false - earliest_date: 2016-12-14T15:38:10Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: be.ghent.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: https://biblio.ugent.be/oai - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: be.ghent - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: https://biblio.ugent.be/ -long_title: Ghent University Academic Bibliography -name: be.ghent -user: providers.be.ghent diff --git a/share/sources/br.pcurio/icon.ico b/share/sources/br.pcurio/icon.ico deleted file mode 100644 index dc3b1978c40378369beae78fbcd5807a905dceef..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 894 zcmb7D-BS`#6yK(Qz^A6i9&7qH8lP&W$*J+dhiV$fryiQ7QUeVW6(mDhM1cWYz;%Hg zbb;NK5BYFIKuHn#5S<0WOa#(tG(e!PGDD4T{q8+y&OP^c=brnUb0O#o7%eRj@YkWP zs}OVzf}oqgKnl76>^y$~uy+~1FHo=7tF_u*p|G;?#Xu~m^_UvNOiLaQ<H;Q4YPDK} z20UN_)avzAhJ}4GrI|!Y%DcAeWR`K7md#tFfn+EitDaN-;>U53FB#W4+{MG(UOMeF z;mdQ{)OO;yn0JLEoN;(J%{CAGEav1o4U8F`x*2p*rojla*=DoakvX$guA--5jUFA8 zN)IwS;DX3jbY3feBbHEJ#xgo2(U=I@>vZ{Cu7KO?R}k(6(J;d>qmDHc{j|<zNs^=~ ziX`R@I<0YdKs-m|W>PMfTg{Yu22s;~qex(}QiN~S$b^W%=F!m+MNwALs@G|;{@!r| zWg)$3e<<S*c__D-@L+r%qBmJXQF}Pfem?{yRVtNgwYvW`rF`{Vtkl4^z#?W~W_6P` zzZk(4oY!0a6`*#xQfUYT9$qR}zNL01p0<tkb4U5Z%0Zrv*E=E{`_RXo=<3*DT;+1P zK|VS8#YB^8R}eQE-nZT!d2~nAabNQIf%Him2n6k|{MK6v(GZHF4u>P3&!1J#OlvWi zV#JvFWj805NOpF~+8=)Gd?vehSG2wv5ekHY5usEnRjE{^Qt7N(b%nR1nSCK2F&XT( z&;9|#*as`$$u&F##}}7Ye4*|aQ}KAh;jo{co;Ioio~G$wFh~a0IVxk%6w;$W-_Dp2 z(#f(cC<bWKWHy3qHk(SNBD-v#VwNjabEmbvQ(9Lbl*{E3iFlLTY`jKrP$&d;GAi5* mqqsM^yI&|?G-Le|t+@o+f|*RE{o!cspPp<IF8_!BUH<`vrc?O< diff --git a/share/sources/br.pcurio/source.yaml b/share/sources/br.pcurio/source.yaml deleted file mode 100644 index e4e0d8a22..000000000 --- a/share/sources/br.pcurio/source.yaml +++ /dev/null @@ -1,19 +0,0 @@ -configs: -- base_url: https://www.maxwell.vrac.puc-rio.br/DC_Todos.php - disabled: false - earliest_date: null # earliestDatestamp is earliest published - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc, time_granularity: false} - label: br.pcurio - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://www.maxwell.vrac.puc-rio.br -long_title: Pontifical Catholic University of Rio de Janeiro -name: br.pcurio -user: providers.br.pcurio diff --git a/share/sources/ca.lwbin/icon.ico b/share/sources/ca.lwbin/icon.ico deleted file mode 100644 index b5dfa56ba85a4ab45372ef6588be32f8afd0a2e6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 538 zcmV+#0_FXQP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00GiTL_t(2k!4a5lp{e9{mE-67-(2%2s8u+0s{vPfx*GR!NI}6LEzxv;BYap z5I6_~f&&MEgMovEgMoeb1{N9y8WtJ?1q%iJ_AMm0yUBj5dcW)U|5c;(l$Orn4Rl5T z_)&0<P1gY!Uv!D!KIm^>tt&JsyvMtqzz^&_A1iApL^*$uE+@o?6N9odb{vA4S`{Bc zQJCe@0NDHt@+eRQ2s4nds7$pm0M<A0RY3#*e9!$o|M;HUDgFPOY_@@}Y&jP|&OF?< zyl*3b%{Epgm6d7##I|KgvTyn@kO(Lv0AoEdBDn2XL;x^z*9(A`h$M-f5Fdmx4PZ<} zP~@C8#+ZrKnO-g}fU+`qP5=t$ikYU2jtUBZu~uiHbf660({39@#7=cSGeoFAOZQi7 zQ5*%waR|0>-b+~-H`m==ByHnC?7VIoUw^RxdM~zcaR>m^s;;4+GsK{crBe_INT*oB zAYtGzY+9n#@BJ76DdI=$ns8+FT_;I)s}%rY?As`Bc0C&Y-SL5&^;(#TV6Ff~uiK3x z30mv=`WlMtELTT2;l2;v#}Fcm(E>%1gbx6bHP+7Z7t^EkbaqVLuV#0%E=v=Flpkxi c_;hif&sTlpC<F6O2mk;807*qoM6N<$f{=3JNdN!< diff --git a/share/sources/ca.lwbin/source.yaml b/share/sources/ca.lwbin/source.yaml deleted file mode 100644 index 7f6e22316..000000000 --- a/share/sources/ca.lwbin/source.yaml +++ /dev/null @@ -1,15 +0,0 @@ -configs: -- base_url: http://130.179.67.140/api/3/action/current_package_list_with_resources - disabled: false - earliest_date: null - harvester: ca.lwbin - harvester_kwargs: {} - label: ca.lwbin - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: ca.lwbin - transformer_kwargs: {} -home_page: http://130.179.67.140 -long_title: Lake Winnipeg Basin Information Network -name: ca.lwbin -user: providers.ca.lwbin diff --git a/share/sources/ca.umontreal/icon.ico b/share/sources/ca.umontreal/icon.ico deleted file mode 100644 index a1b16a2fa37bc65cd11a92b2a568f32a56250583..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 574 zcmV-E0>S->P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0005?Nkl<ZIE{^yziU%b6vsdJy_aZGq%A5z3)O;@R!~&BIH)K(C<x-x$v+^V zgHk9sc9fzHMT9P$bW^*zh+4tYY85p8tdW93YD&`B<h}I$xc4|D7{uo3na(};o)6!9 z4qqiv^_7>OJE)BzE|db1)>(`po!NtO7iZ-A`{!&OJc{UO!{&54$x0=nyE94T`xqlA zC&`bQ+pzZOsT6aMBrZm7JQMmRgi}*e^IP@LcP@$5kfYm5&lhTpPrhc?zAbc{fGgvV zsnwfGMQ~z24ouu#pfsPQT=B4#MX^%n{*y0UI6VLm4{!s+&9Qlg_V!RL*YLDqe{Uy& zQNQ2B0^a3IAO>ukWFm>N49ZeCPOHs|160;r@CDHkfz|{%A`BxoC+z=IAVxTs9pLT9 z0?Q>2Wr23P$Q9}gpV$rRAAqVG@?iQaFCQKu2qKIYjENZD*1_zD3fr|854bk90FI3| zn*QD{j1j6{hyuPXuX5mcwmqP>u6gmM3_DVc-kN1;#pUdwoy;wk7&$+Pc2cBzhT0F& z4<bNd)#v)f!(2YokAyyxqsItB4b4rq1t`@@{6OQaM~Fb(|I;rPVm9WeR!#kV3r%M_ z9hp9s?u}8-O*j0i6ngz9nUhx`nTF5@rEnY@+u4L=+1QSQooI~j7lAPe$SjpSoB#j- M07*qoM6N<$f*_Ux?EnA( diff --git a/share/sources/ca.umontreal/source.yaml b/share/sources/ca.umontreal/source.yaml deleted file mode 100644 index 747838657..000000000 --- a/share/sources/ca.umontreal/source.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# DSpace -configs: -- base_url: http://papyrus.bib.umontreal.ca/oai/request - disabled: false - earliest_date: 2005-05-18T18:27:23Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: ca.umontreal.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: http://papyrus.bib.umontreal.ca/oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: ca.umontreal - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://papyrus.bib.umontreal.ca -long_title: "PAPYRUS - D\xE9p\xF4t institutionnel de l'Universit\xE9 de Montr\xE9\ - al" -name: ca.umontreal -user: providers.ca.umontreal diff --git a/share/sources/ca.uwo/icon.ico b/share/sources/ca.uwo/icon.ico deleted file mode 100644 index 115998d7075613be8ea91c32446ea25ae829114d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 895 zcmV-_1AzRAP)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0009vNkl<ZIE{6Ze@xT`7{;Ib-gU6ky23!nI^gnq0|oriY3ceg)?BmMnX+kP z<eKHIxwX_9FoHRWrLB=s${+ZLT7;vCo8oA}ZADay^Y9Sx?&xwHhj#)8cibI!_dUM% z^hdiJtbDfjk9T{Y?R}no6eO`ALF;qk_36K{<*sF{aBy(&=?7kK##}I@5d;O6FIzUB znYD7JsHkY7*r>OycozLn>Tyk#4xfS~ymwK4aOY>O03bk+^YwaHcJ@mD_OjAH)ajW2 z`SSHw#>d?&M(vKg1QJRB_@?#K!@5m4rNSa9m8y8%v8uMbEoe=D98R?c9stPQ{K6_J zglES3@z(L$FZM~2l$J7)glB~16~-pN?I(U2@zan<5afR+8f5OMnGF@jCjVGJKO%|L z2gZ%hIhJR`CWVnDg^`7a=+PVuzx?7$L&-hWfl7^NQ`OZ5tJPWn0L8^6w)G|J+zYa- zusduy7K>#K0Enx_d)43WJredIPqzuDQV}AvER4sf?P~OOn#6iqJ9yS}hwn@~xpUwm z&zr=0p06DFMT#&p7NRa00VLraVR>kZ8%RQqQ)BPuiDP@O4fi%jJ9?X=9Vgy44-8%5 znS~f1OLpP9#X}_jjo^1-8;M5(_fdS$65bk^3#2{&##28Wsrc%2LS9fcUijpw-;Fi4 z+b+X~QeAJRh@NNwx=aJwK@zbrbDVvwt}<P6c<ylDxys>RLq3*FIaubt%>&UO^KX8~ zwt$<ZU5&m@*DW53%+f#-0syEr@XSv`_wLc%tx^OT`MLTTDTq}y`&-V`95{V4G>LT& z?oVpRE!bn&obmhI1lFc$F`D9lB&Po2?rA6AjK`>T-Q<U5Yfna74qJQsTH=>>6?`r^ ze`8HN$|nL7ES(tSdr99CP=>Csd7z!=OkzDh-qG$1PI6<!PqFEPKW~`CdiGynyS0O7 z6Ec-akFXe$g4a-aqt7A&fWvOr_kBNeeC#^j@98!h0l?gCHWY40wL*Zr1f<21<Is$B z;_KR~hW!8_2ufL&Wo2z$&5!H%;Gjy4z!Jm%F;5b6UT*Db^mVq?IeTWu`Rc>&{sF^_ V4kuyW3{n69002ovPDHLkV1fq-w~GJ( diff --git a/share/sources/ca.uwo/source.yaml b/share/sources/ca.uwo/source.yaml deleted file mode 100644 index 7b6d367c4..000000000 --- a/share/sources/ca.uwo/source.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://ir.lib.uwo.ca/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: ca.uwo - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://ir.lib.uwo.ca -long_title: Western University -name: ca.uwo -user: providers.ca.uwo diff --git a/share/sources/ch.cern/icon.ico b/share/sources/ch.cern/icon.ico deleted file mode 100644 index d8f02533f6da4584c019f61cc29957911c6e3472..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 525 zcmV+o0`mQdP)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0005RNkl<ZIE}57&u>g|6vaQ^?;BGLt)W#>ZKOnWqv?drG)+Tdfkk(AR%&N! zMN*XT2S~+_*s!D$7DQ|&Q41338<9+=6)oCK-}I4~()P`J_gah}8MSDKv%0y-$vyX+ zd(G91o)3%J({+woP?iV7Vq_%VwC5s2_gv8$F*KZwY|N@z=GY~O8L=yn{WmD~AJ`QY zKZ{qn0?%JhBchx<n&SHKC_yOHC*mCGZl@`kV9G1;@Yy&EK}28AP8@6D)&o~_6SI2u z=17%6#|9s(s%pWX)8m((sH*CzD;X{LbE>KuiRg{{qiP*%amPFk?rp1<=2%prd1Hc( z)(wok|HS^T7B1g@$%y*_YYbXhL5Vq*#d5S{Kl3A>FW&+j$8adUgYM34+<Ei{@AEu$ zt1LE*l-^VucQbFvPx|C1eMFQ<gekAY(--fs#?ao9#Ldqzzffjt<2tOV<YyxhHZ`o_ z!ih8=y%GigE0HMuhdank%+lDfmVy2>0Ffx!@gl9w>*?#=MNpRCDJ1Jxb70^0U-Emp zsyzWn?cVZ7=Re$mg_<uJFxWth)JhM<sJWKWbLC?8Onim!9E7N|xpVM4b_NDg?ra=j P00000NkvXXu0mjfx7+uY diff --git a/share/sources/ch.cern/source.yaml b/share/sources/ch.cern/source.yaml deleted file mode 100644 index 12bd54f4e..000000000 --- a/share/sources/ch.cern/source.yaml +++ /dev/null @@ -1,19 +0,0 @@ -configs: -- base_url: http://cds.cern.ch/oai2d - disabled: false - earliest_date: 2003-06-02T08:06:23Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: ch.cern - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://cds.cern.ch -long_title: CERN Document Server -name: ch.cern -user: providers.ch.cern diff --git a/share/sources/com.arizona.openrepository/icon.ico b/share/sources/com.arizona.openrepository/icon.ico deleted file mode 100644 index e07b05e34e34b09d7726545403d392c51a679fa1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 126 zcmZ?rtz&=yJ0PV2!~#&v$iN7eZ~&4*+z`wJWP|Yk|Nj}%($au5hz-GypEEFk@blw9 Wyc~$rff&Sx;s@*u3?K|s5269PcOi`c diff --git a/share/sources/com.arizona.openrepository/source.yaml b/share/sources/com.arizona.openrepository/source.yaml deleted file mode 100644 index f9603cdf2..000000000 --- a/share/sources/com.arizona.openrepository/source.yaml +++ /dev/null @@ -1,60 +0,0 @@ -configs: -- base_url: http://arizona.openrepository.com/arizona/oai/request - disabled: false - earliest_date: 2010-06-10T16:58:45Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: com.arizona.openrepository.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: [ - col_10150_595893, - col_10150_139113, - col_10150_618937, - col_10150_128489, - com_10150_599293, - com_10150_127305, - com_10150_577011, - col_10150_129652, - col_10150_129651, - com_10150_139229, - col_10150_610433, - com_10150_145770, - col_10150_322404, - col_10150_621598, - col_10150_622540, - col_10150_296926, - col_10150_624348, - col_10150_135390, - col_10150_135403 - ] - emitted_type: CreativeWork - property_list: [] - type_map: - "Book Section": Book - "Electronic Thesis": Thesis - "Thesis-Reproduction (electronic)": Thesis - "Electronic Report": Thesis - "Report-Reproduction (electronic)": Thesis - "Electronic Dissertation": Dissertation - "Dissertation-Reproduction (electronic)": Dissertation -- base_url: http://arizona.openrepository.com/arizona/oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: com.arizona.openrepository - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://arizona.openrepository.com/arizona/ -long_title: The UA Campus Repository -name: com.arizona.openrepository -user: providers.com.arizona.openrepository diff --git a/share/sources/com.biomedcentral/icon.ico b/share/sources/com.biomedcentral/icon.ico deleted file mode 100644 index 28b9afab6fde7bd5c7cfcdcbc3713ae675236dde..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 384 zcmV-`0e}99P)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00B5jL_t(2kz@Fe3LZRrQ#EDn+I?sL;{YrSNZ{e~w>HV`3=9m5wjN_(U@+E@ zmzNY~Km{xe7(h*4;?~0#Q&;V~_3(w7yu{w=B`DI!);VQ#J7skL|NnpOzOxUWy?OBL zjf`h;)s(f!Y7iQ#rmTfXKX~>A;u#1j<5`UCB?h=(Arf#oh*c2VWIT(}J#CZRKCv_^ z$kAxs{&Uq+*CD&vSVJBX9*_WFVPIgWp1Mv=ULwfR=;8CX`Q6JPo}O451<?ReclO3Z zga%_Rg+*JB$$FKj$xFm}TS3$XIT~RGAXEcHZtALi3=9k>uH0i_U_cK_T*10_-&wd{ zk>g&*vltR+C^5Zm|2c>U&^!&X6YdAb|NsBvv>lfpAmG=b-k(RO;8MqKox^5V%)r3F eM3V93I0*pqtsCOABKOh&0000<MNUMnLSTYalBt3K diff --git a/share/sources/com.biomedcentral/source.yaml b/share/sources/com.biomedcentral/source.yaml deleted file mode 100644 index 9f9157263..000000000 --- a/share/sources/com.biomedcentral/source.yaml +++ /dev/null @@ -1,15 +0,0 @@ -configs: -- base_url: http://www.springer.com/us/ - disabled: false - earliest_date: null - harvester: com.biomedcentral - harvester_kwargs: {} - label: com.biomedcentral - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: com.biomedcentral - transformer_kwargs: {} -home_page: http://www.springer.com/us/ -long_title: BioMed Central -name: com.biomedcentral -user: providers.com.biomedcentral diff --git a/share/sources/com.dailyssrn/icon.ico b/share/sources/com.dailyssrn/icon.ico deleted file mode 100644 index bcd1340a1af4199147659e4f12e0e7c972915fcc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 366 zcmV-!0g?WRP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00AaRL_t(2kz<@S{r|ERISdR878Y?z3T6z10Q2WBZ%j=i<>ZV`oLHo$=E%a# zieCdG#D=JdDG>h8U*Fl;IPqv;`uXd7RKygBtCp?ES-Y<E-h)&4ob>(s2VEU64o?1^ zySj*R8>54LHiQMShoIXSXHEYPk%N%SR^;&VifL<moH((Ffq@}7s2fci69WSSL}=~0 zQU(SFQ`5-LpI@Cgu_!8HiiJfSL=asA94uRrbNcjhh^y{BI3+Emi|Hg5I1BE22!J>V zSrFo87Gy=p9)*O#vK2Xy6aWc#bSFVnL*#DXJqj`Y>eU?#3=9?)agZ=zoHhMFrbCgf zg6Izp>R!99G%8{W6Q*joRd4`Njm1g0z|&`!;l3xJ0q$1_xcA@`09hx7)b$XAo&W#< M07*qoM6N<$f}<jo@&Et; diff --git a/share/sources/com.dailyssrn/source.yaml b/share/sources/com.dailyssrn/source.yaml deleted file mode 100644 index 68edf1300..000000000 --- a/share/sources/com.dailyssrn/source.yaml +++ /dev/null @@ -1,12 +0,0 @@ -configs: -- base_url: http://papers.ssrn.com/ - disabled: true - earliest_date: null - harvester: null - label: com.dailyssrn - transformer: com.dailyssrn - transformer_kwargs: {} -home_page: http://papers.ssrn.com/ -long_title: Social Science Research Network -name: com.dailyssrn -user: providers.com.dailyssrn diff --git a/share/sources/com.figshare/icon.ico b/share/sources/com.figshare/icon.ico deleted file mode 100644 index ca993fba24eb30f666f78008c02df3ca6aa5268e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1000 zcmV<E0~h>>P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS000A@Nkl<ZIE`(ST})dA7>2)|pB`vWY0H6H7zZ}M0^5L6DZ(6MnP_lmr^FH! z88aG}#gNQ+=Y>Y6@j@>!n_Mhx@o!VQ5(XLDvJJN)qXRld2eQ%vgLZ}TSN_i0b9&Bc zy)YUue0N{+zE8e<d0&8S=BQ)l&yRarN;*fOwRHyCZ#Puv7xRwvZl}dIFV*H_*VIM1 zHzfdsHm;{BssI;#x-gtNL^V5Uz#8{7oOB_0)lExF)xREOGk^t@=A@ch73OYBPx;xI z_Pd5mw&;mpwD`kQDrNf^l~k)%CD3Mz1>JIi%TUv?ki=(xCxLHSYS*4b>edTsE|`?H zmim6k5aD)F8Aj*n_p4GAJqSrWib=@_b+&Qeg^HYN3;-Z#n7=_d=mvR0L?4tOxunb+ z)O5#T?V=VRFqI+YuPGxm-L@iI8!rIiDbB1s%Zi+;8vp?7SOeRF!zmW_YcFL5Vh7z! zI2a$}1NbsK=hEbiWtmW@3%r-BIlsOmMo1-$hSx0=+_iEUaQMnJ)Sy+Sd|!Ol6BEvG z8`|#7`N3oXS$*CRe82wk*TYjcJP*&bd(QpH*`@wdWghR&xWxYK`D}ZJnhhhmrxN5d zl&tUjUM{+)5{!e2^m&d|+epIUNwrQmJAIoUthn|;*qxnUY!A%|v2{o;9@6*{dg+6x z@x-xuzfjAt@dAqNyUG;s6U2VMZTPG!<lPLo`0gEAQj-2$KyGIHf-X5j2%<5<C{dkK z<&SOz8y4c<npjl6$gWff@tj$swSMFNV@tS(cYnC6=r_f3X;oNU*F`2~JjTbA`b#vK z&i#pb;tz*?j~sJz>XH#}Y4qNVk=?C0w6_KKDeJCMnXa1iM~E(-I-u@PoGqA54JI^> z;FBXeCIgEvV|Og)Dw^MT=OflLeKauYy@ztRE_oIc_3~AMI`JUK_RQN7(uwf|x}o6a z(^XVxaGb=Z-i(%)U+g(!YQg{jDi_vXLKvxxC&eK_5sqNhzFhtlSI}<Q%io{<6a?oL zBoT>FLX;elgLf>pwp@v1<W~Z~KLFj`uGeFq19JHgIV_hlAua_v+z}H!zSfkq)H@)m z9*?f-P^51qEmDo)aJZ1XZa7|C!*A(=<O_%*G!yVi^=-Gj7fi#e&3eXup9$Hovj>D& zVQPeXbwiF=BLIQp#b^2deA`vjJizuT{GFdH{n~YslgXrw!KJVAremkG{<7}-zxHpj W-IpF4PoWV20000<MNUMnLSTYKE!}|t diff --git a/share/sources/com.figshare/source.yaml b/share/sources/com.figshare/source.yaml deleted file mode 100644 index 462721f0a..000000000 --- a/share/sources/com.figshare/source.yaml +++ /dev/null @@ -1,25 +0,0 @@ -configs: -- base_url: https://api.figshare.com/v1/articles/search - disabled: true - earliest_date: null - harvester: com.figshare - harvester_kwargs: {} - label: com.figshare - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: com.figshare - transformer_kwargs: {} -- base_url: https://api.figshare.com/v2/articles - disabled: false - earliest_date: null - harvester: com.figshare.v2 - harvester_kwargs: {} - label: com.figshare.v2 - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: com.figshare.v2 - transformer_kwargs: {} -home_page: https://figshare.com/ -long_title: figshare -name: com.figshare -user: providers.com.figshare diff --git a/share/sources/com.mendeley.data/icon.ico b/share/sources/com.mendeley.data/icon.ico deleted file mode 100644 index 2f061036dbf6c3c95e0d3b13587a699e79eca677..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6518 zcmeHLXG|1P5ME9E&kz5q7ZE)bBA^IH>>-FEwjfsQQ6aIRLQsPdBeBF1jbfon2MZz~ ziWI?uCmn={N(2H(6_Cy`?mT#p+r3+gza;D=d-LA6-@G#W%FJ=xIBq<rr^oSH!6_$j z+;ooP6pH@(M2@rLF(r@j6rRzC^NQnmTr5*3`M_9zmMn|_k2CLK(SmS+FV?P(hJ*VO z!9d|Fz+F$Ify2fZ1XE7)ho;{hfRg^Ug7L7;j`nVXU%#5kaK~-&1ncfr1KOGy2SaCP z55Zo>molEEMHs>G=gzGhhVR|=is02%tuSp$-?%Kz!{E^VM2L;~0Valz2zQ%99Gr4_ z3&#$pKu};2%$Vv&aCg^q*t#hejFbVaP5S*qgNvc3M-BD$?U0rEi@gu@FNC(XZfI%g zg4~>1hEqGLlatEWn6aHB&dGq|sI2_M@P>wVN&A%Ka>l~~*8P18p|`h};WfWnCGG9k z$3Sy)Cz(TdXfgY4=mQ)B+8%aFlC-Dum}THUB!_ZGyB41ZW37(oq=J8z@(-S;MEEmQ zETjE9%j1{tIe86?{xzV~_k&wEv&nsi_p#4|{6X)kH8#&6KCTp0pQ^#nrvT=ghDh=; z&It)Bg4Wh9C@iRhE%vcm+|R0Nb{Erv%h6OV9<H_3)=yb%L%C%?n_(s&_vl)Sm%UV! zMl{A%IA6+LXB#bePb&d_g^WYFsJ-vxQi?!htfa}K<(%vq0S_xy%c2-}_l^Xu_Xu3M zq+%S}7HY2x89MpfH^=GZCnXNVPiUxC_X^@PV?MU4sv3x;XyfP?&3<VaV^Fa)Hg<si zWZnDVtou6w9pgsY7S)C`0^q#I2eOt@Q_7*MOASs3-e~cVzkG2d)Yi1YjcZwO_jV3c zl(#@#T^oseX?vO)IZ+W`!R=%kNX<!;kL8P=8N?@h91{d%uwL;PUMKtni+Ka<9pmk^ z)CzbW{T<Bb4&b=%PrIcPT-^Rpb-Pv1n9rJ<I-wxHj?5E5oR8QZ$AyA^D=lq;qA&H( z-k&Gj_s-Mei|xhr4R+5-d%Au!^A2i<`GJ%?j{G{}{xP<el5ZuAg8hqQ@I3cXiq7X2 zH6!j{YLC-Sa8ElQd;_@W9^T6b%)6unzXSEta5bJiLkaeY`(P;fm;Q0x4kqYRGm~I4 zuj5Bj2A#iV&NtloV;+h7T9;torba;mpYGk6K<*bYA9FnM9v<!(@SMrZtz}JO?)fCD z1f=pf+m%mAu8#K{o`<T8D!{yaxcy1-&=)pVPhigMz8qYNC*!*BAK5RmU;fuMApSo< NuFD>fJur4W@E17$TVDVG diff --git a/share/sources/com.mendeley.data/source.yaml b/share/sources/com.mendeley.data/source.yaml deleted file mode 100644 index 5de23efda..000000000 --- a/share/sources/com.mendeley.data/source.yaml +++ /dev/null @@ -1,15 +0,0 @@ -configs: -- base_url: https://api.mendeley.com/datasets - disabled: false - earliest_date: null - harvester: com.mendeley.data - harvester_kwargs: {} - label: com.mendeley.data - rate_limit_allowance: 1 - rate_limit_period: 1 - transformer: com.mendeley.data - transformer_kwargs: {} -home_page: https://data.mendeley.com/ -long_title: Mendeley Data -name: com.mendeley.data -user: providers.com.mendeley.data diff --git a/share/sources/com.nature/icon.ico b/share/sources/com.nature/icon.ico deleted file mode 100644 index 624867aeb4294ac27b9c650f71c0810535d43bff..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 374 zcmV-+0g3*JP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00AyZL_t(2kz@Fe3VvQa|GFsdk)iM-L*ds&aX+t~M^nJ~|Nnmm1_lNOhQDuL zJ@+>J`|=qBDqt3ulH7WpSy&9hg=+YH=lU~OEe6~`YWFo(6-@>P1||p}u9`(gL9n); zfq{W1Btd-neij)8GzCly3=Ho(${`$qf(Gfs_t~@zWG}vFkx^jNGLSxeA8rXm8RP%| z{~szc!bRop|Nr;t!()9xIQM;X;iq{M;9Mq5=l;5O0h!ChCxF7m<58@DJ{nk+)QM3F zRrde?|DP96Jy2kHpuq5X^?XSDf0)qlK!M@=@%_l$pBGOdlE&MLlrJmi;xV2lJXyG7 z4g)-CFfcH@DT@2Lelb?n+<~zoeT$$R<dpdR=-$W1Vi4y+&|_n<???AyPpw1(0B4-Q U_Og?=fdBvi07*qoM6N<$f-RM%kpKVy diff --git a/share/sources/com.nature/source.yaml b/share/sources/com.nature/source.yaml deleted file mode 100644 index 4dee23197..000000000 --- a/share/sources/com.nature/source.yaml +++ /dev/null @@ -1,19 +0,0 @@ -configs: -- base_url: http://www.nature.com/oai/request - disabled: false - earliest_date: null # earliestDatestamp is earliest published - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc, time_granularity: false} - label: com.nature - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: article - property_list: [] - type_map: {} -home_page: http://www.nature.com/ -long_title: Nature Publishing Group -name: com.nature -user: providers.com.nature diff --git a/share/sources/com.peerj/icon.ico b/share/sources/com.peerj/icon.ico deleted file mode 100644 index b108774d238c7fb86f66b29b91a58adac6b14569..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1150 zcmb7^KTE?v7{;$4f?Ef5cGS5}lCG7wb(2mV95j9b{R|b?LPw#ZLnj4Av7$IglObD% zg3wk=6dg)UgX!fSFV~tjsfqT&d;fp$d-uFY2ubiSnIybkBHJ^BED}PNx$!AJGnnTM zA$%=d@Q=b6L#NY$X`1Nw`vHQz-NQ8wgs~4Tu;vX~trqh6JW{EYFiDb7C=}p0j@aw! zd!WZem}?vccrR`)mxH1xXfzt=^?J}W4Vg>^#bWVKzFx13nC*5u;I~{ZLzZQD{^R0T zt5u}aX|ZqIfvT#)uWj40ykQt9l}h8N=<B*J_-?lw;OW&G+}h5-RE4kYgFT;txk1e% z`Js+Kdiw3<Dp+r%K_q{^0(!f_O$NP|Ku_oV+|cT}uR|0+k-yI-Xl23o!K`NN{lmPs znLccec=Ki|-pjAJ$50pcUW@0|U9i`?fpOj4$1OiLt;~bbuUOvKdm;}BxKBrd^{OcS qCis(Caenr4;j>=Hdso?P7M5l4GudEH=XVRen}CzyApAd=cKi(-cTPwE diff --git a/share/sources/com.peerj/source.yaml b/share/sources/com.peerj/source.yaml deleted file mode 100644 index 1a8f5457c..000000000 --- a/share/sources/com.peerj/source.yaml +++ /dev/null @@ -1,55 +0,0 @@ -configs: -- base_url: https://peerj.com/articles/index.json - disabled: false - harvester: com.peerj - harvester_kwargs: {} - label: com.peerj - transformer: com.peerj - transformer_kwargs: {} -- base_url: https://peerj.com/articles/index.json - disabled: false - harvester: com.peerj - harvester_kwargs: - fetch_xml: true - label: com.peerj.xml - transformer: com.peerj.xml - transformer_kwargs: {} -- base_url: https://peerj.com/preprints/index.json - disabled: false - harvester: com.peerj - harvester_kwargs: - fetch_xml: true - identifier_prefix: preprints- - label: com.peerj.preprints - transformer: com.peerj.xml - transformer_kwargs: - emitted_type: preprint -- base_url: https://peerj.com/articles/index.json?journal=cs - disabled: false - harvester: com.peerj - harvester_kwargs: {} - label: com.peerj.cs - transformer: com.peerj - transformer_kwargs: {} -- base_url: https://peerj.com/articles/index.json?journal=cs - disabled: false - harvester: com.peerj - harvester_kwargs: - fetch_xml: true - label: com.peerj.cs.xml - transformer: com.peerj.xml - transformer_kwargs: {} -- base_url: https://peerj.com/preprints/index.json?journal=cs - disabled: false - harvester: com.peerj - harvester_kwargs: - fetch_xml: true - identifier_prefix: preprints- - label: com.peerj.cs.preprints - transformer: com.peerj.xml - transformer_kwargs: - emitted_type: preprint -home_page: https://peerj.com/ -long_title: PeerJ -name: com.peerj -user: providers.com.peerj diff --git a/share/sources/com.researchregistry/icon.ico b/share/sources/com.researchregistry/icon.ico deleted file mode 100644 index 84fc99ba9f84783066824f432fbdb68f1fda4e86..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1150 zcmbW1YfO_@7=S;M{hz<~a}qVtA=5~SVPjlMDQ!WEuug3lpm<?{F;P)L8$~f@*~O8W z#cVE11QrJ<NGKSM*GO=4&W#14sPTeA3%y_~7~4Xhz5TQr1HX2@yyrdVJ@0eg%Sk>( znI+%s*^10D%4csW$~%gpEEExwSYFsgDT<t}d;5i`dznVY{M>iA`D@@Iy>}lI8DWuU zfe=G(AA_S~Oay`igCc?F;`4~VMIy|ouwMTJ-!z@0<lA34(9ng$>*MyIgT^yGY(7*^ z(f%WR)oAC!^?qF9Uw7g><=;2)o#?Tr6Wkhf@?*ypDh@ZXt*!z4r9bGo(I@;>4mY3W z;IVeTY_QXKx|@G)-r<Ml(^UV|#!-7W1CN{pLLoKhYd7z6{M<F}Ja#cT6=raFlw<ZQ zG<0<{G~(vT#B-cpKWG2!S8I(xfzYJNzA=SEod3O-uaDcgI?%`EzFzkI(nj}<esP;r zd-I0>%S~8Lg25?Vt^jwP!xHluztuxm8@T*Wxi>Px!v`Tkp(*i<bolj{l6)iM;>CY4 zXPv>eW1PGdL&Uthm$`FlC|U0i+ZZWH_4M>ySAC;QIQ;sqO&d0{Fg6ZNVj&CWCs0(F z!}?+i^I|k4d|)LuZUJSbr79ogqT20tlc7yV_i+)%Rclxruc5NC3Y)El#Y^MKvKEkL z&c~?F#OL$9x+BUvrM|S}WV9>u`E>tL;*)f2t=K}<&MGu11`29_CMDZ~USfE>-s!x< zfnIM!_en9irMpN-HL!h$#NSgRYZJL;Hne#Km`vGtJf3O1V|a+H+|?>?-Cakb$-=fR zTiI1z&4<}*h}hEe3Nc%(cwDY&ywm9<L#M~Mb~A?6#Uv!_DJ!p_vdV@gDV?nR5;F5k zF&a#GXUadqr)Qweu}c5e5-aaeTYG@=&%faPxFs0n&eY|rFlHKN<Q-Cf@<*v8>T^gn zTX|1-OP&Su3Nx{bmXIcWU8Y-(Hck3BL;XI#pN<YYCtI3n6=^@!PD{%PT2Hph`jl#m UoOO0~GCn^3s{ZJI;Q#9X01ht`oB#j- diff --git a/share/sources/com.researchregistry/source.yaml b/share/sources/com.researchregistry/source.yaml deleted file mode 100644 index 5f84cb6ba..000000000 --- a/share/sources/com.researchregistry/source.yaml +++ /dev/null @@ -1,15 +0,0 @@ -configs: -- base_url: https://us-api.knack.com/v1/scenes/scene_3/views/view_4/records - disabled: false - earliest_date: null - harvester: com.researchregistry - harvester_kwargs: {} - label: com.researchregistry - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: com.researchregistry - transformer_kwargs: {} -home_page: http://www.researchregistry.com/ -long_title: Research Registry -name: com.researchregistry -user: providers.com.researchregistry diff --git a/share/sources/com.springer/icon.ico b/share/sources/com.springer/icon.ico deleted file mode 100644 index 28b9afab6fde7bd5c7cfcdcbc3713ae675236dde..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 384 zcmV-`0e}99P)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00B5jL_t(2kz@Fe3LZRrQ#EDn+I?sL;{YrSNZ{e~w>HV`3=9m5wjN_(U@+E@ zmzNY~Km{xe7(h*4;?~0#Q&;V~_3(w7yu{w=B`DI!);VQ#J7skL|NnpOzOxUWy?OBL zjf`h;)s(f!Y7iQ#rmTfXKX~>A;u#1j<5`UCB?h=(Arf#oh*c2VWIT(}J#CZRKCv_^ z$kAxs{&Uq+*CD&vSVJBX9*_WFVPIgWp1Mv=ULwfR=;8CX`Q6JPo}O451<?ReclO3Z zga%_Rg+*JB$$FKj$xFm}TS3$XIT~RGAXEcHZtALi3=9k>uH0i_U_cK_T*10_-&wd{ zk>g&*vltR+C^5Zm|2c>U&^!&X6YdAb|NsBvv>lfpAmG=b-k(RO;8MqKox^5V%)r3F eM3V93I0*pqtsCOABKOh&0000<MNUMnLSTYalBt3K diff --git a/share/sources/com.springer/source.yaml b/share/sources/com.springer/source.yaml deleted file mode 100644 index d0cb13f4e..000000000 --- a/share/sources/com.springer/source.yaml +++ /dev/null @@ -1,15 +0,0 @@ -configs: -- base_url: https://api.springer.com/meta/v1/json - disabled: false - earliest_date: null - harvester: com.springer - harvester_kwargs: {} - label: com.springer - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: com.springer - transformer_kwargs: {} -home_page: http://www.springer.com/us/ -long_title: Springer -name: com.springer -user: providers.com.springer diff --git a/share/sources/edu.ageconsearch/icon.ico b/share/sources/edu.ageconsearch/icon.ico deleted file mode 100644 index 5ff402af76f1515a31cbff5fc8db0f5a6670854f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 18934 zcmeG^34BvU^ZRlq?XP!BTaJcM+5$~;pR|QGX=$OQr5A??q{(X=NRyDHv=o(76i_+k zR^$-51my<hMiE625L7_r5CIXtB7z8L|FbVmlh#tN@B9D0PkHb*v$M0av-9Td?wg%% z{ipTkQBd#9>`X+^2$Ti?Q2hn^rHmSL8A7?as4GH<gV;m}q60+0KSV?$CIur@O0;c+ zhZ6}6G&G=*AV^UMxE+L)g2Mow@r)TU8tSfq-;WUQ!8nBCgFL@!CR-`V&#$n#Y|aXs zov$mD@pBDEvlZu&R4i6$#cHiY$ro$1a)nl`M2P+h;g#(bS|k6USLj|C+QJu$pf}L6 zeEUIk@b-g<^X&)dq(NRHiPp%Q_PU!L))v-?`?ua#ml%h2c?0=VeHyCEMMV8~D1gc# z(>PqS{w$h={5c#Bm*dan`Um^*{DQ*+{rv;OBSS;OLqj8j{i&<r$DL`O5<EXYUH~sB zARs6#ARr(NCjnvZD#5>pLH%A7%t1jYh)xI)EtsGO6ZM}%w~6|LL=TXXNl+IEj4wJE z3WLdFbGSS|Fo&jv1ftPBg+Yj55Of-Y#$>ZNOuD}e3WMp4jv*3eT0cXmVB~lyD{SV{ zH`6=Ch8OHBmB}3wj_cT+XBF06xM_4MB9?8O7^k0IWXjmT+@)-DqWB{D{iaC=PF7vI zl^H*0#nYQ#JoxcfKfJo-ol}=@mz0f~Ja^^RcTazPN7bu;dG#~%UfXu)lPf<4BN`2) zWl%D)Sxf~bLPtpm10)zJ2xUsg&kVy7>?=54CyOn;;FvH=R~T+|%5QdNVFB22iv1@* zh~;fe8O2H$d9hK1hf5Ie6`}qN@~2a62BRM63v%-pg0Gk4A9-v*+rzQruNU1|x9jY& zlRvY1-R)6uws5ibT+cP{4r-5rZ@8>RRc*<))prNosU5QI*5<M2p0hpHHb!Joe1*Et z6bJ&h-P*P1d_CIV|6a`fDOVqlN<FaS!t)c~-aR<F<1cI1{qVCYWy_m8UU=)o4?hpx z^mT2Ac{lnlxsz;hzL55M%>JB+dQ|^e{f)Q+?`tMY5+ikbb}j=OJ3{Ps@bc2&ABF2o z;es;#zn>Dfz2wXri;1Ogyf&j6m(jn$W$oWc?om@|;g2F6PP5ILB9J7B1$@$Kw3*D- z@)SXFQKnEW=#k1x?x@ex6%EYK;M*-`r;DFooSu`d;|qi$QJURuA^AF6rM=2UIz%FU zk)EHQlU-QE2WBEsMqdG6P~mdfwIWe<b#;;flP4K%m6*^e%6Hi8q{B5j2e=BMLXyd4 z5<q+IW}3(aSd-c4O675rN06gafkgu|y8}j<q<lj;nQ3!W8UW1AvN_DPHml2EDIlH6 zB1{j}8mhNk-(@zAAYDmD2WfEG9G*JdWTy+*mp5+?P?ZKNGzpXq6g+OLjLj~xGR@&I zjP|lN;4BCR6+NOogvW1p4l@DKEvYJLvWNmO-&Q3uTFj)?)hG@S|B=47&b!iP(i>c) zFLM**Q(+=d3MCSuSW+YvYb7eJLOCc|L>2v}yiONz$g~*BeYwJ;C*diiijzeikW#3~ zqtRrMN6paA@3o&i2Ud;21uuoCvBpQ#qV@gnSWgRG!*=3Po}{TB9~lt$PZ?05(O`iw z;_qfW&AQb%=z!Hc8gm-Qmp^ARB`^<~!EFA9Ggb>-Yc!ol^VPp$zVaxz+57o-nX?|! zy+#{)H2h#yc$PQ6d-!oyl@_^Iy><hv!iSozWwxR2mCWy(xSA|W(@evwoG#LYkp-mD zyRdAu!qyiP%@*E&a5dW6s_fTGZ*H<@QHT}iSt?@qH$N>N+D%;Bhb!c$DN&pROH=RU z_#0@IOx_Z;LZxhi_O3~w)s}_!tVo+L^8cV-Tj&1kWj<)LVV$oMiq%4ivPdG=%Ej=% z$vVGPnOj=udsnE<R{B16?_>Br*gx_};Vt+_m~QiaKE!r~5_J=^YcT1DpjE1tZCBG2 z{m6Ey4_5c*ZsVB!hin%fUp9=@;ONN519x9TrT0tQJDRm<_?}_mcaFA6_R6Guo6}Wj zbeQchq<eiGt-is3eZ+08i`ZHl1hE=~7SaWdrzWDcK4|^Q@6pJuxlifgoo~H`)`mry zPz)oZqzHT;a*bBj)cw(_Qd=JufA!)1S52$fYWV+|(f<o<td+h#WN7@yPV23@@BV`Q z{+p(C@3`UnOac2JoY()$47hn`A7a4|nZ+N1{u@p1|05P`x10V66a2%v=`+a}7xHr+ zP4ak`<KHsLH!K0u^0UFOg7^JAAN!5#Ah9?}3gfYNvDeyC61Lw)!6#pi%{ao#sLWt- zk|Mmf)ml~8XuHqCwdLI>*w||r(~Bryg^yT`9*@@A_;oiuYscojY>i6`olI`*V`~+C zyXa}S`7+b%MrqzfPY;tt)P`s(kJ`Jv;ojG62x7p&A2^tSp!R&@sImYmrIH@wkb5_n zM`iIoaR-Vrs%f5ni^_AU6cWO>7KBRRIuP!GFCG;-fg!3*`fUt?6r!>+D6v2aK^kxo z4Va}17B*s}6n+q-9vTIO5qdFHAMZjQAv*+Ow0P04;^G#I>i<d^r5O(94RIB?2Z+|w zeK%+k2gV>Ow8X8SXq=^IL*nia&bsU3`aEPnByu7ls(@buGC}G9JPti+e)-~XU3@pW zd<i@S4MO6qwaQ|F9~`~IDz#NvP0qa1;YP?c)GIK$B#@&51&Sa-$6~XVL!P>F$TF92 zKHX)jY{*k$6qHwZ%5k3Mw3v;gv&53)gdEYN9VS%<=n%NsR3=;R!<$|1uvOWcpjfb5 zUv9RN)~ZUV<DXH8xx+ps;2_U4RJm-uNGs`pQN{!aR0P9D12NA{j4`+*yRw`QKmY0e z)2keoqKpAW&{TtRvGSW@a-HR56Aa5>aTOWLo8SVBB-A5oT-na7qTHNPo6Uk5ay;lJ z#M}y-qc+WAF863E(A{|mc5tJiCfh`o8LBKU0J)=JL#YLF>`bT7P09UA%XK!3&Eccm zU^oBtUX2A%1@*OAv97r;n?0|}<s?0Zfy;OnFr;5${Yz~wFsNVQ{ooN-G$*Itzy!<; z`NK{0sH0%_+lE8Y+$PMS=r9GM9fky}4Vxyu#W>gr@UNpth=qC--1R89>+3uJ8^K*} zEAI%#i%>D#1UBnHlm-)r4VA*=!AJQpmDrFAZYKrVfg<!bDELT+3Xu#mbhqH1tJv-k zPyzx+15Y%>Xjz>}1{}&sg=p0n>>?b+g&<7KZou(T{8AD|<rKlc-r6JJn@uT#{)$|2 zu3bk~n6qjfWMOUJB4h0cqs9bLWBlYAZB3=Ul5`pP5M^d{YHLyi6mu=W7%dX;DH7L+ z6af`p$sdrL53ytpk}pq^2#sQyk}p>#NmWXfRGi4qwORR@WGP>w;p2c(zErA}$h2}9 zzu^)<lMYjvwm_fR&>-Yee522*Wl1(ixk#eXXhdSENGcTqh0r+~Vts0aR%a&<2R-Se z9G<W^=3|Zqm{46Q0ua~T4}-mtZHo%?A<WHfowTwNYk^|xbQO?Zd0S-c#KW@)LIiA8 z4kK){m38t|HW?e+vBUI4_0wb&kruKNLYbWK93_nnxy)q`X$O~3kKDtSgm~gc#!W=R zB~3`}p5Wh>=^KkZl?-;R&h3USf_cwl0p8fgCLzQikB6Sbk!2|YYyu5iOrDrxp%@~I zA%YQ;=Gsi=veCbf6ig9ztqi}1I5$_Tw;8LjPP6qXf~qRB3HG*B`V48BLYXPo>va-I zT1KWMGhLFVQKoC8GQE@%2r6b<!G&uzlHQ7v->PUbYRjmg#uS0M5;j&v!|i0bfR7)O z)+dq+A~ze51F&sas&M35iU8gkxI_}1Hk1$ijdE{svDFMF55iN8^@U)#QKa5nPC7AG zp-hx2RB)@|)+9=m(nP6B4maS`@<fG947UVez^i46QjIcECYB`1Bx<-di3%y)GAZDI zk4g+T(5i4=0p)PZ<T7dZbh%im){B)AiCiVsWk}OAB~qC}ER)LBnUXY}4nkGI2DQ}c zq!7j@FbYruh08bBkd^^@Gd!n;$X2Cq$-tIMns!m9R4e6)a<N7!rcw}IC6T&QDTQ&T zGQcY&ZW?(auBD<<=qPjoFpvU9u2zVt6gmkobP-Zeib@75!zs{1s==umC?Umd$v}ZP z#jVIdu{edgkWzI)t#XMRN^uI(Lh8n-C|HeC4af;8MWaywtde593LXKZ@Sx-p4e-P% zuB%bwx*9d6(Wo(vMnlnPBovIfX((<Q4Q^3G@zh{Rq+*FO5gukD7(k*jQI)7p)Fgsx zB$7mlG*Kc;l*kj|;R2CFl?avqW4aXV1Iz;~M*&6zHUQ=VR?uCl%+P^R=+qjyOsvt1 z)6>(W88XmMMut)$m8k^$#^J>M{NZ(n$1n56XX|tjUR-9jK!LNMS2{mCL#LEzluDsA z3GBqnAPa(zZI00}zT-K|V8`z`30@7-l<BZl@~L52Yi|DXD$68lwYscKSSm4Lm!hdu zs8MMo!ZM{sB{mo}5a|jpkGry|`H-QLDKd46beU2v(Mu%y^mLUH-W6SX2E0va&_RI+ zL=oYs$7d??pkRK(VB_qX>X`r^{XWvbBMm&#z#|Pj(!e7P{DU;${XQqH@KIh3U*Gi? z{RhAbXlQ;xUS@Vq2IWfd&&n}dZ8Y$aRa#vRyd)Yha1ft;6r9Ff@Lz*_*kE+pa|<)E zXA{A4iI3M!81`OwPP=`cCxu!0e7+a|yP#mB-2uKyA|I$_Ciut(coM*tYL^}3>i`Zb z9f4sQc2tKsKtO;aF<kD36Db-u+!Mp5N-Ma9X;@~vsnUeug8)A}stUV{={W#T9%Ux0 z0sa`^SW8u<8DPA_5LQVVz=h4ge#BT8X{-QP46whWs6YpBGWcTs%YEQdAJ_$cTcAZc zn|(Cp$>b*(yMS{-qvmIk)i^9km=8`9gTqAqd7QyI8X<R|aV-?`t2HTwb|<YC`p~ro z;%DnFztxX=W<<mhFV0)ji?c0*T@KjIq0jK*N*5rs?P-J}&v<dMuYgx-5<**#`Sd3O z+l$X%`k6^%5>{yA^_v8Zln)1d+DpRiHLmCRdbi`7kFDAWj_fK2--*8o`NHN#{HH7W zh~*<kB6wj)2mH|_$u9vbgTMNL$89y4@h3RnY;E$@;ltriR`cP2LAPB45W3<Q6gD&o z1sw}R^czPIBh(Mk2fqv@MB{VI;g*2=5TX7tSKM|FFqBhilY}-MvNWfe+Dt*Zf+D`L z$}!4K!oWXF*x3q1VJHgzP`ndLfc-5gQlajsC(1y*Q64Hn1JMv<1P5g$>~~e8C(w8_ z8BIeo(2HmxT7p)f)o4B1h_<1((LVGp`T+i*{1iHi&ZEodI;ul=(0%yRWj4VhLWw9M zhUiRmBcz0yNF_3e9HM|2NDL#&2`f=WJV8t(rV+D<g~U=~HSs2~gV;wLCXN%I5f_MS z#P`HK8lC1(3!}BC#nD7GH7$+ShgL*;oK{X7NvowzqP;+yPg_P?N83u<NBe+wiguoM zopu}k*4&RCLGMTx($#bwJ&!(!UPgD&pQ2Bt&!sP;ucz;%AEKY6pQnFI|B1n3gfjRH zA)`AZi&4xlGDb3<Vm!xK$XLbL$~ee4$+*C%WBkJ8G21b_F}2KW<^X0nvzj@XIhVPT zxrO--^Az(k^A3x}ieSaFRIDu4K-O^97}j&FmslHEds)X>msq#iY<3j88#{%a$2PEC z?8)o}>~-ut>=W$E?7JL4P7FuN$>a>;SUD3ob2)1`yE!K~S2*{%LEO$<EjN#A;*RD{ z=dR%H<R0T*=HBN8^Wu3aydvIk-gw@8-g@3UymP!;etv!eKaF2MKeOLBzxjS|_`T<M z!S5&k5dW_JI{(4`RsJvdzvjQ!|1<wvJRUEO*ONDh=i<G<Tg^Mb`+|2jAT%H;AUnVm zFg9ROz}A2h0pA6313L$%1r7<U4V)XeDezd}w?W*XxFB7SA?T@~ML|1)J_))V92zVM z&I_&#elB=T@ZsRAA?%R2kc<#h$fS_vAqPS(h0;Sih3Z0$p%X(_guWAcIgAw+AC?_9 zJZxIn>tRR3>cT_9<>4jaHQ|fGcZHviphv_-WJg#crbldyI301XP5U-!ZOArL+N^JL zqRo$yZ6kX|l95v*H$<L_ycflf%7_{fH8W~k)aTKR=&sTIqN}4{j(#Wl+qPkCySFvA zo!WL|+cWKG?Yg!rXg8+a@^(ks-EQBZeXsWR_6yn{X#Z`8hz>nF4DT?f!|o2(VnSn5 zW6UvgV)n#*!w=`D@h$xM{Db_uj_o>Tb*$>RwByl^_XY8SV!;H#2Epf@c%9Up$~w*I zw7*kbY)ou!>=UtTW6yNvc2;+;=sd6Uq0T?X#mDuJdnRsM+|~HV`0V)F__gup5_kzc z5=JI0OZd18tBbnJh%PU6`KT+ctD<W~*F{~Ab)$7tb~AT-soU{HCb(RziOUi{5&8?$ zgrkIOg%^?{l5&zJByCIjP82H|B$_EYB>F|H5L?76#AhWTlHQUDlAV(8rCp^)=_2XJ zGM+3`HdeM>c2nL>PRd`Ff2s&o<SL#~>{tA(R4E<G^~!6iSk*AqV$~<=Fm=BAIrV!Q zrY2o8R`a&zC#_QJ(r(oLtGlrKi0*5;UrmlrCX-hrUrG_A7*dv|oKNMa4oiI{^?Z+x zJq$gT^|;uxQ%_URl|8Sdbx9kZwl3|vbaA>PeM|a}I<4+W-CjLi-%CGLe>5W`qc~$> z#uu4_%<{~&nRUGsy~gy~o5jrPlQlEzlkE1{#_ZMEb-h)+pX`0GkAI)SJ`4L?%;}or z$k~}o%k7;zJNI1Q&V8+Yx8<R{?7Z1|pXbNtkIa9oAFE$}zeWANE|3&FQE;d*v~XzQ zn!-Cp=|$6v&K4&WR~7Fs2`m|0vbyBQ{`&s2`kx;l8t~+R4+cgLG!NW9h&`xe(8@u# zAIo^`#m6oWRt%mz`1IrPkJmhYcu4dR%aC0|1BV)hZW_iM)_>TVVGj&>hGmA^rP-y6 zOY4j}<2>VaQ%}<z(={@MoJC$KODUUGcC|dEe0KS@iXIhnE50@B%nQvohi466GW^F8 zc_UsMQE%yQ*-*);99Fr*8e$z`J!tD-tFaxoce78npC73m`QperM<2&ZC*geDx!n~8 zi>V`32~|&5T^N-*YSE~>)g{%NYJzL5HAhBw9X)0AmD<eOSI02M7{}~?qT>_epSbX3 z+LKG4BAzOJYX8_yV<(NhJg(QcRpWW%M~we)f_TEL2|rBiKXK=z_LIg<y7Y9fr`Jvn znCzJR$ur%bS@JC7S@W|;r^u!(nDWb1)71B;iKoq-_TV|=bMHSdd4B%$^)HmYaCEwI z`pYv|GpsXC&+IXC)vVxIwX-hG&Yit=4u8(nIX}EO^u@z-<#U(J<ISs@cYc1({Ot>3 z7tC1jU}43=Q;X6UZCKoH@s!0szGQmo#LGQjUcaRMl4(osy<&dl)1|$ZZe13)Z0>T_ z@=?pbUQx2*(5vcKSFem-Ic??7uUTI^zp7x>!PTnOtJkz!Gkq;>ZPnUq>jtm;=ym<; z+t&-%FWnHaVcHw$jjA`kd2`sCr#JT5xPOy+)B4SwH!t22x@GECV(aLwb=xYoo!>ri z`|%yUckJKUedm_9lHPjl?T&9R+7-TQ`flFtNxSR!)b6>x*S`1LedYTu?H{`T?12FX zK0es*;L&&byz}0>neQGrly+#(dnxa|eOP;V$NTE{w|$`eVCxaZku4u8KHPFtd35VB z)v@g#X+GL{Jo)(U6FpAsJE=eU?#J05e{ibrspF@MPk;K!;7`tfYWno*XO_=yo*8xK z{@HQonCGT`9`yO#FWP;v?8`1+ZaA+zzw3hj!jX%G7tdZYUb_C3^Q#A!Cw<NPdhV5& zE32<cufBaP^V&z>Joe4y>-OvSzny#|_{QSz629B~ub%%pQa7OP@=eFh`tP5+6?N;i zALKvmzny>ki#wHf?*BOTZq(gXKdFB@bg%!tEBC7(upi9-xy#Qxf9dnfx%x_ox$z!` z(i7GK?!!<RbbLVy^C8@9CWFCbb66}ki^bt_AefKK;qj;wP~0g6`XmH{$>gv(fjnN| zUp@>4P7m=OhC+kGP+(aLC*j}=0P12e=`<FB8V*CD(dfRVP4~-b1RWoS!eY}Qp*aZ} zJ(z)*a2iTlKUSy#PD2Tk&Roh4cOOP_K^9S37wedCv$IZKIBVHPqjSGPA3uBfCS{u< zlk32Vi=4QLTwY|<q&XRxD>j#$EKZ32{?aXcAPQOa^o#C8Q66$8iaKjRMa{GGS8adq z)2ny8XtD>IM^9O>ddK0<uKm<i+k4RP+NlfI?0o;sH}~9!r_h;9IM9W`q0=j=gG})G zEosbtATev?_?gnBZ`OtFJAR>4dU!#p<EAWDuA2}stFZGj<Hr4L=Ly9{{lqp!v*D<i z$f!7FMs&Q%Rjm4c`KAL2T~1!QHEGU@%}qtl^f}1I_Z*idGPfdf)ALrY+lO2`vn=wq z`myUTL?$gQo0>GFq^#fdy$=dTC7tU-XCANzzx9DAwZn*u#`n4o`|{TE+cDP<)}xx5 zo%f_;1J^%Mye?+?p*z{m(Y2?`o=>#i*kGKqtdsu9sHxm#&+b{O&g}kS4D;C|AAYm% z(7e;aUFJR+V-8N~Whi~HXY!Dn2O^{1yQ%xRFsALcOE>S<Bdx0bBiV^97lH;@7ZivS z9-p(*&I?~NZo=M#FT}l8M{n-<;_g)+)vTX4Ftpzi(L43%&0k_Jc9?!UiI<?b``{_% zcc#N@PjgeoJUt;Pz-rolS$lANtW3Up%JVN7^>3SZiE>}<Gv=7?#FK@2_m=c|_QJgG kw{DMJdGE=g-+Y;MEj9Y)(tW9^_2|&0Q+GfTMg3?02TOJcHvj+t diff --git a/share/sources/edu.ageconsearch/source.yaml b/share/sources/edu.ageconsearch/source.yaml deleted file mode 100644 index 0e391bede..000000000 --- a/share/sources/edu.ageconsearch/source.yaml +++ /dev/null @@ -1,15 +0,0 @@ -configs: -- base_url: http://ageconsearch.umn.edu/browse-date - disabled: true - earliest_date: null - harvester: edu.ageconsearch - harvester_kwargs: {} - label: edu.ageconsearch - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: edu.ageconsearch - transformer_kwargs: {} -home_page: http://ageconsearch.umn.edu/ -long_title: AgEcon Search -name: edu.ageconsearch -user: providers.edu.ageconsearch diff --git a/share/sources/edu.asu/icon.ico b/share/sources/edu.asu/icon.ico deleted file mode 100644 index 148299e78ad5c3b6b3d4fc81158825805cf55351..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 412 zcmV;N0b~A&P)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00C4<L_t(2kz@S-|33pMfQcm4bkV@Vz`$^Q{`wp9w?OzBi9QSr3=l521lQLW z-$eA4EZPe8Cs%xZ@l7MqhlPQGflopD<J~6^4fmH!eRYFbOyjS(+ILZ<pI3GWyt&B0 zz`*eJ#kY@lp9pFwv;6!0``)_U3=9m)L3W?7?T2U+SNpCX{0_ptaa!}=A4EtpF)%Q^ zxp<F(fk7)Zj73;e$K!*p-<Qjqc$UP=U0f@~z`(#ME`j3d_tzgl+;eUAsy}b9GcYjd z2fvG1_()9S@8wNA3=9n4U%p^sVT7pu`{M`8tFu=a7#O6i)fgBUScEmN?6}Ur!0`GW zJ4D5m9Rjaz+}h%u2QieHgM&F-R`=hpzlNnrI$4q2GBOW#-uU!M>CJt94lyaM^w7V5 zc>ewR%PqmhD<{mv#>pcq$_Pms|9<~wVr3;J9vFu~S^@xiIE28*l!KlC0000<MNUMn GLSTX*lfZuf diff --git a/share/sources/edu.asu/source.yaml b/share/sources/edu.asu/source.yaml deleted file mode 100644 index 999095e53..000000000 --- a/share/sources/edu.asu/source.yaml +++ /dev/null @@ -1,46 +0,0 @@ -# Link in <location><url> -configs: -- base_url: https://repository.asu.edu/oai-pmh - disabled: false - earliest_date: 2011-05-06T20:05:41Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: edu.asu.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: [research] - emitted_type: CreativeWork - property_list: [] - type_map: - 'Masters Thesis': Thesis - 'Doctoral Dissertation': Dissertation - Text: Publication - Image: Presentation - 'Mixed Material': Presentation - Sound: Presentation - 'Moving Image': Presentation - 'Book Review': Publication - Dataset: DataSet - 'Exhibition Review': Publication - Software: Presentation - 'Slide Deck': Presentation -- base_url: http://repository.asu.edu/oai-pmh - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.asu - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [research] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://www.asu.edu/ -long_title: Arizona State University Digital Repository -name: edu.asu -user: providers.edu.asu diff --git a/share/sources/edu.boise_state/icon.ico b/share/sources/edu.boise_state/icon.ico deleted file mode 100644 index fe4f9c7c34af210521827414e8d503812fbdcbcf..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 528 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`EX7WqAsj$Z!;#X#z`%Ii)5S5w zqBr@^|Nr*Pstuh=cX@btbby$L=f3@CW0}|pmq~uP*T3>idL=ILFwSDPV&b_kM?ZhQ z$0Bs)^!YE%iIYxLwOu%OlS`){@NomH=1d-*`ODVLi~oFXa^w7$?9F_)xkY;W|8W@D z{8K*g;iEkJ;VykPo`v%Blq3%T!Ge@+{~zx^TkuZl>fiUOldT^3McutupztqLg&7E% z)vxoiv2pftuV9mjow5H>w<1WCS^hFVo2THrcAj#7D?<YysHxC0e?3|C4!0&_8n=W+ zrM;1o`{Qq$c9b97euw4b&-;^KTOAgeEdE_pc($x0%ThU|n1V9rLvz$O6`Z{$!yjN0 z_fz#C$ZgE_Ad`!Kd-5~|@k>ffxO{M`?Z?~uGY^@*_Y=&xb@$!74ZH8&O)NigopqCr zyaWh@)flEE>?l4Zzl`@$)A#dFe)H`sZ{TeEJSRQjN8%yw&;LKWuYa@t&m}jD_SxSV zS23nvS3TRpCIIxs;Whr#c(kYg(MU=7!BA@Fq}TM^hPk=$2$Kl&y4W3pzkl?zcGVZM zKCrnypIt)4m;cZaiU0enwayom#R`SVzHkV5X4hPJfGI`s7~}a~hJwutC)O@2IRuPi N22WQ%mvv4FO#mgJ+&=&S diff --git a/share/sources/edu.boise_state/source.yaml b/share/sources/edu.boise_state/source.yaml deleted file mode 100644 index cae00d39a..000000000 --- a/share/sources/edu.boise_state/source.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://scholarworks.boisestate.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.boise_state - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://scholarworks.boisestate.edu -long_title: Boise State University ScholarWorks -name: edu.boise_state -user: providers.edu.boise_state diff --git a/share/sources/edu.bu.open/icon.ico b/share/sources/edu.bu.open/icon.ico deleted file mode 100644 index 213423e15b685d1e2bf9d97b4f59edc045ec5939..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 318 zcmZQzU<5(|0RaXO&|qX>5ChRb3=&ZQVnzlQAj!aR08By%h-wA~HWpTf*_~|+mrfpM zh;Vgg*uH!zLsLcy!~0h+88p?^8CK4i!B89##^7XW$<SL_#&CScHU@cFNrneEZZQ1# z{)6F=(z|DA|Ns9#qonsPEgwiX3QG6^1$<f=cI=v$|KH^Zv(h_Twf`<I%u0F(mgTp2 zFdtHS&Xm^S!F)zZizQ8>gX_#8EtY2+8@YVW9AJ94Tae4;Ou(dfp}V<UytxkPO||7U oF5h!V$#Uv$!4{u0O2$*8C0bn099nD{y0OE>=L`eG|NoR=0D-S`qW}N^ diff --git a/share/sources/edu.bu.open/source.yaml b/share/sources/edu.bu.open/source.yaml deleted file mode 100644 index 4c763d3f4..000000000 --- a/share/sources/edu.bu.open/source.yaml +++ /dev/null @@ -1,16 +0,0 @@ -# DSpace -configs: -- base_url: http://open.bu.edu/oai/request - disabled: false - earliest_date: 2005-08-12T20:32:45Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: edu.bu.open - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: {} -home_page: https://open.bu.edu/ -long_title: OpenBU -name: edu.bu.open -user: providers.edu.bu.open diff --git a/share/sources/edu.calhoun/icon.ico b/share/sources/edu.calhoun/icon.ico deleted file mode 100644 index 0844c97c9152561b5bd3de6de52d943d4db4895f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 453 zcmV;$0XqJPP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00DhTL_t(2k(H9YN&`U@g+G@xx@Kl)3_{SL#zt)<BwIuRieRne1q8`U1QaVP z@eg0X2MB7T2p0ZCvCu*-1dFV3QPN02pTJ_)%^F4UR(H4`bI&<59r^rpS9dSa)6*v$ z320?Ij?hv7aVY>mDaSO{zi6dw%h2Ymm2H~l`v+eYrGc2a`>FO&y{x=78i{0bY$Ol7 z_;<i;u{1R~5z;`+D9PsBY|h<SEf4k&FD%TP+1>ra#%j5GT3A|M36@A<X=NMTjw6Va zQqI)m#C&P-=K5l1r($Tp0z#KRbr<JmfW69gII==o|CWhTcFrvTp7#nUEv>c$P0CT1 zt+iSPlH%X>L}?{Ov)KUB=^%P5q$AJNqP=><tJQLDAw84=WW3XA)7@Gk1+ccZl}IMn zH#ZFp#0>4%{hQlI;QVBFZ{JN1^n1@Q)zgF2K<4E5OlfIopyTl93h?WG^gC!N;!<QY vqsC}R2OEI#%xF|em`NKQq!e}eHw}COb!=POgYK3u00000NkvXXu0mjfdeX@p diff --git a/share/sources/edu.calhoun/source.yaml b/share/sources/edu.calhoun/source.yaml deleted file mode 100644 index 983653539..000000000 --- a/share/sources/edu.calhoun/source.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# DSpace -configs: -- base_url: http://calhoun.nps.edu/oai/request - disabled: false - earliest_date: 2012-03-14T16:52:21Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: edu.calhoun.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: [com_10945_7075, com_10945_6, col_10945_17] - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: http://calhoun.nps.edu/oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.calhoun - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [com_10945_7075, com_10945_6, col_10945_17] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://calhoun.nps.edu -long_title: 'Calhoun: Institutional Archive of the Naval Postgraduate School' -name: edu.calhoun -user: providers.edu.calhoun diff --git a/share/sources/edu.calpoly/icon.ico b/share/sources/edu.calpoly/icon.ico deleted file mode 100644 index 5b3a02e4cbcd9e47c4e61b1691fd457908b5af01..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 449 zcmV;y0Y3hTP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00DVPL_t(2kz-uG(*Mk(7YxJ!6FCW%Gml<O+<%rx4f&47OhhV$1ClkcKqzqm z@!IHGHEA_2cCJ0=_nf|UT1isL+sym_|Nk6p9E=Q%r*EBZU)j#Uz`#)4Z7v=m{@}%f z{OS2DUM#{P!fTJLz4quDix<nB?Q{PB|1T3Qqm!=l{`327r?(Y%n=>&mFqDOs{r&fM z>gK5o3=AK?eT?gl`}Fk_0|Ud0_b(V27{34fe&g|t<JXTHs~AI_<YD4*_xWA8D}Vm} z8QK~O=jtixaj<b%YFe&1xB}{F9u6K+eo<tfJ%9Hc&Nfjs@wf1=UQk^)qY!GtrTdq> z%)AVh4bR^_4^iZ0>b33kHVAv(u6ZB7enbh@wiRu^e*aomx6aSP&rsR0E4j<p!k2-8 zft!Pyfq_AYR|q{GPTxB1T;Y8A{^bc76J{68e){%l_LOW5Sq*ay^Fx;pP05;~EU66F rz*yXEKG>3miJU~fqcM@HP2?m1iZP8UbwHs>00000NkvXXu0mjfx&_qi diff --git a/share/sources/edu.calpoly/source.yaml b/share/sources/edu.calpoly/source.yaml deleted file mode 100644 index 1f1c3d0a3..000000000 --- a/share/sources/edu.calpoly/source.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://digitalcommons.calpoly.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.calpoly - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [csusymp2009, acct_fac, aerosp, aero_fac, agbsp, agb_fac, agedsp, - aged_fac, ascisp, asci_fac, aen_fac, arcesp, arch_fac, art_fac, artsp, bts, - bio_fac, biosp, bmed_fac, bmedsp, bae_fac, braesp, ccapc, ari, csq, chem_fac, - chemsp, crp_fac, crpsp, cenv_fac, cadrc, comssp, comm_fac, cpesp, cscsp, csse_fac, - cmgt_fac, cesp, fpe_rpt, dscisp, dsci_fac, erscsp, econ_fac, eesp, eeng_fac, - engl_fac, englsp, ethicsandanimals, eth_fac, essp, fin_fac, focus, fsn_fac, - fsnsp, aged_rpt, gse_fac, grcsp, grc_fac, hist_fac, histsp, honors, hcssp, hcs_fac, - imesp, ime_fac, it_fac, itsp, ir2008, joursp, jour_fac, kine_fac, kinesp, land_fac, - laessp, ls_fac, lib_fac, mgmtsp, mgmt_sp, mkt_fac, theses, matesp, mate_fac, - math_fac, mathsp, mesp, meng_fac, mll_fac, mllsp, mus_fac, musp, nrmsp, nrm_fac, - pres_schol, phil_fac, philsp, phy_fac, physsp, poli_fac, polssp, bakerforum, - psycd_fac, psycdsp, rpta_fac, rptasp, coe_dean, socssp, ssci_fac, statsp, stat_fac, - star, susconf, symposium, forum, thdanc_fac, wvi_fac, wvisp] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://digitalcommons.calpoly.edu/ -long_title: Digital Commons @ CalPoly -name: edu.calpoly -user: providers.edu.calpoly diff --git a/share/sources/edu.caltech/icon.ico b/share/sources/edu.caltech/icon.ico deleted file mode 100644 index e81601721d8af6bbdccdcba2fb81fde5dc195e3e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 511 zcmV<b0RaAqP)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0005DNkl<ZIE}56ziSjh6vsa^yW}p#`UB+-i?gw?vJ$~UEX2lEI~6TbTBQ^G zH*C&Qv=EZQ!YYM`iZ;RC9|*BAX(D(@Zg%FqXEA$b7gie`7-oi<_kExDod^Fo++s7n zwz|F@-F0RMotsl9k0P_(Z#yrir~fW>e&y-(`5XH#W?%QsKK?ZO63jSv)6Psk+34c! zt-i3b&9XO-{Fo_wA(6x=2Ln*SA2~sF>;44T+Ijz^L*m<qs1PF5$p~T`NbBM85!Ias zlhNgs4oTTQBsD@aqe3AD(b)a)3MAo*lH%-z$#}55zP<7AMpwW?G6-UXWF$3IBllMC z;vI-BVR$~P@h;iCmmsP!my90`QjA=>dWEPE#ZX^R``2u|-Rv(xG!xX4OQ_XIF;Z(5 zH>cD|+9B;<a74}7LzA)AT*`t#itN*2_w<wy1m>bd$&a%LL;<zNTtNB+l0isHEjeRs z(4*v4)RWZyAmA4nAv6TPMa;5*YY1B1(;+ZlqU92*9`%M6h6tz{B8IpE^<~?FLxJ(J zYWQsJ1&<!Dfy+LXfOolFefF8*aG9#AhWq@d@e9lVC8ff05M}@X002ovPDHLkV1iyM B>n{KR diff --git a/share/sources/edu.caltech/source.yaml b/share/sources/edu.caltech/source.yaml deleted file mode 100644 index a9e5cd003..000000000 --- a/share/sources/edu.caltech/source.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# links in <relation> -configs: -- base_url: http://authors.library.caltech.edu/cgi/oai2 - disabled: false - earliest_date: 2011-01-12T00:11:25Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.caltech - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://authors.library.caltech.edu/ -long_title: CaltechAUTHORS -name: edu.caltech -user: providers.edu.caltech diff --git a/share/sources/edu.chapman/icon.ico b/share/sources/edu.chapman/icon.ico deleted file mode 100644 index dfd5787212ab1abfddc6683f65b08e38b509b6f5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 339 zcmV-Z0j&OsP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI009e0L_t(2k$sXOZp1JUMZYa2Adz`V4^UKODaE}+k6_dbB+vm1TjXd@&_fjO zN=7|HNx7AP7`VW0GETA`XnoIr^Jkbnovk+BO(1pGJOcQ+e$mH5RI*dtY9m`!hzhqW zAQYmKt+IOH-c7bQvQveq!VPsPYS}5<58jO&fOk{$DUW=TS{v_106f0V(I;o4B+HQL z1B*U6+aF2los@5Q*hbN+@6RiM`{yQdQvv&EUh)x9iuy^v0=RVV8}*8`HaH3@6Hug- zV1{I7H!{*EbD(*9H^vM~Q6C_Y1lLSvH}3C%Qxe+`^r#q^C;JHq>BC{(OD+3}Y@Cox ld-a};+0)tT?RNe-?_bisv)^dpf-L|5002ovPDHLkV1k<$j;H_t diff --git a/share/sources/edu.chapman/source.yaml b/share/sources/edu.chapman/source.yaml deleted file mode 100644 index caa65b4e8..000000000 --- a/share/sources/edu.chapman/source.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://digitalcommons.chapman.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.chapman - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://digitalcommons.chapman.edu -long_title: Chapman University Digital Commons -name: edu.chapman -user: providers.edu.chapman diff --git a/share/sources/edu.citeseerx/icon.ico b/share/sources/edu.citeseerx/icon.ico deleted file mode 100644 index 084be7cc41ba21221ea6fdc00ee0ff5cdd4478da..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 630 zcmV-+0*U>JP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00J&aL_t(2k(H89NRx3G$Dj9|=4`n4#|*X_<a=5|)Ly)FXg9MCTCG#GU21d* zX)OwtL9q8Cjt*-FjVPQ2Grc&FY}Tb!D(lkTwHKBeyiOV$GJW4^b1&L>I=s5<&!Il2 z=lk$HeBpV%6e7a^(6CpGrj_SuWoD_O$-|`4(Pp@0Cl1z7pP@8|Oi}Hf%x8)!qt}sa zdhy*ywW2l=W5ek#-GSC?CtCZ@ShNKi0G5^Qv(J_?Mb%{|{q1TdK6kOlJ`uUvjN4Rv z%0j6RKdjz;Rot;a#Ga=T;l+W(3L^6N<Fwg}h=`cVd>t7}D%J1q@!Y3ERck^{-Xgqs zJUp-Ik+CFg_C9#@26u@>B9bK43m8h42rmvLOZABhmmkw+@6CG?J3grt$8oV(%;|I@ zA`B!}2rs^uzTqCOc%!u0+tC@6l}$tp1OkK*S(Xuz{_C-a!>-}5OAzxOUlafmi3A}; z5Ci~#7uj{fi+$4t`~&R(x9<dfKA$8>9LLGBOgq^9+RMsnO(|DBH%8yiebIdEN38%* zT3Zi=LM+RMLLosA01^A93xpTl<2hN`G}-(nn?EtTC@rscbOuc}|LnqATQD0HWX+aB z#Ijn`9(T;5_po^D`V*yG1pvm_+~q#?nmPyogXgU-JNX%nvRdn%%ujvZ002g>J5(uE zwrV-rNHy(a^t!=w*6#L}U$Uf;DXK5$H)e9X;bR(4vWD&>EsS3Gd-(pHAJR5oNrvVp QO8@`>07*qoM6N<$f-evut^fc4 diff --git a/share/sources/edu.citeseerx/source.yaml b/share/sources/edu.citeseerx/source.yaml deleted file mode 100644 index 6b44cb6a0..000000000 --- a/share/sources/edu.citeseerx/source.yaml +++ /dev/null @@ -1,19 +0,0 @@ -configs: -- base_url: http://citeseerx.ist.psu.edu/oai2 - disabled: false - earliest_date: null # earliestDatestamp is earliest published - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc, time_granularity: false} - label: edu.citeseerx - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://citeseerx.ist.psu.edu -long_title: CiteSeerX Scientific Literature Digital Library and Search Engine -name: edu.citeseerx -user: providers.edu.citeseerx diff --git a/share/sources/edu.cmu/icon.ico b/share/sources/edu.cmu/icon.ico deleted file mode 100644 index cce040a26514d56ddd232c0b7d2d3304f09af550..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 630 zcmV-+0*U>JP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00J&aL_t(2kv)>VOO<gH$3N%X-*aEo^oGJ+)Jm&CNhBd?30Dgw@TH+eaY&aS zh^7Wd{{a61!NI-JK{0TMz@dhaFia~eZlw5lU!LdpI}MMwGo9glIp6a=9{~#R7>3{~ z<RO4?Sq1_roWTcp1JVi4;0Abb0Emb>mvv@F2*J!kNZ=2+hcEyIXaJzjC2877l5B&5 zf<OTx=m0kgDHs?;QFM_cql=4omX^{uj>_fw@89vsNp)}U-O5U&>O*jyEyp?4izK<f zygd5i#mCjvqwVb|pT9meH8ekeet4K|16(#Os$O+ycJ}V8S3h@m-@kr+Zq|gegM+^D z@qAa8wPJ7r0bCTFH5!9cQ?69nUte!J*HbRnYqg7`qpi8Q;JpJg16&rIPt%_M{$izK zA^1k4o+O3y^R!m;@MUKQD1w;@=%v8>%J6WpyIWLqg@S^rX1=eqw|5ka);iS*7^ukU z!9fzo7D8w?16T+^fQvZJ!nIb=7-%k6-QRCitD>q=RB+C<wMC_pMp0#K?AqjH3L;{e zKmapDK5uOJW^-U>rr`bGYW1{M>*(!$u)2C<Vj_Tu2;?4R%$Vg>zwhl8%jLo8X`dva zUhnSfyS=b*b7tn-*4Ed}O(zn}Tn*YX!n`jz_x9z>`mbLj&!69V@+9i)jE|2$t*u#_ zS_?#Z3iqG{Nk)#U4<Tv2N&Y{$g-`AWj6fL@$bp$DI8`U&L~@{#nFomBA8~H*vK;aY QBLDyZ07*qoM6N<$f-S5kO#lD@ diff --git a/share/sources/edu.cmu/source.yaml b/share/sources/edu.cmu/source.yaml deleted file mode 100644 index a6a197288..000000000 --- a/share/sources/edu.cmu/source.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://repository.cmu.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.cmu - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://repository.cmu.edu/ -long_title: Carnegie Mellon University Research Showcase -name: edu.cmu -user: providers.edu.cmu diff --git a/share/sources/edu.colostate/icon.ico b/share/sources/edu.colostate/icon.ico deleted file mode 100644 index e4a4748c9443096996e5dd7b7fafa3820e37c52b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 661 zcmV;G0&4w<P)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00K=(L_t(2k!_I6OH^?j#m{f<y<YF=IOCM*;A2w8p_d72WtLh9VNehy(V`5h zZ9#uRkVYvY1ho}H!w2G`$QDxO!iro3Jyf)G(xNe6^S+PY@7uyjW}ve@pTjvER?&Rj z=W#pi8DK0N)yJooCC2QE1b_%XW5$tUx4(FYB1<dlk*RM%xyt9c*nHTsc)~E9+1d?G z!(&r}ld~>W2^&_|wI=^=PeM0z)7t5B-0vBcxeyGv)jhJf8835bH_z9#oT~Y~8h-Y9 zxuf-juOz?s`Q(#{87R`hU5;QVVFLibr71-YRky^Q-f>+w8>@XK4z<Ex^y>ZBzISup zT)Q`0U61OjY)b~VB->cb62Y}lmZp~HJB6Uxz)GH~Y`HeZ0Hg%~7$$c+vvt$rL;$if z6#+^Gk^zH@xXBm8`jz8;O;bNE1YZZ%44|RLAB-3=lgBL{(k($GRUT?M;44hT6TSl1 zrTYE#$0}A9mK=}zuO>D&6A^c9RnxvAX9g?H%Q<^=&-~J=ycvo8Sr64!mNCYH2G8p0 zJ=fottpENtIx;ZOcJFjUC2_vG5fOs3zTr`ea{weO_K7zwU3UTCGO!nD2clR8UJkvG zg|IQE$Py@3HPr-ACnx}#1I>T{deD)IFrt(b7z2Pgz(n$!!Xe~b_|Hpe`UOY;12Tc3 zF~syVH*HdT8v#@engGp%Iza%XGYn;C{_O#BwdjXIdwO4YdF^ef0EU&Myp65fp2^sn v;7<dePX73k!;NWI{=?F$$GILax3-zT_(C&*I|)x700000NkvXXu0mjf3|cG$ diff --git a/share/sources/edu.colostate/source.yaml b/share/sources/edu.colostate/source.yaml deleted file mode 100644 index 2cae7f23d..000000000 --- a/share/sources/edu.colostate/source.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# DSpace -configs: -- base_url: https://dspace.library.colostate.edu/oai/request - disabled: false - earliest_date: 2007-01-01T06:33:28Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: edu.colostate.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: https://dspace.library.colostate.edu/oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.colostate - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: https://dspace.library.colostate.edu -long_title: Digital Collections of Colorado -name: edu.colostate -user: providers.edu.colostate diff --git a/share/sources/edu.columbia/icon.ico b/share/sources/edu.columbia/icon.ico deleted file mode 100644 index 22428e9ec3a763fbe0aa0921b5115ee9d7fdda99..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 526 zcmV+p0`dKcP)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0005SNkl<ZIE}rLy=zlp7{!0jd-KtRVugAWkvd4NNG+m*n<9SRTNGRb(LrZH z7eNQTwWQt{a4A|>!BtSnV4$~y1)X$pQU3vPun7fAl{Po`eH?0n);5BmXL{ir&U^Sh z@Gn(7Q9k^qj9Jg<JvF|`3N_acXaA~T@p-9%Xw|Bgx)r3lwmaE#k}U57c6Kdr{qQ2l zbzni1M3u+&nVVDEYA%Aj18}`?5;y>25lt^^{@6lvbzXj$AL>ys4A6IId}Ah;xh2YZ zgd>?=AYUF&*3o==Ue~8#Q<8QE;4<1?4xZr@3Buxp?xf7Dg+&)^yh^zl1hK<+o5h z3LdC!p=w{P_6mL&2>QCiKvXKOSFW}!LCWEUVk?d5u}M|^2=bQgSF-Nl=BJ7}jVKsJ zxiA+6J30&IW^PnP*yfOoDAYvF*_FSvUsbobLxll^-H1LZc;O?EXYDmk`;i|!veijv z@1ZsZ_Vx}PJ!4Y20pplqO_C9l%Iv{vpVPD9^yk{OgTI^+fTeg!dvq3yv4*B&O*)Cc zE}=%mNG^$+B{7nVzkQkbnTK?!`r&O<E{V`wi+WoM`NYv(^R?hpx7`2JcMB4znWgVD QdjJ3c07*qoM6N<$f-?8-x&QzG diff --git a/share/sources/edu.columbia/source.yaml b/share/sources/edu.columbia/source.yaml deleted file mode 100644 index 12809e198..000000000 --- a/share/sources/edu.columbia/source.yaml +++ /dev/null @@ -1,32 +0,0 @@ -configs: -- base_url: http://academiccommons.columbia.edu/oai - disabled: false - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.columbia - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: - Abstracts (summaries): publication - Articles: article - Chapters (layout features): publication - Conference posters: poster - Data (information): dataset - Essays: publication - Exhibition catalogs: publication - Monographs: book - Performances (creative events): presentation - Presentations (Communicative Events): presentation - Reports: report - Reviews: publication - Theses: thesis -home_page: http://academiccommons.columbia.edu/ -long_title: Columbia Academic Commons -name: edu.columbia -user: providers.edu.columbia diff --git a/share/sources/edu.cornell/icon.ico b/share/sources/edu.cornell/icon.ico deleted file mode 100644 index ea4484b5567fd379361c0cc2bca8443c9b52d591..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1350 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`k|nMYCBgY=CFO}lsSJ)O`AMk? zp1FzXsX?iUDV2pMQ*9U+m{l@EB1$5BeXNr6bM+EIYV;~{3xK*A7;Nk-3KEmEQ%e+* zQqwc@Y?a>c-mj#PnPRIHZt82`Ti~3Uk?B!Ylp0*+7m{3+ootz+WN)WnQ(*-(AUCxn zQK2F?C$HG5!d3}vt`(3C64qBz04piUwpD^SD#ABF!8yMuRl!uxSU1_g&``n5OwZ87 z)XdCKN5ROz&`93^h|F{iO{`4Ktc=VRpg;*|TTx1yRgjAt)Gi>;Rw<*Tq`*pFzr4I$ zuiRKKzbIYb(9+TpWQLKEE>MMTab;dfVufyAu`<jAm(=3qqRfJl%=|nBkco*&`K2Yc zN<f!FypUS}H#V;r8ce_d)2~P@&^OdG0C@?f(AO8g(%f9Q>f(~1RD^r68eAMwS&*t9 zlv<o$T9gcoxHM&uRRvbgMX8A;`9&f5`8lvq49LhY$uG{xFHmp}HdD~>O-#<r0|{t? zb@=*PdFB<DB<3Zjdb-#u0nO0M%uKOzcC>IdFmiP>GcYtYG;}p~ax*Y=aWb@YF*UI; zurxA(>2=9ZF3nBND}m`vf$McMz^NCM7;+1MHoK%2WtOF;xE1B+Du6w0m5JLeE;!AD z>P^Av7FSoCdi8;h(Fa8>QdGl)fawRsgeP1e2cGm(^ML8S2$-<do1aKCFfi`$ba4!+ zxHTo%*PAI&r1tz2u0$>_E}24MmR%=YXGe;1XgOVR;w*pUbnBP#FZSG+U7K7NEYMTZ z`p|5~wkyxDbM~SuJU%n8+b-stEbqa7u4a$*^Lsnr&pUooCdPVhddX*@{*L)a(jWOo zgk~Gdb1U*V&G=psmvwSUh4xgI4{KK~3f16P6Zog^#oXBqOdG5B$BXMY?T!<-oX*{u z^z1(CWP$%%ZYN28IJ<UHjRUjp@zl`ECh<!TT~D3)v?IFfkeK7ilpEDM&uCp-q1JX- zbg$K~*$S@<bk^kc^?p}LT)1SpbtA)X{U;tQ+b2xbTgTh~++QG9r`hC^Ohm3zPPFPt zjs^8!Ecu@XhRPoHYj@0+EU3GkY0`6`f$4$ChBRZ*tjTP*X8N+sd-QSU#l$mdJ?4f- zPE_h>pO*4TVq=MDR$hLzK&>;nji1AF_uaaetHW}i-FG=(VehlF$8^hY0iLYdbNw&B z25vVz8@}C0#>|rWbHl01?b3Q*oR|w#X7sGf-#F*;<EwENdpPso2yLuf@yly+rf7Hk z-n{Qkb7r?aTXHj{=9PrMobwTu!udPQ?9NH<R#{p-WkTQ9g+E0T7<8Yk?ox`2-3}_* NJYD@<);T3K0RUxA;<Nw& diff --git a/share/sources/edu.cornell/source.yaml b/share/sources/edu.cornell/source.yaml deleted file mode 100644 index 92ac0539b..000000000 --- a/share/sources/edu.cornell/source.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# DSpace -configs: -- base_url: https://ecommons.cornell.edu/dspace-oai/request - disabled: false - earliest_date: 2002-11-12T17:55:14Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: edu.cornell.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: [com_1813_35] - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: https://ecommons.cornell.edu/dspace-oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.cornell - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [com_1813_35] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: https://ecommons.cornell.edu -long_title: Cornell University -name: edu.cornell -user: providers.edu.cornell diff --git a/share/sources/edu.csuohio/icon.ico b/share/sources/edu.csuohio/icon.ico deleted file mode 100644 index aab5ec389a53091252d0c860bc806488aaf82d4c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 795 zcmV+$1LXXPP)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0008gNkl<ZIE{sqTS(IZ6vlu5ZT`2^G**H&U6MJniMn!FtjMA=D2Rd*yHP8Q zUWy0<4NOYtp`sfm5?!#cVv1IlX(TD8DMVqGC7K$h`Ot;TnVatP&=jel56;VX&UfHD z7x2SmGIaq({2v98k&*p>lBe8mw<ISo?wZ7H4>cNFfL0)5oQyJ?+dLkR00k43n>MGp zMMOeEr?va3+v!`jN@Ai-UcGIX{f9CIxa-fo6cKSconzzrr4<zwA|hh7T0a9dJ9aun zr&k4#I~A33zxuJ{7hV#eepZBeKt!afs!D+MIv`wCRRD5wa*Q{MZ!Rx!-DGsc1As=W zqo=zY^Xxh7-nY-;u-gGpRXxB=04x@ZS40Gm^&2({FcD?<?#tEd#d52pTw33C2p~S6 zh~1tf0E*v6Ebu1Q8b?e_46$+X1Whpzpz7)C`-s=;MbYR84%AauS4&ev13)s+tn)W$ z18$_OTDz_3*;D3*z2$w^b6$T^Q1vRaL%f(`7I4|sh;!FYni?9)fMx*Aq_u_ZS*B%9 z@EEqFc)Bo>>WCxNkwEX5p82*o)+I%ewr~W%I4)8E1F-k>fpw>b2HiSOa0JKCWi#5{ zkE`q<p}|3<q%0zEZUiTC3mEQhVaD`na^TFJ!$7_Uuz_+mKH8*Pvm$}K^F=T`LSacI zdes1`o<i3H7*ohTc7e6a6G*hoQh<}d5)IG@k}?vY#Z~@<mhvMgfeF~dhN!77qonvM zjwl~WKrGF!OiC;38T4rYHUf{dek(oE;l|t$gO=56>_mqcc-{Av+SWma3FJ%v09m_M zVzoul(bh|6?>FjRbRGnLObqe(<*Mqo{vj-j_7fC3m7e!~WTnNEks1veg`Zv@^O7=o z)BA(GE7kSDUk?S4|NDS6Qgk$3r2V4EyL`Eow`1~gUHg}CGQxmj-~;d-7y>$gQo#IY Z@h@779Lc0?AwvKF002ovPDHLkV1hcCYtR4y diff --git a/share/sources/edu.csuohio/source.yaml b/share/sources/edu.csuohio/source.yaml deleted file mode 100644 index 652f1b69e..000000000 --- a/share/sources/edu.csuohio/source.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://engagedscholarship.csuohio.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.csuohio - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://engagedscholarship.csuohio.edu -long_title: Cleveland State University's EngagedScholarship@CSU -name: edu.csuohio -user: providers.edu.csuohio diff --git a/share/sources/edu.cuny/icon.ico b/share/sources/edu.cuny/icon.ico deleted file mode 100644 index 6f722f7b6e65d58d9243aa23c60e1ec0b38d4848..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 530 zcmV+t0`2{YP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00GKLL_t(2kz<yzZ1xFGKCypAQ)9yiCc}H5HFj@VVC@oh@wvkL2YYV3Q~AVV z#4qavM=U>n{%$StTC(BnvzH&cDt%ULxxj!Iu>Al3pO>5M!IO9UPu%6@;b38A!moje zfq@aidh-0kgn5Vl{`*f*0~GM{a#$KEFflR`)bO8yk>StZe;UeStER-VurU4q&+z~M z|6jlUGB7eS{71Ktg^i76&ym};xvn2S|2Tj3>Caz(b|1dkP~dK=C&$RZaPs0~G!2Zh zR?YwZ|Mz#%=Hy^qzw65H-+%x8`|oI}EGH$n>+sE&Z@!>A>Cc~kk-i4kZogcz;ap0X z84C+je6Y#m=O34>KkI6vCNC|BuHo13KjrC8H90PyKL4o9a^~gcur^l6Pq2{^=Wi?Z zdiLrQrl%SI{rl%^ttu+a!@$76%EB~%&FMgQU2iANqi61a{q_q>ure|+zW?|w!bcw> zbo|`IPhWnNq&h6!c%F@w8H)y17UsDtPvykhK!pA?{9n8MGBXq7j)OO_#sdQb1H;LS z4>>tlp+byIw;sIy_~pl^FF$Z<U}9uiw(%TH@INCo!C;AXs0KzR#)WIoU`k^G0PvW( U)i$}582|tP07*qoM6N<$f=jjyF8}}l diff --git a/share/sources/edu.cuny/source.yaml b/share/sources/edu.cuny/source.yaml deleted file mode 100644 index 6b9722538..000000000 --- a/share/sources/edu.cuny/source.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://academicworks.cuny.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.cuny - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://academicworks.cuny.edu -long_title: City University of New York -name: edu.cuny -user: providers.edu.cuny diff --git a/share/sources/edu.cuscholar/icon.ico b/share/sources/edu.cuscholar/icon.ico deleted file mode 100644 index eff6f2bffde2a7eb6eafcff7a6acf126b4fc360e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 800 zcmV+*1K<3KP)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0008lNkl<ZIE`(SOKenS7>1vJ=1k8FFwAu9NGVXA7F$fy1!AB9Nl0Tz1Wk+^ zEXI`qG%ifML=(cr#07X=C{s*G2sBV(A(FUINn9ukHpWEdrlN$l!%Qi5=FF{U=KSBs zqE#gH*?&*+<$Yhn9B;n-yoSOps0yf}3W@?yuX$vTe{Glt2VnPWFX>ZT21zUmV?|I^ zKv7kku;J>pX;jhSk^3di+poT;Tc6&<igXpniC{to^sb~aQ)gX&gc*MmhK6QGzPm@_ zw5FEua7UOKe<5eTxxv(}0>&6rl}4k<>c{$6^GJ<VnHazPoVfo5QK8ukz_@&I@+^gX zffm<ARJlEMi)};C@#KcBc#R4-#-@1htsxBrL<A!~j;f$4hy*N*#~I$gi&Wglbz^*g z{yN$02ot$H)oPWou{=9=zJO^=;<{13{o!vM6-8B0v4}!PM;i;|8i+?C?lL|;!FaBK z4Vq-SyJ&9}8pTN-T-HrGog}2HAOa}Wa)|>Uo?`r;I2dKRSY*S-^_)3<ki*%%xX~EC z-@u7Pxo|nkrQd!fq$sKZVCe1Z=gO5oDS1`I1|(B$46Gku)$%HRT@jp!i?sp982-3= zk;BKoGETW%K|*n)Ho=}Z20;h$Yf}`971s78Sl8c4Y5F>)YMOezj;dnC;yBK13LhLg zY944y>EILH#N_sDgh34vOVjsgX^D|5CfIjyluEfYTQCW*^1mj&^X^CH^<CREkyuQw z;IU=%<MgzL`ROl*Pd+`t&HMzp$(wX8eF(7@u@)6`R{-$w=cf$7E890}xm1EB7E}OB z&&mwFJzXSQ(}WCI+e9^Tui#&XPyT3DK02UkV)V8bIr9Fqa}f(Fitqi-g4PWA!bJeW z^DQhH3-%N3B(r=G%^*NTk-5G|ILWauFY(!tQ3EjVL;;4LS*wdvD==num8yy{2G6T7 edgk1JcXt3D*Kt#TPn6aG0000<MNUMnLSTZj%xz`> diff --git a/share/sources/edu.cuscholar/source.yaml b/share/sources/edu.cuscholar/source.yaml deleted file mode 100644 index 491cf4129..000000000 --- a/share/sources/edu.cuscholar/source.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://scholar.colorado.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.cuscholar - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://scholar.colorado.edu -long_title: CU Scholar University of Colorado Boulder -name: edu.cuscholar -user: providers.edu.cuscholar diff --git a/share/sources/edu.dash/icon.ico b/share/sources/edu.dash/icon.ico deleted file mode 100644 index 7513f79bfd1c175b2b8203d0d37b385498fb2dc1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 403 zcmV;E0c`$>P)<h;3K|Lk000e1NJLTq000mG000mO0{{R3C@l|D0001%P)t-sy(Acf zij2Lwy^Nxz*3;JBnw*rDl+JN$t**7TI62<m-p0qqjGm>&Fe!|doQ#r`rJ|*^wzcQy z=bW0Hy(}r--{;QH){L5*y(A%wj+Dm3#@^fB)^T&KvbE07&ZVlY-s9(tij2L!y_}+@ z*4NhFo}9HsO3us9y)-eDmYkH5l(n|Kl$xBqC?VE$bG5Ryt*Wi(<L3vPwF3YE0Jup+ zK~xx(O^$63!XONUnJ*xSYi1TJI=9TJu%@#3|NopYP4r3H56$)N#9GBan!8ZPQ2!80 zG}>Z`2@4jNx)D+1Xl@(eeh(vtm=4UoFhHHD2*a#k-x~+5H;&UwqgkF09XVfY;j8GT z%T!gDYT8I0X9WNOxilD5Hic6=NN9_eJ}DMf3mDyNv3DO2mi_5Uy%};!_OP_Q4J3Wx x=N{^TSVF2Uxn#`M>&G8;N_p<WuFezkif{1C8g;&jLfrrW002ovPDHLkV1oWn#R32T diff --git a/share/sources/edu.dash/source.yaml b/share/sources/edu.dash/source.yaml deleted file mode 100644 index 1148c979f..000000000 --- a/share/sources/edu.dash/source.yaml +++ /dev/null @@ -1,19 +0,0 @@ -configs: -- base_url: https://dash.harvard.edu/oai/request - disabled: false - earliest_date: 2001-01-01T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.dash - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://dash.harvard.edu -long_title: Digital Access to Scholarship at Harvard -name: edu.dash -user: providers.edu.dash diff --git a/share/sources/edu.digitalhoward/icon.ico b/share/sources/edu.digitalhoward/icon.ico deleted file mode 100644 index 7599bf8f5bbc66d7be8c1619a240b0d5e1b1c015..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 719 zcmV;=0x<oFP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI000JJOGiWi000000Qp0^e*gdibxA})R49=(!Mko-WdOkO|93ez+vj3z$95e@ zBuWtv(4`Y&-=G6b3~V4^V_+_b2LK8201O=vNT{z+A+=LQL?bzB<Je8?B<I*U$LIPn zAbvjqpW^nYG}Q)fAcCoD#h-oc?@jJjb1e<ishw;u9LxUwkKZ(MqNJ*2>IPjchN5vS z5nU(qAD(<C6auOo$L8wtO4+H>o~9YwvdW5!f=*HN8`42{14({MBQXlw!sHCX0S?A= zHB_GRmf6o3!q!h<W^ca|(Cd5;$xZP@fMtIS#WsC<wa&CnPZ8%iEsw6-PInl|Y=$i- zO6_mAXvH{um=`-`;j{u>uu!`3JIt5VW~p^8EROEuI-T3>uH{kHtm3jHZy-)%Irdx2 z^>d+^Oi1z<ezX#r`I5`*<X|)~uv#-2--BwCa_^w07n_s&M~4+v(^OgZYh6(pCR4`U z!RhiPJQ@y}FXu{#R=_{6UrErW5p`vZF@Ti($8}x)SDGa7LjZ)Fgdd)s%^Ypn%JaNW zjt{@lg7Y`b7H6S+A1F(FYjl&uvf*HQEa!1CdhemBIe$y&JlNjuQL#TJ5Fd2fgStc* z{B{=_QFA#@q|2zTf;eWphZ4iGh?;`a6Fkp-w>cY)A4kDQMZTK$_RE|>v?q70YaG|5 zuDRLe8M&xAKBQ>QEB@~HoymTGmHl<GeqNX4>75q1S(QiaFNAI@jw*`7>5o_Oi^ZE? zqCW8*Ee}ty+u;R-9i@$26pcUzw%f8*EE~6J0*JyPCA2%Z7D+;$$F6YedM)5{tcX%T z3bzo!fi14T{C41Xuh$C|`4Q$_5XtRk^X%E5{{jv|9g?4N%x3@q002ovPDHLkV1l>r BOOpTq diff --git a/share/sources/edu.digitalhoward/source.yaml b/share/sources/edu.digitalhoward/source.yaml deleted file mode 100644 index 49e6b4728..000000000 --- a/share/sources/edu.digitalhoward/source.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://dh.howard.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.digitalhoward - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://dh.howard.edu -long_title: Digital Howard @ Howard University -name: edu.digitalhoward -user: providers.edu.digitalhoward diff --git a/share/sources/edu.duke/icon.ico b/share/sources/edu.duke/icon.ico deleted file mode 100644 index bc122e9c6c069fbca75a8e9b6654679cd675b7f2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 815 zcmV+~1JL}5P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0008!Nkl<ZIE_t{OK6-`7>1v7&i7wtm}X|uCb13Ek%1UXYl|34gVYLwE(8%5 zB8X5Ix>slyx+!!a+AdwWC~gF$EeMMBvM`|0qS4gas7YF<jfrjAOnOOX{<-|;92c36 z`S$O_`@-|7+p9OcwJnS>tgY3#x_FE2gGKraIRHF>(kSkq!aGa8(9iOX>!-xp7F)ON zq}5J2eeOFx{_H#l_mA@Gi-#x=Z^b!__Z|S}9Nv3G1g#Bs@2(&k4ZzfI^Bg&Gnxx)f z=JFD`$n*Nk2Wch^+U+K(O|fZ8vstH57+|>c5NMFpby#n-`1A60TCFw%eHqr0CKqS_ z#iI$L80SH$4q9ofP0>brgTy<S{CSR#KAmKHK7)0R)OtFp1r&jVxWJ>7-T(wCTALm~ z7xGzB>zJ6n+6_TI7qMfgNG6j-8^IVsBtVN`On_`|aQ@dSUro$XDrUi3taVgMgN%=j z5{41ZT8uF$rO0GL;z&?B{2y@q{co6>S)?x(l+rjXD|gmeSX?1#G?7413R<le^VM6_ z>vi_-t^g1R8k0o3k7WamQVONOC{69oItxpyJo0dfO8EgqLKc>4ymj&{XTQ8a6h%B( z8f0~Kd0L{-fOVi;Pn;+a*6Im!)nyKi?*SY?{4&qEALq#`&ztX@VRGtkc9la!0S@0) zuIrittu$J1`1ku?|M20NN%Dav8wvHK&4rn3*iL`fqD?`dYnx$cTOR=5|1`_Q*Oze4 z0g8@SjO{6L`1yU9V(Gv!?|m<PA7I2#9v)!V$S`pzNLG_H+uW?(V`!kCH(q<1k)7FT zy*boXo-hy^>k03D@C9$b`w7KDj^jsP;@PJjrB?5-e_w@Xo_q}FI*5`8F~0XO8!*_? zXNV)i=imOp#kre2J~qO_VvX^=6<&Ga017Y?i@3CT3TM0fHVUP-LoUuy-mwMSX>w_{ t%GAsi;z0A<6QdL|_qe@Ml{hL){0FecIV=}(P}u+g002ovPDHLkV1l{GeOmwk diff --git a/share/sources/edu.duke/source.yaml b/share/sources/edu.duke/source.yaml deleted file mode 100644 index 418753874..000000000 --- a/share/sources/edu.duke/source.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# DSpace -configs: -- base_url: https://dukespace.lib.duke.edu/dspace-oai/request - disabled: false - earliest_date: 2001-01-01T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.duke - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://dukespace.lib.duke.edu -long_title: Duke University Libraries -name: edu.duke -user: providers.edu.duke diff --git a/share/sources/edu.fit/icon.ico b/share/sources/edu.fit/icon.ico deleted file mode 100644 index 0e466d9b4a3eaa1fc79fa44cf7d14d7e768d5a3d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 815 zcmV+~1JL}5P)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00QSpL_t(2ku}m?Y*S?z0O0ri&e!wTcI{YCTN&%-Oaqju7!!ph8W%>8pOD~{ zF$)O_QNx8KT)@o|FHQ7bG$zJ*ff`9vj3H5!C4e$WimoicHrIAr_jB5<XItBIdd~N~ zTo~`4%Lj=F0Eh^PHk?|cx$e0Qx4D6gMWdEPh(<a)VhKQG$N+!?00D@=Us!nD_8Pir zv^TaE9!_)4y^OEN!oJsPY}Kt)CKS>U5v7zt;FXF~3d8b0g`1yUw(rkK0w#$#TDy7= zUp$-Io%P%P_MV-ps-Y0U;{3GdIfY-Y-x~XdCDe9)Q-VZ9LPP>&G<fdK=Z+sS%xF(0 z$2aP=R;yl_oErb=D?(B|nLL(#c6i8fsw?@~%J>5U_-pJ+B%OMGc)0E?Dd#?TpFsXJ z`bl$TQ;lfPoZSD``46>dlxzIZ$+LbkSR9{}A*xLkv%~uUh`a9A=IZKNp^QZP&y2kC z_Q)T<eXVjm9_x|>2Tz=?&n^|leqXmM>y;%X!<pM~79Y(JApi{=7)n@~-MPX1#MRs5 z-|8Vv2Ehvlk3my_WYyxlB;o`T5f~5wO2z=dvN9oVWOIX6yKJwP*6d{fKmd>^h2RP| zI?|R<00lg}abxF;`%>N6kux99&E-pTh3nV8Fl*uea995hRWq1wn&Cv;%J%TEdh62V zM|W@A%f)ac^6JpY-j@&ergPPs^MDZVPo?_$Oe3m{j*cQ@X1a6c&k5l2{*Tv$6tSeW zW>@ZAxtjm{XTR<Pqt`B+>ge7ciziS7er0hc@LPAj`+nlfAB59pqzDLr7(+%1VO-h! z#_L1xoDx8NPY<AwGHAP{xyjCL>Dh^UzkYhDHa(9%0+0+*SAX|`_uhT!=!tUa@9rHr zO*4>)2&nD4wp~_3JP^_@{=58mS_qlhGZ0R8rJw4rt}P_3UQIIr0RQg|0OYK%xZYN) t;WV2K0%_<`T{n$zymMO$kRbp7JOTe4aZE7nD>VQB002ovPDHLkV1iM$hXeos diff --git a/share/sources/edu.fit/source.yaml b/share/sources/edu.fit/source.yaml deleted file mode 100644 index 213e60aac..000000000 --- a/share/sources/edu.fit/source.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# DSpace -configs: -- base_url: http://repository.lib.fit.edu/oai/request - disabled: false - earliest_date: 2012-11-29T19:21:49Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: edu.fit.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: [com_11141_1, com_11141_2, com_11141_3, com_11141_8, com_11141_12, - com_11141_13, com_11141_14, com_11141_15, com_11141_16, com_11141_17, com_11141_18, - com_11141_19, com_11141_20, com_11141_21, com_11141_22, com_11141_31, com_11141_32, - com_11141_33, com_11141_38, com_11141_39, com_11141_40, com_11141_41, com_11141_42, - com_11141_50, com_11141_245, com_11141_248, col_11141_4, col_11141_11, col_11141_23, - col_11141_24, col_11141_25, col_11141_26, col_11141_27, col_11141_28, col_11141_29, - col_11141_30, col_11141_34, col_11141_35, col_11141_36, col_11141_43, col_11141_44, - col_11141_45, col_11141_46, col_11141_47, col_11141_48, col_11141_51, col_11141_52, - col_11141_74, col_11141_211, col_11141_212, col_11141_213, col_11141_214, col_11141_215, - col_11141_216, col_11141_217, col_11141_218, col_11141_219, col_11141_220, col_11141_221, - col_11141_222, col_11141_223, col_11141_224, col_11141_225, col_11141_226, col_11141_227, - col_11141_228, col_11141_229, col_11141_246, col_11141_247, col_11141_249, col_11141_257, - col_11141_561, col_11141_572, col_11141_573, col_11141_574, col_11141_575, col_11141_576, - col_11141_577, col_11141_578, col_11141_579, col_11141_580, col_11141_693, col_11141_696, - col_11141_697, col_11141_698, col_11141_699] - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: http://repository.lib.fit.edu/oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.fit - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [com_11141_1, com_11141_2, com_11141_3, com_11141_8, com_11141_12, - com_11141_13, com_11141_14, com_11141_15, com_11141_16, com_11141_17, com_11141_18, - com_11141_19, com_11141_20, com_11141_21, com_11141_22, com_11141_31, com_11141_32, - com_11141_33, com_11141_38, com_11141_39, com_11141_40, com_11141_41, com_11141_42, - com_11141_50, com_11141_245, com_11141_248, col_11141_4, col_11141_11, col_11141_23, - col_11141_24, col_11141_25, col_11141_26, col_11141_27, col_11141_28, col_11141_29, - col_11141_30, col_11141_34, col_11141_35, col_11141_36, col_11141_43, col_11141_44, - col_11141_45, col_11141_46, col_11141_47, col_11141_48, col_11141_51, col_11141_52, - col_11141_74, col_11141_211, col_11141_212, col_11141_213, col_11141_214, col_11141_215, - col_11141_216, col_11141_217, col_11141_218, col_11141_219, col_11141_220, col_11141_221, - col_11141_222, col_11141_223, col_11141_224, col_11141_225, col_11141_226, col_11141_227, - col_11141_228, col_11141_229, col_11141_246, col_11141_247, col_11141_249, col_11141_257, - col_11141_561, col_11141_572, col_11141_573, col_11141_574, col_11141_575, col_11141_576, - col_11141_577, col_11141_578, col_11141_579, col_11141_580, col_11141_693, col_11141_696, - col_11141_697, col_11141_698, col_11141_699] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://repository.lib.fit.edu -long_title: Florida Institute of Technology -name: edu.fit -user: providers.edu.fit diff --git a/share/sources/edu.gwu/icon.ico b/share/sources/edu.gwu/icon.ico deleted file mode 100644 index 43349191b9989cc61c975af1cdf7c1f4655af623..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3262 zcmcgvX;70#7+zc3nOdjQ&a}3jTD6C^(`o;-bvm8eQCrW}yWU0XD6I%ZMy&`U;Rv8e z<Q5eW2to)qhC>8_K*Ft@L4qK6Oh_;R5)%jreUfcsgkWoD?8jzj_xav^-e>oHclU#k z8TebUfS`VhaAy-ThY<2Het^g_{0#VXPs`2BI(|%3d2$_}t(4tGqfwZkAJ*0uMpxtn z-IumaOiVPDC5uYAngKbwxAMa!t<^X4!xfU2+M?)gaj9A*!`lu~(b$;2zW7SjwGbdf zD(TI<ON|1Kyj#SwesgSeq&zF|Zhby4>;M>sl)Y?^@4#HhbydpRtFK?kj<7?wIK?wB z_IPUG?lRuF5uKXt@x4mk0X&+qR3Q-;CwnINZfLE|y;GBeAy8Xe5T#Y$4?esIHlrgt z%|PFccxTLL@P04XZ!<v5qOK_Bc10@2qATsFZLUne&OQp|<KyFL!B+ZV4R9EPC;Y_X z-qxyqiFi=at&(?E<%XsN?$!*-gY6f>hCaNDzcyE<4Jjqirqig>L-&cwl1fs&6a6-0 zDWFZ#Rt+2QW4R(veqGOJ4=B1~nJd*wX(7j@l*eii#8Uy;5w@MTOSJ0#5Tky|Uz%}N z(pCliQH*aJ1@XlxOs!feDoeao5LumjzN=Z-DCFeFJAo6E#iS6dhLX6FR4?sNKc>(q z<W}SaW1S&egm!k2tgDe_{Wc}Xlt)KL&>rmXN$}l>1{MwV*r*;wKcap35P{NR<zYEK z{%};ULmvoW^bPiU!&{iqu_M|caWxMbdhb*M!NWPdzM&)@g0C=FQRE+M{qTcur^Se- zMN3G<Y78JEeeFmFdCzTCUkAF%zPo7ri_(s@QuH>1-=gs!D~NeHzLFd_B50W7Pi{ZK zIL>SJGc7~PdsB;MK!)^CtE5w3JuPA}DA;O9ON+1m14vT~1G}09$^P4Y_r1#8`2sD$ zzrTMi%7NyF$&9+-T{tkw7YvY`<(dB22$No6)~P|e8Jh`X3t{eh(Z}lL(|cz7Sj}SY zoC)OAMskDWWIhB++_n&LWja9Wm)6)>Ub|jwtId6cPfQFbI@#{aoz@cXpI?eP^-XzJ zKxcCaj+zcpK^cz~VDql~Rub*Hgz{6KtZ*BEG49LFox)QzV+lm^liW-=wqAPZo)c@x znZ2(F(tIWiyYR6mEqIT|Ho|saW{w8h%mcsgzSlaN_`s>to|bZ#^~7}(!6~YHFhK3Q z#X?LU#)Szk<})?1r3CDl#NXX4z)Z0|7-b%X7`LTZOz;QUy@!L8`n?*qA5JeE`wTnR zq{KvyMtaC2{Zs&sN6+og1=!5RQinQxB<rk)S?`?+&mGTuY?~2yXx^y}1m`Lx39_9J zk4fjY(k>YAD0-T(Tv+))+xgyBv!K&_eQ??^Hjx`w95XNdc5)pNX82+(=kz-FZ6y4} z7huK-z`JNYtQ`Ulrx>0SX}F%iOlwGT{((RwEgsuv#2T+Nyu~fxe{e1o$~x<i1K`I^ zC$7ry+y6RvTB<WJ7Nd~ZIF4YCIlBEsvLX(lO&@r9Fen$Kd6#FN`@683%97|?sD&h! z^&E6{8fC(n^^hHMcp*4L>^}rJ*x!vnAYl-CP=y`&#C0R#p4~*nQ<CXw+~7&9t(e-i zD0^xnD&s;PauPwrsSXecnIGrqxSHg#kHJ1K_88^}0n&~dyq(Sq#bq6J&;xDAI?9j3 z8CwSA!S)~EzWK{(DVMopCL%kkAI5bI??|?s%XV-*)dgjyb%THdwODFg&6El?b`55P z&j8a{e3Kv{oqsgM2%P*Rcc(Q3sg)gZ$fUtE91VI);(pAHc7XWwki9_Qkvc_`<Gj}3 zo}yhkFp;$d{6&cjbg|bm&i{faAOWx?vm@>5`LW`vtM$dP1qmm^j(&>M0A~2)xa9b` z<0^t!<MO3EsEvNc%*g>F4RCgaIxM32n0^?(kQF#Ku^n7C7~+K*PO*A9mkrVEZLNd= zzXNZ`dzwt-#)j!kF*OX^Ey#8OT)@EOTG`i*w2b#z8|nO2gwvO(V?9?~k2!MB7<wdj zQIZ=zZ|KH(+{NS`zBg(LB7|2BThHQy)Ut>_EqJfJW@<smq{i(jwe)tX{Sy%XgYP`k Pf{;DaGTr|L{}=uzYi7d= diff --git a/share/sources/edu.gwu/source.yaml b/share/sources/edu.gwu/source.yaml deleted file mode 100644 index f3d9788c7..000000000 --- a/share/sources/edu.gwu/source.yaml +++ /dev/null @@ -1,15 +0,0 @@ -configs: -- base_url: https://scholarspace.library.gwu.edu - disabled: false - earliest_date: null - harvester: edu.gwu - harvester_kwargs: {} - label: edu.gwu - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: edu.gwu - transformer_kwargs: {} -home_page: https://scholarspace.library.gwu.edu -long_title: ScholarSpace @ George Washington University -name: edu.gwu -user: providers.edu.gwu diff --git a/share/sources/edu.harvarddataverse/icon.ico b/share/sources/edu.harvarddataverse/icon.ico deleted file mode 100644 index 82115971947656a0700368eb6b4ebb3def401683..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 419 zcmV;U0bKrxP)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS00046Nkl<ZIE{^yF;W6y428e!U?Gir1L(Ge(pd_)fmV)CASVD%fSdr1fCbyi z5h!3Rp>(x@8*C-ch&+o0L3Tx6m;A{~@{->;z_gxo-&f~PKzZXDWi!j`;^QJZj0JEG z9dWV>NDbEN0OjT9c{eQ?Ad})N3wos>>;sHz-UVhYpm9<MpvXjiYZ+ER%unt>&^NQz zr0Pw<Tp%x7-~UCB=%6$R?~xjt@_C_NY4F-RnE=2K!hZJbqw+Jt5u_3T6dxB6(v00Y zQ(=ESOsd`;Mt7qFNJCh6{NQc&FTv#2d%&^w#vCEJm=!<~T37Z-5+gj{c=<S45&A0n zgO`dQgvD4`bHQov$0s$W-Ht6e_``-nMaSH}sCLW%#7Bv{2)XL6d;dd}UtV^OUT zi@8Iph~t{qHLQi0%QSRku8h05qAHhzaCDRawws{2TO)JF)Ywl3_y#?Pg_Acj@{0ff N002ovPDHLkV1npGv<v_M diff --git a/share/sources/edu.harvarddataverse/source.yaml b/share/sources/edu.harvarddataverse/source.yaml deleted file mode 100644 index 11806a23e..000000000 --- a/share/sources/edu.harvarddataverse/source.yaml +++ /dev/null @@ -1,15 +0,0 @@ -configs: -- base_url: https://dataverse.harvard.edu/api/search/ - disabled: false - earliest_date: null - harvester: edu.harvarddataverse - harvester_kwargs: {} - label: edu.harvarddataverse - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: edu.harvarddataverse - transformer_kwargs: {} -home_page: https://dataverse.harvard.edu -long_title: Harvard Dataverse -name: edu.harvarddataverse -user: providers.edu.harvarddataverse diff --git a/share/sources/edu.huskiecommons/icon.ico b/share/sources/edu.huskiecommons/icon.ico deleted file mode 100644 index 6a8a6cc732e63f4c2781be25e33590f61d8ed89c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 419 zcmV;U0bKrxP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00CP`L_t(2k)4vgZWBQig}<2{du=5Yk%)+8MIeMxQn;{`xUmGU03AF8;z^)M z3ys~8Cy9$7<kBf4*^nR0YZ+&kojnx04sqi|9F0cjN_TY5J$KBHXHWgb1=eye(zNva zh0Ee3oBzJQ8w5d;I1E5>uZ0o71DveF=33~E@a<uRsCXa+;6Vamb9WXC%FD8opb`dv zWHlPMq=V@+Y#%OQZ!oBR{j%}qZA*%w6-~E3d=ykWotBiH4P7qutJiIw^v^$kO0Tbc z6lL={aV$uqnU5kMi)OjOV(6boN2ewG<lXy6uU;!j3}6gk!?G3&fhLn;=seHTPAyB- zuh(w|16!>UhWtq^jJw@Nuh*8E!{O!eH#RoS_gXC;jodOM4+acm5E%SzdH_@#FCB)W z+W-VAcuoRyCvgzV=6{=BL;i2mPHDQb>Gkm5RrdG&?(RcAADP_a?Z5H>llaKqP0auR N002ovPDHLkV1jgtw&DN) diff --git a/share/sources/edu.huskiecommons/source.yaml b/share/sources/edu.huskiecommons/source.yaml deleted file mode 100644 index 6f1e8550d..000000000 --- a/share/sources/edu.huskiecommons/source.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# DSpace -configs: -- base_url: http://commons.lib.niu.edu/oai/request - disabled: false - earliest_date: 2011-06-15T22:42:54Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: edu.huskiecommons.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: http://commons.lib.niu.edu/oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.huskiecommons - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://commons.lib.niu.edu -long_title: Huskie Commons @ Northern Illinois University -name: edu.huskiecommons -user: providers.edu.huskiecommons diff --git a/share/sources/edu.iastate/icon.ico b/share/sources/edu.iastate/icon.ico deleted file mode 100644 index 366ce9ad436b1ebb533d732aa215bc329f952088..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 599 zcmV-d0;v6oP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI000JJOGiWi{{a60|De66lK=n#{YgYYR49=(l1p!laTvva=Xu|fP}92Y#3gnX zB32?|-jP`20|?~{@I_ke-52b%d;zxF8evrlF=8PuGaZRrXa3Kz7}n>U<lM9@WGXbq zP>Gae+eDC-F@Tr=L>SN?9Ksa9Bp?dd22B}&uq}s0qMe-%t(lqtx7y9-1A9$qc6O#Y zWtUB}+qaxL2?~o9SfLu*Yn|rxzP+Qt;M2nOG8lYbm|yEOwpSHv?Y3Q(sa}X$2ufvu z-E)_JzkK!a>Xq?u*lHi=@ngVFNjQMCYGVtONo@4L?KpBMwzmeiZ>=vaPR+JRg8=N1 z(-9INQo(p|??JfQvv6ah*ZbY=I>RC3w2*KHrM4xE1Tk9w^5N3O{g0O9?9I)EQfWnm zgwj-yH4{r9Bh~%ICwb}o!JGGgJbSwFzUPoK?P5ZZA_OuM&2)2Y=Y{kAyAQwKz0*2( zP7Z7%REdNlJ5mZlU@_gC-gWVE<apG38{1|w#*!xyAw*pPU04>G`KCF!)@khWdH(un zFnD+N%nE$(uJ#w7{95gPx;DQG&#AO50W<B+&+d!ya0pB{Cw{(so$KqC0&hKhbYs{b z^*$Dl1GMt*SjZxrNiEW{kTI@^B3E{y62xQ|89}i&5hIJxgdK`PPT56BMPxwy|F6SQ l$O<M~)%+`IB;*PU{1=MgDTf=}eW?Hd002ovPDHLkV1lsA761SM diff --git a/share/sources/edu.iastate/source.yaml b/share/sources/edu.iastate/source.yaml deleted file mode 100644 index ada3197ee..000000000 --- a/share/sources/edu.iastate/source.yaml +++ /dev/null @@ -1,162 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://lib.dr.iastate.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.iastate - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [museums_rediscoveringshelves, museums_rediscoveringshelves_installation, - museums_rediscoveringshelves_videos, extension_4h, extension_4h_pubs, abe_carousel, - acct, acct_pubs, accounting_pubs, admin, abe_eng_advancedmachinery, abe_ag_advancedmachinery, - abe_ag_advancedmachinery_conf, abe_eng_advancedmachinery_conf, abe_ag_advancedmachinery_pubs, - abe_eng_advancedmachinery_pubs, cbe_advancedmaterials, cbe_advancedmaterials_conf, - cbe_advancedmaterials_pubs, aere, aere_conf, aere_patents, aere_pubs, aere_etd, - afam, afam_pubs, agdm, farms_centraliowa_reports, ageds, ageds_etd, safefarm_ag, - safefarm, safefarm_ag_pubs, safefarm_pubs, agpolicyreview, ag_researchbulletins, - abe_ag, abe_eng, abe_eng_books, abe_ag_books, abe_eng_conf, abe_ag_extensionpubs, - abe_eng_extensionpubs, abe_eng_patents, abe_ag_patents, abe_ag_conf, abe_ag_reports, - abe_eng_pubs, abe_ag_pubs, abe_eng_researchareas, abe_eng_reports, abe_ag_etd, - abe_eng_etd, extension_ag_pubs, extension_ag, agron, agron_conf, agron_pubs, - agron_reports, agron_etd, airforce, airforce_pubs, a2ru, amin, amin_pubs, ameslab, - ameslab_manuscripts, ameslab_conf, ameslab_iscreports, ameslab_patents, ameslab_pubs, - ameslab_software, ameslab_isreports, aecl, aecl_pubs, aecl_etd, ans_air, abe_eng_animalproduction, - abe_ag_animalproduction, abe_eng_animalproduction_conf, abe_ag_animalproduction_conf, - abe_eng_animalproduction_pubs, abe_ag_animalproduction_pubs, ans, ans_conf, - ans_pubs, ans_reports, ans_etd, ans_whitepapers, anthr, anthr_pubs, anthr_etd, - aeshm, aeshm_conf, aeshm_pubs, aeshm_etd, arch, arch_books, arch_conf, arch_announcements, - arch_pubs, arch_etd, farms_armstrong_reports, ad, ad_conf, ad_etd, avc, avc_creativeworks, - avc_pubs, asam, asam_pubs, baltic_reports, balticbasin_reports, beefreports_1996, - beefreports_1997, beefreports_1998, beefreports_1999, beefreports_2000, beefreports_2001, - beefreports_2002, beefreports_2003, bce_proceedings, bbmb_ag, bbmb_las, bbmb_ag_pubs, - bbmb_las_etd, bbmb_ag_etd, bbmb_ag_conf, bei, bei_reports, bcb_etd, abe_ag_biologicalprocess, - abe_eng_biologicalprocess, abe_ag_biologicalprocess_conf, abe_eng_biologicalprocess_conf, - abe_ag_biologicalprocess_pubs, abe_eng_biologicalprocess_pubs, bms, bms_pubs, - bms_reports, bms_etd, brt_etd, cbe_biorenewables, cbe_biorenewables_conf, cbe_biorenewables_pubs, - bot, botany_etd, bot_pubs, bot_etd, experimentstation_bulletin, business_etd, - card_books, card_briefingpapers, card_pubs, card_policybriefs, card_pres, card_reports, - card_publications, card_staffreports, card_technicalreports, card_workingpapers, - carver, carver_narratives, cbe_catalysis, cbe_catalysis_conf, cbe_catalysis_pubs, - card, cbirc_annualreports, ccur, ccur_conf, ccur_pubs, cfsph, cfsph_pubs, cnde, - cnde_conf, cnde_reports, cnde_pubs, cnde_etd, edesign, edesign_newsletters, - edesign_conf, edesign_pubs, edesign_etd, cbe, cbe_conf, cbe_pubs, cbe_researchareas, - cbe_etd, chem, chem_conf, chem_pubs, chem_etd, libaccess, libaccess_conf, libaccess_workshops, - libaccess_pubs, ccee, ccee_books, ccee_conf, ccee_pubs, ccee_researchareas, - ccee_etd, libcat, libcat_conf, libcat_pubs, ag, business, design, colledu, engineering, - hs, chsmatters, las, vetmed, communitymatters, communityplanning, communityplanning_pubs, - communityplanning_etd, cbe_fluiddynamics, cbe_fluiddynamics_conf, cbe_fluiddynamics_pubs, - cs_techreports_applications, cs, cs_pubs, cs_techreports, cs_etd, cs_techreports_compsystems, - cs_techreports_methodologies, cs_techreports_milleux, pres_portfolio, ccee_construction, - ccee_construction_conf, ccee_construction_pubs, cornbeltcowcalf, ci, ci_etd, - ci_pubs, dae-card_sectoranalysis, ebooks, cs_techreports_data, entnewsletter, - housing, housing_books, digirep, digirep_conf, digirep_outreach, digirep_pubs, - dimensions, dgtc_symposium, diversityreports, driftlessconference, eeb_etd, - eeob_las, eeob_ag, eeob_conf, eeob_ag_pubs, eeob_las_pubs, eeob_ag_reports, - eeob_las_etd, eeob_ag_etd, econ_las_staffpapers, econ_ag_staffpapers, econ_ag, - econ_las, econ_ag_conf, econ_las_conf, econ_las_pubs, econ_ag_etd, econ_las_etd, - edu_pubs, elps, elps_pubs, elps_etd, ece, ece_books, ece_conf, ece_pubs, ece_reports, - ece_etd, engl, engl_books, engl_conf, engl_pubs, engl_etd, ent, ent_conf, ent_pubs, - ent_reports, ent_etd, ensci_etd, ensci_studentprojects, ccee_environmental, - ccee_environmental_conf, ccee_environmental_pubs, ethos, extension_communities, - extension_communities_pubs, extension, hs_extension_conf, extension_conf, extension_pubs, - extension_research, hs_extension, hs_extension_pubs, fapri_staffreports, ir_factbooks, - finance, finance_pubs, fshn_ag, fshn_hs, fshn_ag_extensionpubs, fshn_hs_extensionpubs, - fshn_ag_patents, fshn_hs_patents, fshn_hs_conf, fshn_ag_conf, fshn_ag_pubs, - fshn_hs_pubs, fshn_ag_etd, fshn_hs_etd, for, for_pubs, for_reports, for_etd, - gatt_papers, cs_techreports_general, genetics_etd, gdcb_conf, gdcb_ag, gdcb_las, - gdcb_las_pubs, gdcb_ag_etd, gdcb_las_etd, gentle_doctor, ge_at, ge_at_etd, ge_at_pubs, - ccee_geotechnical, ccee_geotechnical_conf, ccee_geotechnical_pubs, gerontology_etd, - grad, grad_reports, etd, graphicdesign, graphicdesign_etd, jlmc, cs_techreports_hardware, - cbe_biomedical, cbe_biomedical_conf, cbe_biomedical_pubs, ag_hist, design_hist, - las_hist, hs_hist, history, history_books, history_conf, history_pubs, history_etd, - honors_posters, hort, hort_conf, hort_pubs, farms_horticulture_reports, hort_etd, - hci_etd, hdfs, hdfs_extensionpubs, hdfs_conf, hdfs_pubs, hdfs_reports, hdfs_etd, - extension_families, extension_families_pubs, imsenews, econ_las_economicreports, - econ_ag_economicreports, immunobiology_etd, intrans_reports, industrialdesign, - industrialdesign_etd, iet_pubs, iet, imse, imse_conf, imse_pubs, imse_reports, - imse_etd, infas_etd, cs_techreports_infosystems, is_etd, inspire, iprt, intrans, - ir, cropnews, interdisciplinaryprograms_graduate, cnde_yellowjackets, grad_etd, - id, id_conf, id_etd, safepork, iowaagreview, iaes_circulars, iahees, ibc, cfwru, - cfwru_reports, ipic, ipic_factsheets, ipic_handbooks, iowastatedaily, iowastatedaily_2010, - iowastatedaily_2011, iowastatedaily_2012, iowastatedaily_2013, iowastatedaily_2014, - iowastatedaily_2015, iowastatedaily_2011-04, iowastatedaily_2012-04, iowastatedaily_2013-04, - iowastatedaily_2014-04, iowastatedaily_2015-04, iowastatedaily_2010-08, iowastatedaily_2011-08, - iowastatedaily_2012-08, iowastatedaily_2013-08, iowastatedaily_2014-08, iowastatedaily_2015-08, - iowastatedaily_2010-12, iowastatedaily_2011-12, iowastatedaily_2012-12, iowastatedaily_2013-12, - iowastatedaily_2014-12, iowastatedaily_2015-12, iowastatedaily_2011-02, iowastatedaily_2012-02, - iowastatedaily_2013-02, iowastatedaily_2014-02, iowastatedaily_2015-02, iowastatedaily_2011-01, - iowastatedaily_2012-01, iowastatedaily_2013-01, iowastatedaily_2014-01, iowastatedaily_2015-01, - iowastatedaily_2010-07, iowastatedaily_2011-07, iowastatedaily_2012-07, iowastatedaily_2013-07, - iowastatedaily_2014-07, iowastatedaily_2015-07, iowastatedaily_2010-06, iowastatedaily_2011-06, - iowastatedaily_2012-06, iowastatedaily_2013-06, iowastatedaily_2014-06, iowastatedaily_2015-06, - iowastatedaily_2011-03, iowastatedaily_2012-03, iowastatedaily_2013-03, iowastatedaily_2014-03, - iowastatedaily_2015-03, iowastatedaily_2010-05, iowastatedaily_2011-05, iowastatedaily_2012-05, - iowastatedaily_2013-05, iowastatedaily_2014-05, iowastatedaily_2015-05, iowastatedaily_2010-11, - iowastatedaily_2011-11, iowastatedaily_2012-11, iowastatedaily_2013-11, iowastatedaily_2014-11, - iowastatedaily_2015-11, iowastatedaily_2010-10, iowastatedaily_2011-10, iowastatedaily_2012-10, - iowastatedaily_2013-10, iowastatedaily_2014-10, iowastatedaily_2015-10, iowastatedaily_2010-09, - iowastatedaily_2011-09, iowastatedaily_2012-09, iowastatedaily_2013-09, iowastatedaily_2014-09, - iowastatedaily_2015-09, farms_reports, farms_reportsbyfarm, catalog, patents, - isurf, farms, iowastate_veterinarian, weedbiology, icip, jctp, jlmc_etd, jlmc_pubs, - kin, kin_pubs, kin_etd, abe_eng_landwaterresources, abe_ag_landwaterresources, - abe_ag_landwaterresources_conf, abe_eng_landwaterresources_conf, abe_eng_landwaterresources_pubs, - abe_ag_landwaterresources_pubs, landscapearchitecture, landscapearchitecture_conf, - landscapearchitecture_pubs, landscapearchitecture_etd, lau_slideshow, leadership, - leadership_conf, leadership_pubs, leopold_annualreports, leopold_grantreports, - leopold_extension, leopold_proceedings, leopold_pubspapers, leopold, leopold_letter, - libadmin, libadmin_conf, libadmin_pubs, library_books, libit_pubs, libreports, - livestock, matric_briefingpapers, matric_researchpapers, matric_workingpapers, - management, management_pubs, management_reports, marketing_pubs, mse, mse_conf, - mse_pubs, mse_etd, math, math_conf, math_etd, cs_techreports_mathematics, math_pubs, - farms_mcnay_reports, meatscience, meatscience_air, meatscience_pubs, me, me_conf, - me_pubs, me_etd, me_whitepapers, im_etd, micro, micro_pubs, micro_etd, armyrotc, - armyrotc_pubs, mcdb_etd, farms_muscatine_reports, music_pubs, music_recordings, - music, ncrac_annualreports, ncrac_cultureguides, ncrac_factsheets, ncrac_techbulletins, - ncrac_whitepapers, ncrpis_conf, ncrpis_pubs, cbirc, swinefeedefficiency, nrem, - nrem_conf, nrem_extensionpubs, nrem_pubs, nrem_studentprojects, nrem_etd, nrem_reports, - navy, navy_pubs, resilientneighborhoods_plans, neuroscience_etd, ncrac, ncrac_pubs, - ncrac_etd, ncrac_conferences, ncrpis, farms_northeast_reports, farms_northern_reports, - farms_northwest_reports, igpns_etd, abe_eng_occupationalsafety, abe_ag_occupationalsafety, - abe_ag_occupationalsafety_conf, abe_eng_occupationalsafety_conf, abe_ag_occupationalsafety_pubs, - abe_eng_occupationalsafety_pubs, provost_reports, registrar, provost, philrs, - philrs_pubs, physastro, physastro_conf, physastro_pubs, physastro_etd, ipb_etd, - plantpath_conf, plantpath, plantpath_pubs, plantpath_etd, pols, pols_pubs, pols_etd, - icip_poverty, pres, pres_conf, pres_workshops, pres_pubs, cnde_yellowjackets_1976, - cnde_yellowjackets_1978, cnde_yellowjackets_1977, cnde_yellowjackets_1975, cnde_yellowjackets_1979, - cnde_yellowjackets_1981, cnde_yellowjackets_1974, psychology, psychology_conf, - psychology_pubs, psychology_etd, refinst, refinst_conf, refinst_pubs, cbe_renewableenergy, - cbe_renewableenergy_conf, cbe_renewableenergy_pubs, ccee_reports, abe_ag_researchareas, - resilientneighborhoods, resilientneighborhoods_memos, resilientneighborhoods_reports, - rtd, qnde, revival, stories, safefarm_ag_extension, safefarm_extension, safefarmminute_ag, - safefarmminute, safefarm_minutes, safepork_covers, edu, s2erc, s2erc_reports, - stb_etd, sheepreports_1997, sketch, soc_las, soc_ag, soc_las_extensionpubs, - soc_las_pubs, soc_las_reports, soc_las_etd, soc_ag_etd, cs_techreports_software, - farms_southeast_reports, soybeanaphid_podcasts, speccoll_conf, speccoll_exhibits, - speccoll_outreach, speccoll_pubs, speccoll, stat_las, stat_ag, stat_ag_conf, - stat_las_conf, stat_las_preprints, stat_ag_preprints, stat_las_pubs, stat_las_etd, - stat_ag_etd, stories_covers, ccee_structural, ccee_structural_conf, ccee_structural_pubs, - scm_conf, scm, scm_pubs, susag_conf, susag, susag_pubs, gpsa_etd, swinefeedefficiency_air, - swinefeedefficiency_conf, swinefeedefficiency_factsheets, swinefeedefficiency_pubs, - swinefeedefficiency_etd, swinereports_1996, swinereports_1997, swinereports_1998, - swinereports_1999, swinereports_2000, swinereports_2001, swinereports_2002, - undergradresearch_symposium, systemseng_etd, intrans_techtransfer, cs_techreports_subjects, - tcmuseum, tcmuseum_exhibits, tcmuseum_installation, cs_techreports_theory, toxicology_etd, - trans_etd, ccee_transportation, ccee_transportation_conf, ccee_transportation_pubs, - trend, usls, usls_pubs, uhuru, undergradresearch, honors, library, bookmarks, - museums, museums_exhibitguides, museums_exhibits, museums_installation, museums_videos, - vcs, vcs_conf, vcs_pubs, vcs_reports, vcs_etd, vdpam, vdpam_conf, vdpam_pubs, - vdpam_reports, vdpam_etd, vmpm, vmpm_pubs, vmpm_reports, vmpm_etd, vpath, vpath_conf, - vpath_pubs, vpath_reports, vpath_etd, vrac, vrac_conf, vrac_pubs, weedscience_reports, - farms_western_reports, withhonors, tcmuseum_exhibits_womenforwomen, language, - language_conf, language_pubs, zool, zool_pubs, zool_etd, a2ru_photos] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://lib.dr.iastate.edu -long_title: Digital Repository @ Iowa State University -name: edu.iastate -user: providers.edu.iastate diff --git a/share/sources/edu.icpsr/icon.ico b/share/sources/edu.icpsr/icon.ico deleted file mode 100644 index 229dec5d29e26eede148d5097d537fffb7af52c3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 166 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61SBU+%rFB|%*9TgAsieWw;%dH0CM9zT^vI+ zCMN&+|KFZjv!PSvDi05jPR5@~Ug2-+Z?Qe^I_dT8T};QKNA_$twijjhECZs~cYf9R zMC$knO`h6dl%lfoL3<$YmrEaBedN}7e6rXl(rwEXF9Y3$*$NyC2i;wKrcJ213$%&B M)78&qol`;+07>CJz5oCK diff --git a/share/sources/edu.icpsr/source.yaml b/share/sources/edu.icpsr/source.yaml deleted file mode 100644 index e4cdf0afe..000000000 --- a/share/sources/edu.icpsr/source.yaml +++ /dev/null @@ -1,19 +0,0 @@ -configs: -- base_url: http://www.icpsr.umich.edu/icpsrweb/ICPSR/oai/studies - disabled: false - earliest_date: null # earliestDatestamp is earliest published - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc, time_granularity: false} - label: edu.icpsr - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://www.icpsr.umich.edu/ -long_title: Inter-University Consortium for Political and Social Research -name: edu.icpsr -user: providers.edu.icpsr diff --git a/share/sources/edu.iowaresearch/icon.ico b/share/sources/edu.iowaresearch/icon.ico deleted file mode 100644 index f407037974443cf84560e255e5174a87ca75b0d6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 494 zcmV<K0TKR*P)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00E{+L_t(2k#&*JYZFlr$G<c0Wq-sq*}80SHMEKrX-<kL3W66Y#hd;$f?m88 zMUR5?;N6=@DV}-~r3j%2da<~Qml&*K?7G=x-*{Nt#O(U`!W(AhGt8G!3gCZ5K<BjO zx%_irxKve)*=K;qt5MX_o3E2{bvBb1pU98hajCfk7yv*!((#wkl@)*CSWU`=+E;<z zzZE`xblj@ZQpXJ%il-V1O+~=sd`;xSjb-nDLnlBne*H=N)B7`(dmjL#_Pw56a=LF9 z<E`Y`i(hGKlp+9RlMLnV!~R|>JY%}oALDy|;_}?Nj>DRo4HkjrtN!jd%_ipcL6nV5 zypw(2O1#5#bHy7DB@FbyqE~{kK?Tr0syAQmJ>M8yU-ouBj^1pi_wIz<?(evlSKq<p z&`eQOYXw><0AkzZ`O0;u8W2DPA`k-zl%hNrCO9@%1H^R1XAp?tH<WGD$$9HTJbJM9 zji>r19z2gb7u~R>n{__5P`h;A%`#lQ=DBsgz4|ptrP%E69RS!i1))A<QIZ&~$#tmL k&*uWwtn>D}G&d-KKbm-m#70RbcK`qY07*qoM6N<$f@ZYpJpcdz diff --git a/share/sources/edu.iowaresearch/source.yaml b/share/sources/edu.iowaresearch/source.yaml deleted file mode 100644 index c7b2d384a..000000000 --- a/share/sources/edu.iowaresearch/source.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://ir.uiowa.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.iowaresearch - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://ir.uiowa.edu -long_title: Iowa Research Online -name: edu.iowaresearch -user: providers.edu.iowaresearch diff --git a/share/sources/edu.iu/icon.ico b/share/sources/edu.iu/icon.ico deleted file mode 100644 index 6f2f9bb928ca2221e76cbc6ec24ef52808357785..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 326 zcmV-M0lEH(P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0002}Nkl<ZIE~#<J8r^25PcIN6N3xfS)^hMj=%+|xkJu@j1f`72{-@}1qaA+ zA}tkYMYG5<A4Lk2#)~$a1t%?2&6|0joh5jRMbgQ1A??{q90`(Y<#Hc<1|-$09|lW< z`(d#3i2;DD)Zvl&5dcD>6M%W9i*=d)1mM};lT=dxs2PJy7jMp!=Qeit0H!}qC{up* zReSFr{HN}~pmt^6sKB=kGF^P!!ggIwAgMNKt2wzwG?liR6Ow9k(YXW`$pAq7$t+%X z`>_!PL=u1|sotwXj8ShCM?8unW|8zNPjJNTS$<d;Jj)LY7D@Zh1_0{YuXERcvakDy Y--T^k2Pe$zW&i*H07*qoM6N<$g0Y@~#{d8T diff --git a/share/sources/edu.iu/source.yaml b/share/sources/edu.iu/source.yaml deleted file mode 100644 index 25143c6dd..000000000 --- a/share/sources/edu.iu/source.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# DSpace -configs: -- base_url: https://scholarworks.iu.edu/dspace-oai/request - disabled: false - earliest_date: 2005-06-21T14:14:29Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: edu.iu.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: https://scholarworks.iu.edu/dspace-oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.iu - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: https://scholarworks.iu.edu -long_title: Indiana University Libraries' IUScholarWorks -name: edu.iu -user: providers.edu.iu diff --git a/share/sources/edu.iwu_commons/icon.ico b/share/sources/edu.iwu_commons/icon.ico deleted file mode 100644 index 60de03a0855f43edf3a96a83a3f83271837a7365..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 710 zcmV;%0y+JOP)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0007gNkl<ZIE{6WOGs2<6o9|~zjtg*F?C=ZLBU6kk0~dS)P#fx1Rt|#p$V-5 zm#&J$uvO&3z;>-n%GyPcI#i1m!9X&Q1LlZEKEQmCjgM?bof+re+hSDI(X%=KIp;gy z;TN0{J)hr&CKu+DbVC<a#g5SZI#<SNoC;tpt{1SU8vqC?M80L-X0>i~y9aZL#i*~Y z{Rdzytrrm8=S)(FT+6%)&^_j!{W$wwC_-|hunYr)R(!psEB&Q6u)9O^<=RIE@Zj6f z+?Tm2G()HHqSFAR1ZZh+`L{zsd@~bdTdY9=L{%=tPZ>mrofkm1#TxX@Ot2ja($e7a z14w`hv&HFh)@&gq>3KQR04Ruqe=7SqJuhb(DRDV#wkpgP=b-?g*jYXYFz)r1#qSb3 zm}#INogJ{p1qFpONCp4OLaH$~Yf^QJBDw~Kff<=*c&G8NETkfpIGZ3KZJgv3ysL}s zRTeh?lHrbEGz!rOJH+mWua?~Mt}a4yN}N3@umyFG!;(dKbB*8Zbv_(??#NVB5DHJ; zzH<DL(a!MZ8V*YqwxI4w5YRh3>d3GG&@s?gv$+vK5fY?ieLX<OKwk}jBf|#0!=niT z(bGz4k$oA}#E8#Bl4?41C#fdfV;)dVlor{SMNcaU0)R_Z)w2MoCStmN*w=I&_JxPc zX@dOJ%pV9Kvdgu?GPboCXc{Fsc@PbQ*k_J}dbF^U?7LlAM<f!Vv-!@8$6r6SBj8?B zn|pF;-gVAw#!j;DOhKAZ`;8Cd{rn8@%ro+r&=zPKPlw(#zAtYR1VRXLY~{EBKr~$e spljr{x!xXs@5J^LrWO(HH&5dH2g6<QXH`qA>;M1&07*qoM6N<$f<%5lIsgCw diff --git a/share/sources/edu.iwu_commons/source.yaml b/share/sources/edu.iwu_commons/source.yaml deleted file mode 100644 index 0b6751f06..000000000 --- a/share/sources/edu.iwu_commons/source.yaml +++ /dev/null @@ -1,38 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://digitalcommons.iwu.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.iwu_commons - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [oral_hist, ames_award, arthonors_book_gallery, arthonors, bio, - music_compositions, cs, constructing, economics, education, ed_studies_posters, - eng, envstu, fac_biennial_exhibit_all, fac_biennial_exhibit2011, fac_biennial_exhibit2013, - fac_biennial_exhibit, firstyear_summer, founders_day_docs, german, theatre_hist, - history, teaching_excellence, honors_docs, honors_programs_docs, physics_honproj, - bio_honproj, intstu_honproj, envstu_honproj, russian_honproj, history_honproj, - theatre_honproj, religion_honproj, wostu_honproj, nursing_honproj, education_honproj, - eng_honproj, french_honproj, math_honproj, socanth_honproj, econ_honproj, art_honproj, - cs_honproj, amstudies_honproj, grs_honproj, hispstu_honproj, polisci_honproj, - chem_honproj, phil_honproj, acct_fin_honproj, busadmin_honproj, german_honproj, - psych_honproj, bookshelf, wglt_interviews, oralhist_2009, oralhist_ucd, oralhist_wesn, - italian, japanese, jwprc, math, music, nursing, oralhistory, oralhistory_gallery, - anth_ethno, gateway, envstu_seminar, music_outstanding_works, writing_student, - polsci, psych, religion, respublica, russian, grs_scholarship, math_scholarship, - nursing_scholarship, bio_scholarship, religion_scholarship, mcll_scholarship, - envstu_scholarship, physics_scholarship, socanth_scholarship, history_scholarship, - intstu_scholarship, cs_scholarship, chem_scholarship, eng_scholarship, hispstu_scholarship, - psych_scholarship, socanth, student_prof, sea, parkplace, uer, germanresearch, - uauje, univcom, wglt] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://digitalcommons.iwu.edu -long_title: Digital Commons @ Illinois Wesleyan University -name: edu.iwu_commons -user: providers.edu.iwu_commons diff --git a/share/sources/edu.jmu/icon.ico b/share/sources/edu.jmu/icon.ico deleted file mode 100644 index d5fac22bd03c0faeeb446d35ac8e56f71f8ecc13..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1150 zcmbW1F-rqc5QJB-GEG3Sl$7bUwX{4?P%MH0!9WTzpdv&N6jN$tBUp*0rKO$!B>x~{ ztk3s7Gs|+<-G$8_voo`AH+L~xN}qPy(s##(%Vz6l)=iP1g#6<v<!iNNM(CQ@&NRiW zVW6F-C>zFn-0A-e_a>#{X6?YeuDU0FkN@3DQC@7GI`@3*>^I@F<H1u|yndCV?Mvrh zZyaST`k{v>?rp31MlWvcCr`3w!wc>1^QSQFE#uOQCoX;Qpbpj>zt!~nj~{t1c!S=9 z^CHfC==-CWnjTMfNH5L@k6od`17GUk<i$*We&)niJ*1HzxOT#QRgWiUKu3?j$@5Sn zsA*5h!~1hqotvzk(MS7KJ^ZGw*KYA=e~a^ZW;SX>sJR|Y_2BPZ`5)fDJC-yxeSBU* qpR8dw>>%Li{r#a9b`yE|o}E@^L+kIap67(=T(w-`O!Ys|H2(o!;|2r( diff --git a/share/sources/edu.jmu/source.yaml b/share/sources/edu.jmu/source.yaml deleted file mode 100644 index d324b0b3e..000000000 --- a/share/sources/edu.jmu/source.yaml +++ /dev/null @@ -1,19 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://commons.lib.jmu.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.jmu - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://commons.lib.jmu.edu/ -long_title: Scholarly Commons @ JMU -name: edu.jmu -user: providers.edu.jmu diff --git a/share/sources/edu.kent/icon.ico b/share/sources/edu.kent/icon.ico deleted file mode 100644 index e02efcbb8d479adcd9070790a3aed05efee5ae75..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 323 zcmV-J0lfZ+P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0002`Nkl<ZIF0R)u}T9$5Qe|KTp>0FtOSdsSuDj$d<c6RA46gUg%B0Q!d7g3 zgfu>ZrL{Jj6JjjnI0cE~-ToFE&nPFc*T0!%X7-zZ2L3fw>BX@ClFq8C>QPnlw!0>P z!O6OwuX+IZQCc~m1SESa8lG;1Dc<ASR&Nj2<k?+lFd+=jHkIrymu}1?&23u&G!Jh~ ze<!>VXrfUH^%CH=teQukiSpwQYV3zz*RrA*gI&uC1E%sK-*%p<<3|~dg8=%n!qers z(q3mO<iiDk>9?1p!UvRrD2|C{=ZWi!)aDjY@k){1lRpmdFB!`H_!wYnzp(rV^bY1x VU~ic!Qe*%C002ovPDHLkV1ljhiEID> diff --git a/share/sources/edu.kent/source.yaml b/share/sources/edu.kent/source.yaml deleted file mode 100644 index f5f52367e..000000000 --- a/share/sources/edu.kent/source.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://digitalcommons.kent.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.kent - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://digitalcommons.kent.edu -long_title: Digital Commons @ Kent State University Libraries -name: edu.kent -user: providers.edu.kent diff --git a/share/sources/edu.krex/icon.ico b/share/sources/edu.krex/icon.ico deleted file mode 100644 index 835a617648b23c6d04a17ae0d5f4260a348923c0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 388 zcmV-~0ek+5P)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00BHnL_t(2kz>4d<JQJ0M-FYe{FU_&18%_mo5jvg*FVRGS&qSQ`}Xs{ng8KY z&A`C$oB7|JJ1_pe|IN(yLg^17)o}3i`WvS2cwK@9zO(*eLgp(xwO)T_Qs-$$1_lP@ zr}A@GP1<~?Gqr;SSq+K?4hA-676xX{{|pQaDNAw`j2KSbm^-`T4^9mb@c;Mkbl*a= zVD@vLH@EuSLX%~|)S%7jso%}_nBm53rx^?k%4o9aHvIj=z|O+Q%)%hdAa7o-j%gga z24)t9#|#(tx13`C!|4*7uP1$+fCdHzhUcGewk%k(XWBa%M){<j&4iuA%D~3Jz_6@* z=8cnobvZmr*UKX-V02Q>#k2$uVB+}AOuTB2-^@(5UOL2Uu=Ubm@z1hkU|=}B?aB|< izxd6z_0sXrvIGG8pl7{9&_^Kv0000<MNUMnLSTaSzP(5Q diff --git a/share/sources/edu.krex/source.yaml b/share/sources/edu.krex/source.yaml deleted file mode 100644 index 8b7e09074..000000000 --- a/share/sources/edu.krex/source.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# DSpace -configs: -- base_url: http://krex.k-state.edu/dspace-oai/request - disabled: false - earliest_date: 2004-11-29T18:08:48Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: edu.krex.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: http://krex.k-state.edu/dspace-oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.krex - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://krex.k-state.edu -long_title: K-State Research Exchange -name: edu.krex -user: providers.edu.krex diff --git a/share/sources/edu.mason/icon.ico b/share/sources/edu.mason/icon.ico deleted file mode 100644 index bb07bbb23d32ce63877ed3c4bf0f6e7b658c7db4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 671 zcmV;Q0$}}#P)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00LJ@L_t(2k&TZ}NRx3K#(%&0Zm-*#?>6ULuF|=(+^94wu|%kdh=>lA9lCVs z)Xi(1qJyVM!b=bY2?=>9Y+@Eht~s%^scE{<+O%!XZM=Wx`|f>z@7{06k}34uo(DdB zcrX9}0Q&Dx0Dv(r*?EMr2zg2YUFihz9ujWXHp%veElpF=+5>K5k>A_9&)l#ru+<<< z49xsGKWL8%%nld+F#y2uLZUzB?!I>UT;q`)ftB=HLddjxn=r<qc&@d+fiYXcKW0c# zwUnC@Y^Iv2OtKqpn^RFqK_gU(%XD<#nz-9%HCcKBPoocqUQK;SvKdj4Y8^Fm@n|(u zxs;0eYU=f66blmNwpW{syPk{=R8`o;f@q-hXMHDQnWULEW`&&J?L`Q+c>K-^n@|vo z7<8P-ceY*#M1r!a9Q7UsK+l4&T~<dVz8IXJ)IfXw>CMR8`0#8f5D7{}g(?u@ch~PZ zV{bdqG`2XE%X5QY$82UMmPr6YZ)eAkdwdq7`SHk$+$P6qk`5u1m!+GhE;ZL4QdAON zo@EU5$M2u-U%j)kzS{e~-$WZ_LJ)#`S8jj#xqycv!Iv|mkFIy6`CMmD*M3*+jkb$@ zp@7q3rS)`kU1M-*8Y5I=t2*7#3<e$ElqCQ#(?&^=Rh7s^VltG=DiLL=OjqWzI?{a3 zK<N#Xo|p12#)02`8)vMx@cgv1!hXWzXGLKpy{4zA-PWqng~^VCE%9vfSY4y26j)Kf zG=-K^t2`)4iCr&n0H6VIMJY@2c~D|PS_bX6nAPBK{s72AI%yjxjF12T002ovPDHLk FV1l8kI-39h diff --git a/share/sources/edu.mason/source.yaml b/share/sources/edu.mason/source.yaml deleted file mode 100644 index 20b68e581..000000000 --- a/share/sources/edu.mason/source.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# DSpace -configs: -- base_url: http://mars.gmu.edu/oai/request - disabled: false - earliest_date: 2004-12-08T19:31:55Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: edu.mason.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: http://mars.gmu.edu/oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.mason - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://mars.gmu.edu/ -long_title: Mason Archival Repository Service -name: edu.mason -user: providers.edu.mason diff --git a/share/sources/edu.mit/icon.ico b/share/sources/edu.mit/icon.ico deleted file mode 100644 index d7c324f9652f92e95b09b55dfe7254d4e75bdad4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 188 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61SBU+%rFB|%*9TgAsieWw;%dH0CGz_T^vI+ zCMN&+|NsAM>+-mm-Nz0bP*}KBb=9J#`KLbsMT=L5`963y@4(sGysKJGF=A|NY-(v~ zVw{{D%?8sOT?~{=HzaWV{r&s<`EGFmwm_*RU%tF~cD~)9<C4O<b$aX8^_}+o^3kL& ke$h#zB(;1K-i%EQ1uNse&q<lP4QM}ur>mdKI;Vst0JTI*pa1{> diff --git a/share/sources/edu.mit/source.yaml b/share/sources/edu.mit/source.yaml deleted file mode 100644 index b2bb80274..000000000 --- a/share/sources/edu.mit/source.yaml +++ /dev/null @@ -1,192 +0,0 @@ -# DSpace -configs: -- base_url: http://dspace.mit.edu/oai/request - disabled: false - earliest_date: 2001-01-01T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.mit - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [hdl_1721.1_18193, hdl_1721.1_18194, hdl_1721.1_18195, hdl_1721.1_89012, - hdl_1721.1_3650, hdl_1721.1_67473, hdl_1721.1_7630, hdl_1721.1_7760, hdl_1721.1_7744, - hdl_1721.1_7768, hdl_1721.1_7767, hdl_1721.1_7631, hdl_1721.1_7766, hdl_1721.1_7632, - hdl_1721.1_50867, hdl_1721.1_37333, hdl_1721.1_37334, hdl_1721.1_5460, hdl_1721.1_5461, - hdl_1721.1_39813, hdl_1721.1_7771, hdl_1721.1_7634, hdl_1721.1_7635, hdl_1721.1_7772, - hdl_1721.1_7633, hdl_1721.1_7765, hdl_1721.1_79695, hdl_1721.1_18236, hdl_1721.1_18237, - hdl_1721.1_18238, hdl_1721.1_7626, hdl_1721.1_7755, hdl_1721.1_7770, hdl_1721.1_7627, - hdl_1721.1_7756, hdl_1721.1_7628, hdl_1721.1_7629, hdl_1721.1_7769, hdl_1721.1_7784, - hdl_1721.1_7636, hdl_1721.1_7591, hdl_1721.1_7785, hdl_1721.1_7592, hdl_1721.1_7783, - hdl_1721.1_7352, hdl_1721.1_7780, hdl_1721.1_7637, hdl_1721.1_7781, hdl_1721.1_7638, - hdl_1721.1_7639, hdl_1721.1_7779, hdl_1721.1_88072, hdl_1721.1_81425, hdl_1721.1_5462, - hdl_1721.1_88531, hdl_1721.1_35731, hdl_1721.1_7641, hdl_1721.1_7789, hdl_1721.1_7791, - hdl_1721.1_7643, hdl_1721.1_7642, hdl_1721.1_7790, hdl_1721.1_7640, hdl_1721.1_7788, - hdl_1721.1_7644, hdl_1721.1_7795, hdl_1721.1_7645, hdl_1721.1_7796, hdl_1721.1_7646, - hdl_1721.1_7794, hdl_1721.1_88516, hdl_1721.1_7934, hdl_1721.1_7648, hdl_1721.1_7800, - hdl_1721.1_7801, hdl_1721.1_7649, hdl_1721.1_7652, hdl_1721.1_7802, hdl_1721.1_7653, - hdl_1721.1_7799, hdl_1721.1_18186, hdl_1721.1_39096, hdl_1721.1_39099, hdl_1721.1_39100, - hdl_1721.1_39097, hdl_1721.1_72924, hdl_1721.1_72923, hdl_1721.1_54823, hdl_1721.1_54828, - hdl_1721.1_35730, hdl_1721.1_35732, hdl_1721.1_39117, hdl_1721.1_39115, hdl_1721.1_3651, - hdl_1721.1_85979, hdl_1721.1_88006, hdl_1721.1_1779, hdl_1721.1_29807, hdl_1721.1_29808, - hdl_1721.1_1778, hdl_1721.1_18187, hdl_1721.1_55346, hdl_1721.1_55382, hdl_1721.1_71743, - hdl_1721.1_55383, hdl_1721.1_88179, hdl_1721.1_1780, hdl_1721.1_3765, hdl_1721.1_30597, - hdl_1721.1_41529, hdl_1721.1_7806, hdl_1721.1_7655, hdl_1721.1_7807, hdl_1721.1_7656, - hdl_1721.1_7654, hdl_1721.1_7805, hdl_1721.1_7657, hdl_1721.1_7811, hdl_1721.1_7658, - hdl_1721.1_7812, hdl_1721.1_7810, hdl_1721.1_7659, hdl_1721.1_83579, hdl_1721.1_3766, - hdl_1721.1_7816, hdl_1721.1_7661, hdl_1721.1_7818, hdl_1721.1_7662, hdl_1721.1_7663, - hdl_1721.1_7817, hdl_1721.1_7660, hdl_1721.1_7815, hdl_1721.1_7886, hdl_1721.1_7709, - hdl_1721.1_7711, hdl_1721.1_7887, hdl_1721.1_7710, hdl_1721.1_7888, hdl_1721.1_7885, - hdl_1721.1_7708, hdl_1721.1_67705, hdl_1721.1_83560, hdl_1721.1_67903, hdl_1721.1_1781, - hdl_1721.1_68162, hdl_1721.1_34284, hdl_1721.1_34283, hdl_1721.1_46718, hdl_1721.1_79432, - hdl_1721.1_7714, hdl_1721.1_7892, hdl_1721.1_7893, hdl_1721.1_7715, hdl_1721.1_7891, - hdl_1721.1_7712, hdl_1721.1_61396, hdl_1721.1_3652, hdl_1721.1_5551, hdl_1721.1_6750, - hdl_1721.1_6751, hdl_1721.1_7624, hdl_1721.1_33950, hdl_1721.1_7666, hdl_1721.1_7822, - hdl_1721.1_7667, hdl_1721.1_7823, hdl_1721.1_7821, hdl_1721.1_7665, hdl_1721.1_34286, - hdl_1721.1_34285, hdl_1721.1_3767, hdl_1721.1_3768, hdl_1721.1_3769, hdl_1721.1_3653, - hdl_1721.1_7671, hdl_1721.1_7827, hdl_1721.1_7828, hdl_1721.1_7672, hdl_1721.1_7826, - hdl_1721.1_7670, hdl_1721.1_1782, hdl_1721.1_3550, hdl_1721.1_7353, hdl_1721.1_39119, - hdl_1721.1_1784, hdl_1721.1_29811, hdl_1721.1_29812, hdl_1721.1_55347, hdl_1721.1_1785, - hdl_1721.1_1786, hdl_1721.1_88005, hdl_1721.1_60280, hdl_1721.1_1783, hdl_1721.1_7833, - hdl_1721.1_7674, hdl_1721.1_7834, hdl_1721.1_7675, hdl_1721.1_7832, hdl_1721.1_7673, - hdl_1721.1_29814, hdl_1721.1_7729, hdl_1721.1_7918, hdl_1721.1_7730, hdl_1721.1_7919, - hdl_1721.1_7917, hdl_1721.1_7728, hdl_1721.1_35733, hdl_1721.1_29796, hdl_1721.1_7509, - hdl_1721.1_7678, hdl_1721.1_7838, hdl_1721.1_7839, hdl_1721.1_7679, hdl_1721.1_7840, - hdl_1721.1_7677, hdl_1721.1_7676, hdl_1721.1_7837, hdl_1721.1_1788, hdl_1721.1_7844, - hdl_1721.1_7681, hdl_1721.1_7682, hdl_1721.1_7845, hdl_1721.1_7680, hdl_1721.1_7843, - hdl_1721.1_37287, hdl_1721.1_7849, hdl_1721.1_7686, hdl_1721.1_7685, hdl_1721.1_7851, - hdl_1721.1_7684, hdl_1721.1_7850, hdl_1721.1_7848, hdl_1721.1_7683, hdl_1721.1_7717, - hdl_1721.1_7899, hdl_1721.1_7900, hdl_1721.1_7718, hdl_1721.1_7716, hdl_1721.1_7898, - hdl_1721.1_81436, hdl_1721.1_63243, hdl_1721.1_7532, hdl_1721.1_18116, hdl_1721.1_49433, - hdl_1721.1_1787, hdl_1721.1_5539, hdl_1721.1_7295, hdl_1721.1_7296, hdl_1721.1_3654, - hdl_1721.1_18239, hdl_1721.1_88182, hdl_1721.1_88019, hdl_1721.1_83516, hdl_1721.1_76749, - hdl_1721.1_67476, hdl_1721.1_7855, hdl_1721.1_7688, hdl_1721.1_7857, hdl_1721.1_7690, - hdl_1721.1_7689, hdl_1721.1_7856, hdl_1721.1_7687, hdl_1721.1_7854, hdl_1721.1_67474, - hdl_1721.1_67477, hdl_1721.1_67475, hdl_1721.1_7692, hdl_1721.1_7861, hdl_1721.1_1789, - hdl_1721.1_7863, hdl_1721.1_7694, hdl_1721.1_7693, hdl_1721.1_7862, hdl_1721.1_7691, - hdl_1721.1_7860, hdl_1721.1_7904, hdl_1721.1_7721, hdl_1721.1_5067, hdl_1721.1_7720, - hdl_1721.1_7905, hdl_1721.1_7719, hdl_1721.1_7903, hdl_1721.1_3774, hdl_1721.1_7356, - hdl_1721.1_7357, hdl_1721.1_85978, hdl_1721.1_16560, hdl_1721.1_16547, hdl_1721.1_16556, - hdl_1721.1_16552, hdl_1721.1_16564, hdl_1721.1_7696, hdl_1721.1_7867, hdl_1721.1_7868, - hdl_1721.1_7697, hdl_1721.1_7866, hdl_1721.1_7695, hdl_1721.1_3770, hdl_1721.1_32548, - hdl_1721.1_7871, hdl_1721.1_7700, hdl_1721.1_7699, hdl_1721.1_7872, hdl_1721.1_7870, - hdl_1721.1_7698, hdl_1721.1_89476, hdl_1721.1_88073, hdl_1721.1_80741, hdl_1721.1_16553, - hdl_1721.1_16548, hdl_1721.1_16557, hdl_1721.1_16565, hdl_1721.1_16561, hdl_1721.1_80814, - hdl_1721.1_88074, hdl_1721.1_54831, hdl_1721.1_54825, hdl_1721.1_39103, hdl_1721.1_39111, - hdl_1721.1_39112, hdl_1721.1_39104, hdl_1721.1_1790, hdl_1721.1_1791, hdl_1721.1_5435, - hdl_1721.1_55385, hdl_1721.1_18157, hdl_1721.1_86180, hdl_1721.1_26678, hdl_1721.1_18165, - hdl_1721.1_30600, hdl_1721.1_18163, hdl_1721.1_18228, hdl_1721.1_26584, hdl_1721.1_26672, - hdl_1721.1_18161, hdl_1721.1_18217, hdl_1721.1_26399, hdl_1721.1_18213, hdl_1721.1_26401, - hdl_1721.1_18155, hdl_1721.1_18159, hdl_1721.1_18897, hdl_1721.1_45562, hdl_1721.1_5436, - hdl_1721.1_5529, hdl_1721.1_5530, hdl_1721.1_32529, hdl_1721.1_18111, hdl_1721.1_5531, - hdl_1721.1_18107, hdl_1721.1_18168, hdl_1721.1_5444, hdl_1721.1_5443, hdl_1721.1_18105, - hdl_1721.1_18109, hdl_1721.1_34016, hdl_1721.1_62236, hdl_1721.1_5437, hdl_1721.1_18078, - hdl_1721.1_18076, hdl_1721.1_18169, hdl_1721.1_18080, hdl_1721.1_18106, hdl_1721.1_18108, - hdl_1721.1_18110, hdl_1721.1_18112, hdl_1721.1_32530, hdl_1721.1_18079, hdl_1721.1_18077, - hdl_1721.1_70033, hdl_1721.1_67478, hdl_1721.1_16558, hdl_1721.1_16554, hdl_1721.1_16546, - hdl_1721.1_16562, hdl_1721.1_16550, hdl_1721.1_83528, hdl_1721.1_88075, hdl_1721.1_43714, - hdl_1721.1_88076, hdl_1721.1_83613, hdl_1721.1_41896, hdl_1721.1_88000, hdl_1721.1_18216, - hdl_1721.1_18212, hdl_1721.1_18227, hdl_1721.1_26677, hdl_1721.1_26583, hdl_1721.1_26673, - hdl_1721.1_18892, hdl_1721.1_87999, hdl_1721.1_49817, hdl_1721.1_49830, hdl_1721.1_50644, - hdl_1721.1_50655, hdl_1721.1_50666, hdl_1721.1_50676, hdl_1721.1_50686, hdl_1721.1_50716, - hdl_1721.1_50726, hdl_1721.1_50740, hdl_1721.1_50761, hdl_1721.1_50763, hdl_1721.1_50793, - hdl_1721.1_50794, hdl_1721.1_50823, hdl_1721.1_50838, hdl_1721.1_50852, hdl_1721.1_50869, - hdl_1721.1_50883, hdl_1721.1_50898, hdl_1721.1_50913, hdl_1721.1_50914, hdl_1721.1_50915, - hdl_1721.1_50916, hdl_1721.1_50917, hdl_1721.1_51010, hdl_1721.1_51011, hdl_1721.1_51073, - hdl_1721.1_51074, hdl_1721.1_51075, hdl_1721.1_51122, hdl_1721.1_51138, hdl_1721.1_51139, - hdl_1721.1_51189, hdl_1721.1_51190, hdl_1721.1_51236, hdl_1721.1_51237, hdl_1721.1_51238, - hdl_1721.1_51239, hdl_1721.1_51900, hdl_1721.1_51928, hdl_1721.1_51952, hdl_1721.1_51976, - hdl_1721.1_52013, hdl_1721.1_52041, hdl_1721.1_52068, hdl_1721.1_52095, hdl_1721.1_52117, - hdl_1721.1_52145, hdl_1721.1_52168, hdl_1721.1_52189, hdl_1721.1_52212, hdl_1721.1_52245, - hdl_1721.1_52274, hdl_1721.1_53335, hdl_1721.1_53355, hdl_1721.1_53386, hdl_1721.1_53418, - hdl_1721.1_53448, hdl_1721.1_53474, hdl_1721.1_53510, hdl_1721.1_53587, hdl_1721.1_53612, - hdl_1721.1_53635, hdl_1721.1_53674, hdl_1721.1_53751, hdl_1721.1_53779, hdl_1721.1_53806, - hdl_1721.1_53845, hdl_1721.1_53869, hdl_1721.1_53899, hdl_1721.1_53923, hdl_1721.1_53955, - hdl_1721.1_53988, hdl_1721.1_54016, hdl_1721.1_54038, hdl_1721.1_55589, hdl_1721.1_54105, - hdl_1721.1_54130, hdl_1721.1_54154, hdl_1721.1_55434, hdl_1721.1_55460, hdl_1721.1_55490, - hdl_1721.1_55512, hdl_1721.1_55553, hdl_1721.1_55617, hdl_1721.1_55651, hdl_1721.1_55672, - hdl_1721.1_55714, hdl_1721.1_55747, hdl_1721.1_55775, hdl_1721.1_55817, hdl_1721.1_55852, - hdl_1721.1_55870, hdl_1721.1_56010, hdl_1721.1_56030, hdl_1721.1_56066, hdl_1721.1_56089, - hdl_1721.1_56109, hdl_1721.1_56128, hdl_1721.1_56161, hdl_1721.1_56182, hdl_1721.1_56201, - hdl_1721.1_56220, hdl_1721.1_56265, hdl_1721.1_56266, hdl_1721.1_56267, hdl_1721.1_56345, - hdl_1721.1_56375, hdl_1721.1_56393, hdl_1721.1_56412, hdl_1721.1_56431, hdl_1721.1_56458, - hdl_1721.1_56479, hdl_1721.1_56505, hdl_1721.1_56533, hdl_1721.1_56582, hdl_1721.1_56610, - hdl_1721.1_56635, hdl_1721.1_56676, hdl_1721.1_56715, hdl_1721.1_56751, hdl_1721.1_56746, - hdl_1721.1_56732, hdl_1721.1_56717, hdl_1721.1_56716, hdl_1721.1_56714, hdl_1721.1_56708, - hdl_1721.1_56692, hdl_1721.1_56679, hdl_1721.1_56678, hdl_1721.1_56675, hdl_1721.1_56674, - hdl_1721.1_56673, hdl_1721.1_56672, hdl_1721.1_56661, hdl_1721.1_56647, hdl_1721.1_56644, - hdl_1721.1_56638, hdl_1721.1_56637, hdl_1721.1_4059, hdl_1721.1_5552, hdl_1721.1_7727, - hdl_1721.1_7726, hdl_1721.1_7725, hdl_1721.1_7914, hdl_1721.1_7915, hdl_1721.1_7913, - hdl_1721.1_46333, hdl_1721.1_76637, hdl_1721.1_46704, hdl_1721.1_85574, hdl_1721.1_62800, - hdl_1721.1_65423, hdl_1721.1_40283, hdl_1721.1_16551, hdl_1721.1_16545, hdl_1721.1_16555, - hdl_1721.1_16559, hdl_1721.1_16563, hdl_1721.1_70040, hdl_1721.1_37288, hdl_1721.1_1792, - hdl_1721.1_18188, hdl_1721.1_53733, hdl_1721.1_76638, hdl_1721.1_88003, hdl_1721.1_7923, - hdl_1721.1_7740, hdl_1721.1_7739, hdl_1721.1_7924, hdl_1721.1_7922, hdl_1721.1_7738, - hdl_1721.1_87998, hdl_1721.1_65424, hdl_1721.1_76639, hdl_1721.1_29797, hdl_1721.1_76745, - hdl_1721.1_1793, hdl_1721.1_7743, hdl_1721.1_7928, hdl_1721.1_7742, hdl_1721.1_7929, - hdl_1721.1_7927, hdl_1721.1_7741, hdl_1721.1_55584, hdl_1721.1_55916, hdl_1721.1_42001, - hdl_1721.1_26585, hdl_1721.1_29798, hdl_1721.1_18230, hdl_1721.1_18899, hdl_1721.1_26674, - hdl_1721.1_18219, hdl_1721.1_18215, hdl_1721.1_26680, hdl_1721.1_80739, hdl_1721.1_80746, - hdl_1721.1_88077, hdl_1721.1_18235, hdl_1721.1_18225, hdl_1721.1_18224, hdl_1721.1_55586, - hdl_1721.1_55719, hdl_1721.1_55588, hdl_1721.1_78860, hdl_1721.1_18226, hdl_1721.1_7703, - hdl_1721.1_7876, hdl_1721.1_7877, hdl_1721.1_7704, hdl_1721.1_7702, hdl_1721.1_7875, - hdl_1721.1_16459, hdl_1721.1_18240, hdl_1721.1_7358, hdl_1721.1_3771, hdl_1721.1_33228, - hdl_1721.1_18229, hdl_1721.1_26679, hdl_1721.1_80740, hdl_1721.1_18898, hdl_1721.1_26676, - hdl_1721.1_18218, hdl_1721.1_26586, hdl_1721.1_18222, hdl_1721.1_88078, hdl_1721.1_34010, - hdl_1721.1_18214, hdl_1721.1_81470, hdl_1721.1_88079, hdl_1721.1_83431, hdl_1721.1_52116, - hdl_1721.1_39118, hdl_1721.1_89011, hdl_1721.1_50866, hdl_1721.1_37332, hdl_1721.1_34280, - hdl_1721.1_5459, hdl_1721.1_16538, hdl_1721.1_55583, hdl_1721.1_87997, hdl_1721.1_88181, - hdl_1721.1_16539, hdl_1721.1_7752, hdl_1721.1_40282, hdl_1721.1_67472, hdl_1721.1_88529, - hdl_1721.1_34009, hdl_1721.1_3549, hdl_1721.1_55585, hdl_1721.1_3764, hdl_1721.1_7530, - hdl_1721.1_18211, hdl_1721.1_16165, hdl_1721.1_18154, hdl_1721.1_26400, hdl_1721.1_18203, - hdl_1721.1_39094, hdl_1721.1_54826, hdl_1721.1_33227, hdl_1721.1_5458, hdl_1721.1_16540, - hdl_1721.1_88071, hdl_1721.1_16535, hdl_1721.1_29806, hdl_1721.1_18118, hdl_1721.1_55384, - hdl_1721.1_18160, hdl_1721.1_85573, hdl_1721.1_7758, hdl_1721.1_7749, hdl_1721.1_7761, - hdl_1721.1_7776, hdl_1721.1_7786, hdl_1721.1_7792, hdl_1721.1_7797, hdl_1721.1_7803, - hdl_1721.1_7808, hdl_1721.1_7813, hdl_1721.1_7819, hdl_1721.1_7824, hdl_1721.1_7829, - hdl_1721.1_7835, hdl_1721.1_7841, hdl_1721.1_7846, hdl_1721.1_7852, hdl_1721.1_1774, - hdl_1721.1_7864, hdl_1721.1_5427, hdl_1721.1_7873, hdl_1721.1_67704, hdl_1721.1_7883, - hdl_1721.1_18204, hdl_1721.1_30596, hdl_1721.1_46702, hdl_1721.1_7507, hdl_1721.1_18202, - hdl_1721.1_67902, hdl_1721.1_68161, hdl_1721.1_55718, hdl_1721.1_55915, hdl_1721.1_34282, - hdl_1721.1_76636, hdl_1721.1_7889, hdl_1721.1_6749, hdl_1721.1_79431, hdl_1721.1_61395, - hdl_1721.1_7932, hdl_1721.1_18205, hdl_1721.1_46717, hdl_1721.1_55587, hdl_1721.1_34281, - hdl_1721.1_18206, hdl_1721.1_26398, hdl_1721.1_18192, hdl_1721.1_1775, hdl_1721.1_70032, - hdl_1721.1_29810, hdl_1721.1_80738, hdl_1721.1_88180, hdl_1721.1_29795, hdl_1721.1_18162, - hdl_1721.1_68160, hdl_1721.1_7896, hdl_1721.1_18156, hdl_1721.1_41894, hdl_1721.1_7531, - hdl_1721.1_7581, hdl_1721.1_33971, hdl_1721.1_49432, hdl_1721.1_33970, hdl_1721.1_1776, - hdl_1721.1_1773, hdl_1721.1_7582, hdl_1721.1_7294, hdl_1721.1_18158, hdl_1721.1_30599, - hdl_1721.1_88018, hdl_1721.1_7351, hdl_1721.1_70038, hdl_1721.1_5066, hdl_1721.1_16537, - hdl_1721.1_7933, hdl_1721.1_18167, hdl_1721.1_53732, hdl_1721.1_41528, hdl_1721.1_88004, - hdl_1721.1_54829, hdl_1721.1_39101, hdl_1721.1_32547, hdl_1721.1_80743, hdl_1721.1_29813, - hdl_1721.1_80745, hdl_1721.1_88530, hdl_1721.1_18119, hdl_1721.1_16536, hdl_1721.1_62799, - hdl_1721.1_18170, hdl_1721.1_4058, hdl_1721.1_88001, hdl_1721.1_49816, hdl_1721.1_62234, - hdl_1721.1_7508, hdl_1721.1_33226, hdl_1721.1_7911, hdl_1721.1_46703, hdl_1721.1_18164, - hdl_1721.1_3649, hdl_1721.1_1777, hdl_1721.1_16541, hdl_1721.1_18117, hdl_1721.1_18210, - hdl_1721.1_45561, hdl_1721.1_18201, hdl_1721.1_18185, hdl_1721.1_7920, hdl_1721.1_7925, - hdl_1721.1_16542, hdl_1721.1_55345, hdl_1721.1_7759, hdl_1721.1_7622, hdl_1721.1_7754, - hdl_1721.1_7782, hdl_1721.1_7778, hdl_1721.1_7787, hdl_1721.1_7793, hdl_1721.1_7798, - hdl_1721.1_39098, hdl_1721.1_39095, hdl_1721.1_54827, hdl_1721.1_54822, hdl_1721.1_39116, - hdl_1721.1_39113, hdl_1721.1_7750, hdl_1721.1_7621, hdl_1721.1_7583, hdl_1721.1_7589, - hdl_1721.1_7593, hdl_1721.1_7594, hdl_1721.1_7595, hdl_1721.1_7596, hdl_1721.1_7597, - hdl_1721.1_7598, hdl_1721.1_7599, hdl_1721.1_7600, hdl_1721.1_7601, hdl_1721.1_7602, - hdl_1721.1_7603, hdl_1721.1_7604, hdl_1721.1_7605, hdl_1721.1_7606, hdl_1721.1_7607, - hdl_1721.1_7608, hdl_1721.1_7609, hdl_1721.1_7610, hdl_1721.1_7804, hdl_1721.1_7809, - hdl_1721.1_7814, hdl_1721.1_7884, hdl_1721.1_7612, hdl_1721.1_7613, hdl_1721.1_7890, - hdl_1721.1_7820, hdl_1721.1_7825, hdl_1721.1_7830, hdl_1721.1_7916, hdl_1721.1_7836, - hdl_1721.1_7842, hdl_1721.1_7847, hdl_1721.1_7614, hdl_1721.1_7897, hdl_1721.1_7853, - hdl_1721.1_7859, hdl_1721.1_7902, hdl_1721.1_7615, hdl_1721.1_7865, hdl_1721.1_7869, - hdl_1721.1_54830, hdl_1721.1_54824, hdl_1721.1_39110, hdl_1721.1_39102, hdl_1721.1_7617, - hdl_1721.1_7912, hdl_1721.1_7618, hdl_1721.1_7737, hdl_1721.1_7921, hdl_1721.1_7926, - hdl_1721.1_7620, hdl_1721.1_7874, hdl_1721.1_7959, hdl_1721.1_65422, hdl_1721.1_78859, - hdl_1721.1_18131, hdl_1721.1_89474, hdl_1721.1_18209, hdl_1721.1_76748, hdl_1721.1_46332, - hdl_1721.1_5432, hdl_1721.1_5549, hdl_1721.1_18081, hdl_1721.1_18082, hdl_1721.1_5523, - hdl_1721.1_34015, hdl_1721.1_5524, hdl_1721.1_18083, hdl_1721.1_18084, hdl_1721.1_5442, - hdl_1721.1_32528, hdl_1721.1_5528, hdl_1721.1_5431] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://dspace.mit.edu/ -long_title: DSpace@MIT -name: edu.mit -user: providers.edu.mit diff --git a/share/sources/edu.mizzou/icon.ico b/share/sources/edu.mizzou/icon.ico deleted file mode 100644 index d8ffe3db60b2f39e28767cc4a5863830baf313f2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 450 zcmV;z0X_bSP)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0004bNkl<ZIE|H)KS)AB9LGQRaPX;%2t9bq3xW!?tsH_J3Jh#<X=#XB8d}4x z!J$oB8(NCimL@?*-L$bcBnHn#(2z$^>2^&gd7eby>3jG4_x;}S6KEKYLRoFL@ZWm$ z`U3zw*CD;)QP|0{luC-<`7?+6r<$xbTYAqK6nD2dI@p`mO-bqcR>tip<?~|!aC%vh zU*ib?aXZT0RY?GpZyw~=ctYjkOaNGFvXZN?nO-$=fL?#V{ZmZ>Sjw}Gt<0K{D^!~; zhNDngkM)MpYj$H@DEW%(c&s;w1e$c+M%<2aw;&$$2Sjx5x{bIUT^R562SiMP&+oBJ z#B8yZStAj%#c&i#{r?p#&voW56ER!(-9RQ{wg71R9isxzb+DEV4FK9NuT$=K17psz z>tG$`bAL8?UZCJGpTk;8B}HK;I~RNpKNyZe>30KZDpq@80h|=~h3PHYerHCBg6BG% s6!!)Eu}ta<)lR6%s(7ve{C}N)0O4(zR9>^nAOHXW07*qoM6N<$f`Ea>VgLXD diff --git a/share/sources/edu.mizzou/source.yaml b/share/sources/edu.mizzou/source.yaml deleted file mode 100644 index 73ade9709..000000000 --- a/share/sources/edu.mizzou/source.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# DSpace -configs: -- base_url: https://mospace.umsystem.edu/oai/request - disabled: false - earliest_date: 2008-07-14T15:27:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: edu.mizzou.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: https://mospace.umsystem.edu/oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.mizzou - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: https://mospace.umsystem.edu -long_title: MOspace Institutional Repository -name: edu.mizzou -user: providers.edu.mizzou diff --git a/share/sources/edu.nau.openknowledge/icon.ico b/share/sources/edu.nau.openknowledge/icon.ico deleted file mode 100644 index 9a5e0733b91e02aa77b158e0baed21d8a4dfc444..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2238 zcmd6oX;hSD7=~}N(k!#G%xD2pj7oE9$56piNGn7_D<cInB|{Jq!37W$MNk331z7|^ z6cj}iRCYvm7&b*RNzF8cnSp^}pdWU>BOd2dIi1e&NAH|#u4le$=9`)K%mVb`#lQf( z?vKR&zz_h&goiMRh~vF27t5iz5<MKvrxxI5)<Uuf&DoA<$zF`+9Dx&>bDhwVyM%V8 zN#0W8ik5sjU)T-u{N-pVpbLcE(Q?`yaxE)3dvF$bLSE<z1(6r7g#3(P6<G}hkr%Cj zLW>ve&D%b-U@a8IK9Cm+3w)s{p-Y7Qpd^Ztb@VzYN(JkoB#KghD9Si%@uxTN_C{I| z0A)E{F1(2ZLRldQLaUa|^cE;9X)R*j%2^N$WfiR@m<}PKXqC~jP_&U&*)|f!ISj4U zoU1u&3Fo|>GpPwD!aK-LwAJn+5m1q~+THYSwAFF06OM#ROB7V~k!Y(Ij-sRKXgUU} zvw}S&7VSiJE)MNPb#5;$NPwy#9_>WckU*33`$!_$kM>5v0g{9c(%zU%lV1e~(Q!d= zh@_Ci==e?e2rcGRaugjGQ=z^noJOb7$DnRHhK{D=v>+Yo-vt>^UphffqLZjEWuo(P zCe)X+XhAmASF+K0C5Pmq^AABD$tMNSTrEK7)zh?~5S@P(LZjsjT||nZxkg_TE<x9| zQfRIV%FuPa0$qQV6X8m9-4L##tDw0dqh;t8zFAFbNG-aW>E=38kM5SU=$1E-^XOJI zqFZ?Zf44UE_7(eIAFq8F>%s5v?wS9+Pq#2L{b2N4Mh1E>K09bY|Ht}10_Z;8#%k(h zQ<E|87#Y6$((_M0`NZRoO1s_c8Ph(UG=AJD<Kg;xFAN?ypr5om%=^yf%PHojAC7&~ z$nfPMg9kn(?Z3>Q_k%5;V}i-Jks}OW8~WTc(!Ol*0*CKsSx^7W?34FL8xJ$k<8Q3) z>+@Ld;y8caY-=kE^9di189CzhSESu%%?ekih4yyeSbffy`R=GUhDrN+-!&d?&ObTW zeQW)NrMc-xW8aqcpa5U*)hk>bf3%-tV`XVR(d0d858V>5ZtY6XWiE>x=GxAjHpOhb zv`2()4fOZ*_H=XpnOpJoR~D0{J$l#nke~oRZ!h<yOBUGM+0K|M?Qt<tJ41svZSeK- z5L+>K)=X*NA0HbP5grn_(a(F8n=5y7j<g?6N{o$(+_7!5&PPFa|Bj_5AK1GmDk7|x zek$YWp`?9r(YyI?c6&+BpOcx9mXe$h7jviU=31aAKkH<A%E83=o-WDk<)x?dPVp2; zx4Sg6S5}mrDabyNeni{v$?Vlt6=lT*IVX>ces5;4sg_limlWn^vfr24CF}0S^h}bf m^SQh4j{p6@t)8U;^dB-4c|I?4OM^b7FX=}H5&b()d+Q&0%q{N# diff --git a/share/sources/edu.nau.openknowledge/source.yaml b/share/sources/edu.nau.openknowledge/source.yaml deleted file mode 100644 index b91663d40..000000000 --- a/share/sources/edu.nau.openknowledge/source.yaml +++ /dev/null @@ -1,23 +0,0 @@ -configs: -- base_url: http://openknowledge.nau.edu/cgi/oai2 - disabled: false - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.nau.openknowledge - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: - Monograph: WorkingPaper - "Thesis (Masters)": Thesis - "Thesis (Doctoral)": Dissertation - "Conference or Workshop Item": Presentation -home_page: http://openknowledge.nau.edu -long_title: OpenKnowledge@NAU -name: edu.nau.openknowledge -user: providers.edu.nau.openknowledge diff --git a/share/sources/edu.nku/icon.ico b/share/sources/edu.nku/icon.ico deleted file mode 100644 index c807f86cedf8e61ae121a6401e3963146510dec4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 422 zcmV;X0a^ZuP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00CY}L_t(2k%f@GN*rMnhM)7zx3fR!s)dOgR8okQBB?C|O%cdV<QlB3yaX%n zBTZ}suOL;5tRRSpD56nT!P(iJ+3!u`?mDZBPjwEQ2j2IfAwiO|dtL7-yOkvM^a{ri zgMgo=bI#chx|iS}P{b8B35@s?$saDZ$?w2G;{;(5%4s(#&5{1)X8%0_NKZ(cn8($5 zI3PV9308~*w0;s2Fz68yTEF^HM+Jc+yW;T(t6-jZ9vtl^%CTq=tS=}4JRFkWVjlA} zcJUwRwFv239n1id-w>fL@glV~CN~HP=3WZ|M`)A%!#v8Ve+5xn#XLk~zGMz_s6)KK z)ORraER|(k83guHFbG(gQiJfs#5#Tkz+!4&h957M7Qo~tp-Xk;`69IYf(-;e%!8<J zQCT9SV5mpDNa^E<Fz8?)T$Q;UI;RjYqx>0HrHFqqI}lUfh^A476}k@i4+uw`Vlrfi QVE_OC07*qoM6N<$g2olDiU0rr diff --git a/share/sources/edu.nku/source.yaml b/share/sources/edu.nku/source.yaml deleted file mode 100644 index 2f8b2b15b..000000000 --- a/share/sources/edu.nku/source.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# DSpace -configs: -- base_url: https://dspace.nku.edu/oai/request - disabled: false - earliest_date: 2014-12-15T16:30:01Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: edu.nku.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: [com_11216_7, com_11216_20, col_11216_168] - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: https://dspace.nku.edu/oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.nku - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [com_11216_7, com_11216_20, col_11216_168] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: https://dspace.nku.edu -long_title: NKU Institutional Repository -name: edu.nku -user: providers.edu.nku diff --git a/share/sources/edu.oaktrust/icon.ico b/share/sources/edu.oaktrust/icon.ico deleted file mode 100644 index 0844c97c9152561b5bd3de6de52d943d4db4895f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 453 zcmV;$0XqJPP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00DhTL_t(2k(H9YN&`U@g+G@xx@Kl)3_{SL#zt)<BwIuRieRne1q8`U1QaVP z@eg0X2MB7T2p0ZCvCu*-1dFV3QPN02pTJ_)%^F4UR(H4`bI&<59r^rpS9dSa)6*v$ z320?Ij?hv7aVY>mDaSO{zi6dw%h2Ymm2H~l`v+eYrGc2a`>FO&y{x=78i{0bY$Ol7 z_;<i;u{1R~5z;`+D9PsBY|h<SEf4k&FD%TP+1>ra#%j5GT3A|M36@A<X=NMTjw6Va zQqI)m#C&P-=K5l1r($Tp0z#KRbr<JmfW69gII==o|CWhTcFrvTp7#nUEv>c$P0CT1 zt+iSPlH%X>L}?{Ov)KUB=^%P5q$AJNqP=><tJQLDAw84=WW3XA)7@Gk1+ccZl}IMn zH#ZFp#0>4%{hQlI;QVBFZ{JN1^n1@Q)zgF2K<4E5OlfIopyTl93h?WG^gC!N;!<QY vqsC}R2OEI#%xF|em`NKQq!e}eHw}COb!=POgYK3u00000NkvXXu0mjfdeX@p diff --git a/share/sources/edu.oaktrust/source.yaml b/share/sources/edu.oaktrust/source.yaml deleted file mode 100644 index 74bf049a9..000000000 --- a/share/sources/edu.oaktrust/source.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# DSpace -configs: -- base_url: http://oaktrust.library.tamu.edu/dspace-oai/request - disabled: false - earliest_date: 2004-11-10 20:56:05 # their earliestDatestamp is most recent - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: edu.oaktrust.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: http://oaktrust.library.tamu.edu/dspace-oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.oaktrust - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://oaktrust.library.tamu.edu -long_title: The OAKTrust Digital Repository at Texas A&M -name: edu.oaktrust -user: providers.edu.oaktrust diff --git a/share/sources/edu.opensiuc/icon.ico b/share/sources/edu.opensiuc/icon.ico deleted file mode 100644 index b354cdfbec57a9d6848bf3069bc9532de49e2dc1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 583 zcmV-N0=WH&P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS00060Nkl<ZIE|f?%TE(g6vlt|&YeyP0*!+S26jH`D^dIdPy=ji!bamlW2q~3 zQ@SHDhQtlA_}HlnSh}F@NUS2^p$P$tv93%+k}kA_kQ74&+NMKiTre;Y4H3TGx#v6I zIp^N*iZCJtGy)ELg8=3g&?q(p_6!G5o1OR59&nVW=3u)Cxr?Ac;NWcK6+jsWziAQ- zg;<=OWg(T~;Hi_8*9z!{0aC*%tfBi2R=&?s6AWTkDp>hE(!!xR65-zPFv{nnBNpRw zPY>7Qar!1FF^?SOMqivq)6*PoK92<)YiZ$rGKppws1nc?4)ftvf{}p%zP%fvK6sqf zQi*C!G4u8fO=nIMsIB31B0(K6)8Egz)>f9XS)>l6M@PBV-Oba9_guIYVKJRX={i!A zC_pNOlp1Q=-cr*LfHHvBy}f)I9Hgx)N=Gz`5Q5*uqIYpZ;Mg{{8{2>o5&*dhJQyG6 zM<&CIt{5MaNtEuxu2cZQI#9?j6KH54P=APDz=<nY$uH%QfKtHMsVN?gjj^1|alkZr z+1bh6p&^XgIzo4DGx6*>XD?pj@s~6UnGB_JnJ4Y-_yKVf@zcU-vfTH#fhwTrdcgp6 zH(qx+0M#4IWt9L71ime6h;0N9Rj%DUJ9$>#QMfz)=BUln|0(~Mq5|gK!S?l?{{Yg` VqkRQ&em(#I002ovPDHLkV1i#+0ki-B diff --git a/share/sources/edu.opensiuc/source.yaml b/share/sources/edu.opensiuc/source.yaml deleted file mode 100644 index fb1a7aeb8..000000000 --- a/share/sources/edu.opensiuc/source.yaml +++ /dev/null @@ -1,36 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://opensiuc.lib.siu.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.opensiuc - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [ad_pubs, agecon_articles, agecon_wp, anat_pubs, anthro_pubs, arch_videos, - asfn_articles, auto_pres, ccj_articles, cee_pubs, chem_mdata, chem_pubs, cs_pubs, - cs_sp, cwrl_fr, dh_articles, dh_pres, dh_works, dissertations, ebl, ece_articles, - ece_books, ece_confs, ece_tr, econ_dp, econ_pres, epse_books, epse_confs, epse_pubs, - esh_2014, fiaq_pubs, fiaq_reports, fin_pubs, fin_wp, for_articles, geol_comp, - geol_pubs, gers_pubs, gmrc_gc, gmrc_nm, gs_rp, hist_pubs, histcw_pp, igert_cache, - igert_reports, ijshs_2014, im_pubs, jcwre, kaleidoscope, math_aids, math_articles, - math_books, math_diss, math_grp, math_misc, math_theses, meded_books, meded_confs, - meded_pubs, meep_articles, micro_pres, micro_pubs, morris_articles, morris_confs, - morris_surveys, music_gradworks, ojwed, pb_pubs, pb_reports, phe_pres, phe_pubs, - phys_pubs, phys_vids, pn_wp, pnconfs_2010, pnconfs_2011, pnconfs_2012, ppi_papers, - ppi_sipolls, ppi_statepolls, ps_confs, ps_dr, ps_pubs, ps_wp, psas_articles, - psych_diss, psych_grp, psych_pubs, psych_theses, reach_posters, rehab_pubs, - safmusiccharts_faculty, safmusiccharts_students, safmusicpapers_faculty, safmusicpapers_students, - srs_2009, theses, ucowrconfs_2003, ucowrconfs_2004, ucowrconfs_2005, ucowrconfs_2006, - ucowrconfs_2007, ucowrconfs_2008, ucowrconfs_2009, ugr_mcnair, wed_diss, wed_grp, - wed_theses, wrd2011_keynote, wrd2011_pres, zool_data, zool_diss, zool_pubs] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://opensiuc.lib.siu.edu/ -long_title: OpenSIUC at the Southern Illinois University Carbondale -name: edu.opensiuc -user: providers.edu.opensiuc diff --git a/share/sources/edu.pcom/icon.ico b/share/sources/edu.pcom/icon.ico deleted file mode 100644 index a6bc859ecb07590bcd56f39b210cee3614473782..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 572 zcmV-C0>k}@P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0005=Nkl<ZIE{sqJ!n&56o#Mg%e^-@gmBxmp;G^vRz)ls3ZhU*D_T2<&_xAt z(2j*}QkM==cPCNMRd6vN6ve>^f=lh7SO=?E8(SmB(i(4*=I`F`a}!FE>)FnE;d#${ z;6Omlr-tT&bO6~-&`(JhQpu~2IW`#t5WBjww{Hlo=5)hUxoeM}<*V`W?3C@?92+fy zFN7wdr5@qM%$qhF2wNDojW5dwoyGWY@};ve)}^oK-gjVFjxcQtUxwO8P+BV_xZw;U zdWw3C%NN%~^2;~&)OJv45K;%<KntE<$>s@1LcVV@y#F=pBm0<K{Q$eo+W-(8POKOH zNuJG#b0r3IA3^zG1Og;c;d5wa+M-^q2mwl&oH??@GW|>xe?lZ40FaVVw*W_f%;LGX zq$$u^;@Ab`X_Ma4A27`zfI(krlL^1rD=t0Rps}+p1)wC|{R-tZ2X2aBE06P^7a;w8 z7VX{<h5)S*w#BDdH&iM?07W@u(Yf`T>8<6Yc#zE?e2`@p-5y?bCZOU50K7VxM3PE4 z%*oXyST9_lv7Pf99Xwn)08;%gf@0IKxO;q@SjmO@S_)CEX1}H(q+32t(@bi`Lad9o z1B2}K>NY;ky>}@vo84Oi0PsET_YIMcMQgum1Bv;S9%PI40saCMq_zM*u!*t&0000< KMNUMnLSTZe=lv4^ diff --git a/share/sources/edu.pcom/source.yaml b/share/sources/edu.pcom/source.yaml deleted file mode 100644 index e4a618598..000000000 --- a/share/sources/edu.pcom/source.yaml +++ /dev/null @@ -1,21 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://digitalcommons.pcom.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.pcom - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [biomed, pa_systematic_reviews, psychology_dissertations, scholarly_papers, - research_day, posters] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://digitalcommons.pcom.edu -long_title: DigitalCommons@PCOM -name: edu.pcom -user: providers.edu.pcom diff --git a/share/sources/edu.pdxscholar/icon.ico b/share/sources/edu.pdxscholar/icon.ico deleted file mode 100644 index 11e87c23bf9e6aa517988edf8e30505805706a9c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 736 zcmV<60w4W}P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0007)Nkl<ZIE{^xOGs2<6o9{f?wB*qIF6B&nRPK?w2P6IR*RsbjRa+jh@`+& z5H~4CkhLgUgb<V!T98RlRMwCpJtZ^3G<xCdW@hLmQydKqU)Py&=C&~@GwLkQ`ObI# zpK}C%3DuCJfZad<s8NJAG#_XFOLJ;@@kD*hl7wji!M=gci>e`SR)qHMzXW$Kd2j95 za$IuB+romu03W-bQ*-UY4IptYYno@eeaNH7t#PG?Ub?xL3U@Zit9H_>jU=dsyx^Y# zKmrgZiO>ah4y2dT{;Bq7%{uQFR6~}4{HDjDY1LPK@~=Q`!?{^O^;Pk<;^A7)hP2^( zz<EVz0W&~rqxEh}(^T4))GR4ZafG;givSoM_u%=~#cEd?2`jfENg?s0<Ad?n&z2=8 zZJb^YWSCS#b~V-w?J7L@)H3DOL%t5Q0nqgRIskbYm26JQrr+~{-u@Q+ex1V1XJN1E z^gXH}yUak0-5%kc4*0FL4JETrf79mXNX}E<wz;WmD#h!Yn60HG?IGOm^g%Gj3^eOg zqo&W=OHSoCGog<FV7D!z^1)#`dtOkGd7m>^wo{OKpM??2=;*Fza%xNhTeDda+OTTK z`^rxj-G0;%VK^O;x)BVued%ip0kB#eIP8l7a6dV$k57z9lim>{w<vmlqG%m911LhP zQVn?p_}9T;eCf&WC3zW@RJgN#TiLGMZ^t0e1-5sIqIF<)jWdz~;DKt$cAz9W%7xY9 zz$7I6etm`tlqo{<{c#7sh9b1_6J<-a_U>wDUPh(UBqZK<R0U$?$Sy_n_~!CrZUR6x z<W1$pV^3nD6Q>DS!utkWd^tyLTNR<b{x89dB+5zpAqz>%CwWs8D=zoX$Nd1c9SP%8 Sj^4un0000<MNUMnLSTY1idtX* diff --git a/share/sources/edu.pdxscholar/source.yaml b/share/sources/edu.pdxscholar/source.yaml deleted file mode 100644 index c2c7a2f02..000000000 --- a/share/sources/edu.pdxscholar/source.yaml +++ /dev/null @@ -1,30 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://pdxscholar.library.pdx.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.pdxscholar - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [actg, actg_fac, anth_fac, anthos, anthos_archives, ling_fac, bio_fac, - eng_bookpubpaper, busadmin_fac, childfamily_pub, centerforlakes_pub, clee_pubs, - publicservice_pub, realestate_pub, scienceeducation_fac, chem_fac, chla_fac, - cengin_fac, cengin_gradprojects, cengin_honorstheses, comm_fac, commhealth_fac, - compsci_fac, coun_fac, ccj_fac, ccj_capstone, ci_fac, etds, open_access_etds, - econ_fac, edu_fac, elp_fac, ece_fac, etm_fac, esm_fac, geog_fac, geog_masterpapers, - geology_fac, hist_fac, naturalresources_pub, iss_pub, metropolitianstudies, - aging_pub, is_fac, lse_comp, ulib_fac, lltr, mem_gradprojects, usp_murp, mth_fac, - mengin_fac, metroscape, pdxopen, mcnair, phl_fac, phy_fac, prc, psy_fac, pubadmin_fac, - realestate_workshop, usp_planning, rri_facpubs, socwork_fac, soc_fac, sped_fac, - sphr_fac, sysc_fac, honorstheses, usp_fac, wgss_fac, wll_fac, younghistorians] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://pdxscholar.library.pdx.edu -long_title: PDXScholar Portland State University -name: edu.pdxscholar -user: providers.edu.pdxscholar diff --git a/share/sources/edu.purdue.epubs/icon.ico b/share/sources/edu.purdue.epubs/icon.ico deleted file mode 100644 index fe50cab47941f3b2eb092e8364f5367d87a30e6a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 894 zcmZQzU<5(|0R|u`!H~hsz#zuJz@P!dKp_SNAO?wpfCErYNF0K}0zl&bAOlB^9`o@F zz(g@|2{UKTynN*fntMNd_#i2%SUz<%8UmUCGzv{IgqvMf4@8Z#)&Nn?l#M_HWB^4V zT7bIHGy*LJN*Nm)cXV|ltB05a(Fl}9a~qIr=im%ck4XY;1Zn{~9^xd36+n-`odh%% zs2zxa42X6h0~i8uA3@v$R}W;s5kx&OK!DDMs|PwBXaZU&A#)>>vVdyf>Va+pYR7|+ P9Rt@j<QTAgKrRgcgt%Di diff --git a/share/sources/edu.purdue.epubs/source.yaml b/share/sources/edu.purdue.epubs/source.yaml deleted file mode 100644 index 420e2f91b..000000000 --- a/share/sources/edu.purdue.epubs/source.yaml +++ /dev/null @@ -1,66 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://docs.lib.purdue.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.purdue.epubs - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [ddad2011, gully, gully_images, advancegsr, a3ir-core, alsnscort, - atg, agroenviron, airties, aps, aqrr, techdepts, alsreports, artlas, atspmw, - atpubs, atgrads, atugres, bio, biopre, biopub, bioinformaticspubs, nanoetd, - nanofacility, nano, nanoposter, botrsrchctr, brc, comm, commpubs, cfshonors, - cfstheses, clcweblibrary, clcweb, coolingpubs, cffamilies, cffpub, cie, cwc, - charleston, claritas, cgg, ag, education, engr, hhs, hhshonors, hhstheses, hhsrci, - cla, pharm, sci, tech, techdirproj, techdissertations, techmasters, techetds, - vet, vetbooks, cgtpubs, cooling, cc, ccpubs, cctech, dlcpre, dlcpub, datacurationprofiles, - dcp, dcpsymposium, dcptoolkit, dilcs, dilsymposium, dawnordoom, agrypubs, cpbpubs, - agcomm, agecon, agedocs, agry, ansc, anscpubs, anscgrads, anth, anthpubs, ad, - adpubs, avtech, atdp, attheses, bms, bmspubs, biochem, biochempubs, biosci, - bioscipubs, btny, btnypubs, bcm, bcmdp, bcmtheses, bcm_pubs, chem, chempubs, - cpb, cgt, cgttheses, cgtdp, cit, comp_sci, cspubs, cstech, citdp, cittheses, - csr, csrpubs, edci, edcipubs, eas, easpubs, econ, econpubs, edst, edstpubs, - ece, ecepubs, ecetheses, ecetr, ecet, engl, englpubs, entm, entmpubs, foodsci, - foodscipubs, fnr, fnrpubs, hk, hkpubs, hist, histpubs, histwp, hlapubs, hla, - htm, htmpubs, hdfspubs, hdfs, ipph, ipphpubs, mgmt, mgmtpubs, math, mathpubs, - metdp, mettheses, mcmp, mcmppubs, mcmpgrads, fn, fnpubs, phpr, phprpubs, phil, - philpubs, physics, physastrpubs, physics_articles, polisci, pspubs, psych, psychpubs, - soc, socpubs, slhs, slhspubs, stat, statpubs, statgrads, tli, tlidp, tlitheses, - theatre, theatrepubs, vcs, vcspubs, ydae, ydaepubs, dtrs, dlc, dp, cem, cempubs, - dance, dancepubs, eee, eeepubs, music, musicpubs, drinet, ectfs, ectfs_archive, - eandc, ect, soeslsummit, ebull, cit_articles, fosr, gisday, geopubs, ggrs, gpridocs, - gtaptp, gtapwp, giftedchildren, gbl, gpri, gpripb, agext, anrhist, hivelab, - hivepubs, hivepres, hivetechs, hees, honors, hon_stupubs, impactcms, impactpres, - imp, impactpubs, impactreps, impsymposium, waterpubs, watertech, agroenviron_images, - inltap, inltapdirectory, inltappubs, inltaptechs, imr, iwrrc, itap, impact, - idc, idcpres, idcpubs, ijpbl, alsinternal, iatul2010, icec, thermal, icdcs, - ihpbc, i3r2, iracc, jtrpaffdocs, jtrpdata, jtrpdocs, jtrppres, jtrpsuppm, jtrp, - jtrprogram, jafe, jate, jca, jhpee, jpeer, jsaaea, jto, kran, lars_symp, larstech, - lita2009, lars, lib_fscm, lib_fspres, lib_fsdocs, lib_fssup, librariespublishing, - libreports, lib_research, lsg, civl, civeng, civlgradreports, cetheses, modvis, - ngica_artcl, ngica_note, ovpr, provost, provost_pubs, ocs, ocspub, open_access_dissertations, - open_access_theses, nanodocs, p12eders, pibergiim, pibergpres, pibergpubs, prism, - vpa, paij, greenhouse, policiesforprogress, nasap, aseptic, iatul, herrick, - alspub, ciberwp, fruitveg, fvtrials, purduegisday, piberg, pjsl, libraries, - plas, pmag, pmcg, pmrg, p12nsummit, roadschool, thepress, purduepress_ebooks, - pupoaj, pupsubj, presssupp, writinglab, writinglabsdrp, writinglabgradpres, - writinglabgradpubs, writinglabpres, writinglabpubs, writinglabreps, writinglabetd, - rche_pre, rche_pro, rche_rp, rche_rep, rche_wp, revisioning, rche, nasarp, sbrite, - sbritereports, sps, spsoaj, sps_ebooks, sotl, aae, aaepubs, abe, abepubs, che, - chepubs, ene, enepubs, enegs, enewp, et, etdp, ettheses, hsci, hscipubs, hscigradpubs, - ie, iepubs, lc, lcpubs, lcwp, mse, msepubs, me, mepubs, ne, nepubs, nursing, - nursingpubs, shofar, ses2014, sppp, demolition, agep, deansbible, passarowitz, - reframit, swadin, nasatr, tlipubs, puhistorian, icpns, atsmw, jps, jpur, jpur_proposals, - rueffgalleries, surf, etd, timber, transform, archives, gendes, ovprcores, demingconf, - bme, bmepubs, bmegrads, swresources] - emitted_type: CreativeWork - property_list: [publisher, format, source, date, identifier, type] - type_map: {} -home_page: http://docs.lib.purdue.edu/ -long_title: Purdue e-Pubs -name: edu.purdue.epubs -user: providers.edu.purdue.epubs diff --git a/share/sources/edu.purdue/icon.ico b/share/sources/edu.purdue/icon.ico deleted file mode 100644 index ab2108524553262f2258bf27a153798901ab401f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 453 zcmV;$0XqJPP)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0004eNkl<ZIE{^yF;Bu!5XXPD^@Rim1{T*hFm#ZQ;NWTkiFR^hHgV}-*jN}y ziD_cO#)gUCz{SC@00~2fj&24_9OOas94hSts(8r_@7~?-{qK6U*s;N2Fb2MM<EAO4 zXf~TA*LAPAF5)=Wz`uPqn~g0K(Cv2h@o^Qn{YwA<w*A1z$J@v;gXMCS&(FifDc}Yn z1{|3|-@m0&IonGXBKKX(08td_6!MA?ZKF7((;@R5a*tB)IbgkB69mD}X;dgX2!<%N zX(7S$JgkB|-RqGhZd!mmCPb=cI-Ne+W@Qvbx>h?ONnW;yNs{`;Z2OM$^9!!7E>ANB zVHoPgVu99Wz~6g4KezGy5pf(NrL-JK7f4D8fGJ;yHl5DyJVp$ML!^`zkj}tdAhY+4 zg4L>r=e2MghkCufH8%(TULd7HENg-5x<v!ezJd+{_Fe~At)SJK(P$I{m`o;59R6u^ vVKLw=m&@mVzyD?1_PV$Q#e6<5{Ra33GNYiA8|NTJ00000NkvXXu0mjf5<|z< diff --git a/share/sources/edu.purdue/source.yaml b/share/sources/edu.purdue/source.yaml deleted file mode 100644 index b5c0bd315..000000000 --- a/share/sources/edu.purdue/source.yaml +++ /dev/null @@ -1,19 +0,0 @@ -configs: -- base_url: http://purr.purdue.edu/oaipmh - disabled: false - earliest_date: 2012-02-22T11:51:19Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.purdue - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://purr.purdue.edu -long_title: PURR - Purdue University Research Repository -name: edu.purdue -user: providers.edu.purdue diff --git a/share/sources/edu.richmond/icon.ico b/share/sources/edu.richmond/icon.ico deleted file mode 100644 index 489e75a78728f5391f9ac25b1bfc6e9785be2390..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1150 zcmZQzU<5(|0R|wcz>vYhz#zuJz@P!dKp~(AL>x#lFaYJy!T<mN|BiJ0?#|)AS662K zJ<_K8_duQ8-?KBkapF%;FQJ4NNZ+1P{=XL&MEymVBPLuydQbM-fX#e*>-^toc@7wO zMO(t(o%8C!_|Ccgf4_Zx4|fC1&DiXI`~3c2ka`#fnK8dA;BT=l!(Wh@Aaj0y`vCSk z%nx^Wwf_Z$2Mm9I_YiIu%s(J^)_4m1Z4Fg`%Yn=R=>h2nVNkeW3rCncdy@>n?tqy| iEDf?7*$>G2aD^W%Okr*Y`5hD{$a+UA8@)Url@9<YjG9US diff --git a/share/sources/edu.richmond/source.yaml b/share/sources/edu.richmond/source.yaml deleted file mode 100644 index 87004ca36..000000000 --- a/share/sources/edu.richmond/source.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://scholarship.richmond.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.richmond - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://scholarship.richmond.edu -long_title: University of Richmond -name: edu.richmond -user: providers.edu.richmond diff --git a/share/sources/edu.scholarsarchiveosu/icon.ico b/share/sources/edu.scholarsarchiveosu/icon.ico deleted file mode 100644 index 0844c97c9152561b5bd3de6de52d943d4db4895f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 453 zcmV;$0XqJPP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00DhTL_t(2k(H9YN&`U@g+G@xx@Kl)3_{SL#zt)<BwIuRieRne1q8`U1QaVP z@eg0X2MB7T2p0ZCvCu*-1dFV3QPN02pTJ_)%^F4UR(H4`bI&<59r^rpS9dSa)6*v$ z320?Ij?hv7aVY>mDaSO{zi6dw%h2Ymm2H~l`v+eYrGc2a`>FO&y{x=78i{0bY$Ol7 z_;<i;u{1R~5z;`+D9PsBY|h<SEf4k&FD%TP+1>ra#%j5GT3A|M36@A<X=NMTjw6Va zQqI)m#C&P-=K5l1r($Tp0z#KRbr<JmfW69gII==o|CWhTcFrvTp7#nUEv>c$P0CT1 zt+iSPlH%X>L}?{Ov)KUB=^%P5q$AJNqP=><tJQLDAw84=WW3XA)7@Gk1+ccZl}IMn zH#ZFp#0>4%{hQlI;QVBFZ{JN1^n1@Q)zgF2K<4E5OlfIopyTl93h?WG^gC!N;!<QY vqsC}R2OEI#%xF|em`NKQq!e}eHw}COb!=POgYK3u00000NkvXXu0mjfdeX@p diff --git a/share/sources/edu.scholarsarchiveosu/source.yaml b/share/sources/edu.scholarsarchiveosu/source.yaml deleted file mode 100644 index dc0d51832..000000000 --- a/share/sources/edu.scholarsarchiveosu/source.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# DSpace -configs: -- base_url: https://ir.library.oregonstate.edu/catalog/oai - disabled: false - earliest_date: 2017-07-03T17:46:34Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.scholarsarchiveosu.hyrax - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: http://ir.library.oregonstate.edu/oai/request - disabled: true - earliest_date: null # 0011-01-01T00:00:00Z is incorrect - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: edu.scholarsarchiveosu.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: http://ir.library.oregonstate.edu/oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.scholarsarchiveosu - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://ir.library.oregonstate.edu/ -long_title: ScholarsArchive@OSU -name: edu.scholarsarchiveosu -user: providers.edu.scholarsarchiveosu diff --git a/share/sources/edu.scholarsbank/icon.ico b/share/sources/edu.scholarsbank/icon.ico deleted file mode 100644 index 1e660b740906b95894663a8785846d0c410402bc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 726 zcmV;{0xA88P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00009a7bBm000XT z000XT0n*)m`~Uy~h)G02R5%f(Q_W8kQ4pWn(r!z;P_|TC=|X`bL@<#QQWH54gCQL1 zSq>@(4yGRbW8k2Tc*C=3xZqXT^aB%>1XK{VTBTWNyDi=BLalE`YtY1tPV)B6zWI2+ zH}e})y<Ug^5eo*JOeTolT(FnQhUdzNIAR>vV)=p=E~-^0+IbgOcDLG7R;zhlO3F!> z)2iIRJEo0~^@0fz+?alB*Y%peFVsF;F6+_Wpx_m|+XR*ftW9PJiUO>xY#!{&`5f=E zExr4o#BDZn_UYn`MhNic!GUwLmCEt-;3Z!m*eO7>!wSPg9dJ}JpwrJox9AbxY-WYU zCuwJ>#{(Oi`|}K3<_toNFxYCj8lAa4O2K%Bw@d4LsFHXE2Q)D>cW#Xa&=+ISKnV<^ z7V8SQy^+h}4{!E8W(jb)TO4$Dq@6&asIh-Li09p2T|bGTff8WBCMNm?bUH29uJ!@n zOs|Ayy9F-vcwt9AgvrSP;V`d-(U+`6Rdb&8@Z^kd@9?$KY6PRBnO(K7lu!lqRap$` zga8!`M2QS{!KW_;R)gQskdb|`4k!)w5CZJ;j;!#$c9BqAXZ?MdmSKxr0XqP`b_a|{ zf}j+tu(h2dZlIYEw7DIr7p%`e{b?KO_d`x8(~{PAA@Mp(=bly$o;`nifH8PL2^hoX zJ~EQvt$&lx>{+#MK*+LR)f{KD%>7Eh=$S%>#TV8xjU-q=M9~n`rB^aMh_7Z^T-=hB zk_iFT0J)9y4E1{8;iJ{)^74jvYU-kZJjfR-z<2_wgmjR}7IG~-xAcBniDL{Vr&%~X zOp&k{){T>U*k`p=b>ggGk%cx#(#|!_ttIx-&Oe8#0rOw>?^q!lUgi3;(*OVf07*qo IM6N<$f|IpWF#rGn diff --git a/share/sources/edu.scholarsbank/source.yaml b/share/sources/edu.scholarsbank/source.yaml deleted file mode 100644 index 2efaa4552..000000000 --- a/share/sources/edu.scholarsbank/source.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# DSpace -configs: -- base_url: http://scholarsbank.uoregon.edu/oai/request - disabled: false - earliest_date: 2003-05-08T13:18:19Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: edu.scholarsbank.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: http://scholarsbank.uoregon.edu/oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.scholarsbank - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://scholarsbank.uoregon.edu -long_title: Scholars Bank University of Oregon -name: edu.scholarsbank -user: providers.edu.scholarsbank diff --git a/share/sources/edu.scholarscompass_vcu/icon.ico b/share/sources/edu.scholarscompass_vcu/icon.ico deleted file mode 100644 index 369fa88b4806aea20e4c86bae6acc9f8a1d763c5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 574 zcmV-E0>S->P)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI000JJOGiWi000000Qp0^e*gdh<Vi$9R49>6l0Qh5VHm}K=lSln>@QhMCAdaQ zP_{`mG)T~xVeSP<;L_Ss1T9SsZP8Xi=F-p~!XT<8goY@jh#-w5uS#fFzwbE>-_?4D zcX^+4o*(BuvMUG-N(jIp16YX8F$heCEfPf4c}js*(wR;uS+`J1PFtysB`FI@kN^(< zQvwkHuH`_~(}`<5lpP?#1Z1GJ0(XFfNQY~L=)j0|s%S8Iabf0T&+w$7b#Jh_x4-D= z`~B*=8r@q5u59WXSbQ|nGx9Q7o1ZwlXZQZ#@P)P6x2A&p^X*COIk5C}Y}e_#+m4Ja zjGwD!-nVDo*j3Q<?35QiHP~^eELWG`-gMC{)V1>Eag^<5@3Cg@z~;l}z4%qKRIG=a z4T!FNM^~n9rM0F1QvUk9T+><-rBhW2X-85;5>W0OyyNy_wen;8i5uO=ud0alyW308 zM!#QEG`5!+t?{AN>G53t$-y%?op$T_=+cYJwr#J@zHZ$=)p|IzHaBfMS6cHE!*dT# zwm*!!O4+XeeWMX|h-M9%>#>%mfsLfJv8{|+MAW50n3SUu@t;?SlxzxH2n)#iRtiyU ztW`n=gsmcxWJ06~Yy*;A)u?r;ZG~M)vZ@-_gg~+!D3PSPrA6)V7x^X?>+<g5_y7O^ M07*qoM6N<$f`Eb%&;S4c diff --git a/share/sources/edu.scholarscompass_vcu/source.yaml b/share/sources/edu.scholarscompass_vcu/source.yaml deleted file mode 100644 index 8b5f78b9d..000000000 --- a/share/sources/edu.scholarscompass_vcu/source.yaml +++ /dev/null @@ -1,47 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://scholarscompass.vcu.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.scholarscompass_vcu - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [lhs, anat_pubs, anesth_pubs, arte_pubs, arth_pubs, bioc_pubs, - biol_present, biol_pubs, biomarker_pubs, egrb_pubs, bios_pubs, britva, capstone, - vcoa_case, biomarker, medsim, csbc, clse_pubs, chem_pubs, humsci, cei, cer, - cenrinstitute, cenrinstitute_images, cer_resources, cmsc_pubs, conferences, - anat, anesth, arte, arth, bioc, biol, egrb, bios, clse, chem, cmsc, econ, edlp, - egre, emsa, engl, fmph, frsc, genp, grty, hadm, hcpr, hist, hgen, info, intmed, - hems, kine, math, egmn, medc, micr, musc, neurology, neurosur, obgyn, occt, - ophth, oralhealth, orsg, orthop, otolar, path, patc, pediatrics, peri, pceu, - phtx, phar, phil, pmr, phty, phys, phis, pros, psych, psyc, radonc, radiology, - sbhd, ssor, surgery, tedu, vcoa_editorial, community, community_resources, econ_pubs, - edlp_pubs, egre_pubs, emsa_pubs, engl_pubs, fmph_present, fmph_pubs, frsc_pubs, - fwap_pubs, genp_pubs, grty_pubs, rcdayposters, gradposters, gradschool, hadm_pubs, - hcpr_pubs, hist_data, hist_pubs, medsim_pubs, hgen_pubs, info_pubs, inova, inova_pubs, - ica, ica_pubs, intmed_pubs, jstae, hems_pubs, kine_pubs, wilder, wilder_pubs, - lifesci, mcvq, mktg_pubs, massey, massey_pubs, math_pubs, egmn_pubs, medc_pubs, - micr_pubs, neurology_pubs, neurosur_pubs, obgyn_pubs, occt_pubs, research, pharmacy_dean, - arch001, ophth_pubs, oralhealth_pubs, orsg_pubs, orthop_pubs, otolar_pubs, partnershipinstitute, - partnershipinstitute_images, path_pubs, patc_pubs, pediatrics_pubs, peri_pubs, - pceu_pubs, phtx_pubs, phar_pubs, philipsinst, philipsinst_pubs, phil_pubs, pmr_pubs, - phty_pubs, phys_pubs, phis_pubs, pros_pubs, psych_pubs, psyc_pubs, pharmacy_dean_pubs, - radonc_pubs, radiology_pubs, rice, rice_symp, masc, masc_present, rpec_race, - sahp, business, dentistry, education, engineering, medicine, nursing, nursing_pubs, - pharmacy, socialwork, wrld, arts, service_institute, service_institute_images, - sixty, socialwork_pubs, socialwork_student, sbhd_pubs, sociology, sociology_pubs, - ssor_data, ssor_pubs, csbc_pubs, surgery_present, surgery_pubs, tedu_pubs, etd, - urop, uresposters, brandcenter, vcubulletins, libraries, libraries_data, libraries_present, - libraries_pubs, vcuhealth, vcuhealth_pubs, archives, davinci, davinci_student, - vcoa, vcoa_pubs, wrld_pubs] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://scholarscompass.vcu.edu -long_title: VCU Scholars Compass -name: edu.scholarscompass_vcu -user: providers.edu.scholarscompass_vcu diff --git a/share/sources/edu.scholarworks_montana/icon.ico b/share/sources/edu.scholarworks_montana/icon.ico deleted file mode 100644 index 4869c96654894de1fd54836b5c59886fcfaf5d1e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 7406 zcmeHL30RHk9{<T{J4z^}kjgo#(HN97IM&0(jD5|XHPVz4Q;O`%7)vtQYiub*DM})1 zO_qd+Aw*J2+B-|9@4Nr+6cr}!bDukN?{nw>Jiqhbe((Ez=l#y}zQ6AP4bTuxnt;3( z=BfiC0A^-Yd8if;M753`tMkUd0D={9Xb6a=8b|JvHlC^z2$Km@3DXGE2{XC40Wga& zT@COgznIH4fVl)mZD0vq*NMC%A8;W!=>qfU+D>`|y0~*wz?br6<X1NX<{JVFS^~>W zfpu+YPIC=-wbQ|7kqOrIwnES_J8T`+9sa|`*fv6pts{Ei(AXZ>Zr=}w-5e10;|zp( z&Y{7Vfu9l(yLmazZdrk(kUc<fF>sg=QVN6;j+X$(q(GP)I3WYVD}V?NN|Y7kLm@2( z!&E}avS0`-;cxqU1^(YwK=OgvcInzf+)c!pxcj$uoh1@WOUtjhv;zfwh}~YVQCmr0 ziNu_5(q1*Vody*tRI{v_%aSMQrDG<MpgI?5_xwsry_2LtbuKjO*u=)B%{Q9WxwT<S zQ<11etHxD%Z;R%-RuYj_v!)iR`7G7+TZ<$jk+DI;>H!kUB%-Q|Reyez*KN~?I`o?4 z-+kWPK5ItkL*Vx3KQh|#(2ihB=tAg8=tby5kP!M41`q}j1`~!7h7(2-><Qc+J(l+0 z5o)yGa&;c<ujEJ5Uc89>_gZSO*95$XGnQ^+33*?FJ?+ar#2wcN@Fwnf+M_4Xo;<R# z1}4$IJcTf!sTwBh(;3j5&H%!=W;$>%0wx(~V}>#3bupdxZ3iP=%xn$JFwv&C31-nA z?r282aZ}7TQ-`BPL(DWWz#Ji+5thJg)8=pz@G-}{1)SR%!P#0Ln=PBb)us)m*;r%# z*Fr4pYy!7$ED_Y%0B)VEF}kZQyt;RUS5F%R^|XL@56Z<N`1G=cPhSyM_3wnG{kmb< zfG#*b(iVH{?J#)aXoTAL#K!T1vB7>Af~Wig0kgkHfa4JKa(2QFr=i$BcLV|*Ct%kP zqY%2-1)WxG#NlPmxU}8{)|-P7ws9p+{qi%g<2qUeT?YJffq*>9-vGOEapmAn#2?s8 zhwU2}hNjY4lL73bUQi*hzlid;;D<fK)vzEsgXr8jRt6lY!1ah=Bu5^goPlm+0dP`^ z8>d3h<YX~4Pt({)1)WKB_M9dk#p2uOEEvVmd@)Mk42$w}fv}4%f;{e5AdbbI^AWI# zD}mKHI^!<VxC9nMFBHS*D#h1WSYBaadxb%1QY88$%8-+I4h~n!&?`}i!AVj$CYHhe zniS4SC2*zO;f54$Nir-T@02VBn;HYJn{s&HRA6;VDcYyA@VYC-+S_t0yHkl3_sh{C zgGK)*EVkU2W79(^iXWw5%_BLak8i<0T?W5&IRYOmke88$;+MA(m?6Xd3^|THQ{dq9 z3Pe1sK=G?I9Lbd9IN?~99QoN#vGFyFlUWKJdsTtL+$V_2Rv;p$0_R>SQJ9m3(-fEG zrz7sQ0`U}A<YnMeo&u-ySSa$cQIh`}@o!ip=Q1cM$VOrT@r!c67H6TnIG6AiY<VVD zac6$%YdkDb;(19WWM#!jFIVCO)hpzMsFW9@LRN+glwTsemb17;zL0ztmlIf)){2#* zfMP`@i(*xcu18)<sAL&bsA}|F$e91INtN|pKzZlq9T%Sze2P<<bLaTBWG3s-0+*#b z_v}9$9)01rgw(Xe-!7hwJaTZ~?j^4Cw_H~Ze6V5O_Fdbz{^YYrWj^aR1?<|s@Ru{v zchmam^|znpym<XeFS4~8*LcjC`hAlZ@45`LOs%<jYa=a3rb1StV8&^iv}&Wzdsx%Y zo@>#ev+>OP3={i|VPd8?6<OK3*R&}bX=wHxSNLWOeUw^$r8AAoYHWmIORn$RQ0qLy zWV99PIMnpGXs|)ce~_~>)fvJt=@wQxlRwp;ttl31tYV&ivOkb{-biR=W7vt2xBb-k z;micS>8Jq~noi8u!jI=SotV_fO7~@uwy^EZe(gT$H|x@?tF_M9%zTM~Ro`x+MjzbY zux33vcj%}Wzw5+iU6GAQ+-OSO`yXv67K{1gi+z?C4l?K=7VFHY-GAd|Wg`~bG`(8B z+B0fz<IZBSu#bN&f;SpjSeWuAy?qq7X!+}*>ZTTES`BX35d2&8?A^P&-Q8<p%!(gR z9c@q9w}Rhm`b?gjVdQ1Z$>7H~eh6erxh%I<`(1b`B%;Xc^1JL~UwJI_%B_#fHI<ZK zPuC=$iVmFnRJEpd?^R9wr$nkv;4l2(bV^6UG(vqkr3dwU6Z#Q2J;Ldb5d`l2a8$#p z_rfuxC&rM*m`HkK8u@XgF(&C$X$uF^7H*_977`|swpc`ZV;cDd^iDX1G{y|VRC*6| zHvn8oYdEx^8VAxBE~GW)k)K5xV;%`67Xqg*=Cma~!KqyXI9u`HWZe++$vazX!lk_y zJUj78M>NI~TOIVV`w9z1&9Ss=b4=+*`k}WemP!P$C*81OfE8AL*B*XDEU|u67YrLa z6hDvc3OoDB@SD;X2OQ|#bH+dnoH-rg&V#Vuc{ujEjYF{87z8dD5AmW^*zf6pSWkN# zTs#8@eP$uj#{qUrHp6zQKZ1SdB4n8pLRUJY{e}=kZ1hB;-$JzWi@+&=Us!I7M0CJv z#Ozv&b9;Y6+@4KH3S5hOL8}qJZwumo-3Idm*U|F8EhHWA!{tNUaN$q@S|7Tr(jAvW zc97N}&2cavSC8yQ;<4Rme)KsK!UE}?m$XF)z5gD4OIo7{NfG<$y_afXrAUhif?hc3 zneb9*L==J-Q4H;qoc1V1v#2*{97ViH8O4P#I8{Wtg!D_a5|3jKLp??Ttr$6O#U4kS zGezLX%5eMa3ABx+YsIo?94Ch`u9!3lX_oUWG~!uwil_Jz#iUQ}U5o}un{>NS3N7*t z6IrBPIfDkIRk~e~;z7b$3`i)!kc3hUBpvej+C_{?D#O@oW%z;o)SG3Po?HgUlydY* zW#MwW98P!2;hii)X6kh;OOe4lh0gJN$yjwq2CsWkc-)s^{aqPerlnxY0~Sltq}Y%u z!<a`bmOqdp|KV-0kCL$FcPX}}$?@uODgqwLk@NUIHb0W#=96bwN8IcuX~=#0I|81{ z5louK`6Y|J&*a!mTIR?T(l^i3ap)<7gD>Q8C+)&!r6J}8>6feu<Y#3flJrYKPA1N0 zliqogjmvK;kU)qdjdCMTiCcuEd?nbzOq3Sp;bvha?i4AJMo2BLL|O@h4ALi0N*O#W zW57xapeQdvW;u%-sS@&PjdGQA##<SKECp#41^uU?kmDiMWRw>uS!8fJgps0@;#{sy zdWEI9h>Ho_e<1Jv|M*ij!~fxnyx0_YE%OWS|9L`C29p&y*U@9s{?p0nujx4aq)D?L zCP(bw;O6L+#PxHJcsR|S@9ygEy?n#=phJfv&YX_BmUQv-nKPk>4+L&qx6I4k)z!(- zX>;oP+3x(h#y?=oN*@n*cQ?*lRiB*qaQF26+27xHT}0M<HW$q5ICS>Lox6AL^jp1{ zQ)`?pUgNiI=g!?bmW=IrsScTv$r{Z}tVMl?*-!s*y+_sTo-4e6m^Nyl*tTs`t&|V> z6M5zqZA=WC8EW)nO0%+_rN4MppzNSyVqjp{(yX=mjSu-#d3pk&P$+23<MdgzeWTu- zE1L89AMu%|eYgSoUGJ&5GmmkuD5||N(e=jXRZmdco#2}b1)6JBr6QhPRcjDmC~PID zO?w|l>2dtMs=Pms_me6Zt|<^2TfOJh4o>Kr68{&jbe`Wz&qmezp5t5AroE4A@xSE@ z1iaB)=Zp9Yc;-T(vHsI9<qP#`r39M2zK}nX$1S?hRC6Ec<<H&!E+$Q{l|W!b(ChRC z{C!Lh`m(wsy6e+dp311!v!3<g-@Bq|Dlj%>Vl~YLMyAYo?fTc#XhIj7Sba>svYW0z zz#lX~pH}LR=NGDO+^8M1<<?nkdVKUX>gRI{BT%36X1*`eo3C1?KjcU0(o^E-`d=}p zT<8Cm(#TYm|IzQ~{5w8x2D8V_%eO*8-<30CAw9T{ZMd$gEztkEvf`wxw|mT!CaqL_ zbG@qHYMmi2M%<5pOrX9f|K=+<FRz8g-_gV6thK)2ZXGfu6B?QdRHoONxqOrfcJp=* z$!=j>RWssU{g6Kn>YALXtF0<K9(G&m;pOGY3~8XMY4X%Ae8@jPY0f;(W;rvzAB_xC zM7Vmm%gP<9igPCAe8{hrryq?Dzs`8MEv;3nFO-LeM~9WfIfm9ps@=O!7f5{NQCPcH zZyX&S8TR@?y_7o5M@J)%zN}OEOmZ$P?CobLb=uDl#@ER|n?yhUETs<NU%Hz*Bmc`2 Mdd>OU{=yabC$M1iX#fBK diff --git a/share/sources/edu.scholarworks_montana/source.yaml b/share/sources/edu.scholarworks_montana/source.yaml deleted file mode 100644 index dd77ef19e..000000000 --- a/share/sources/edu.scholarworks_montana/source.yaml +++ /dev/null @@ -1,64 +0,0 @@ -# DSpace -configs: -- base_url: http://scholarworks.montana.edu/oai/request - disabled: false - earliest_date: 2012-10-24T17:28:01Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: edu.scholarworks_montana.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: [col_1_3451, com_1_3, com_1_4, com_1_7, com_1_8, com_1_9, com_1_10, - com_1_11, com_1_13, com_1_14, com_1_15, com_1_16, com_1_17, com_1_18, com_1_19, - com_1_24, com_1_25, com_1_26, com_1_27, com_1_29, com_1_30, com_1_31, com_1_32, - com_1_33, com_1_35, com_1_37, com_1_38, com_1_39, com_1_40, com_1_42, com_1_43, - com_1_44, com_1_45, com_1_46, com_1_48, com_1_49, com_1_50, com_1_51, com_1_52, - com_1_53, com_1_54, com_1_55, com_1_56, com_1_57, com_1_60, com_1_65, com_1_66, - com_1_68, com_1_77, com_1_86, com_1_91, com_1_93, com_1_104, com_1_154, com_1_318, - com_1_388, com_1_404, com_1_732, com_1_2942, com_1_2949, com_1_2950, com_1_2951, - com_1_2952, com_1_2953, com_1_2954, com_1_9334, col_1_320, col_1_356, col_1_359, - col_1_405, col_1_733, col_1_2919, col_1_2922, col_1_2958, col_1_2961, col_1_2974, - col_1_2983, col_1_2994, col_1_3034, col_1_3036, col_1_3048, col_1_3420, col_1_3421, - col_1_3451, col_1_3455, col_1_3458, col_1_3460, col_1_3466, col_1_3494, col_1_3495, - col_1_8680, col_1_8684, col_1_8714, col_1_8716, col_1_8718, col_1_8719, col_1_8721, - col_1_8747, col_1_8821, col_1_8827, col_1_8870, col_1_8878, col_1_8931, col_1_9109, - col_1_9167, col_1_9191, col_1_9236, col_1_9306, col_1_9317, col_1_9335, col_1_9476, - col_1_9749] - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: http://scholarworks.montana.edu/oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.scholarworks_montana - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [col_1_3451, com_1_3, com_1_4, com_1_7, com_1_8, com_1_9, com_1_10, - com_1_11, com_1_13, com_1_14, com_1_15, com_1_16, com_1_17, com_1_18, com_1_19, - com_1_24, com_1_25, com_1_26, com_1_27, com_1_29, com_1_30, com_1_31, com_1_32, - com_1_33, com_1_35, com_1_37, com_1_38, com_1_39, com_1_40, com_1_42, com_1_43, - com_1_44, com_1_45, com_1_46, com_1_48, com_1_49, com_1_50, com_1_51, com_1_52, - com_1_53, com_1_54, com_1_55, com_1_56, com_1_57, com_1_60, com_1_65, com_1_66, - com_1_68, com_1_77, com_1_86, com_1_91, com_1_93, com_1_104, com_1_154, com_1_318, - com_1_388, com_1_404, com_1_732, com_1_2942, com_1_2949, com_1_2950, com_1_2951, - com_1_2952, com_1_2953, com_1_2954, com_1_9334, col_1_320, col_1_356, col_1_359, - col_1_405, col_1_733, col_1_2919, col_1_2922, col_1_2958, col_1_2961, col_1_2974, - col_1_2983, col_1_2994, col_1_3034, col_1_3036, col_1_3048, col_1_3420, col_1_3421, - col_1_3451, col_1_3455, col_1_3458, col_1_3460, col_1_3466, col_1_3494, col_1_3495, - col_1_8680, col_1_8684, col_1_8714, col_1_8716, col_1_8718, col_1_8719, col_1_8721, - col_1_8747, col_1_8821, col_1_8827, col_1_8870, col_1_8878, col_1_8931, col_1_9109, - col_1_9167, col_1_9191, col_1_9236, col_1_9306, col_1_9317, col_1_9335, col_1_9476, - col_1_9749] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://scholarworks.montana.edu -long_title: Montana State University -name: edu.scholarworks_montana -user: providers.edu.scholarworks_montana diff --git a/share/sources/edu.scholarworks_umass/icon.ico b/share/sources/edu.scholarworks_umass/icon.ico deleted file mode 100644 index abc0b2a7f340050a65f28f63a429ab2dc298dbb2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 630 zcmV-+0*U>JP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI000JJOGiWi000000Qp0^e*gdi97#k$R49>+l08h5VHAa*_vLGwwr@$H{rE8* z%tylDAhD65F(GWyFgPet;_wZaXmBtgU2rH6a3&-SMH7BZG+}TsF(xdA4vHg#h=U}k z$d9!(&=!IIybhJdqzs<%o^$TG$vxtMX@*iM{tsKGX%w{1g8&UaD4(VK4H`5Qw3dFQ zdKX^~d|Dse*!UI-)vCWkG?^S&Tf4oo(i8~Dd_F&S^P1Oi1TE{v&`_zya3LDK+|{K_ zb1D+aWHJ&s2cI`L1zfp%FYEPIr5d*3*5u@2=GVdAo(TeEh4A=%?-B_-?no?F4XK0H z{{Huii=|vv2vHeIpbWuaa4H@z<nz7pctNSkmP6<tfAo4}<hWR@#{NxkxIGtBsrgt8 zDLbd9E4<sbKRkcd2*qO2FbrKaIKg?a4}6@NIojE|YTH@M+J?c2i8mv0Ae~Nom2y@M zDG;TSfL5&^4-W&^XJ^}OdwG5#pf!~*X)OUTfV|(o?{Wzc@MU3PZ+tu)iG({YJ#01` zPfxE`YF#NmNCAz3fUIOeGrSlZ!|6;unW;ZH(ICL<@!)bf4FizbhK4(%qs?t?G8|4j z91=F>=73j;gaJ9Fe6}6x=)mpn?CYzOQatVH@mrSr`1nMrAIap;)zuO>y1NU@%M!fp z?N&5u<Z?jC=X<-fB<{DgoZsF3w+s5Ti=w6`Uz?f?N1?D~nzfxf6LsbCAI93dGpvm$ QuK)l507*qoM6N<$f-}q*OaK4? diff --git a/share/sources/edu.scholarworks_umass/source.yaml b/share/sources/edu.scholarworks_umass/source.yaml deleted file mode 100644 index a0d39a4fa..000000000 --- a/share/sources/edu.scholarworks_umass/source.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://scholarworks.umass.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.scholarworks_umass - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://scholarworks.umass.edu -long_title: ScholarWorks@UMass Amherst -name: edu.scholarworks_umass -user: providers.edu.scholarworks_umass diff --git a/share/sources/edu.smithsonian/icon.ico b/share/sources/edu.smithsonian/icon.ico deleted file mode 100644 index 0844c97c9152561b5bd3de6de52d943d4db4895f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 453 zcmV;$0XqJPP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00DhTL_t(2k(H9YN&`U@g+G@xx@Kl)3_{SL#zt)<BwIuRieRne1q8`U1QaVP z@eg0X2MB7T2p0ZCvCu*-1dFV3QPN02pTJ_)%^F4UR(H4`bI&<59r^rpS9dSa)6*v$ z320?Ij?hv7aVY>mDaSO{zi6dw%h2Ymm2H~l`v+eYrGc2a`>FO&y{x=78i{0bY$Ol7 z_;<i;u{1R~5z;`+D9PsBY|h<SEf4k&FD%TP+1>ra#%j5GT3A|M36@A<X=NMTjw6Va zQqI)m#C&P-=K5l1r($Tp0z#KRbr<JmfW69gII==o|CWhTcFrvTp7#nUEv>c$P0CT1 zt+iSPlH%X>L}?{Ov)KUB=^%P5q$AJNqP=><tJQLDAw84=WW3XA)7@Gk1+ccZl}IMn zH#ZFp#0>4%{hQlI;QVBFZ{JN1^n1@Q)zgF2K<4E5OlfIopyTl93h?WG^gC!N;!<QY vqsC}R2OEI#%xF|em`NKQq!e}eHw}COb!=POgYK3u00000NkvXXu0mjfdeX@p diff --git a/share/sources/edu.smithsonian/source.yaml b/share/sources/edu.smithsonian/source.yaml deleted file mode 100644 index 8ef5bb1ae..000000000 --- a/share/sources/edu.smithsonian/source.yaml +++ /dev/null @@ -1,55 +0,0 @@ -# DSpace -configs: -- base_url: http://repository.si.edu/oai/request - disabled: false - earliest_date: 2006-01-30T20:00:38Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: edu.smithsonian.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: null - blocked_sets: - - col_10088_539 # STRI News - - com_10088_24078 # CONTENT STAGING - - com_10088_9716 # Digitization Program Office - - com_10088_9645 # Office of Human Resources - - col_10088_17186 # Office of Policy & Analysis Research Reports - - com_10088_2801 # Office of Public Affairs - - com_10088_17318 # Smithsonian Administrative History - - col_10088_26087 # STAGING - BAE BULLETIN - - col_10088_30358 # STAGING – Bulletin of the USNM - - col_10088_26933 # STAGING - Contributions from the United States National Herbarium - - col_10088_30357 # STAGING – Proceedings of the USNM - - col_10088_23045 # STAGING - SMITHSONIAN MISCELLANEOUS COLLECTIONS - - col_10088_29562 # STAGING-USNM Staff Publications - emitted_type: CreativeWork - property_list: [] - type_map: {} - regulator_steps: - NODE_STEPS: - - - block_extra_values - - node_types: - - WorkIdentifer - blocked_values: - identifier_type: srbnumber -- base_url: http://repository.si.edu/oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.smithsonian - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://repository.si.edu -long_title: Smithsonian Digital Repository -name: edu.smithsonian -user: providers.edu.smithsonian diff --git a/share/sources/edu.stcloud/icon.ico b/share/sources/edu.stcloud/icon.ico deleted file mode 100644 index 2d3ab2ae40467832f5d129ee131cf8ec39b53ee8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 780 zcmV+n1M~ceP)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0008RNkl<ZIE}@VU1(DW97oT+H_6TF8vCJ<y4Kd2>0mAD*0Jh7K-#Gt6{p35 zeNYg}V5YKRWT^;(>_Kpo!Pq(-%z>qGGTl&7R|Lx*A``|kwNl!eS*hu2o2<>v&CN}6 z``^>Ru}24<xAXFQIzM>YDE~5z&HlBkf?b_^-Yl4ww@}DqW-VO0CqJB?iAMX+JrGaP zuqQM9Kny7UGw|$^wJ%0{yL%d1iWMlaahzX<P;wSUnMbSfGasnN3{*ne=DIJ(`$v!b znwfgkr~rC>`#TfYcO6)ye{hnqKZdX|m{&km6c(!GB#Q~$Xy|GwDzNh1PP&#&T792= zS?2PTc*AGf_AGLCALmy3E_KUarnt3<s^;|=R)*NE-+AMeCdN-+KpBi-&P|XT7^L;% z?a%bVO4hV)NX)(-l&G>orrSP}_rm+duq+uhQ<5;!(jM%R`^KDzh}@2k%FkPa@>l4D ztlQKw3skD>ssa_n6D-!3GT0WNytamKd%t2SI?*dvnH?Hqpf5slCdr}jLC&_+uqAhc z8$Vs^0Fr7B$3lgiV-m1jM6UeE?FVs=9sh*wJ9e<Gql3x06nT$}d{r5ZwwLqIw^IA+ zn(qJ||7-OZQ~N%bo7BljX|u#apNeB-rC{4qu=CQ@6OvPBB9e<w%FS&bO1ylt+;i1Q zqW1?Wt*-c5VF63R-8~Nv{ibMl2O_xD5^UYgg6b8_D+*-`eaQHqOr%Ef7QmxwEMNT= z6T!D>JiKpM0njRZ8^ilUmqY0)O+9^;)XWs=!ZeCFD5&V}GRl36(d%Cz+x!xTQ+JBr zoILSGA)7n!c!`0UjqA_uZQ8WD!E&qBs!ratal2g@KAj=QU@-kKbMgB~znPdl2qgcT zGL+zTxBF_J+ga)N*E%AOs%dIAW6Y#R?q4bz<^^CBkpK5lPr~1rjyD#i;I=vd0000< KMNUMnLSTZwlY*20 diff --git a/share/sources/edu.stcloud/source.yaml b/share/sources/edu.stcloud/source.yaml deleted file mode 100644 index dd681601f..000000000 --- a/share/sources/edu.stcloud/source.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://repository.stcloudstate.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.stcloud - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [ews_facpubs, ews_wps, hist_facwp, comm_facpubs, anth_facpubs, - soc_facpubs, soc_ug_research, chem_facpubs, phys_present, lrs_facpubs, cfs_facpubs, - hurl_facpubs, ed_facpubs, cpcf_gradresearch, econ_facpubs, econ_wps, econ_seminars, - stcloud_ling] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://stcloudstate.edu/ -long_title: The repository at St Cloud State -name: edu.stcloud -user: providers.edu.stcloud diff --git a/share/sources/edu.texasstate/icon.ico b/share/sources/edu.texasstate/icon.ico deleted file mode 100644 index 5a8de609bd6479e09ab43c85f57b51f53418dd35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 660 zcmV;F0&D$=P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0006@Nkl<ZIE~elOGs2<7>1w!TpXRr9L-E~k}%YeOpJ<@%tGw~Azi2>XrmN~ z1g+9dAQZK;8$#Mv6pB`nRx3+aUTUFcVt4~n@iL}!IdjgL^S7{~pq;C}#rrPa_xm;v zpD3+<D3P!(5|HcmhXY6{L4rWy;2?fx3LBgaAJBA{+iw}6L;%N;n6`~e*M1iqM-m7b zY}>M#`m3jK=^8^{XL#E)$hU={UjrdQVp=x&**+?c<#Fd}2b!i}S~l4`Q@DNoG^(ob zWnz|*@mcb+d{mz=rm?k4YNj3KR#iFe9sP_=&C~p1g__C|X6F5L4~+4k{t_{|#*Mpe zeEd8`jHY5mZ4MV?u@bh?bXDbXYZpmgH@h-YI8&O>{b!v-A{Gs`6?o&_v^@X7r;%wA z;@t=-anKkTp2QXBL6PgMs5)E3j?_dNnqFZ<EDG~8X@A|zqJNFP!3knr`Y)mY=$eYE z2sA|z1OgH6wRCdv#C{5LGjM4tNnQ^pkK~~$3PB@+5aQn$4b#RG7sIs+#k6(w5(=A` zb`+OOXDwi`$rH<-taKu#{nw%+CAQ<BHa&S%rzuX{!0<G|P=vXqRc04fh}sUNhjx>Z zmPBfD0&jYTFd`O;5QI&WWN$pa?OVgDNGq%xpPq{wpIIb7$4BkuqrC4MW%+xUzQGAb zC+9d<wvUnnIjjW@Vsw>*doyXQuOvM+IV_IXG%wpmI1wQcfU1IJIe;V@l?VY*C(6w$ uWt11?B7|UmX_c4n`swH%@r(cenSKCl?h_F;xd>eV0000<MNUMnLSTZ(m?0?u diff --git a/share/sources/edu.texasstate/source.yaml b/share/sources/edu.texasstate/source.yaml deleted file mode 100644 index 962f07995..000000000 --- a/share/sources/edu.texasstate/source.yaml +++ /dev/null @@ -1,50 +0,0 @@ -# DSpace -configs: -- base_url: http://digital.library.txstate.edu/oai/request - disabled: false - earliest_date: 2009-04-09T05:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: edu.texasstate.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: [com_10877_2, com_10877_5, com_10877_8, com_10877_10, com_10877_13, - com_10877_16, com_10877_19, com_10877_21, com_10877_24, com_10877_27, com_10877_29, - com_10877_33, com_10877_36, com_10877_39, com_10877_42, com_10877_44, com_10877_47, - com_10877_50, com_10877_53, com_10877_56, com_10877_59, com_10877_62, com_10877_64, - com_10877_69, com_10877_72, com_10877_74, com_10877_80, com_10877_83, com_10877_85, - com_10877_89, com_10877_92, com_10877_94, com_10877_96, com_10877_98, com_10877_100, - com_10877_103, com_10877_104, com_10877_109, com_10877_111, com_10877_114, com_10877_116, - com_10877_118, com_10877_120, com_10877_123, com_10877_125, com_10877_134, com_10877_136, - com_10877_4332, com_10877_4424, com_10877_4425, col_10877_78] - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: http://digital.library.txstate.edu/oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.texasstate - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [com_10877_2, com_10877_5, com_10877_8, com_10877_10, com_10877_13, - com_10877_16, com_10877_19, com_10877_21, com_10877_24, com_10877_27, com_10877_29, - com_10877_33, com_10877_36, com_10877_39, com_10877_42, com_10877_44, com_10877_47, - com_10877_50, com_10877_53, com_10877_56, com_10877_59, com_10877_62, com_10877_64, - com_10877_69, com_10877_72, com_10877_74, com_10877_80, com_10877_83, com_10877_85, - com_10877_89, com_10877_92, com_10877_94, com_10877_96, com_10877_98, com_10877_100, - com_10877_103, com_10877_104, com_10877_109, com_10877_111, com_10877_114, com_10877_116, - com_10877_118, com_10877_120, com_10877_123, com_10877_125, com_10877_134, com_10877_136, - com_10877_4332, com_10877_4424, com_10877_4425, col_10877_78] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: https://digital.library.txstate.edu/ -long_title: DSpace at Texas State University -name: edu.texasstate -user: providers.edu.texasstate diff --git a/share/sources/edu.triceratops/icon.ico b/share/sources/edu.triceratops/icon.ico deleted file mode 100644 index f0e0b2a7516990f6ee451cca80637fb66e3310ed..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 389 zcmV;00eb$4P)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00BKoL_t(2k%f^zOT$1I#-Ci?;flJpRCMZ)K@it|3RkCojSdce3Kz#hrx4Ij zQ91-dAVQ#VNobp9Ov~MH?Kzyrkc9eh!|}ZLd;gvF_!@GuM}0mS4*`VP-t!CK1XyTF z1E9O>i^+KC40^^hm-%wbsSN;trL}HYaW1CbiW6%U(EwOy8cwzex77hSgWjmmrP4tC z{D?Z!7)f>FWCM`u_*<EE%mBdk3jp^wmz5J{djLUNb^{H?xd^j;ke10#oNLKEoh>%e z`T&5F1z6ocoC{~ri*xbvkw~S@B<I#z{i9UPhX0wtbnp4Q%3)RD>Fv`jr(H^_Qxlji zHc}nu%kA^~SDcGF6Qrf-<qvTEB3d6fF-|O6AEw^Q^@|Eic9N4-eL2-~001Wj!1MDU jEgx>Lg0z(CXiTGThn1Ta=e9vr00000NkvXXu0mjffmW>4 diff --git a/share/sources/edu.triceratops/source.yaml b/share/sources/edu.triceratops/source.yaml deleted file mode 100644 index cdd57e197..000000000 --- a/share/sources/edu.triceratops/source.yaml +++ /dev/null @@ -1,19 +0,0 @@ -configs: -- base_url: http://triceratops.brynmawr.edu/dspace-oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.triceratops - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://triceratops.brynmawr.edu -long_title: 'Triceratops: Tri-College Digital Repository' -name: edu.triceratops -user: providers.edu.triceratops diff --git a/share/sources/edu.trinity/icon.ico b/share/sources/edu.trinity/icon.ico deleted file mode 100644 index dac4996d8ae87d00abbcb271f3940718048abcba..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 433 zcmV;i0Z#sjP)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS00006VoOIv0RI600RN!9r;`8x0ai&wK~y-6rIS5NLU9zvfA{MDmPD6s(2HT< z9a>z{p`|5)zCan>rP^DH92&!#MCb!VqYat&3uJ{k-z<uXAjl#pNNX^N^pdvwmf15N zp7T4;IdCMPKWZ5JW3#gpcFCp{2}NL;W_m3ipCf7*CzUo8HH;HdiA1~ol~f{8kz0*j zPj<;hZf4jGhuIE=nnD@^&JPdq`h2`)AJ~gT{su_JVtnNDl*?r%=jXYzELKMYJl)>b zc%9XS$n*@Is!Erpp}E~4!H}-=bb7|l@(P>DgXREZBg}m!L$BA%h(CZNOFUd%bG)_1 z(oypF1cP3k?DY-#=NH~za{xFT4!(f_UH|<CF14GlLV?d*4yPgz9SV<^7kK;uWM|F1 z`bV5vT%d2*gIz3Q7fYx;8tYZNH(UWgak&@|2J1G|BoK;d$yiT95y+NlrrQNrrkR%b b=R5xZ_iyBUDK9ih00000NkvXXu0mjfNXxiN diff --git a/share/sources/edu.trinity/source.yaml b/share/sources/edu.trinity/source.yaml deleted file mode 100644 index 04d3e307d..000000000 --- a/share/sources/edu.trinity/source.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://digitalcommons.trinity.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.trinity - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [engine_faculty, env_studocs, geo_faculty, geo_honors, geo_studocs, - global-awareness, hca_faculty, hct_honors, hist_faculty, hist_honors, infolit_qep, - infolit_usra, lib_digitalcommons, lib_docs, lib_faculty, math_faculty, math_honors, - mll_faculty, mll_honors, mono, music_honors, oaweek, phil_faculty, phil_honors, - physics_faculty, physics_honors, polysci_faculty, polysci_studocs, psych_faculty, - psych_honors, relig_faculty, socanthro_faculty, socanthro_honors, socanthro_studocs, - speechdrama_honors, urban_studocs, written-communication] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://digitalcommons.trinity.edu/ -long_title: Digital Commons @ Trinity University -name: edu.trinity -user: providers.edu.trinity diff --git a/share/sources/edu.u_south_fl/icon.ico b/share/sources/edu.u_south_fl/icon.ico deleted file mode 100644 index cfc48113044d187471d3b188ad6b6fcb83a10bbf..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 652 zcmV;70(1R|P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0006*Nkl<ZIE}4RYe<s;6g}Viw45<RGF<7D4Adf7X<}ML>93S5BYV(-{9_T7 zf65>Tim2#8kR)L>Tcok99OOcZq|)#?)3w=5=VNnD#VIa_Hm%byg~~@z_t)ic&f%VW zFYv#ss$PHWZ+HU^<Db0A2~W}2!4}RZc3NwT=<Oi@h*KcPdNT+6%m+#;8^9mlGC|^- zHj}u&tLfQ?mf8%NL6<)by6la%$GW@3V$e`mI_Ctf%`Y?nXl*m_X<ph+xEv-+R_;+E z7;LE5am;OR{60bLYzDTDns!aeJt7dSsV=2(_2vnley9N!$&els3)PN;7;t{U@{mx- zeZBpWaambLL!rLL)C`V)L)fZB?7Nl=jC=6>>^Y$M<4a;o1hGa!tPdpC`4ckGWPZzG zH<O2YjqM5hQ;66gVy!>18bmO7Lw$u_PoD+ek<QFfS4ZdPPvwk1oJvH3cOG2iV2}B} zu{1kbqrF8a1Bi`sKGNp%dR`96VOm=%ihjw~*IiA7gBeFM=yDh>LmqAzakUbmAR@rm z6<X45%lv&OlBs4Ket-zb<q_y;(&<N?m34`#bifCARZxPYxJ`CJrg$==fuq}CyHubC zL_n1ikEQbHt@&l|VYXRt=J<KUtPCxd%2p_5z3pxF1tJ<0lN4N*s$|&i1VZUYgn(7= z)N}rY2mpuD_H?~5wS_y~ZY&T4SO@IL*?SV%yS8J=!hqs=4WvO)mrrerO7;<bC#RE1 m1Tjppxm+q03g<HT=lThl5e~J9?5XAe0000<MNUMnLSTZ>94smT diff --git a/share/sources/edu.u_south_fl/source.yaml b/share/sources/edu.u_south_fl/source.yaml deleted file mode 100644 index 84779c3be..000000000 --- a/share/sources/edu.u_south_fl/source.yaml +++ /dev/null @@ -1,53 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://scholarcommons.usf.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.u_south_fl - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [race_place, ur_symposium, abo, tlar_pub, tlar, tlas_pub, acc_etd, - afa, afa_etd, gey_etd, alambique, ana, ana_etd, ant, ant_facpub, ant_etd, aba, - aba_facpub, aba_etd, arc_etd, art_etd, cutr_pub, camprec, sa_catalyst, bcm, - bcm_facpub, bcm_etd, ocep_cbrr, clphp, ech, ech_etd, chm, chm_facpub, chm_etd, - cfs, cfs_facpub, cfs_etd, egx, egx_facpub, egx_etd, cas, cbcs, business, business_pub, - coedu, coedu_pub, coe, coe_pub, marine, marine_pub, med, nur, pharm, cph, arts, - arts_pub, spe_facpub, csd, csd_etd, spe_etd, cfh, cfh_facpub, cfh_etd, clphp_cbdc, - esb, esb_etd, conferences, couch_stone, cjp, cjp_facpub, cjp_etd, basgp_data, - dean_cbcs, deepkarst_2016, spe, grad_dsli, ecn, ecn_etd, els, els_facpub, els_etd, - edq, edq_facpub, edq_etd, esf, esf_facpub, esf_etd, ege, ege_etd, eng, eng_facpub, - eng_etd, tles, tles_pub, tles_gallery, tles_oh, eoh, eoh_etd, epb, epb_etd, - sg_exec_pubs, camprec_exlib, fmhi_el, fmhi_pub, fs, fs_pubs, fin, fin_etd, wusf_first, - fl_drive_in, clphp_fphtc, ocep_fg, map_links, gsp, gep, gep_etd, gly, gly_facpub, - gly_etd, glo, glo_etd, gia, gia_facpub, gia_etd, honors_gast, grad, grad_facpub, - etd, hpm, hpm_etd, las_hhfc, usfhistinfo_oh, hty, hty_facpub, hty_etd, honors, - hcs, hcs_etd, into_facpub, into, iigw, basgp_images, egs, egs_etd, qmb, qmb_etd, - las, ibl, edk_etd, bin, bin_facpub, bin_etd, eie, eie_etd, interdisc_programs, - clumped_isotope, iciworkshop_gallery, ijs, jacaps, jea, jpr, jpt, jss, sg_jud_pubs, - las_facpub, las_etd, latcom_2013, ehe, ehe_facpub, ehe_etd, sg_leg_pubs, fmhi, - man, man_etd, msc_facpub, msc_etd, mkt, mkt_etd, com, com_etd, mth, mth_facpub, - mth_etd, egr, egr_etd, mhlp, mhlp_facpub, mhlp_etd, mhs_etd, mca, tow_gallery, - tow, mme, mme_etd, emu, emu_etd, mus_etd, nckms_2013, numeracy, nur_facpub, - nur_etd, ocep, ocep_pub, ocep_workshops, ur, onc, onc_etd, honors_et, pcb, pcb_etd, - pcmr, pth, pth_etd, pharm_facpub, phi, phi_etd, edj_etd, phy, phy_etd, pyb, - pyb_etd, ocep_pi, clphp_perlc, projects, psy, psy_facpub, psy_etd, phc, phc_etd, - race_place_gallery, mhs, mhs_facpub, rel, rel_etd, basgp_report, research_matters, - research_matters_gallery, surcosur, tlar_scpub, acc, gey, arch, art, geo, geo_facpub, - si, si_facpub, si_etd, mus, edj, the, tles_sealevel, sla, sla_etd, edi, edi_etd, - sinkhole_2013, sinkhole_2015, sbdc, sok, sok_etd, soc, soc_etd, tlsdc, ese, - ese_etd, lat_sponsor_gallery, lat_sponsors, siv, sa, sa_facpub, sa_pub, sg, - geologia, subsust, compaccountability-2013, basgp, tlib, edr, edr_etd, oa_textbooks, - cutr, tci, fhm_newsletter, iwic, the_facpub, alumni_pubs, usf_facpub, usf_gallery, - lib, usf_gallery_ri, tloa, human_trafficking, ujmm, wusf, wst, wst_etd, wle, - wle_etd, wle_facpub] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://scholarcommons.usf.edu -long_title: University of South Florida - Scholar Commons -name: edu.u_south_fl -user: providers.edu.u_south_fl diff --git a/share/sources/edu.ucf/icon.ico b/share/sources/edu.ucf/icon.ico deleted file mode 100644 index 1dd2cea9bb538e4015ecf80332ee6659080c6a26..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1150 zcmaJ=O=uEe7=GQ1M8uhgFcA^iL27tdCL-cQM8@zS5<_H&3>&i35*i+cbL^0TbqX>I zGBOHT9gM=(a^==qLLh6<A&RxkPKlUVh-KeHd>~_+^7{Tg@Bb%}fPG~$V!s*+35kvq zk&Y2FG0T5DV?@li_tWimsh3)<*3oo2?H?T-ePT2k-}rpKyVYt{(Th1q-rU?gGc+{x zP9zfjlF4K>iA3^4rBZ!CjzXa@QmIr<928qBl}@9c#bPN=PEOvm*=(zWgM%Ls?{GMt zc|4v66B846wzs!W?#)rJ*QM|S%jWBX)N1t(a*U6U-v@WQ-4Brma}|q43u<g^Y?zQM zlgaoHBNPhTR;%@8Fc`csHa7NHtJQu4A=YFvy+$tJqS5FA;!~+qaAjr1uG8t>%jNR# ze!u_r@bK_kU=#|)H}vFmI-iQg;-ARDeRjM3c_NWGkND~7>8sciW&=kopA|cVj=Jav zGJ2FsrFHCPYHI2_dNrHPub{(^I;aU5cYq(gLtk22@@#Ewo#HzBjm2V7jz<pM3RuwW z?Cd4<3K=ojRUi<!g`Ojk$i-YP=gMZYXR%jcFoRO5{0bTSSzli_Lk3|sgTYXOJU2JD z$ZuGy)dr9Y_E0Ev4ZN?f?@zg0)&rAFCeOjvOPGVlg9D2l^FA~h%_qELj>+fqPV51D zoSB)q!tc)g;Ha(F>&w^~dfnaKRl(kDHpPubL&CA0PDcRZI_I!q4!k?`8V-jq!xxXo zFJNaJcNlNC+x^SS%MO>z^$0n<Uhh4;FZ{0#i`&n~^I&GYGu(YYTxXuBK0>rJK=k4W lBLPvYpH;_4!Q^8^3rt^R7g}SlXpLRs^D5DA#{bR}{R0>{8HNA= diff --git a/share/sources/edu.ucf/source.yaml b/share/sources/edu.ucf/source.yaml deleted file mode 100644 index 976710ed6..000000000 --- a/share/sources/edu.ucf/source.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://stars.library.ucf.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.ucf - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://stars.library.ucf.edu/ -long_title: University of Central Florida -name: edu.ucf -user: providers.edu.ucf diff --git a/share/sources/edu.udc/icon.ico b/share/sources/edu.udc/icon.ico deleted file mode 100644 index 3d4dc263e34878e1d316dc24c0f5ca15de8826c1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 419 zcmV;U0bKrxP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00CP`L_t(2kz@RHdolwlfQcm4bkV>9XB}-j_vOVK1_p*dKY#H`ib&{bJw9>v z&yU|9@4w<#6jOAtPz&Ni(eUEjEd~aLj~CxEFfhDic*wxO@a)K)->-i`WbZSaPz!QH zaZ>P_2p%aRCKe_J28KVuTHLAW?82N7DHSgZ_epN(ez>*n)3;Y2|NZ&Lz`!7I_369b z-tUjTK%~w*e2m4@FD_ksfA-a%U@ci+^>24RKiz(tPg7XdSDlH4iDm6IbPeaG-+6Z8 zCIbV5ac%)SAJ4x({~%Tg6&DK_i!re<%~PF<&~SRfwMR!Ue!KgHfq~)av*&k~o`uN0 zKl|$Gv*+Am{0s~XECQ?xG^ayD>DGo5Kc9YwI&AIAwA<N8;r?o2%AJ3I{{8##4-pSe z`&OPld;Z_QzYGivvNAFZ3>T4ON^nly*RNk07#P%4RTvm9G7i4P2LN~JjV6wh5;On+ N002ovPDHLkV1nY~xvc;I diff --git a/share/sources/edu.udc/source.yaml b/share/sources/edu.udc/source.yaml deleted file mode 100644 index 94e6797fc..000000000 --- a/share/sources/edu.udc/source.yaml +++ /dev/null @@ -1,33 +0,0 @@ -configs: -- base_url: http://conservancy.umn.edu/oai/request - disabled: false - earliest_date: 2007-02-19T20:04:59Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: edu.udc.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: [com_11299_45272, col_11299_169792, col_11299_166578] - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: http://conservancy.umn.edu/oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.udc - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [com_11299_45272, col_11299_169792, col_11299_166578] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://conservancy.umn.edu/ -long_title: University of Minnesota, Digital Conservancy -name: edu.udc -user: providers.edu.udc diff --git a/share/sources/edu.udel/icon.ico b/share/sources/edu.udel/icon.ico deleted file mode 100644 index 1e660b740906b95894663a8785846d0c410402bc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 726 zcmV;{0xA88P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00009a7bBm000XT z000XT0n*)m`~Uy~h)G02R5%f(Q_W8kQ4pWn(r!z;P_|TC=|X`bL@<#QQWH54gCQL1 zSq>@(4yGRbW8k2Tc*C=3xZqXT^aB%>1XK{VTBTWNyDi=BLalE`YtY1tPV)B6zWI2+ zH}e})y<Ug^5eo*JOeTolT(FnQhUdzNIAR>vV)=p=E~-^0+IbgOcDLG7R;zhlO3F!> z)2iIRJEo0~^@0fz+?alB*Y%peFVsF;F6+_Wpx_m|+XR*ftW9PJiUO>xY#!{&`5f=E zExr4o#BDZn_UYn`MhNic!GUwLmCEt-;3Z!m*eO7>!wSPg9dJ}JpwrJox9AbxY-WYU zCuwJ>#{(Oi`|}K3<_toNFxYCj8lAa4O2K%Bw@d4LsFHXE2Q)D>cW#Xa&=+ISKnV<^ z7V8SQy^+h}4{!E8W(jb)TO4$Dq@6&asIh-Li09p2T|bGTff8WBCMNm?bUH29uJ!@n zOs|Ayy9F-vcwt9AgvrSP;V`d-(U+`6Rdb&8@Z^kd@9?$KY6PRBnO(K7lu!lqRap$` zga8!`M2QS{!KW_;R)gQskdb|`4k!)w5CZJ;j;!#$c9BqAXZ?MdmSKxr0XqP`b_a|{ zf}j+tu(h2dZlIYEw7DIr7p%`e{b?KO_d`x8(~{PAA@Mp(=bly$o;`nifH8PL2^hoX zJ~EQvt$&lx>{+#MK*+LR)f{KD%>7Eh=$S%>#TV8xjU-q=M9~n`rB^aMh_7Z^T-=hB zk_iFT0J)9y4E1{8;iJ{)^74jvYU-kZJjfR-z<2_wgmjR}7IG~-xAcBniDL{Vr&%~X zOp&k{){T>U*k`p=b>ggGk%cx#(#|!_ttIx-&Oe8#0rOw>?^q!lUgi3;(*OVf07*qo IM6N<$f|IpWF#rGn diff --git a/share/sources/edu.udel/source.yaml b/share/sources/edu.udel/source.yaml deleted file mode 100644 index 2963f89c2..000000000 --- a/share/sources/edu.udel/source.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# DSpace -configs: -- base_url: http://udspace.udel.edu/dspace-oai/request - disabled: false - earliest_date: 2004-08-17T23:51:09Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: edu.udel.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: http://udspace.udel.edu/dspace-oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.udel - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://udspace.udel.edu/dspace-oai/request -long_title: University of Delaware Institutional Repository -name: edu.udel -user: providers.edu.udel diff --git a/share/sources/edu.uhawaii/icon.ico b/share/sources/edu.uhawaii/icon.ico deleted file mode 100644 index a9292584b9775de6c4798ef02e8b842a2e51bfd9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 558 zcmV+}0@3}6P)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00HJnL_t(2kz@S-|33pMfC*FRKg0hozdjSI;n<x+w!Ox@ReT?Qyu+^HKMXK1 zFx-E0*Lu4B)2|Pd`Gqb#IvYO2|Hq&24F4JaBLNf}_Fdb>z{J4K!uRXX-<TC{tL|=k z@cJGDBW@@CXJBAsXOIz;Hjpv;`<s*DF9Qb~Cj$cmBO@br4T>U)zy33DbMV}Fb>+p+ z?|!PThi>mnSrGI6_cunEMU4Oc{e!X?8J~W5EYl~)_M1V3jV)9w@!7|xC!ZX7^zOr! zlnven{t$m6+3=r1l3#jf=$2;3h7(0M1-S$*<ZT|k`@q1+z|Fw}SH%J|9^yVGdrf=y zSw0+$++Y8EQ5I9#lCpZs<@Ne<`fv@5|Ni}BWMusLo8jWqAJ6~%lM-kD|NiXz&(HHW z#2zWU^NLCA@vA=qfB))Av2(FA{Qv)-g^`i*+1(#=w`@B7_K?7TQANHmg+NpOzZ>o3 zf_A?XyRqu-!*{D*Gd$84v`F;~R@CBQ{Py!l)~u49Z>MlD^8Nk)NsisX=l6;a9v^@H z`6P8nZ6`zAt)B;382Nwv`)I)%JvVI{<Jl8?cWj)?#>&aS!0`V+!+(Z9Z2veJ*|-^* w82|nF_M7P|BO}zGzyJKOa}H&sLTbei0ABvyeN$32(EtDd07*qoM6N<$g38_;djJ3c diff --git a/share/sources/edu.uhawaii/source.yaml b/share/sources/edu.uhawaii/source.yaml deleted file mode 100644 index d9717d024..000000000 --- a/share/sources/edu.uhawaii/source.yaml +++ /dev/null @@ -1,70 +0,0 @@ -# DSpace -configs: -- base_url: https://scholarspace.manoa.hawaii.edu/dspace-oai/request - disabled: false - earliest_date: 2007-05-10T10:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: edu.uhawaii.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: [com_10125_562, com_10125_565, com_10125_569, com_10125_572, com_10125_574, - com_10125_576, com_10125_1913, com_10125_579, com_10125_1915, com_10125_770, - com_10125_1917, com_10125_860, com_10125_1919, com_10125_1921, com_10125_1929, - com_10125_863, com_10125_1931, com_10125_1933, com_10125_866, com_10125_1935, - com_10125_869, com_10125_1940, com_10125_1937, com_10125_1941, com_10125_1944, - com_10125_872, com_10125_875, com_10125_339, com_10125_1881, com_10125_1880, - com_10125_1878, com_10125_1883, com_10125_307, com_10125_1816, com_10125_1383, - com_10125_878, com_10125_1877, com_10125_1135, com_10125_323, com_10125_349, - com_10125_561, com_10125_1699, com_10125_1700, com_10125_1138, com_10125_1140, - com_10125_1832, com_10125_1677, com_10125_30, com_10125_31, com_10125_1819, - com_10125_321, com_10125_531, com_10125_19, com_10125_324, com_10125_530, com_10125_310, - com_10125_1221, com_10125_312, com_10125_1222, com_10125_1300, com_10125_598, - com_10125_317, com_10125_364, com_10125_370, com_10125_386, com_10125_823, com_10125_856, - com_10125_956, com_10125_1036, com_10125_1041, com_10125_1046, com_10125_1051, - com_10125_490, com_10125_365, com_10125_495, com_10125_634, com_10125_430, com_10125_961, - com_10125_968, com_10125_971, com_10125_974, com_10125_979, com_10125_435, com_10125_624, - com_10125_1085, com_10125_1090, com_10125_1115, com_10125_1120, com_10125_1125, - com_10125_1130, com_10125_629, com_10125_450, com_10125_289, com_10125_25, com_10125_738, - com_10125_1820, com_10125_286, com_10125_1696, com_10125_1678, com_10125_1701] - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: https://scholarspace.manoa.hawaii.edu/dspace-oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.uhawaii - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [com_10125_562, com_10125_565, com_10125_569, com_10125_572, com_10125_574, - com_10125_576, com_10125_1913, com_10125_579, com_10125_1915, com_10125_770, - com_10125_1917, com_10125_860, com_10125_1919, com_10125_1921, com_10125_1929, - com_10125_863, com_10125_1931, com_10125_1933, com_10125_866, com_10125_1935, - com_10125_869, com_10125_1940, com_10125_1937, com_10125_1941, com_10125_1944, - com_10125_872, com_10125_875, com_10125_339, com_10125_1881, com_10125_1880, - com_10125_1878, com_10125_1883, com_10125_307, com_10125_1816, com_10125_1383, - com_10125_878, com_10125_1877, com_10125_1135, com_10125_323, com_10125_349, - com_10125_561, com_10125_1699, com_10125_1700, com_10125_1138, com_10125_1140, - com_10125_1832, com_10125_1677, com_10125_30, com_10125_31, com_10125_1819, - com_10125_321, com_10125_531, com_10125_19, com_10125_324, com_10125_530, com_10125_310, - com_10125_1221, com_10125_312, com_10125_1222, com_10125_1300, com_10125_598, - com_10125_317, com_10125_364, com_10125_370, com_10125_386, com_10125_823, com_10125_856, - com_10125_956, com_10125_1036, com_10125_1041, com_10125_1046, com_10125_1051, - com_10125_490, com_10125_365, com_10125_495, com_10125_634, com_10125_430, com_10125_961, - com_10125_968, com_10125_971, com_10125_974, com_10125_979, com_10125_435, com_10125_624, - com_10125_1085, com_10125_1090, com_10125_1115, com_10125_1120, com_10125_1125, - com_10125_1130, com_10125_629, com_10125_450, com_10125_289, com_10125_25, com_10125_738, - com_10125_1820, com_10125_286, com_10125_1696, com_10125_1678, com_10125_1701] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: https://scholarspace.manoa.hawaii.edu -long_title: ScholarSpace at University of Hawaii at Manoa -name: edu.uhawaii -user: providers.edu.uhawaii diff --git a/share/sources/edu.uiucideals/icon.ico b/share/sources/edu.uiucideals/icon.ico deleted file mode 100644 index 3a20a47452f442454cde9050a7cad1284a4338b0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 231 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61SBU+%rFB|%*9TgAsieWw;%dH0CMMhx;Tbt zOicbW|CobNLSjNff{pi@W&e+DjtKaXo|Y7PJYy1Hy79rt*VRUnzL$W=TxQ3HhX+_Y z9<(uwPY7L{(V(%(JcwIoCSPK{MS<dm6suXe9K77yW%|Eww5fP^?bke?6y<`fh8G84 zvCom}U}Rpcc;H}z25+P4WkoM$8J-7<Z0tH2HCs;b6&U=ePg5(pvoPDCJABcc7I}k7 bv-U9N#hEO86!w2T(5Vcbu6{1-oD!M<>918L diff --git a/share/sources/edu.uiucideals/source.yaml b/share/sources/edu.uiucideals/source.yaml deleted file mode 100644 index 059036d49..000000000 --- a/share/sources/edu.uiucideals/source.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# DSpace -configs: -- base_url: https://www.ideals.illinois.edu/dspace-oai/request - disabled: false - earliest_date: 2005-09-21T17:43:50Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: edu.uiucideals.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: http://ideals.uiuc.edu/dspace-oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.uiucideals - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: https://www.ideals.illinois.edu -long_title: University of Illinois at Urbana-Champaign, IDEALS -name: edu.uiucideals -user: providers.edu.uiucideals diff --git a/share/sources/edu.ukansas/icon.ico b/share/sources/edu.ukansas/icon.ico deleted file mode 100644 index 6317d8d4a6cfee41da818f9558de44a1def824a6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 583 zcmV-N0=WH&P)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00I9=L_t(2k(H87NK{c6hM#lpy*iHLm}7s2IwH78%9U0SL_|qO1qQ8JwMb|Y zxDuj`iz0U}+7(gILc*bpAP6He%_u2|Qcx>wlyPCrFuyr^&-q$R5QQy#x8L)^%g3W= ze#~a#g56|6plA^Q$`rtw{JDDyN3kv#DFH4;zq@NkK5r_6&a3w+g!aAFRY512@hdBw zxi8t7&xJ$#YpScf@^U>sm4wC^EssRx$!Hvg&R!T7kprhjMPzI;LG>`v7!41m=hL_D z%~3P#qG3Ll%J|9exyYW1!K?dJI7W6gmGKix8PbLtm&IaYIlmCkfYo5Y3Pfsy6C+3J zcaIP@fKp0nz?J}|&>F;Quweal+jT4&ELS8~aRe*~A}Ct|KokO2BLdbd(>L#a3<TVT z#WTc<V3byZAf^OZE5_K;BnwuAaHZb4ZD?0r=)?3$C@dv%4bfUy!x$(wH*I<NY?+{F z#K2l4>^iOnzp4FxTWjsu>+VZ^FAsKXzjD0|&mFkYZj70E@dH;Jywg1wTPEKm(pldG z?5D}}^Qm}$Uq@BQX+80@_gwURcm20`wzvOXXJ=$)Hm7L3L%~oKfRM)1toTQdZyg*s zJo`0s`P$q0i6mi%kitTlRy5w;z?h$w6(1CJE}r_wrH$kZK!XeXe(vA4$_9o1=NG9P V=mM-H;Ise$002ovPDHLkV1h0U1^@s6 diff --git a/share/sources/edu.ukansas/source.yaml b/share/sources/edu.ukansas/source.yaml deleted file mode 100644 index ee3ab1e6e..000000000 --- a/share/sources/edu.ukansas/source.yaml +++ /dev/null @@ -1,64 +0,0 @@ -# DSpace -configs: -- base_url: https://kuscholarworks.ku.edu/oai/request - disabled: false - earliest_date: 2003-09-19T22:01:33Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: edu.ukansas.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: [com_1808_978, com_1808_118, com_1808_84, com_1808_1666, com_1808_1029, - com_1808_7799, com_1808_8834, com_1808_377, com_1808_7165, com_1808_1069, com_1808_65, - com_1808_907, com_1808_7105, com_1808_3892, com_1808_5894, com_1808_62, com_1808_224, - com_1808_64, com_1808_97, com_1808_1776, com_1808_5634, com_1808_1260, com_1808_8219, - com_1808_8701, com_1808_9235, com_1808_4471, com_1808_291, com_1808_267, com_1808_231, - com_1808_592, com_1808_979, com_1808_8717, com_1808_1049, com_1808_1050, com_1808_1051, - com_1808_1052, com_1808_1483, com_1808_1815, com_1808_87, com_1808_408, com_1808_776, - com_1808_5725, com_1808_5724, com_1808_284, com_1808_6210, com_1808_54, com_1808_363, - com_1808_3604, com_1808_913, com_1808_9053, com_1808_7115, com_1808_276, com_1808_748, - com_1808_9219, com_1808_21, com_1808_9232, com_1808_238, com_1808_275, com_1808_263, - com_1808_100, com_1808_5423, com_1808_8837, com_1808_984, com_1808_56, com_1808_1060, - com_1808_2088, com_1808_80, com_1808_981, com_1808_96, com_1808_109, com_1808_234, - com_1808_89, com_1808_4073, com_1808_594, com_1808_8689, com_1808_3598, com_1808_9214, - com_1808_466, com_1808_6080, com_1808_60, com_1808_7240, com_1808_735, com_1808_789, - com_1808_4373, com_1808_287, com_1808_82, com_1808_2076, com_1808_774, com_1808_6803, - com_1808_1811, com_1808_197, com_1808_8721, com_1808_6204] - emitted_type: CreativeWork - property_list: [publisher, language, relation, date, identifier, type] - type_map: {} -- base_url: https://kuscholarworks.ku.edu/oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.ukansas - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [com_1808_978, com_1808_118, com_1808_84, com_1808_1666, com_1808_1029, - com_1808_7799, com_1808_8834, com_1808_377, com_1808_7165, com_1808_1069, com_1808_65, - com_1808_907, com_1808_7105, com_1808_3892, com_1808_5894, com_1808_62, com_1808_224, - com_1808_64, com_1808_97, com_1808_1776, com_1808_5634, com_1808_1260, com_1808_8219, - com_1808_8701, com_1808_9235, com_1808_4471, com_1808_291, com_1808_267, com_1808_231, - com_1808_592, com_1808_979, com_1808_8717, com_1808_1049, com_1808_1050, com_1808_1051, - com_1808_1052, com_1808_1483, com_1808_1815, com_1808_87, com_1808_408, com_1808_776, - com_1808_5725, com_1808_5724, com_1808_284, com_1808_6210, com_1808_54, com_1808_363, - com_1808_3604, com_1808_913, com_1808_9053, com_1808_7115, com_1808_276, com_1808_748, - com_1808_9219, com_1808_21, com_1808_9232, com_1808_238, com_1808_275, com_1808_263, - com_1808_100, com_1808_5423, com_1808_8837, com_1808_984, com_1808_56, com_1808_1060, - com_1808_2088, com_1808_80, com_1808_981, com_1808_96, com_1808_109, com_1808_234, - com_1808_89, com_1808_4073, com_1808_594, com_1808_8689, com_1808_3598, com_1808_9214, - com_1808_466, com_1808_6080, com_1808_60, com_1808_7240, com_1808_735, com_1808_789, - com_1808_4373, com_1808_287, com_1808_82, com_1808_2076, com_1808_774, com_1808_6803, - com_1808_1811, com_1808_197, com_1808_8721, com_1808_6204] - emitted_type: CreativeWork - property_list: [publisher, language, relation, date, identifier, type] - type_map: {} -home_page: https://kuscholarworks.ku.edu -long_title: KU ScholarWorks -name: edu.ukansas -user: providers.edu.ukansas diff --git a/share/sources/edu.uky/icon.ico b/share/sources/edu.uky/icon.ico deleted file mode 100644 index 94a96815fcc4880ed9df762a6cbd95d7cdfbc1a0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 566 zcmV-60?GY}P)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI000JJOGiWi000000Qp0^e*gdh+(|@1R49?nlDlhDVHk(s_q&|$TymOA)AS^1 z(<T;)jYMqe5F!p<78N>)_z(D>h~lo|DuRpF#X%H7FgjH+gD9!=W=u`WNlnb{<Xk=n zL2#^-zxnws4-ZNy;V>hIqW|T<>`veHd<h^p(*^%IbUZ&m04V_xRE39h(d~~AAfyY< zD7$h0MWwpw`@z|jqnFBsdr#he-f2!4YNtP9gsh%Ee)-JFyK9w1%)EMjDV>Orm1CK$ zhP~OaiwmhaE5<q9-fJ70(i=Ft&4XMj{$ZmY3maF?EhXb&f=EnV03ZMus!%vp80C!S z=cX6uEkvw-`F8tJWjQ~ao(gkD0D%0lDnSB(1fZ16BqG^VZ0NWTo~^&H)z@BC+ugx$ zk^uf~K}v<uLM~azS*9+Q^RtYSr*A&KT(6DB9s&@BQA7X$-w&jOvF9TqqeS6MSNVm~ z;>B{IH*oGee!aD42NFoeie{`RqolUms<(bL_B&S0OioTL%uIze5izxEtEF5zzVUVc z;q!ORgD$;w^LjQp>H5KT!~V3>a>m}N6GuwLc}mFlb~mJnaxps{H5nryK@}VWDP7m^ z4IS6>5fK1Xg&CR>1hO}D7$Jrx9M>B<E&vG5@c)bN7c23$;nx33z5oCK07*qoM6N<$ Ef=J)`6#xJL diff --git a/share/sources/edu.uky/source.yaml b/share/sources/edu.uky/source.yaml deleted file mode 100644 index 8355f1fac..000000000 --- a/share/sources/edu.uky/source.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://uknowledge.uky.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.uky - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://uknowledge.uky.edu -long_title: UKnowledge @ University of Kentucky -name: edu.uky -user: providers.edu.uky diff --git a/share/sources/edu.umassmed/icon.ico b/share/sources/edu.umassmed/icon.ico deleted file mode 100644 index 9ce02515f8cc48d423d0461028e89f194dcfaec3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 386 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`EX7WqAsj$Z!;#X#z`$tf>Eak- z(VP6||NsB-Y+MY&s<YVJ8Ffy~@h*=yc;bJrA^2l!^mz%E3QI{D-l>v8HolT?zKBXn zc=8?rskn0f{>cpwFNn6e=m)nj&aeNMyyMRs<HHZD-%CmWf!yBz2S0qc%g@7X^*2Ov zi|P(Zp5R}SdUvXh9r*O~xqVyL+3gb3=GC2huvquh0`0Sh6IQEmTvt#(a#G>&`L>5$ z?C0+pYj|)O8ob&j|NHmH<MM5}l5RY=gzji?$?yGj;KPHT)fV;tf9~cHe`nKbUGFr{ zo>{s6=Dz*Ag7)yVfA%e4TePS}pm|S=05cFYFJ5TW+_*}FSzg@Q#VGsh*ZqG|fB$pS z-}C4Dsn7cLK>zy-rGbLse)4yFBOTk{a~^(}`<+L={!ikCsgI6Ia&K^GVVtyBP?o1A dXxR-02G-EE+~=OUp8*CcgQu&X%Q~loCICr8r%?a^ diff --git a/share/sources/edu.umassmed/source.yaml b/share/sources/edu.umassmed/source.yaml deleted file mode 100644 index 80b546641..000000000 --- a/share/sources/edu.umassmed/source.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: https://escholarship.umassmed.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.umassmed - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: https://escholarship.umassmed.edu -long_title: eScholarship@UMMS -name: edu.umassmed -user: providers.edu.umassmed diff --git a/share/sources/edu.umd/icon.ico b/share/sources/edu.umd/icon.ico deleted file mode 100644 index 2d28572654154aca9d1ed2635df60e662a6b8ebd..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 766 zcmc(d!D|y?5XE0g5?tz<P_gL2u&y92m_*2-R|^X+=_Lpiyl4fL9yA9nd?mT4Y;{bm ztsaCPau9115>FCI*c?PCA}A7bs3=%SAi*A*giwn@2+r4}{tLckcIG$leakK^;NVNs zypZ}i1dMVY<I=d!ayj7SJ8*}+ef86TbNCQEkJoVc%MMPSc!F|e8}3^R7=Ha7XU5;- zcee{?cp9_UzT(C62A*c$;6QK^o_ZGtU%mwm^k{v5;%*pJ`*Ho>!YZco&30wi9Bk}t zHm7UKw;O#f875i!xKbXOm5XpZT}%pnYjfHCjM2)MM6Mw8R@AF1V?@cMEm(nHx~5`B z=8ZISL)HE6@@4j8RABb1di_$Q@_`a@l0KjZ18w(FO3Z~BnFAM_Gmfj2oRc<lP}Mcp zX|m=A98sH^QYCs7TD#IYbvqOa?fR9f^;tvjX0#^Hi14TSV#SXRqPoP`1Jjax1id|8 zJ!jIxp)Hi*Ye<_48M^DK!V6z}LI{4bdAjFnxHOyx`^*`lRBdA$Daj%qGl^z;ojN~3 z#0#QS<j-B)9Brv>OcWC$%t%X)>6%BwDkXEaB?FP==!hRB!!WY8<ONhN)R7k1_*6_D zpp>%Gf)TSynLVVX<7Hpj%ul6dTI&AFM%%RQep)UjkNx==w@o?rWxZQB`qTdZ^uPLF DK6eT* diff --git a/share/sources/edu.umd/source.yaml b/share/sources/edu.umd/source.yaml deleted file mode 100644 index 903bf9620..000000000 --- a/share/sources/edu.umd/source.yaml +++ /dev/null @@ -1,69 +0,0 @@ -# DSpace -configs: -- base_url: http://drum.lib.umd.edu/oai/request - disabled: false - earliest_date: 2004-05-28T20:22:48Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: edu.umd.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: [com_1903_1654, com_1903_2205, com_1903_2206, com_1903_2207, com_1903_2208, - com_1903_2209, com_1903_2210, com_1903_2211, com_1903_10990, com_1903_2212, - com_1903_2213, com_1903_2214, com_1903_2215, com_1903_2264, com_1903_2271, com_1903_11810, - com_1903_11811, com_1903_11609, com_1903_7565, com_1903_2219, com_1903_11812, - com_1903_2221, com_1903_2222, com_1903_1598, com_1903_1611, com_1903_8, com_1903_12, - com_1903_1647, com_1903_1631, com_1903_2223, com_1903_2224, com_1903_2225, com_1903_7097, - com_1903_2227, com_1903_2230, com_1903_2231, com_1903_2232, com_1903_2234, com_1903_2235, - com_1903_11813, com_1903_2237, com_1903_2238, com_1903_2216, com_1903_7125, - com_1903_2239, com_1903_2240, com_1903_2241, com_1903_6626, com_1903_12164, - com_1903_9069, com_1903_2242, com_1903_2243, com_1903_2244, com_1903_7124, com_1903_2245, - com_1903_2246, com_1903_2247, com_1903_2262, com_1903_2249, com_1903_4375, com_1903_2250, - com_1903_2251, com_1903_2252, com_1903_2253, com_1903_2254, com_1903_11321, - com_1903_2278, com_1903_2255, com_1903_2256, com_1903_2257, com_1903_2258, com_1903_2259, - com_1903_10067, com_1903_2260, com_1903_2261, com_1903_10091, com_1903_2263, - com_1903_2265, com_1903_2267, com_1903_1629, com_1903_2268, com_1903_2269, com_1903_2266, - com_1903_10116, com_1903_2270, com_1903_2272, com_1903_8706, com_1903_1584, - com_1903_1607, com_1903_1633, com_1903_1618, com_1903_2273, com_1903_2233, com_1903_5, - com_1903_2229, com_1903_2276, com_1903_2277, com_1903_10, com_1903_2279, com_1903_2280] - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: http://drum.lib.umd.edu/oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.umd - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [com_1903_1654, com_1903_2205, com_1903_2206, com_1903_2207, com_1903_2208, - com_1903_2209, com_1903_2210, com_1903_2211, com_1903_10990, com_1903_2212, - com_1903_2213, com_1903_2214, com_1903_2215, com_1903_2264, com_1903_2271, com_1903_11810, - com_1903_11811, com_1903_11609, com_1903_7565, com_1903_2219, com_1903_11812, - com_1903_2221, com_1903_2222, com_1903_1598, com_1903_1611, com_1903_8, com_1903_12, - com_1903_1647, com_1903_1631, com_1903_2223, com_1903_2224, com_1903_2225, com_1903_7097, - com_1903_2227, com_1903_2230, com_1903_2231, com_1903_2232, com_1903_2234, com_1903_2235, - com_1903_11813, com_1903_2237, com_1903_2238, com_1903_2216, com_1903_7125, - com_1903_2239, com_1903_2240, com_1903_2241, com_1903_6626, com_1903_12164, - com_1903_9069, com_1903_2242, com_1903_2243, com_1903_2244, com_1903_7124, com_1903_2245, - com_1903_2246, com_1903_2247, com_1903_2262, com_1903_2249, com_1903_4375, com_1903_2250, - com_1903_2251, com_1903_2252, com_1903_2253, com_1903_2254, com_1903_11321, - com_1903_2278, com_1903_2255, com_1903_2256, com_1903_2257, com_1903_2258, com_1903_2259, - com_1903_10067, com_1903_2260, com_1903_2261, com_1903_10091, com_1903_2263, - com_1903_2265, com_1903_2267, com_1903_1629, com_1903_2268, com_1903_2269, com_1903_2266, - com_1903_10116, com_1903_2270, com_1903_2272, com_1903_8706, com_1903_1584, - com_1903_1607, com_1903_1633, com_1903_1618, com_1903_2273, com_1903_2233, com_1903_5, - com_1903_2229, com_1903_2276, com_1903_2277, com_1903_10, com_1903_2279, com_1903_2280] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://drum.lib.umd.edu/ -long_title: The University of Maryland Libraries DRUM (Digital Repository at the University - of Maryland) -name: edu.umd -user: providers.edu.umd diff --git a/share/sources/edu.umich/icon.ico b/share/sources/edu.umich/icon.ico deleted file mode 100644 index 1e660b740906b95894663a8785846d0c410402bc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 726 zcmV;{0xA88P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00009a7bBm000XT z000XT0n*)m`~Uy~h)G02R5%f(Q_W8kQ4pWn(r!z;P_|TC=|X`bL@<#QQWH54gCQL1 zSq>@(4yGRbW8k2Tc*C=3xZqXT^aB%>1XK{VTBTWNyDi=BLalE`YtY1tPV)B6zWI2+ zH}e})y<Ug^5eo*JOeTolT(FnQhUdzNIAR>vV)=p=E~-^0+IbgOcDLG7R;zhlO3F!> z)2iIRJEo0~^@0fz+?alB*Y%peFVsF;F6+_Wpx_m|+XR*ftW9PJiUO>xY#!{&`5f=E zExr4o#BDZn_UYn`MhNic!GUwLmCEt-;3Z!m*eO7>!wSPg9dJ}JpwrJox9AbxY-WYU zCuwJ>#{(Oi`|}K3<_toNFxYCj8lAa4O2K%Bw@d4LsFHXE2Q)D>cW#Xa&=+ISKnV<^ z7V8SQy^+h}4{!E8W(jb)TO4$Dq@6&asIh-Li09p2T|bGTff8WBCMNm?bUH29uJ!@n zOs|Ayy9F-vcwt9AgvrSP;V`d-(U+`6Rdb&8@Z^kd@9?$KY6PRBnO(K7lu!lqRap$` zga8!`M2QS{!KW_;R)gQskdb|`4k!)w5CZJ;j;!#$c9BqAXZ?MdmSKxr0XqP`b_a|{ zf}j+tu(h2dZlIYEw7DIr7p%`e{b?KO_d`x8(~{PAA@Mp(=bly$o;`nifH8PL2^hoX zJ~EQvt$&lx>{+#MK*+LR)f{KD%>7Eh=$S%>#TV8xjU-q=M9~n`rB^aMh_7Z^T-=hB zk_iFT0J)9y4E1{8;iJ{)^74jvYU-kZJjfR-z<2_wgmjR}7IG~-xAcBniDL{Vr&%~X zOp&k{){T>U*k`p=b>ggGk%cx#(#|!_ttIx-&Oe8#0rOw>?^q!lUgi3;(*OVf07*qo IM6N<$f|IpWF#rGn diff --git a/share/sources/edu.umich/source.yaml b/share/sources/edu.umich/source.yaml deleted file mode 100644 index 986789d3b..000000000 --- a/share/sources/edu.umich/source.yaml +++ /dev/null @@ -1,82 +0,0 @@ -# DSpace -configs: -- base_url: https://deepblue.lib.umich.edu/dspace-oai/request - disabled: false - earliest_date: 2005-08-29 23:41:11 - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: edu.umich.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: [col_2027.42_57773, col_2027.42_62481, col_2027.42_60130, col_2027.42_51534, - col_2027.42_58597, com_2027.42_65133, col_2027.42_41237, col_2027.42_60475, - col_2027.42_78358, col_2027.42_57483, col_2027.42_49245, col_2027.42_78359, - col_2027.42_49252, col_2027.42_61182, col_2027.42_58060, col_2027.42_50691, - col_2027.42_64026, col_2027.42_35324, col_2027.42_50473, col_2027.42_61365, - col_2027.42_78361, col_2027.42_63588, col_2027.42_58625, col_2027.42_58741, - col_2027.42_57485, col_2027.42_63030, col_2027.42_51400, col_2027.42_39366, - col_2027.42_77949, col_2027.42_58366, col_2027.42_60161, col_2027.42_58605, - col_2027.42_60937, col_2027.42_39212, col_2027.42_13915, col_2027.42_61837, - col_2027.42_40242, com_2027.42_79040, col_2027.42_57190, col_2027.42_64867, - col_2027.42_55692, col_2027.42_60175, col_2027.42_62180, col_2027.42_55467, - col_2027.42_55461, col_2027.42_40241, col_2027.42_56218, col_2027.42_57418, - col_2027.42_57738, col_2027.42_21621, col_2027.42_21609, col_2027.42_78207, - col_2027.42_55736, col_2027.42_55473, col_2027.42_71387, col_2027.42_40243, - col_2027.42_57759, col_2027.42_58219, col_2027.42_61172, col_2027.42_49548, - col_2027.42_57420, col_2027.42_61002, col_2027.42_49477, col_2027.42_78360, - col_2027.42_77456, col_2027.42_58188, col_2027.42_64496, col_2027.42_35325, - col_2027.42_64452, col_2027.42_65119, col_2027.42_64002, col_2027.42_49538, - col_2027.42_60933, col_2027.42_64343, col_2027.42_61282, col_2027.42_55277, - col_2027.42_41251, col_2027.42_58662, col_2027.42_61259, col_2027.42_69349, - col_2027.42_51549, col_2027.42_58620, col_2027.42_56193, col_2027.42_55486, - col_2027.42_61401, com_2027.42_13913, col_2027.42_55256, col_2027.42_49331, - col_2027.42_57558, col_2027.42_64010, col_2027.42_57275, col_2027.42_50792, - col_2027.42_55208, col_2027.42_49482, col_2027.42_39193, col_2027.42_57498, - col_2027.42_13914, col_2027.42_50621, col_2027.42_39392, col_2027.42_49534] - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: http://deepblue.lib.umich.edu/dspace-oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.umich - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [col_2027.42_57773, col_2027.42_62481, col_2027.42_60130, col_2027.42_51534, - col_2027.42_58597, com_2027.42_65133, col_2027.42_41237, col_2027.42_60475, - col_2027.42_78358, col_2027.42_57483, col_2027.42_49245, col_2027.42_78359, - col_2027.42_49252, col_2027.42_61182, col_2027.42_58060, col_2027.42_50691, - col_2027.42_64026, col_2027.42_35324, col_2027.42_50473, col_2027.42_61365, - col_2027.42_78361, col_2027.42_63588, col_2027.42_58625, col_2027.42_58741, - col_2027.42_57485, col_2027.42_63030, col_2027.42_51400, col_2027.42_39366, - col_2027.42_77949, col_2027.42_58366, col_2027.42_60161, col_2027.42_58605, - col_2027.42_60937, col_2027.42_39212, col_2027.42_13915, col_2027.42_61837, - col_2027.42_40242, com_2027.42_79040, col_2027.42_57190, col_2027.42_64867, - col_2027.42_55692, col_2027.42_60175, col_2027.42_62180, col_2027.42_55467, - col_2027.42_55461, col_2027.42_40241, col_2027.42_56218, col_2027.42_57418, - col_2027.42_57738, col_2027.42_21621, col_2027.42_21609, col_2027.42_78207, - col_2027.42_55736, col_2027.42_55473, col_2027.42_71387, col_2027.42_40243, - col_2027.42_57759, col_2027.42_58219, col_2027.42_61172, col_2027.42_49548, - col_2027.42_57420, col_2027.42_61002, col_2027.42_49477, col_2027.42_78360, - col_2027.42_77456, col_2027.42_58188, col_2027.42_64496, col_2027.42_35325, - col_2027.42_64452, col_2027.42_65119, col_2027.42_64002, col_2027.42_49538, - col_2027.42_60933, col_2027.42_64343, col_2027.42_61282, col_2027.42_55277, - col_2027.42_41251, col_2027.42_58662, col_2027.42_61259, col_2027.42_69349, - col_2027.42_51549, col_2027.42_58620, col_2027.42_56193, col_2027.42_55486, - col_2027.42_61401, com_2027.42_13913, col_2027.42_55256, col_2027.42_49331, - col_2027.42_57558, col_2027.42_64010, col_2027.42_57275, col_2027.42_50792, - col_2027.42_55208, col_2027.42_49482, col_2027.42_39193, col_2027.42_57498, - col_2027.42_13914, col_2027.42_50621, col_2027.42_39392, col_2027.42_49534] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://deepblue.lib.umich.edu -long_title: Deep Blue @ University of Michigan -name: edu.umich -user: providers.edu.umich diff --git a/share/sources/edu.uncg/icon.ico b/share/sources/edu.uncg/icon.ico deleted file mode 100644 index 1f5c4484466383739c4f8f5b3483a4667c7f116f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 794 zcmV+#1LgdQP)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0008fNkl<ZIE{^xTS(Jk7{;IP+qb#0po7lMEm15hVOFz)6-7Z*L>C27ph0v| zH<1@vSyUEO5=2o!-BnNw+lA2iA}9<YCyX7Ott~W9QE1I%=l|jRzAj>6Ce(ZRyzlSf zeIH&D4Yf(X-}|8c;LQX0k9za+iY&2~fiVLjk)9aMWwRI={<Nm$<fevy!HtKi_Rg9$ z8(dc~JQRc9*U>_P-S;E_Xqtj#Vgf_MLxX+M;MvPJyzV@BrfGdcO-<b;r?aL!+#8M! zeu{;fFFe}^03^_LSDMWQ03nb<f>MHXI)QLFG&nF2YZhF^)@{2z7K<H;#5mg9AMU?0 z$($J&fw#LeEI2ng9FE1s78^|FnOL=YUAfg-;)%z{!$rkb=(>WQp1{at7yuZ7Kq{FC z96ix?006FC+~r=pw7TBmD2L6q05+S$IX3zgu~=mAaO3@osku-gI0&UIFEpKd+;H`3 z>xOuI44mr_LLi+=p!a>?$aKL{A^0D{`Q|Nqk2f`U&Ye3Sx~@V>j#9fFb!#@>p5B0S z&`EG@t;<ttFN3D#;Pt+X_4R#>2*I(ivZDNY>mJWE8|VO#JAG{P4wq}W$7U;qs%GHx z`9>N~K3WnA1zS_82~Y!p-EOb_%Rtjr<Wy}%t!vqfk`gOaB@4elIC7-PT?PP`uXVVG zhd%dnU5BN3US(^`&Vzp#s7emYYnHCCT8g1$(+C86`}1(VzQyBjH7y5|k)g0~_V&p= z^7{E%Nht-P1kvaKlBvusBV$GR0ST#dE><pcE~=h6!;BBnfjfz0dJdt4sfzlPyn1p< zQc6GosHzSSfK0hlNQnZI2>_5vXE4KD0LBc+<`l4BA^t*`k${k?I3R=o7a9P-$S4d9 zAXLI=q+o(`2mvqyMPL7DIICz~gpl8l1qoeBxpqmF&1o_*eE#zNPBx>N3(Snge<WZ0 Y1X-O+NseVR+yDRo07*qoM6N<$g67v{nE(I) diff --git a/share/sources/edu.uncg/source.yaml b/share/sources/edu.uncg/source.yaml deleted file mode 100644 index 5a076c2d2..000000000 --- a/share/sources/edu.uncg/source.yaml +++ /dev/null @@ -1,19 +0,0 @@ -configs: -- base_url: http://libres.uncg.edu/ir/oai/oai.aspx - disabled: false - earliest_date: 2013-04-18T09:58:43Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.uncg - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [UNCG] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://libres.uncg.edu/ir -long_title: UNC-Greensboro -name: edu.uncg -user: providers.edu.uncg diff --git a/share/sources/edu.unl_digitalcommons/icon.ico b/share/sources/edu.unl_digitalcommons/icon.ico deleted file mode 100644 index 1e660b740906b95894663a8785846d0c410402bc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 726 zcmV;{0xA88P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00009a7bBm000XT z000XT0n*)m`~Uy~h)G02R5%f(Q_W8kQ4pWn(r!z;P_|TC=|X`bL@<#QQWH54gCQL1 zSq>@(4yGRbW8k2Tc*C=3xZqXT^aB%>1XK{VTBTWNyDi=BLalE`YtY1tPV)B6zWI2+ zH}e})y<Ug^5eo*JOeTolT(FnQhUdzNIAR>vV)=p=E~-^0+IbgOcDLG7R;zhlO3F!> z)2iIRJEo0~^@0fz+?alB*Y%peFVsF;F6+_Wpx_m|+XR*ftW9PJiUO>xY#!{&`5f=E zExr4o#BDZn_UYn`MhNic!GUwLmCEt-;3Z!m*eO7>!wSPg9dJ}JpwrJox9AbxY-WYU zCuwJ>#{(Oi`|}K3<_toNFxYCj8lAa4O2K%Bw@d4LsFHXE2Q)D>cW#Xa&=+ISKnV<^ z7V8SQy^+h}4{!E8W(jb)TO4$Dq@6&asIh-Li09p2T|bGTff8WBCMNm?bUH29uJ!@n zOs|Ayy9F-vcwt9AgvrSP;V`d-(U+`6Rdb&8@Z^kd@9?$KY6PRBnO(K7lu!lqRap$` zga8!`M2QS{!KW_;R)gQskdb|`4k!)w5CZJ;j;!#$c9BqAXZ?MdmSKxr0XqP`b_a|{ zf}j+tu(h2dZlIYEw7DIr7p%`e{b?KO_d`x8(~{PAA@Mp(=bly$o;`nifH8PL2^hoX zJ~EQvt$&lx>{+#MK*+LR)f{KD%>7Eh=$S%>#TV8xjU-q=M9~n`rB^aMh_7Z^T-=hB zk_iFT0J)9y4E1{8;iJ{)^74jvYU-kZJjfR-z<2_wgmjR}7IG~-xAcBniDL{Vr&%~X zOp&k{){T>U*k`p=b>ggGk%cx#(#|!_ttIx-&Oe8#0rOw>?^q!lUgi3;(*OVf07*qo IM6N<$f|IpWF#rGn diff --git a/share/sources/edu.unl_digitalcommons/source.yaml b/share/sources/edu.unl_digitalcommons/source.yaml deleted file mode 100644 index 2293cd173..000000000 --- a/share/sources/edu.unl_digitalcommons/source.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://digitalcommons.unl.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.unl_digitalcommons - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://digitalcommons.unl.edu -long_title: DigitalCommons@University of Nebraska - Lincoln -name: edu.unl_digitalcommons -user: providers.edu.unl_digitalcommons diff --git a/share/sources/edu.upennsylvania/icon.ico b/share/sources/edu.upennsylvania/icon.ico deleted file mode 100644 index 431e0f726196452395d07826d557e61f882298cc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 432 zcmV;h0Z;ykP)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0004JNkl<ZIE}@Uy-Nad7{|YNDACIi4ozhZVOykxlu!_*L$+Dg+2BwZL4`{- z+0<A|BtdaB_5$5fLyJE|#36`4T0&6d5Zna2Kj7OCie*DLL?3vD=j{v63-I4atxmC! z=k@I!GMOCgb{o`{Mbzu>xPN$r!{Na6)FgJ|ds3T9z(337@FF7-m8n#UBw4G~7z_rf z>pB}~n#PF<FE?WAJUuxg0N~|0OP|k2k_5n35{U%A)2P|sjk8*<(v~<lL^7GQJbO0x zgpSTL$oso-93LN97U=2jLNpqMJTtc-Ii3BM1-7?haJgKPY`OCra<P!7VN@(P8b$@C zY0`FnaS6Xa&`by+P%f8|PNyM+fDi(qPzWB6=ZnDAjSi)^@AG-Q-tp#%eq$uLxv@sm zG${{{j7dX-PT({1#WYPrsZ?r<1Hk-z0E(hWHUMzDN3j|S!#U7?jl$dOk2_G+CChvJ a8$JL-+Sb;_F!5~w0000<MNUMnLSTZ6-MQre diff --git a/share/sources/edu.upennsylvania/source.yaml b/share/sources/edu.upennsylvania/source.yaml deleted file mode 100644 index 4293a3115..000000000 --- a/share/sources/edu.upennsylvania/source.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://repository.upenn.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.upennsylvania - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://repository.upenn.edu -long_title: University of Pennsylvania Scholarly Commons -name: edu.upennsylvania -user: providers.edu.upennsylvania diff --git a/share/sources/edu.ut_chattanooga/icon.ico b/share/sources/edu.ut_chattanooga/icon.ico deleted file mode 100644 index 9ce02515f8cc48d423d0461028e89f194dcfaec3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 386 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`EX7WqAsj$Z!;#X#z`$tf>Eak- z(VP6||NsB-Y+MY&s<YVJ8Ffy~@h*=yc;bJrA^2l!^mz%E3QI{D-l>v8HolT?zKBXn zc=8?rskn0f{>cpwFNn6e=m)nj&aeNMyyMRs<HHZD-%CmWf!yBz2S0qc%g@7X^*2Ov zi|P(Zp5R}SdUvXh9r*O~xqVyL+3gb3=GC2huvquh0`0Sh6IQEmTvt#(a#G>&`L>5$ z?C0+pYj|)O8ob&j|NHmH<MM5}l5RY=gzji?$?yGj;KPHT)fV;tf9~cHe`nKbUGFr{ zo>{s6=Dz*Ag7)yVfA%e4TePS}pm|S=05cFYFJ5TW+_*}FSzg@Q#VGsh*ZqG|fB$pS z-}C4Dsn7cLK>zy-rGbLse)4yFBOTk{a~^(}`<+L={!ikCsgI6Ia&K^GVVtyBP?o1A dXxR-02G-EE+~=OUp8*CcgQu&X%Q~loCICr8r%?a^ diff --git a/share/sources/edu.ut_chattanooga/source.yaml b/share/sources/edu.ut_chattanooga/source.yaml deleted file mode 100644 index a2c13bd3d..000000000 --- a/share/sources/edu.ut_chattanooga/source.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://scholar.utc.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.ut_chattanooga - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [honors-theses, theses] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://scholar.utc.edu -long_title: University of Tennessee at Chattanooga -name: edu.ut_chattanooga -user: providers.edu.ut_chattanooga diff --git a/share/sources/edu.utah/icon.ico b/share/sources/edu.utah/icon.ico deleted file mode 100644 index fdd792f1ec6a86281e1fd3a55d8454e034bf28a8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 318 zcma(}F%AMT40FUuEGN<FW=5VFiI0W-9>Ej%fw9{TCv*d;Y{yPgAoN;ma6JOptb&uf z5Lm=ObsjXfN1ql^&L*d+DrE_JF5+xB4STSwG+(U+SQM;X_ByC%_J66)kGKBq)PHW| S`%2b@%oupS$TLKezxV<>ZH{9A diff --git a/share/sources/edu.utah/source.yaml b/share/sources/edu.utah/source.yaml deleted file mode 100644 index 258eaa104..000000000 --- a/share/sources/edu.utah/source.yaml +++ /dev/null @@ -1,27 +0,0 @@ -configs: -- base_url: https://collections.lib.utah.edu/oai - disabled: false - earliest_date: 2002-01-08 - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc, time_granularity: false} - label: edu.utah - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: - - ir_uspace - - ir_computersa - - ir_etd - - ir_mfafp - - ir_su - - ir_kcg - - ir_htoa - - ir_eua - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://lib.utah.edu/ -long_title: University of Utah -name: edu.utah -user: providers.edu.utah diff --git a/share/sources/edu.utahstate/icon.ico b/share/sources/edu.utahstate/icon.ico deleted file mode 100644 index bce95b42999ca073eb3b264b319f542e4f35ea53..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4671 zcmb7I2~?BUwuUmus1#5if)YVyLP&r>!c=Av!XyY1fgni$$$%lqAQ25$dO-$D7{pXD zM5sz5kwBEWaiV~T5y3zVR2f7Bm55NQ!2M%Od*6DmcfFUja`OM@?ERg6zP<N3XQv<} z$WK#khnk$6oTmR??=U$zdARJWq73xB?&!J-{OD1AqNw}HcxpyG1t;f$B_GCt{E6|2 zxG-EiHtT3F&P7g6!8|!UiW-FqL?)1lHt{kUn@nOVpe-lo;+~lrpOAv1f)3*nlS!^d zZ`-?#K*?BFqeusoEh^Oumz2CWn}XY)9Tc9BosxjS8o9fHTr!aW01-!x2W1ioq%>rv ztI-!+B=9Wjh8Tgqh)`2pjea*O3KatKB2#c62OBsz0SbkJ91%892PZq2gEa_d3w40l zA|Oy(uq_k`wMD{gL4SOVfY~TmJTlDN_m8=NH&>%1Dm4`efn;Q4*kst*kSU1}C<1|i z*uo$%7#NTMr)80-@tI&!n(+pLH!dxKlAKCSCX+xi#Q4MHbgHWnVCnBmAg2BaOG^8L zCV((VW_&6HYGW%~(uN=k^`AwF#6P9esA0Ij?EPPX)55b-agZ=v8abVk05}eBESoA7 z=|#cCQ^}NYGMTW!Vn`C1N={26r-Hn^zOV)|N5v;3lVqJ1U$jstq(3Q*8c#~V`Fpz> z0XR0v$ylVPr@fQClP|*82jL5a`Z#zX91y<t-VO*4sFQ=2hr@=hH#s4lh$B%qbg_Tw zdi<@fEFg%fz|7t_O7bxr)|Wyig1#&qnf&**IQ}i(AG+ATx5epibs>N;5Se@b&D|Sa zKn%$q{|sK><<IcPk${+|0Kxrkuhd6QZZpT<+ao-4Y+jPg(;e2^^iB_#rr`L`OXijs z-O#h;m$Y}+{+z8jl{ZCm^SLIy4>?<PHQau;2T3tN=h0<runt3;cPrq@Er`ca>G!tb zw%<*UJ7?5?a_Np6zWVU?^y9?eK7Vp?rM0zp#x^R1Fw&^<Ad#G+{z;627D0}#`K9@W zE3X2V6DWUaHirL_`7#VN_7g?DYnYp2POZX>HG1WaC})E84cN^&=L)*ND{#592TwQ| zrn><K;0O-12UF!$_+tjcN*l2M?^#v&GoQk@pTsl_bOTs2GdH0An}u}EOLh3!ug*2) zdOKgyKhGAw{$RS2Cn0U!X`49O)_SA0*<<~3jW2ro5g~>P{yhJV1xde$K_P3JHlgo$ z9`hiKrxskQ81dt-`a0Shi!?6bvwli^96d3tG!U=l&~Fp-XpW(+$<ieT=^-1_K1UB& zcM7AeY7J<yj^;ie8#$gRL}6ICULi%dE-A17(ddmVvoWP?_J`|5l%;gZCEdEA*5E7E zEiB!Ld?QgvA^etxt0SE3+uk9K0SA=4+57IhUBz(sKH(Mv$v)wO5i^yR7n_7IYlP(l zft(&?<--$Sz!*-HEJrn)F>>ljpR@?CaJRbnkbS(c{qq6RY7&!2kOY3q#{^hxUURr{ z@Ow;T&nK~8Sx)nG0d(T%Kh)v876ZY7eM0N)!xd~giW@PXTF!!0t!s%Phu<fa4IZ<` zPmy*4uzip852x*?$4D~r*JB&3*tGudY(x)6=4`xzMa)^d10`yFD$HPMx~pZ?Y<){; z$(ji~H$bX~J`ECuRj}#;4>B>4HHDz#Z&TJ*tS3sY|8p?=s&Op0nll`ek5`9JU%>?6 zGRjt6BDk(&y*E1cAuL6Y%URPAxfy1n;b7zsnNRWEE(1*+a}U?VJz9hav9NcscCN?h zr;>{^>l0N;MGL7SbvQ0bf}2&n&f6`rwD1}%9ayC=xWmAWZ4kHP!F#t6hAjtQoo8U& zOTCZ3q2_`{rPrj@<YT|8!%h7A6H``ubT9O})wt3*5?oT2TV&;#pC=PPz6A8ggt;0n zbdKfp$3xrI`Hki5>dMFpaZVTUzko2t9$Z>`sq$l_*}IoC=R!<MvR?FWi2j|klRBT@ zSJMY96$J+>ntq|hm(ecFZvP6<1Q#%#aq{yp`}6tzH<C7)N)>u5BQ5-t&=E~P!ZRsl z5VeO?@wb$4ZcAMGE~|tzTA5^L&D0>S?I9moQqWMWA*f%v;w}R03M)|qmV741VBrz= zgzc!*5}+}Krq{Ly-cUCG=9J2n!5`keufEQcNOElq=>r7L^ZFfuGTUD+675_R(ddPJ z&5e`$KR(CyH>f=wRYY6$Kax7lIy2_q=zkrUYw*(q<CRzlK3enu8m5RA7n#gWEXuFC zR?|5ynG{C6k!Z|x=tGN#61sjmqAyf1(gvfFns|%f(8158*KVvSXloHTAzh2-E3k1j zrQc-JPUgndi{1LtMd|rxFe&O2`9b5xy(L>^R{Q87+ZnHS)p-nZo4}1tY98E{+ccp~ zU{fQ`;>jtGwbQCqk>FCs^WywsfH-&$OQp6n+EzUrV4yxEzw)NmLSS?M<j~T}8`QB` zbAU`XrxYH?MYS3X@H^&~oh)o%If_geb;)f$@U3fNb=S@XQ|2pmer0d-7$$e!vm2wY zA!FdZx5W)p_?Zfp0)v<}?7F(uN(G+yEILoNqNXt1nfN`Tau)l2wBle*{=4JPmQP{y z<MrBH;EvTC%|rax1o33l`DAZZAg|i;z+&k&N->fB9MI9)x(R*zcb3k<nx93r!i6^O z+fTip(aMbj)VAD(4m&g{imxo}<9APbt<gp+BV+W4#qlO0Mf5#EWolLh5D>*38emAL z<ZMz^zxsEE`hvGDYVrcOw==RQ?gaCm;G74|T2wD7bV`mTXQ~2`aT+)453mVK(IFUH zc@4guLVsmfMmE>6X|!}<qqxQ1sx5(J=?V9-w@s(#R3ux?20!Lk1M~(pO3&3M7=K<Z z-DALX*02-6AHb;i+j*p%yWQ@<qHd?d1@LfvcFgyL8oy{Y)0o(n)?Dy*0bF-<TW7bn zK<c@iy3^-q$nAkt9l{-RuYeldr~5}bcCVLos%}95?$$39v%$IqV+%h!o5r!xT#hjv zr5Alv_1uD3*hC<A#+!-i(R{JX#p3G6XgUhm3@o>r2>L|U5&0$M(-A80O;HiJq><@} zfESmu&pbB+e3Ez6er)pgh<^lk7@coZFffO0UlpI*O?&)PZq;jo3gD1AysutWB&US& zdN`kj(YL>uW$*oXI^wjSo+|%TB}@HqT4IyrgyYzdXCzmZpIvM+-w^6vKN|=GOw=+P z4bbot)N5SmSTtRqECU#P`&2}7QsaA+v(gfH7SFHWWIgHsb>Rx&Ce6G2BwpFV`v9nM zb|}~3@rcvtL%6k)IJ|X$-3p+!fs&PWi<G1CAHEp`+^b*`6B}{s;EG0DXDpfGE?U5= z@TIYN-|Fnzjx$|~4_M<g)W$I8-y^h%p^C>p@P>P5hKe@#JzCEwE$tF^eN7}b=e2oS zAcdpRSw~(4xt;<{7{2V%@<cu5YXZQlnpcy{wL<covH%|1o-U6k(m6YhnjX?&-pi)v z^MFi4=!9KpIjS9dMIXqJ^7G|P{~l@{#u><#ECT7Vjx3u{Ir{sUghIxmPn!o<6@Db_ zq6;s=QH4KL!8(KNsk(L&m^0$eO;)oINVUNyn)HQmKtq4v3_lc*YP9l#6b~qWu`I&j z!rROu9q?wp6CxvBwhM+a8fqo=`ikOS+-%m3(OR=4MBE8gexvz{{D4rixf8f2&=;_z zzB*+q_HVa>0KA~}HAC`KUmz^5H9^Nr+T?+d+Mn-f$0@a(q6jM6HC%h1Tu|#-3c7Gs zydZQrrThE%9mzzu<9@CQu0`I#as@jM5y4JK`$r@O%)0w>!2VUQar&4@&+KiQ5)+em z{?ImJC#F9lKURl1TtX{n(>bPH_Mp7xl_b0-9A~a~NPoB&Ob7evwSnLnh?!8}1Uz4q z{;d03j1HkMB#Gk?T4%kDkX{9ZM_OaTP99uvfI$@E5f1^Ua7=fFdi(0Lqg$PZHvf=S ziqGNloLdd$*E*Ae%?y8WFXS6PKQTCK_{n`6;e*A~5t~5drHSHY-r#0KWF=zw#ndw6 zvfx;)0ZNBZ4O|QwG*48!J_%a#YL0zOn;2`1Rz&+G&PmJj9+U>Goeq3WHF{QhcjoHl z_PA>mC_2XmS10&<dDGI3wZPMpCoX1A4COeVHxa?ASiyG#r(?2AMYp5Ym3-+O?dnK< zbFc2`5;q-YB;(E6R^f;mU+u2m5ftHikV~cEw#3_RXvF#W`;{N-pLO5Ym>#4AiW2p= z=+TK)@(<QC?Z<xbUs=t+a+st=Sg`S`>cOAJOk$yDz^vsIfDb%>#Ldw6_@N?In!r?a zq{$I)>Now=V!^-TZ3_E*t0%XGrxEkSUpl;Oacf|5X*#KOdb0X*`^17lZdo(NPC}}< zh;g>sB0D<8*jQq!Ierhh7t9+Q66r>uT7uiJ!bC3=+U?=`8c`2_SrL}(k*rAfyyDO~ zF@0Vi@OyS~tDkyZY&kc4VzhVF#IvPA>xd@*0dCd`imBO(P6WRv8j9I%)fS?$(EN+r znP7a%_jNR3)A+Xi<9Q*|Cd?g)gy}Ycf#CkZO19y>8CeQA#LO@${b8)t7k#}LBf++a zph&Jmy9W5FI;^|kNL=pDJvxk!NOV@f=Ocm<0<eJ|o)IYa3%h$}KHiN9kqor8D2cC6 zB~XJm$-*U=`=L<>8|(Xl`oJZY5sYtVaAEfcTj-qXKKjaOWm5*#cFcWX3`kla`Pl28 zuO8HO0Kr%M9{=*!Hotn)_dCQNLQ7&zMLYJmon<EQtQS36&F}`Ut@gO?!eyVwG{`~j ze&7n`P>`XoQExzlz=MHYXKukR^&xv`hm$d|xQ1uX;!?n(9U-zb5NjJ2eM+cbyx{`- zwDY9)5vCFo_T-x%8x3f?;SftdCE)Z5a@*z#jzl2$I@hWpBe}(VjTkLvySCv!a%UUU zuv}R8)U<TR{cv$2!P)skxBl>h!@u@9vJLl)uaCI1ISZ__of=ySLm<4MrUy@cVUx6) z&L2@mm$SCtn;E~mX~J2!xnQ?hadK+RdVPif{?+uUGM=_!&hg(~>sQyW6))ET*UX_s zyskq9n`~C|wL1UnauzRv2VV4u9I?Z9DNjv7S4ZwTweRC<Mz->gg}%spQ*bV{1CpUA zwoh5O$tG&@=NmM4&DjF?^Hx#EWrMA<<EuYLn9$qv{jf5gc!vwUFYbV^-D&R}pV~E< zJ}z~>s3+`h&C$tDkP4Ow3Tv8M?ibAnlHCei$MsfCzLmba*Pv~ff|JbuysPQmEA>s0 zsF|1ez~>p`b67VRYW9;={yXObzRZ`CyRO#l3*9JWHmaHbSs2MGs6UGxEy700wo#7# zr&{RWi!kL!v368>&1Fh+Yul-Ux#euvPC0WQ`M;{C|6>jKY88D_UhdVSQE|g9@8`0= Obo_mSysvv==>GyO^fBlF diff --git a/share/sources/edu.utahstate/source.yaml b/share/sources/edu.utahstate/source.yaml deleted file mode 100644 index 62b237564..000000000 --- a/share/sources/edu.utahstate/source.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://digitalcommons.usu.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.utahstate - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://digitalcommons.usu.edu/ -long_title: DigitalCommons@USU -name: edu.utahstate -user: providers.edu.utahstate diff --git a/share/sources/edu.utaustin/icon.ico b/share/sources/edu.utaustin/icon.ico deleted file mode 100644 index 0844c97c9152561b5bd3de6de52d943d4db4895f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 453 zcmV;$0XqJPP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00DhTL_t(2k(H9YN&`U@g+G@xx@Kl)3_{SL#zt)<BwIuRieRne1q8`U1QaVP z@eg0X2MB7T2p0ZCvCu*-1dFV3QPN02pTJ_)%^F4UR(H4`bI&<59r^rpS9dSa)6*v$ z320?Ij?hv7aVY>mDaSO{zi6dw%h2Ymm2H~l`v+eYrGc2a`>FO&y{x=78i{0bY$Ol7 z_;<i;u{1R~5z;`+D9PsBY|h<SEf4k&FD%TP+1>ra#%j5GT3A|M36@A<X=NMTjw6Va zQqI)m#C&P-=K5l1r($Tp0z#KRbr<JmfW69gII==o|CWhTcFrvTp7#nUEv>c$P0CT1 zt+iSPlH%X>L}?{Ov)KUB=^%P5q$AJNqP=><tJQLDAw84=WW3XA)7@Gk1+ccZl}IMn zH#ZFp#0>4%{hQlI;QVBFZ{JN1^n1@Q)zgF2K<4E5OlfIopyTl93h?WG^gC!N;!<QY vqsC}R2OEI#%xF|em`NKQq!e}eHw}COb!=POgYK3u00000NkvXXu0mjfdeX@p diff --git a/share/sources/edu.utaustin/source.yaml b/share/sources/edu.utaustin/source.yaml deleted file mode 100644 index 5c8cf8cbd..000000000 --- a/share/sources/edu.utaustin/source.yaml +++ /dev/null @@ -1,50 +0,0 @@ -# DSpace -configs: -- base_url: https://repositories.lib.utexas.edu/utexas-oai/request - disabled: false - earliest_date: 2008-08-14T00:07:28Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: edu.utaustin.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: [com_2152_1, col_2152_13541, col_2152_22957, col_2152_13341, col_2152_11183, - col_2152_15554, col_2152_21116, col_2152_11227, col_2152_26, col_2152_25673, - col_2152_21442, col_2152_11019, col_2152_10079, col_2152_23952, com_2152_19781, - com_2152_4, com_2152_5, com_2152_15265, com_2152_20099, com_2152_4027, col_2152_22392, - com_2152_24880, com_2152_24538, col_2152_20329, com_2152_14283, col_2152_14697, - col_2152_16482, com_2152_24831, com_2152_11681, com_2152_15722, col_2152_7103, - col_2152_20398, col_2152_7100, col_2152_7105, col_2152_7102, col_2152_7101, - col_2152_17706, col_2152_15040, col_2152_14309, col_2152_18015, com_2152_6854, - com_2152_6851, col_2152_1508] - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: https://repositories.lib.utexas.edu/utexas-oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.utaustin - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [com_2152_1, col_2152_13541, col_2152_22957, col_2152_13341, col_2152_11183, - col_2152_15554, col_2152_21116, col_2152_11227, col_2152_26, col_2152_25673, - col_2152_21442, col_2152_11019, col_2152_10079, col_2152_23952, com_2152_19781, - com_2152_4, com_2152_5, com_2152_15265, com_2152_20099, com_2152_4027, col_2152_22392, - com_2152_24880, com_2152_24538, col_2152_20329, com_2152_14283, col_2152_14697, - col_2152_16482, com_2152_24831, com_2152_11681, com_2152_15722, col_2152_7103, - col_2152_20398, col_2152_7100, col_2152_7105, col_2152_7102, col_2152_7101, - col_2152_17706, col_2152_15040, col_2152_14309, col_2152_18015, com_2152_6854, - com_2152_6851, col_2152_1508] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: https://repositories.lib.utexas.edu -long_title: University of Texas at Austin Digital Repository -name: edu.utaustin -user: providers.edu.utaustin diff --git a/share/sources/edu.utktrace/icon.ico b/share/sources/edu.utktrace/icon.ico deleted file mode 100644 index 9ce02515f8cc48d423d0461028e89f194dcfaec3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 386 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`EX7WqAsj$Z!;#X#z`$tf>Eak- z(VP6||NsB-Y+MY&s<YVJ8Ffy~@h*=yc;bJrA^2l!^mz%E3QI{D-l>v8HolT?zKBXn zc=8?rskn0f{>cpwFNn6e=m)nj&aeNMyyMRs<HHZD-%CmWf!yBz2S0qc%g@7X^*2Ov zi|P(Zp5R}SdUvXh9r*O~xqVyL+3gb3=GC2huvquh0`0Sh6IQEmTvt#(a#G>&`L>5$ z?C0+pYj|)O8ob&j|NHmH<MM5}l5RY=gzji?$?yGj;KPHT)fV;tf9~cHe`nKbUGFr{ zo>{s6=Dz*Ag7)yVfA%e4TePS}pm|S=05cFYFJ5TW+_*}FSzg@Q#VGsh*ZqG|fB$pS z-}C4Dsn7cLK>zy-rGbLse)4yFBOTk{a~^(}`<+L={!ikCsgI6Ia&K^GVVtyBP?o1A dXxR-02G-EE+~=OUp8*CcgQu&X%Q~loCICr8r%?a^ diff --git a/share/sources/edu.utktrace/source.yaml b/share/sources/edu.utktrace/source.yaml deleted file mode 100644 index 941bb34bb..000000000 --- a/share/sources/edu.utktrace/source.yaml +++ /dev/null @@ -1,33 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://trace.tennessee.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.utktrace - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [utk_accopubs, utk_agrieconpubs, utk_animpubs, utk_anthpubs, utk_architecpubs, - utk_audipubs, utk_aviapubs, utk_biocpubs, utk_compmedpubs, utk_biospubs, utk_botapubs, - utk_statpubs, utk_cssjpapers, catalyst, utk_chembiopubs, utk_chempubs, utk_chilpubs, - utk_civipubs, utk_claspubs, utk_lawpubl, utk-lawjournals, utk_compmatdata, utk_datasets, - utk_davisbacon, utk_rotcarmypubs, utk_biolpubs, utk_eartpubs, utk_ecolpubs, - utk_econpubs, utk_edleadpubs, utk_educpubs, utk_elecutsipubs, utk_englpubs, - utk_entopubs, utk_compmedpeer, utk_smalpeer, utk_largpeer, utk_famipubs, utk_foodpubs, - utk_forepubs, gamut, utk_geno, utk_indupubs, utk_induengipubs, utk_nuclearpubs, - utk_instpubs, utk_intepubs, ijns, utk_exerpubs, utk_largpubs, utk_latipubs, - utk_mtaspubs, utk_manapubs, utk_markpubs, utk_matepubs, utk_mathutsipubs, utk_mechutsipubs, - utk_micrpubs, utk_modepubs, utk_molecularsimulation, utk_nuclpubs, utk_libfac, - utk_artfac, utk_artpeer, utk_libpub, utk_physastrpubs, utk_psycpubs, utk_bakecentpubs, - pursuit, utk_relipubs, utk_infosciepubs, utk_jourpubs, utk_socipubs, utk_socopubs, - rgsj, jaepl, utk_theapubs, utk_theopubs, utgradmedpubs, vernacular] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://trace.tennessee.edu -long_title: 'Trace: Tennessee Research and Creative Exchange' -name: edu.utktrace -user: providers.edu.utktrace diff --git a/share/sources/edu.utuskegee/icon.ico b/share/sources/edu.utuskegee/icon.ico deleted file mode 100644 index 10235edbed636cf13788bab4c3394d9b8f205e60..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 632 zcmV-;0*C#HP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00J;cL_t(2k&TjHNK<hX$ItKnF3qhax~AK77L7=-oH?RD42m`>tB0tk2=S$$ z7n33y=pG8%ORr0U9?Ht7ClM`hvqqMtPUl4``y;xjvCZXdbMwyK<^7!=vamwd=kf66 zb2xktal73Xi$#`Y$OolV5Cq<0u~@Cvyy@R?Yq{g``M)NjC?X=amg#D1lcVK|jPNb7 zkdfrd9lHV}{gmhV)gf7yQ>j#OaWMd#vZ<1ZvHtdB-TkrC^(CFx$`?oufUIm77#Q&R zd>)TSqtUdrwE@8FqGr#|3R+O^ZmlsGHX99vz$pNcCxl3ngot*#9RLs!0KC1Ah1Zdj z`wC-8HXT+4K1jA2%Bzj+yT}nj(&@BbuLl4GxOwZ&x#Pyj^Ya_|=!*%-q|0^mEm^CU z+q*-h+9Jjn#~9OUwQnaUzs3`ev-Nx7`k5o;=B>KjrmQd#*|uq2^9f2>vbxS$X3#Ol zR&LOjRorbcH60ATiJ4SfltOlcLON7Ad#@|}emXWqn~xs0DHIAs<S3;nF&nsV*O)KY zJ7;<XNE%(j-1c|D@NzCuXZjeNie}QQIvE-nz3pn~ebR-9UqT^msjly-4-praaxVvm z9^JUq>niDVl4qVCL>wG^MH(9$s@MZZ2a;b^`2GIc+FGSj`O_GR%msW8=f^ITnJ<3I zZXsT;cmDxff#Qz_|0S6cL!W0xhKG5zTAeoy07ZrCi>l3{nBm9A#}V=W-}?tYVFa5! Scz>V(0000<MNUMnLSTX(x+#_b diff --git a/share/sources/edu.utuskegee/source.yaml b/share/sources/edu.utuskegee/source.yaml deleted file mode 100644 index 5774ba422..000000000 --- a/share/sources/edu.utuskegee/source.yaml +++ /dev/null @@ -1,33 +0,0 @@ -configs: -- base_url: http://192.203.127.197/archive/oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: edu.utuskegee.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: http://192.203.127.197/archive/oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.utuskegee - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://192.203.127.197 -long_title: Tuskegee University -name: edu.utuskegee -user: providers.edu.utuskegee diff --git a/share/sources/edu.uwashington/icon.ico b/share/sources/edu.uwashington/icon.ico deleted file mode 100644 index 5896111f467e2c772c88f5a4defda43c1ae39a53..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 527 zcmV+q0`UEbP)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0005TNkl<ZIE}@UL2DCH6otPxGZ-HYlqSa5F4~MqjCBGjVpSvy2`&m1MR28b z=c50>t&4(G`UARfQRt?EG!R=FDAXz`=t3}L5|D1J8;LP(LkT5iU|f`W!<g=SR`1<+ z`0lxv2j?yEZn@~5^qjtjsmJ#Thr^WKmnlr;iN#{#L$T~0wvUwOLdd|FpV>H*JeC}x zb9lu3%mycKPaC{>QKH)pHq&lD8DSu5sKP+hz$!$ETpHq2={o>B>vaGUxk2UieA;61 zm6)E*DnKGP$nw@}5x)}SYv~sN_0NB3wOa1d<{Wn(jRG(|n`LQp4h_J(KCA#gs~rHo zRKBUW-L}J4xq`0iqF(t8fOYFCx~>Ziz?#S?K)3B+*BY)D-_ys{Dt4{mHg}H!FjJ>V zG=OBvB%YSaytcAUbFUKs?AZsbt!x7jPfL<1(+5Z?g_ZZmSY4>{V`o1AXzm`fzGwq5 zmK{P$DHH%;vfz*5^jw^tOFS(D$-JE;Zd_J11+XSE7$FhpYUPJ{_A<}^y7wU04@jm= zqN5iB#>qm4r_Ub-{)rJ1jAyU;ckdvj6n3rQ?*BdT0e5enS&u{_eW}h*{{TO@jaT~i RoO1vG002ovPDHLkV1mQ{>8t<% diff --git a/share/sources/edu.uwashington/source.yaml b/share/sources/edu.uwashington/source.yaml deleted file mode 100644 index f58ce5181..000000000 --- a/share/sources/edu.uwashington/source.yaml +++ /dev/null @@ -1,35 +0,0 @@ -# DSpace -configs: -- base_url: http://digital.lib.washington.edu/dspace-oai/request - disabled: false - # 2083-03-01T08:00:00Z is obviously not correct - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: edu.uwashington.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: http://digital.lib.washington.edu/dspace-oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.uwashington - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: https://digital.lib.washington.edu/ -long_title: ResearchWorks @ University of Washington -name: edu.uwashington -user: providers.edu.uwashington diff --git a/share/sources/edu.valposcholar/icon.ico b/share/sources/edu.valposcholar/icon.ico deleted file mode 100644 index 9ce02515f8cc48d423d0461028e89f194dcfaec3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 386 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`EX7WqAsj$Z!;#X#z`$tf>Eak- z(VP6||NsB-Y+MY&s<YVJ8Ffy~@h*=yc;bJrA^2l!^mz%E3QI{D-l>v8HolT?zKBXn zc=8?rskn0f{>cpwFNn6e=m)nj&aeNMyyMRs<HHZD-%CmWf!yBz2S0qc%g@7X^*2Ov zi|P(Zp5R}SdUvXh9r*O~xqVyL+3gb3=GC2huvquh0`0Sh6IQEmTvt#(a#G>&`L>5$ z?C0+pYj|)O8ob&j|NHmH<MM5}l5RY=gzji?$?yGj;KPHT)fV;tf9~cHe`nKbUGFr{ zo>{s6=Dz*Ag7)yVfA%e4TePS}pm|S=05cFYFJ5TW+_*}FSzg@Q#VGsh*ZqG|fB$pS z-}C4Dsn7cLK>zy-rGbLse)4yFBOTk{a~^(}`<+L={!ikCsgI6Ia&K^GVVtyBP?o1A dXxR-02G-EE+~=OUp8*CcgQu&X%Q~loCICr8r%?a^ diff --git a/share/sources/edu.valposcholar/source.yaml b/share/sources/edu.valposcholar/source.yaml deleted file mode 100644 index 4d68dd5e6..000000000 --- a/share/sources/edu.valposcholar/source.yaml +++ /dev/null @@ -1,25 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://scholar.valpo.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.valposcholar - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [cc_fac_pub, it_pubs, ccls_fac_pub, journaloftolkienresearch, art_fac_pub, - bio_fac_pub, chem_fac_pub, comm_fac_pubs, econ_fac_pub, ed_fac_pubs, eng_fac_pub, - vfr, german_fac_pub, spanish_fac_pub, geomet_fac_pub, history_fac_pub, kin_fac_pubs, - mcs_fac_pubs, phys_astro_fac_pub, poli_sci_fac_pubs, psych_fac_pub, sociology_fac_pub, - theatre_fac_pubs, theo_fac_pubs, cba_fac_pub, jvbl, engineering_fac_pub, ebpr, - msn_theses, nursing_fac_pubs, grad_student_pubs, vulr, law_fac_pubs, ils_papers] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://scholar.valpo.edu/ -long_title: Valparaiso University ValpoScholar -name: edu.valposcholar -user: providers.edu.valposcholar diff --git a/share/sources/edu.vtech/icon.ico b/share/sources/edu.vtech/icon.ico deleted file mode 100644 index b96fe76d6dfb7785b2c733fb99600fc130cc17c3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 574 zcmV-E0>S->P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0005?Nkl<ZIE}TEze`(j6vaQ^mmf{?0<jqqO^idUQ{p5P2SJIW-73XR=&1e$ z7F_xlv`eMIAT4%?Ac)Y#rDO_(K#Md8wwR&GP*P}3G<o{o_jX83Q)>l>p6PPG=bn4M zhpXr!8;ccP(db!VY>aQY90uU=dO;~JEDH@p=I1#(I>I?T^gpqz2lV4L$ieouh=`PT zcBHVrE*X$lAf-%3KJD#^h{(a#mTZCmX#n@7fCkPj3jnui5>BP?&Ca4*E#l+j{5(B{ zE=PAp>^}thAtK;7h;1Xbjdq+)?;04_c5i|ID<E2f);9`Vn*mCp3<Lkr5ZbaZTrN=R zW&sCKM#qpXLcLg|p39+u&*?OGlgW;@Vc_)dx-9ZbOJWQU6C4?#w!e?lY~l@vJA#b{ zDi9!+PSdPZu#b<K+1Qu{^1tsO8l}9t$^{Ubn&SS-3Lh610EjFv(<l`9lFeeKQY7Z) zdJcLawkj3eqoa5LB|>#~7gv{H*J_-XN_YT&B7v*j?z!)M;Y~a)t!fpabEo}}(pM|s zj?c$pyFCr$dk*eROgwCx<_}Ld+5XH`*NGQ6F-_<F;NVLj&lMg3Zzm@oe=nEUg3V@9 zYmERdpdJb}znbRe^IGjGaB_9-FFZ;nXI=~pyn7Z5eprk|*8au&1xrW5omd%UZvX%Q M07*qoM6N<$f~|S`WB>pF diff --git a/share/sources/edu.vtech/source.yaml b/share/sources/edu.vtech/source.yaml deleted file mode 100644 index 453dd01b7..000000000 --- a/share/sources/edu.vtech/source.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# DSpace -configs: -- base_url: http://vtechworks.lib.vt.edu/oai/request - disabled: false - earliest_date: null # earliestDatestamp is earliest published - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: edu.vtech.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: http://vtechworks.lib.vt.edu/oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.vtech - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: https://vtechworks.lib.vt.edu -long_title: Virginia Tech VTechWorks -name: edu.vtech -user: providers.edu.vtech diff --git a/share/sources/edu.wash_state_u/icon.ico b/share/sources/edu.wash_state_u/icon.ico deleted file mode 100644 index 29eb956475467919e062b6b61c58daa4ef9192f9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 625 zcmV-%0*?KOP)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0006gNkl<ZIE{^xPe@cz6vlt&zQNZxliGwLgIJO@G$0I2NJ8Ku1}q;%Z3Y6N ztsxk+Nm_?1p<N)j5`w`+n@1FpMJRzVG}NjfG#Ca;@_#<x=-k`Fc{Ys^p56KGcfRwT zb1ob+jO`2>+qqnit78^Nfj(fP9+zVZJe=37cOqhKTS%o5%R48LDWE;{Kg+-%P|oSn z=mGfWy@IjMOZ5Gb5sd9v#&#xhy5s^YGrBaARq@HJijQ5C=D7xx2XJ1ZH^g`E|I5MP zGrc@~*a-MywtK!A?|MeHUhOWwDHz)s0ve!hiN<yY-|3%^Z>siNfq(|<gW2rMsMbf~ zy9EI8-DzOuh(O+~kAKJS1QJy!7QqTO|EZQMzFYoFz}QY#T3Vi-B(yrCOA`QJ&9B~+ zH07(5&N{czdIzJsRC)(g+q4D?;G@|}cStzsoHVvGP{YpvgsN)I&-ITXfNQGF3m9c| z7zpyNT`CD<J06fP8gInd&RlSX@h$da06AT{-7m3M1+$sjV869yKgqPTOyzXxZGdGZ zY$;<qoj^XT;`a_LNx5B4JV{AYp~~9^STbG$m`q5d0B}W4zYyPDKK#nYb{^z(X*{dq zV@2a-IwYI}8Wzm@@S@pC0%TRZr8-iN_--CRs>Rll*?E0d8fWMA>QIs0ZtCI;;m?=U zsk`F4v$c325U%EFAj!cMQbpr+ud&yBL86Nx)WRBj{W1TgQ!DTrzkJbGt3!WQ00000 LNkvXXu0mjfeas#A diff --git a/share/sources/edu.wash_state_u/source.yaml b/share/sources/edu.wash_state_u/source.yaml deleted file mode 100644 index 1838de1c9..000000000 --- a/share/sources/edu.wash_state_u/source.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# DSpace -configs: -- base_url: http://research.wsulibs.wsu.edu:8080/oai/request - disabled: false - earliest_date: 2003-01-01T08:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: edu.wash_state_u.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: http://research.wsulibs.wsu.edu:8080/oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.wash_state_u - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://research.wsulibs.wsu.edu/xmlui/ -long_title: Washington State University Research Exchange -name: edu.wash_state_u -user: providers.edu.wash_state_u diff --git a/share/sources/edu.waynestate/icon.ico b/share/sources/edu.waynestate/icon.ico deleted file mode 100644 index fff3f6b142fad21e2fe08282ebc4bf00927edcd6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 622 zcmV-!0+IcRP)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0006dNkl<ZIE|%~Jxo(!6o#Mg_G|BLOaFf^CL|RbXd*HA0~$>XadSgALgGLe zNSug^i800)k;OQ$%V6T>=)}aTC=rF|z@U+!6-tYh-h12I?{h#CQKtG#=OpJj!+CRr zQcBYSLI{9)`A5?c#{bA|4brQ;La7$YZxJw)n5p9kArBY49o*^~2thOwq1goab#a8o z_Xfg*%)l&Wya@8}1%BfT+i&kv-&i1*hU#7zDXm^~b&W{wD!CIkKuDa*3aVMf2u4ut zCfaui#rrT*1LOwpvR7UJkwTbRWYoS$b#o1`_6}$JHR1eu-0da4El;78iB=Bb%mt2| zor73``se5PF35a0UAYmAuA`$!6Wq!=n=htm?R_Fq9K*_7rd4;qhj?KOD|?38`co>a z55Nb@K1aGVi6OI>$PC;hV1iSANqPAmKQ<Qd8=C||fB|0Z1G^uevGwK-z6(~amuTra zMr;V7wbrUtr@VZd%IX{lFajWqC|3F;s@=rfSporV7eYxJd*~6#;y79}Z)mMCf-&-g zw@CF*>|fgj8SW-snj%rSLfeP72cdY9{HYm|#c=?H0ONN=0n5f7o+Z^c0UC^8hFD<~ zD?5z#A=H&1fBGS*{>cN;9td1oHulgAs@)_IN?-)z1fxB~dWMM~yGE*XPzFFLrT@lv zwzOM&zh8U1+NrE))oT1Rr?q}8wALMc<1d~_V7^0HDy0^G0+$B&FBdLu>i_@%07*qo IM6N<$f+<-MasU7T diff --git a/share/sources/edu.waynestate/source.yaml b/share/sources/edu.waynestate/source.yaml deleted file mode 100644 index 29f6bc450..000000000 --- a/share/sources/edu.waynestate/source.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://digitalcommons.wayne.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.waynestate - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [acb_frp, agtc, anthrofrp, bio_fuel, biomed_eng_frp, biomedcentral, - biosci_frp, business_frp, ce_eng_frp, chemfrp, cjfrp, cmmg, coe_aos, coe_khs, - coe_tbf, coe_ted, commfrp, commsci_frp, compscifrp, cpcs_pubs, csdt, ec_eng_frp, - englishfrp, geofrp, gerontology, humbiol_preprints, iehs, im_eng_frp, immunology_frp, - libsp, mathfrp, med_anesthesiology, med_biochem, med_cardio, med_cher, med_dermatology, - med_dho, med_did, med_dpacs, med_edm, med_em, med_intmed, med_neurology, med_neurosurgery, - med_obgyn, med_ohn_surgery, med_oncology, med_opthalmology, med_ortho_surgery, - med_path, med_pbn, med_pediatrics, med_pmr, med_radiology, med_ro, med_surgery, - med_urology, mott_pubs, musicfrp, nfsfrp, nursingfrp, pet, pharm_appsci, pharm_healthcare, - pharm_practice, pharm_science, pharma_frp, philofrp, phy_astro_frp, physio_frp, - prb, provost_pub, psychfrp, skillman, slisfrp, soc_work_pubs, socfrp, urbstud_frp, - antipodes, criticism, discourse, framework, humbiol, jewishfilm, jmasm, marvels, - mpq, narrative, storytelling] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://digitalcommons.wayne.edu -long_title: Digital Commons @ Wayne State -name: edu.waynestate -user: providers.edu.waynestate diff --git a/share/sources/edu.wisconsin/icon.ico b/share/sources/edu.wisconsin/icon.ico deleted file mode 100644 index 0b0ae75a8564217e4c5f2b280fcc643495f2a51d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4286 zcmeHLdr(wW7{6$#rIwglW7b%vVv>T0APTMzNX#Z<#wp3sv5dmRrlN<Vrsf|tIX*^# zUEb{SP(ct(T^?$NiX#|MsRV*3vY?_20)oK8ukSm{z204Mw150Z4|DE4=X;;;b?zcb zcJw!Lq{L5Y_aI3cCP|V5O#(?8nhY0%aJIum3dfWA`1)Xyq*LP0>z?*9T*le%U3!qA zrnSv4nV;cqePa~;?)_rn`gAnjl0s>Q!KF9hXin)H3y=N=FrInbpNYe?f$^BqCk7Lk z&Ms0+XTnVzV1NhBsn6=^Ea7l{DjLpY-$Y3(rfcs=co5D_mesAF<v2;v&<sn)d!tkE zp;I~*PtC+fZbr--l@8Z_@r36*(FvP91{$Cll7t!dv6%DHE_e^$O>$#|ZnkY|f4#8P zXG98S4NZi`UJuXb3}g%0Pv@CTc3AwP!+1Kc&+tS<Z7N1dVLh4}I?#5f6L(t7XuaKm zD;14MiztQPxD<?&LP&;|Y|+7(SOHUIGpfoPQC-=DoV}MYsaK@Hv&>IC52ETqBk?z( z`eGBx&o#hrY_i3-;w<5Ckaoh~HC@o)0i)qY8v=a}VJz|Zj@U(Zwp(#D(thsCyF^SJ zNPpP6LMuFPwgwl_87OAm`n`t4<7|H2J#_a2G}gC6Gc*CK=I8WC66kHjo21*xE&?Zv z)%VaG+kb`fAY94wO`n}cXJ?l}*L<rT*~ypi%Zfa#@X18LOe4N|KL@%EML3mhLPvWS zeh$h<?5}4P9`!=Iz6hhqKeLA<;JT?rjS_2ydB2!QKGF&w><KGXXgGfBmgZsk%q;S8 zsE9S6Wdg-|4Eby<&4rGsSmt$zVtrZRQ!h>$O@c09mQf8>*5-~bl92{SlFKpp%}^n5 z3ER#iWK9unUTwv`$THGx!E+u3dS~M3zA6OyW>T(dF-qEj8H4rM@^wB;lvnCed$}3z zRA1Y_D^P=#_5QdDeEIHS%0bKdv-VViVf&&kC^T{8TRBJg{6X`w6<q$2TCDWV5_Y^Q zZHMNCc<fmHC)L1RJC=E7;YeDQ!o!PBa~G1y*QXEH;O|p)R3i~02Uh#%B8TFw9{SDa zD2FUL%KBH%%~GSqnsaN#T#G+mCp)|b>9Jv1E((6HMc9wUsJYZE=-Td>QFP)O!qyif zFUurgx9Z!dPLe1O!w|Bz2wkfDV}gg#F9gwpb8@Li2Arwx-=N;$C`H2Ol~jE0a~NOw z9YgTKKM*wkD1PuiF0+Vtu-_4pUk4H{TEThqfAll`+9i4Te$jE{9=IavWXrc_uqF5m zQnY2FH)X_@W5Ku#(aV^gN8&d2aR}05$}IrZQ`XP@`+okH$y=H`F#ol^_}HuGd}q3K zOLD0nM4{}jdYKLXoUWt0CJO0fo9)nV=KQnd|AX|G7T05PuPAsuABU?|w{1nyna%KX zND)0DJ^F5~w=|ovU`)z=&j0ZG(;`c$hlh%umX}#`kN$s)Zo;ik6!njt`03M=R`@lG zb01E>@7?;d5<r`c%TJ0tVt?s27Tcn!=1PPw9Vyp8n2<{MnpvSqjVM$4j{>Ac!q)Rv z^|$Jso_jsl^rA@_)JsA|o_s;|Xgk(Y>^X)`)NiKskG1L(d><xkFF~ED71yg<#JoZM zPfPVH*Z+9R(E!as)YT9T_3mpby29hvF@2z(dWlZ-^S6g5q5NzEZq&94`l<^}BF5b7 z`I~1V^_<j*a<C4TRatV2^2Q+UirI7z@|;6=n<vSX??>iy?;DNT1ZVuLZaUu27-S&2 z2!(bs-FX^&lEbt#XAX`B!(>@5w4OddFLcPd*e~+8k^JWG3x%iqu9R`=H}k6Lc&(P# d6P{(OW!UmR*+0I&MV`d|(Z}BQ{{fGB{~tT2;NAcL diff --git a/share/sources/edu.wisconsin/source.yaml b/share/sources/edu.wisconsin/source.yaml deleted file mode 100644 index 300c96bd2..000000000 --- a/share/sources/edu.wisconsin/source.yaml +++ /dev/null @@ -1,120 +0,0 @@ -# DSpace -configs: -- base_url: https://minds.wisconsin.edu/oai/request - disabled: false - earliest_date: 2001-01-01 - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.wisconsin - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [hdl_1793_56216, hdl_1793_66493, hdl_1793_7217, hdl_1793_6780, - hdl_1793_48354, hdl_1793_6663, hdl_1793_23407, hdl_1793_6654, hdl_1793_43546, - hdl_1793_24569, hdl_1793_7278, hdl_1793_7951, hdl_1793_35950, hdl_1793_22944, - hdl_1793_28999, hdl_1793_34206, hdl_1793_39085, hdl_1793_6571, hdl_1793_23460, - hdl_1793_24474, hdl_1793_28953, hdl_1793_38773, hdl_1793_60, hdl_1793_48149, - hdl_1793_8536, hdl_1793_53085, hdl_1793_35120, hdl_1793_326, hdl_1793_6655, - hdl_1793_43458, hdl_1793_32315, hdl_1793_21563, hdl_1793_56968, hdl_1793_196, - hdl_1793_7567, hdl_1793_23263, hdl_1793_6511, hdl_1793_70268, hdl_1793_48526, - hdl_1793_64235, hdl_1793_35613, hdl_1793_56823, hdl_1793_6769, hdl_1793_52383, - hdl_1793_6821, hdl_1793_34304, hdl_1793_34351, hdl_1793_34830, hdl_1793_254, - hdl_1793_259, hdl_1793_38587, hdl_1793_35954, hdl_1793_46206, hdl_1793_182, - hdl_1793_6665, hdl_1793_34919, hdl_1793_21276, hdl_1793_34921, hdl_1793_35798, - hdl_1793_21355, hdl_1793_28, hdl_1793_7215, hdl_1793_55281, hdl_1793_46755, - hdl_1793_34277, hdl_1793_54255, hdl_1793_47199, hdl_1793_63289, hdl_1793_23682, - hdl_1793_44013, hdl_1793_6782, hdl_1793_56125, hdl_1793_70669, hdl_1793_37425, - hdl_1793_55782, hdl_1793_72276, hdl_1793_72275, hdl_1793_64713, hdl_1793_61710, - hdl_1793_23588, hdl_1793_49209, hdl_1793_25079, hdl_1793_35117, hdl_1793_36008, - hdl_1793_35958, hdl_1793_36507, hdl_1793_36091, hdl_1793_55946, hdl_1793_6666, - hdl_1793_22054, hdl_1793_21863, hdl_1793_21864, hdl_1793_21865, hdl_1793_21866, - hdl_1793_53, hdl_1793_125, hdl_1793_35273, hdl_1793_46207, hdl_1793_305, hdl_1793_46218, - hdl_1793_64239, hdl_1793_197, hdl_1793_34247, hdl_1793_8139, hdl_1793_48893, - hdl_1793_202, hdl_1793_46208, hdl_1793_30491, hdl_1793_6796, hdl_1793_44014, - hdl_1793_37054, hdl_1793_54159, hdl_1793_67166, hdl_1793_46952, hdl_1793_6487, - hdl_1793_45567, hdl_1793_8138, hdl_1793_39050, hdl_1793_24567, hdl_1793_52386, - hdl_1793_38136, hdl_1793_6650, hdl_1793_63357, hdl_1793_73455, hdl_1793_73379, - hdl_1793_70647, hdl_1793_46387, hdl_1793_45568, hdl_1793_61725, hdl_1793_61706, - hdl_1793_61716, hdl_1793_67164, hdl_1793_6486, hdl_1793_112, hdl_1793_28166, - hdl_1793_6656, hdl_1793_64899, hdl_1793_69649, hdl_1793_114, hdl_1793_69720, - hdl_1793_6485, hdl_1793_63355, hdl_1793_48047, hdl_1793_23405, hdl_1793_49010, - hdl_1793_39049, hdl_1793_211, hdl_1793_35275, hdl_1793_7205, hdl_1793_30477, - hdl_1793_48150, hdl_1793_72888, hdl_1793_72890, hdl_1793_38910, hdl_1793_38909, - hdl_1793_8166, hdl_1793_8165, hdl_1793_30445, hdl_1793_53555, hdl_1793_74491, - hdl_1793_23552, hdl_1793_39048, hdl_1793_7579, hdl_1793_11974, hdl_1793_65805, - hdl_1793_67165, hdl_1793_56828, hdl_1793_70083, hdl_1793_7198, hdl_1793_48898, - hdl_1793_48894, hdl_1793_48891, hdl_1793_48896, hdl_1793_48900, hdl_1793_48895, - hdl_1793_48899, hdl_1793_48901, hdl_1793_48897, hdl_1793_48902, hdl_1793_48903, - hdl_1793_48892, hdl_1793_48904, hdl_1793_48905, hdl_1793_48906, hdl_1793_35043, - hdl_1793_6749, hdl_1793_66926, hdl_1793_39047, hdl_1793_44020, hdl_1793_11485, - hdl_1793_12122, hdl_1793_29011, hdl_1793_301, hdl_1793_56824, hdl_1793_74730, - hdl_1793_45566, hdl_1793_63312, hdl_1793_6488, hdl_1793_38291, hdl_1793_6478, - hdl_1793_62120, hdl_1793_63356, hdl_1793_11504, hdl_1793_258, hdl_1793_34302, - hdl_1793_6748, hdl_1793_304, hdl_1793_54817, hdl_1793_35276, hdl_1793_8333, - hdl_1793_39059, hdl_1793_31576, hdl_1793_31577, hdl_1793_34102, hdl_1793_34103, - hdl_1793_34104, hdl_1793_34105, hdl_1793_34106, hdl_1793_34107, hdl_1793_34108, - hdl_1793_34109, hdl_1793_34110, hdl_1793_34111, hdl_1793_34112, hdl_1793_34114, - hdl_1793_34113, hdl_1793_34115, hdl_1793_34116, hdl_1793_34117, hdl_1793_39058, - hdl_1793_265, hdl_1793_266, hdl_1793_267, hdl_1793_35963, hdl_1793_47607, hdl_1793_43457, - hdl_1793_38659, hdl_1793_61981, hdl_1793_34774, hdl_1793_52916, hdl_1793_24593, - hdl_1793_62930, hdl_1793_28797, hdl_1793_37664, hdl_1793_22087, hdl_1793_46149, - hdl_1793_23458, hdl_1793_23538, hdl_1793_23535, hdl_1793_38327, hdl_1793_23540, - hdl_1793_6484, hdl_1793_23541, hdl_1793_23542, hdl_1793_31875, hdl_1793_23539, - hdl_1793_34920, hdl_1793_83, hdl_1793_23537, hdl_1793_23546, hdl_1793_73994, - hdl_1793_22069, hdl_1793_23536, hdl_1793_11689, hdl_1793_24417, hdl_1793_47032, - hdl_1793_11634, hdl_1793_32119, hdl_1793_38943, hdl_1793_38944, hdl_1793_28897, - hdl_1793_28930, hdl_1793_6846, hdl_1793_345, hdl_1793_45033, hdl_1793_34823, - hdl_1793_39171, hdl_1793_36593, hdl_1793_28998, hdl_1793_39064, hdl_1793_28997, - hdl_1793_62246, hdl_1793_34681, hdl_1793_34679, hdl_1793_30714, hdl_1793_31295, - hdl_1793_31294, hdl_1793_34432, hdl_1793_22903, hdl_1793_70234, hdl_1793_18648, - hdl_1793_18647, hdl_1793_38938, hdl_1793_23492, hdl_1793_47237, hdl_1793_35274, - hdl_1793_30706, hdl_1793_38250, hdl_1793_7971, hdl_1793_7972, hdl_1793_24610, - hdl_1793_48605, hdl_1793_56827, hdl_1793_73664, hdl_1793_48604, hdl_1793_39200, - hdl_1793_54299, hdl_1793_270, hdl_1793_48353, hdl_1793_256, hdl_1793_24726, - hdl_1793_22056, hdl_1793_45154, hdl_1793_6657, hdl_1793_38576, hdl_1793_11510, - hdl_1793_45569, hdl_1793_33929, hdl_1793_35947, hdl_1793_203, hdl_1793_204, - hdl_1793_255, hdl_1793_262, hdl_1793_263, hdl_1793_261, hdl_1793_260, hdl_1793_12125, - hdl_1793_37438, hdl_1793_35951, hdl_1793_143, hdl_1793_63311, hdl_1793_44012, - hdl_1793_23459, hdl_1793_72676, hdl_1793_122, hdl_1793_123, hdl_1793_299, hdl_1793_38772, - hdl_1793_59, hdl_1793_193, hdl_1793_191, hdl_1793_32314, hdl_1793_6662, hdl_1793_21798, - hdl_1793_283, hdl_1793_8331, hdl_1793_6570, hdl_1793_69719, hdl_1793_284, hdl_1793_35948, - hdl_1793_285, hdl_1793_63288, hdl_1793_286, hdl_1793_56967, hdl_1793_287, hdl_1793_324, - hdl_1793_288, hdl_1793_46217, hdl_1793_34209, hdl_1793_28796, hdl_1793_61705, - hdl_1793_22943, hdl_1793_21274, hdl_1793_289, hdl_1793_11484, hdl_1793_290, - hdl_1793_291, hdl_1793_8332, hdl_1793_21799, hdl_1793_64898, hdl_1793_35949, - hdl_1793_6509, hdl_1793_6652, hdl_1793_35612, hdl_1793_6653, hdl_1793_18778, - hdl_1793_297, hdl_1793_32118, hdl_1793_61587, hdl_1793_21275, hdl_1793_44011, - hdl_1793_56124, hdl_1793_30490, hdl_1793_300, hdl_1793_21862, hdl_1793_194, - hdl_1793_180, hdl_1793_8336, hdl_1793_8337, hdl_1793_8340, hdl_1793_8338, hdl_1793_8334, - hdl_1793_6794, hdl_1793_6647, hdl_1793_8341, hdl_1793_8342, hdl_1793_209, hdl_1793_7969, - hdl_1793_8343, hdl_1793_8344, hdl_1793_8335, hdl_1793_8339, hdl_1793_81, hdl_1793_23404, - hdl_1793_6649, hdl_1793_62245, hdl_1793_294, hdl_1793_70668, hdl_1793_35957, - hdl_1793_23406, hdl_1793_45152, hdl_1793_38249, hdl_1793_49009, hdl_1793_72889, - hdl_1793_38908, hdl_1793_12121, hdl_1793_26, hdl_1793_30437, hdl_1793_298, hdl_1793_11973, - hdl_1793_65804, hdl_1793_48677, hdl_1793_48887, hdl_1793_48889, hdl_1793_6747, - hdl_1793_31575, hdl_1793_206, hdl_1793_46148, hdl_1793_29010, hdl_1793_179, - hdl_1793_6817, hdl_1793_34822, hdl_1793_22055, hdl_1793_35946, hdl_1793_250, - hdl_1793_238, hdl_1793_35962, hdl_1793_23543, hdl_1793_23545, hdl_1793_6781, - hdl_1793_34345, hdl_1793_23544, hdl_1793_24568, hdl_1793_70082, hdl_1793_34303, - hdl_1793_252, hdl_1793_62929, hdl_1793_239, hdl_1793_240, hdl_1793_11633, hdl_1793_241, - hdl_1793_38135, hdl_1793_11376, hdl_1793_11377, hdl_1793_7203, hdl_1793_11378, - hdl_1793_33928, hdl_1793_7214, hdl_1793_23681, hdl_1793_7204, hdl_1793_30476, - hdl_1793_39170, hdl_1793_7216, hdl_1793_47198, hdl_1793_11379, hdl_1793_11380, - hdl_1793_242, hdl_1793_6795, hdl_1793_34350, hdl_1793_11381, hdl_1793_11382, - hdl_1793_11383, hdl_1793_11384, hdl_1793_48046, hdl_1793_11387, hdl_1793_11386, - hdl_1793_28996, hdl_1793_34678, hdl_1793_30713, hdl_1793_34680, hdl_1793_243, - hdl_1793_244, hdl_1793_34431, hdl_1793_22902, hdl_1793_70233, hdl_1793_11509, - hdl_1793_38937, hdl_1793_23491, hdl_1793_18645, hdl_1793_35083, hdl_1793_210, - hdl_1793_34246, hdl_1793_245, hdl_1793_7970, hdl_1793_246, hdl_1793_247, hdl_1793_248, - hdl_1793_264, hdl_1793_52, hdl_1793_7566, hdl_1793_226, hdl_1793_228, hdl_1793_237, - hdl_1793_234, hdl_1793_249, hdl_1793_35042, hdl_1793_24725, hdl_1793_111, hdl_1793_23262, - hdl_1793_45153, hdl_1793_72274, hdl_1793_61709, hdl_1793_325, hdl_1793_115, - hdl_1793_236, hdl_1793_257] - emitted_type: CreativeWork - property_list: [date, rights, identifier, relation, format] - type_map: {} -home_page: https://minds.wisconsin.edu/ -long_title: University of Wisconsin-Madison Libraries -name: edu.wisconsin -user: providers.edu.wisconsin diff --git a/share/sources/edu.wm/icon.ico b/share/sources/edu.wm/icon.ico deleted file mode 100644 index 9ea019fbc9fe69809d9788125fc104d80821c67e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3638 zcmeHJX*`zM8@|r8FVs|0Gc8&bm5ES8(vVO}X`)n=Bq}OvNrXxfvKB(2cu5E$ds$kn zQAAQAZ&ISrrn%qOq-Oc_|1h8a=l6Tw=bUrB=RW6L_j#`C01Q81<Vf(#&H=zos)>m7 zwj&1v?$kGnn!La8x*4dGiX!8`{_*qW%NN+Y?L}5z3KTR&5gh3YJu?kFeo}+iZ(89I z;0QB2J(S$L1<h^pkWdoF<)jECXU5_=_Zj@pyQ1s;J1A?<$Nh)pFtXA{!QC8KI~zep zeKK@*E<?nXKscYag8C+Dob<CpO??GA-nQd<?p1K&L-FzBN3^syqoJ`L2IgyU<5oJB zuA75(hKjgbl80RfH)8)01K6I}4e#LNxO$y~vio=8=x&axx-y)*cp5pkQjlMifut-B z7Ok9u`o<d6HB_Opwgj04NjT5(LuzguK7am<cb#v+ZE3<rljUf9{uHOqIN)U)7wI>W z@u<EE4<A)PY5iQh|L`7fI@(ZKRf^)u0_5GvqV+q#H}nLeuU^3Vol1DpSck~?V3;1- zjH;S@D6P1I+od;ASe%QT!t2P+Ps6pWI3%TCM)>6bbieOHS63%qzIuTdtxag+K1I>J zLZs#-AUP|B`1eL<R|np{>p)xkYdmXug1V;<QBrvuH*aSkt1uaf>Cuo@n~a_>J=nH? z4ca^2;AJZpR?a){;^lKRH8-H{Ni`~~%is~{1iw&M6clA5G|CTYH{uYL5Q=M=SBPs( zeD3*7oP2_*-DZl@hW!o(c-`Jgd^FP<x!_VfZp(8t6JJe~Z%20vG*bMg=BJde$8e>5 zKBo1Z^s}e@m!YikE=ntjgA(GPD3|8VL3nHcov&+jZo(<&iM0M0BxPLw$65S;$A8QK zqawRUe*3Rd$9YvSmlN!qj@;aN+a#E=GlSBI!@6!~-9>ipG7kzFy94@XMh={3HBD>3 z$wr|;vqyn>+8B5b2^26rJz&g91sc!TUPYOsvxblRxi=nTjusXUYx;}-bc7#&@iM;& zPD6u5m!DEs;mtRAz4GWEmzrt+uume8+FrjKj$g_8X-=Q|+l-mRwqoIAja5GOL)1q2 ztovo{5<4u~Am~5kfVGFu!PSAgeOJIzk4;uRHb+d@b<1XEbc8oQV}~|P43Om4dfW4D zr58-<ZU2p42WtPX56LE9XGJK6B0XPq)}cQ&{-yNltU2kL7_#zHFhQ~(w(eVnx@WZr z<@jRW(sA_N+~#ZS=^$sm_Qs`Lg4tm`44w)sTp<Ec`QfCsorsDHhKsi?X{!p(gt_5) z%QM(G@5BSrpTeSSq-4g!*hU+p!~oTel8|2|0crJFm?Ap_@~dY;W6MI+)>mSV;*U6V zVh<8CqWL;=r!)_eDnDb>?iC1%@Pe!V5jcBVL2LVO2#XDbZ^#MKX)Xk30MUsTNG}s{ zuc`#*jt2Pf@jaBt1KQud!T4W5T9t|I7HeQ=xgNT^m9dBPWtp}#5>h#MMB1dYLkVFq zXUJb1`1<p<vjfu?4TH4mMEt5T4f2|^p`ovUSn`^++ZEvCW{yT~J)S<R!)%37n5QI! z`O4$?K2TI%05$TKkVr4=I<Ns=!N*YFP>ox~dC0n%jO2`1=o+hHi`fcXymAhEZ1rI7 zuoXV!T`s<dQT?bAxun(6q>BMzp0IV<1reEnNJ@*r@zd5AIU5jH90N(!Nsv(!fxYWq zbbsi^8`9@juUq&U`S9@r(%W*}C$CUgGZPAH#i2rc<WoM<vJ((>H5f(~I=C2r4nefe zpa>80&Xcr%Cpdaoz{%YPPF^+;o(H_7m>K!kka#_c^j#C&7maA<Hb6pInD+IE^tJ}F z>Qf=NY8Hyi^08cJ0qr#x`Q%R;dW(qTDB@iUF{HO<hqfR*CIEZwx8QutS$h0&A<7@t zPDZdkW<>jSC;vMFH`4E(o*oE^1Kr(Sm?}F6GvtP$m3(UM(y@H6uc@!bgW57wJ}l+y z{EE%8<mn5MLmrnw-l@Aw4Qb@*$r&-E@t65`fh*S{5Sti={r204`#|D-JDsJouy8a) z&_y5ebTb@wHHH1DeQ-Qw0iSa&IO=5$ub`uF@i~B#zIGTX3UrcZi^>b~{j#CyF>x$P zXJihFDhhFv_MJt1>F?7-0&y07^*o}JLis)y66wSDxKE!xVcJ6ae!Aa5e90(CE*sCk z11wORKwM5nRc!^z9~7gQ?m0!}dE{UDd=Fb^xCD83GqJ&B8L|sfkxloc&3n`-H_<RT zsDt<v4tc*m>|9I{9OaGROWt%J@kAg!XTv=xua@BO)rbF6CF!q4mH3WLmft$R;R)%p z!kmr>sz{sER15U|p%3>C8LHeRv~;XO*y6-GA;$g$`f#Zd!RrA*$3Cn!mX#Hh`F?zi z$(;*sjKw7A<jNfEKU?2;&}f2NhKCxf^1@g~&3z>?y^TFd9sGg~&10%B3|bfqo>nUm zWWvLhMe7;c)hrn7pvG8wav5V%HG=On+8EPX!^5raB+@I;YY=mgVk+gf%wCKsYn(}` zur}4>;Y{oCm~81C=2xG1D6{c<>sVwl6Z>^&O17yE1?H{aGD%E;U7M#o<dy35NxRv( zv21(6*A4XT&uEo!w2Jq-0h-?ZR`%DR6;dIv4}a<P+;v2x-T64t^)tl>F8arK>%&t+ zD$9kHpX~{Ban1b-e;d!<{^+^wcQ>Y6I=tm~*}vnST&oTJMt6>iXTl32!zKyJ9U$&_ zrFVXR6=VGYm0_>VkFz%e)*srRW~-!f`Ww!zjbR@98sn-m{5D+<^N^o)W&%q!<7J_D z{HGwc`2NkO5==F7(pcsrUOe|9le1{*!`n|Ta-7q1f{snrNF~Nz9h&I(py|?|@tXny zwn#?!M(}cOlRQO~7N%7wk|y0hDPX82<G4p=RRyHSYE}>|kUVzW?jqheeffd*4ndL< z_Y9Zw&RYI2ZQ*s+YH6BMOfFB5X_*A&F8hfwRpWu`j|iUdTk1}(;H_al@Mu-rbo*ob zS?%l*2k0bBo^VlTw!xAX6=$FDw1rXvHa0eLIyv3*?F|>p9)H(5Je@NmRAsdQ^IZHh z*OOCyS9s&4TUiZU*?=T9rl$Q$`8)n8bnQ}!i4q}WsMc5h%rE~XRv7Vq#fTe1DShZy z{`nmOnWwI#9`6`rytS8lerMnLV9ZMQvy!CqQJKNIQr`l6hqJaXMaDOcg>Gwqx2o@P g8nsq&^S80Q8aRI`0>xMU(&qfjb>Khia-f<104i5xn*aa+ diff --git a/share/sources/edu.wm/source.yaml b/share/sources/edu.wm/source.yaml deleted file mode 100644 index 77b995fdb..000000000 --- a/share/sources/edu.wm/source.yaml +++ /dev/null @@ -1,24 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://publish.wm.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.wm - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [appliedsciencepub, ckharrisdata, ccrmresearchreports, chsdreports, - ccrm, chemistrypub, chsd, compmathstatspub, computersciencepub, dr, geologypub, - internationalrelationspub, linguisticspub, mathematicspub, neurosciencepub, - physicspub, psychologypub, researchtechnicalreports, sociologypub, spms, samsroe, - ssr, tsdpub, ts, yorkriverdata] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://publish.wm.edu/ -long_title: College of William and Mary -name: edu.wm -user: providers.edu.wm diff --git a/share/sources/edu.wustlopenscholarship/icon.ico b/share/sources/edu.wustlopenscholarship/icon.ico deleted file mode 100644 index 0102021a83d9c44bdb450f5ddc3567ef14b58a0b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 796 zcmV+%1LOROP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00PuWL_t(2kwuG7Y!r1IM?c@+{C+b#ot@pTJMDJ2mL<ZvE)6MYG}@4os7OLE z9@JY+B$(*IizXgG4n`BhNfTmvG<rpX-mnQsFtCxNg~Tp&fx4D9+m^Qbe|EYvvok*q zMtG-pc#qCqoqe#rnu;f%8yX%<B!|qH8Hs2L2gvq%j_<A2>&x}+%I2fRwUx8)P4n$e z`{TmsQoZTaD~lV8#hz#TeYYRzocCoIQ@N$<Rogi+o;&Yu`(ell*z7vr7Uxoi{{5R% zrpC*)=Go${sa=^5_U}`P{kd8_b?q-1hMW-rTuNzdx26j@p$bzIkk}oMc|jmUsS=Zf z@nP%q<V)$07FO2*(3#)Po71Ng#Emg)Ad`$_2Gv*;#DX9Q?5^K*x+}Gv^_pAn{PDu@ zSF;y55t0*oP_A~CH{ylt{!ia0!TolZ8xd02%B5o1D>X`$Xm%u?9|OP{=+Mk(&*<9n z#p{YH6kTP4b6o`y0ZilRsfql_W66WL)mtS141n9{$UrKZkN|-|1R?-Z0w92th~~BI z-c}O;4<KjXIWuG!nq}rcI=*)GHYtjhh<fd=45WR3BRR46#wXvo4ZEDP@a~74fQS{} zS>AG69k1RYMv&l9EpS}l-WkYPkBTK{y+)j?u>k;_5sqB^+VsNTZ~m<-YOmJrcY5Wy zd#2z~A;h4OnB1qomODCmpmFaX09=CnK>oN?`DJtCi&qaGef>q-^Do`LlZr);@5$<d ze{*AQZfz^oY&l+E<ANQ2zA!UidL=Uw<$UtlGyya73zr{OFWi0b^Fmo6dLuiUHBCc^ zo*LVow3MID{ZQDo_t(4kha<|MCPoaSU?r|Stfme9%R?{Qe!tRaNnwyV`uSULD~yqp zQc4NPj@!GkcxS#+W^UULe8E_tXhX@-BjX41BWZ;g5dnBIJjL<6oBuAQ<AeEB`o9PO a{<p^;S2@iTnCK$_0000<MNUMnLSTZ}L4xf7 diff --git a/share/sources/edu.wustlopenscholarship/source.yaml b/share/sources/edu.wustlopenscholarship/source.yaml deleted file mode 100644 index fb9119245..000000000 --- a/share/sources/edu.wustlopenscholarship/source.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://openscholarship.wustl.edu/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: edu.wustlopenscholarship - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [cse_research, facpubs, art_sci_facpubs, lib_research, artarch_facpubs, - bio_facpubs, brown_facpubs, cfh_facpubs, engl_facpubs, hist_facpubs, math_facpubs, - psych_facpubs, lib_present, lib_papers, wgssprogram] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://openscholarship.wustl.edu -long_title: Washington University Open Scholarship -name: edu.wustlopenscholarship -user: providers.edu.wustlopenscholarship diff --git a/share/sources/es.csic/icon.ico b/share/sources/es.csic/icon.ico deleted file mode 100644 index 1ee4f86cd01d25a2c111d5b2966f8d8bd6ead3b0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1150 zcmeHFI}(CG3=N9o2X$6PEgd>7v9{p`as*~<EG($3t*vdn3rFAx9Dy6eyfC|ipUPIl zWWy$T`8*)<<S~ZijOgeSEs1C&DlYLfelbK63xh`W-W)9EuU~QyMG+@S@}$O?H$FXY z#J%oXwcDAyc$%jCNwO?+IfLGg=i?hMCU>Wk!9i=GHv|^n5xt9|P#@m?&Pp-U;fZbE zzQ5%#xa$2Cj~Z=X(BCT_^SA#KTdSRG<8~*+EZ*kZ9L&(}mFKzk1-7axuIpOkz+rdH o3m(8Bb_W-z0iIr^EK82#*v$(L%;@a6n*|re+F5?w%KG2S1FxK2oB#j- diff --git a/share/sources/es.csic/source.yaml b/share/sources/es.csic/source.yaml deleted file mode 100644 index 39ab509d4..000000000 --- a/share/sources/es.csic/source.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# DSpace -configs: -- base_url: http://digital.csic.es/dspace-oai/request - disabled: false - earliest_date: 2007-05-07T22:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: es.csic.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: http://digital.csic.es/dspace-oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: es.csic - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://digital.csic.es -long_title: Digital.CSIC -name: es.csic -user: providers.es.csic diff --git a/share/sources/et.edu.addis_ababa/icon.ico b/share/sources/et.edu.addis_ababa/icon.ico deleted file mode 100644 index 0844c97c9152561b5bd3de6de52d943d4db4895f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 453 zcmV;$0XqJPP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00DhTL_t(2k(H9YN&`U@g+G@xx@Kl)3_{SL#zt)<BwIuRieRne1q8`U1QaVP z@eg0X2MB7T2p0ZCvCu*-1dFV3QPN02pTJ_)%^F4UR(H4`bI&<59r^rpS9dSa)6*v$ z320?Ij?hv7aVY>mDaSO{zi6dw%h2Ymm2H~l`v+eYrGc2a`>FO&y{x=78i{0bY$Ol7 z_;<i;u{1R~5z;`+D9PsBY|h<SEf4k&FD%TP+1>ra#%j5GT3A|M36@A<X=NMTjw6Va zQqI)m#C&P-=K5l1r($Tp0z#KRbr<JmfW69gII==o|CWhTcFrvTp7#nUEv>c$P0CT1 zt+iSPlH%X>L}?{Ov)KUB=^%P5q$AJNqP=><tJQLDAw84=WW3XA)7@Gk1+ccZl}IMn zH#ZFp#0>4%{hQlI;QVBFZ{JN1^n1@Q)zgF2K<4E5OlfIopyTl93h?WG^gC!N;!<QY vqsC}R2OEI#%xF|em`NKQq!e}eHw}COb!=POgYK3u00000NkvXXu0mjfdeX@p diff --git a/share/sources/et.edu.addis_ababa/source.yaml b/share/sources/et.edu.addis_ababa/source.yaml deleted file mode 100644 index f73c92d6b..000000000 --- a/share/sources/et.edu.addis_ababa/source.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# DSpace -configs: -- base_url: http://etd.aau.edu.et/oai/request - disabled: false - earliest_date: 2007-08-29T19:14:01Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: et.edu.addis_ababa.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: http://etd.aau.edu.et/oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: et.edu.addis_ababa - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://etd.aau.edu.et -long_title: Addis Ababa University Institutional Repository -name: et.edu.addis_ababa -user: providers.et.edu.addis_ababa diff --git a/share/sources/eu.econstor/icon.ico b/share/sources/eu.econstor/icon.ico deleted file mode 100644 index 5c3c1b113bac13fb3af4b5e6740334a028b51231..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 316 zcmV-C0mJ@@P)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI008t!L_t(2kz@S-|33pMfCazw%NH-7zj)5W!=s|20u}m?8}#+|ONmQEP*Otj z=g*)2|NoCu!?vy4;Hn|0zOLc_|Nl(59J71ZZZwhEGiNg}FyPei@#80~;y5+9dAOp9 zsA;G%FfcGeg7wawJ4+TXef;RLtek9SW`?+g_?ItV;$q`Z9zO|DEh;LyX8mdv6&1$+ z|NmdOaKYQd2W}Evj)8$;`?l>*pFZQ|<8iQe5SI{#25VqI5Y_;}3YZue7~uB8!Q)4d zaruo2?i(Tj69WT7baXVDXh>)<P912eKwrmT#flYp(hDQ0sTF6$;Q|0Ly~v4ULm#66 O0000<MNUMnLSTZ4REs<S diff --git a/share/sources/eu.econstor/source.yaml b/share/sources/eu.econstor/source.yaml deleted file mode 100644 index 9aff52325..000000000 --- a/share/sources/eu.econstor/source.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# DSpace -configs: -- base_url: http://www.econstor.eu/dspace-oai/request - disabled: false - earliest_date: 2008-12-09T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: eu.econstor - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://www.econstor.eu/dspace/ -long_title: EconStor -name: eu.econstor -user: providers.eu.econstor diff --git a/share/sources/fr.archives-ouvertes.hal/icon.ico b/share/sources/fr.archives-ouvertes.hal/icon.ico deleted file mode 100644 index 277a29922267225d453ccd4bb7cc9c42ecc68225..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4286 zcmchby=xXh5XRU4z{)y>5J9j|0(ORg7NTIMBm^vM)K<jGO2k&hMubQNu`pU#sg>9m z!NMliN;*M_5W&XvehZI0jCcEPUc5}$?B4CnJoB9+DedWgW@Zw<)04d^?Mo>g=vMbC z&3B9Ug}S>)xq-fJZswOO5A&_1<$USzTt2^l_MdhA^qKtN>b3mg)$0b0Z7iSOtryHb zyxKc#e0<|(-rm}30}5RDi?20|mOJcte*13FUVnVM_G~c5Etq{g_|g&!u>0iy$2;od z^Tx)Y^J8Mm{wv28dgDNyiD&GcI5?3P>>3-_^?OZn_PgCU(--s@MBZz?Mjta*unRfn z5R3DWt;nF4Si^JY;^oqZ_^L(Do%(cvE8^^N(>%m>UoOKeB)2WphwpftU3ZI4#kPgF z1s|T4IU+oKCpMzkXXHuLicj1{QQyhM_xZ(}U*%eNHGIEQUHI-b!h39t`Bd+oJ9H|~ zEq*Ht8^{&>jv<@mYn%o0t8W)RvThgg;N9^PkN)BxJ^Lju9h{$tk6bG`zSkl3usnOD zU@u(wp6@^Xnb+>E6m0a@{C9l#;-Y><j*pzz;p*zgg8O6pcVSmfCgFSh;f)JSQN;H; z6c2mMam|0fi`YEZ@M9gy2GyPPIYzY2^^4zkzmNXI7r*t$iOX}Y;k$g4<9QN3bKOb3 z;JSwJ`THS?agjVR#wV87l<wK}O*6y;j(9vb@~zKi*FVM?o3IxevU?0%>5RQYd~`SV z?)9MjvomYP#%A6q?vAbVm^MtpmJRe|?b~Xa7}2LSzlE(GUSy`havA)HwI+;~ow+cL m9n1>)dnZ2E>_HE;!TcbP?jvXXq^|j%!seY&{C~i1KmP&7nNyzt diff --git a/share/sources/fr.archives-ouvertes.hal/source.yaml b/share/sources/fr.archives-ouvertes.hal/source.yaml deleted file mode 100644 index 3d5501700..000000000 --- a/share/sources/fr.archives-ouvertes.hal/source.yaml +++ /dev/null @@ -1,29 +0,0 @@ -configs: -- base_url: https://api.archives-ouvertes.fr/oai/hal/ - disabled: false - earliest_date: 2002-09-23 - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc, time_granularity: false} - label: fr.archives-ouvertes.hal - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: publication - property_list: [] - type_map: {'info:eu-repo/semantics/ConferenceItem': conferencepaper, 'info:eu-repo/semantics/ConferencePaper': conferencepaper, - 'info:eu-repo/semantics/ConferencePoster': poster, 'info:eu-repo/semantics/article': article, - 'info:eu-repo/semantics/bachelorThesis': thesis, 'info:eu-repo/semantics/book': book, - 'info:eu-repo/semantics/bookPart': book, 'info:eu-repo/semantics/conferenceContribution': conferencepaper, - 'info:eu-repo/semantics/conferenceObject': conferencepaper, 'info:eu-repo/semantics/conferencePaper': conferencepaper, - 'info:eu-repo/semantics/conferencePoster': poster, 'info:eu-repo/semantics/conferenceProceedings': conferencepaper, - 'info:eu-repo/semantics/contributionToPeriodical': article, 'info:eu-repo/semantics/doctoralThesis': thesis, - 'info:eu-repo/semantics/lecture': presentation, 'info:eu-repo/semantics/masterThesis': thesis, - 'info:eu-repo/semantics/patent': patent, 'info:eu-repo/semantics/preprint': preprint, - 'info:eu-repo/semantics/report': report, 'info:eu-repo/semantics/reportPart': report, - 'info:eu-repo/semantics/studentThesis': thesis, 'info:eu-repo/semantics/workingPaper': workingpaper} -home_page: https://hal.archives-ouvertes.fr/ -long_title: Hyper Articles en Ligne (HAL) -name: fr.archives-ouvertes.hal -user: providers.fr.archives-ouvertes.hal diff --git a/share/sources/gov.clinicaltrials/icon.ico b/share/sources/gov.clinicaltrials/icon.ico deleted file mode 100644 index 6d63bd8e629b6f5ac920c6b1305aa1b7c22d71e4..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 388 zcmV-~0ek+5P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0003yNkl<ZIE~elKS;ws7{z~=Kdq@!0)me1pyHqqil7wHtZs34$lxq)uDaPx zoE<ufQ>Wq>9h_6#L}=Z*2(d`oD)Af=L#RcBy7;F1?%sR+zTt$Fl1ZdYl>UnX!Q9vt zEWrOwftER!88+(x<TiZ<R}L~XDI8dYwKhtrg?u%U&n48=8=^-Gb-`DeK90lTxsS1T zine%&$X}yZ0;2XZ+DaD`L4L<ZnX!<4!PM>s`g#|2J|P%3>xe=C)H6gK^okF7=+y=Q zZ?`tNEiZX*Ac|gG5Z!N6I64_w&qH1EGIN5T-UUFQ0CpPqSz~?57{iwwz)UCEv%z=) zI-><1V)lvtg$tC^@cW&cV8$9YZS=Kolre#^?*I_i+6*pzq6Y`P>L#8tvUi3k27Cl2 i;ARWN|NjdU7kmN=4PI{9-C30Y0000<MNUMnLSTaDFrzsD diff --git a/share/sources/gov.clinicaltrials/source.yaml b/share/sources/gov.clinicaltrials/source.yaml deleted file mode 100644 index cc55e1bbe..000000000 --- a/share/sources/gov.clinicaltrials/source.yaml +++ /dev/null @@ -1,15 +0,0 @@ -configs: -- base_url: https://clinicaltrials.gov/ct2/results - disabled: false - earliest_date: null - harvester: gov.clinicaltrials - harvester_kwargs: {} - label: gov.clinicaltrials - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: gov.clinicaltrials - transformer_kwargs: {} -home_page: https://clinicaltrials.gov/ -long_title: ClinicalTrials.gov -name: gov.clinicaltrials -user: providers.gov.clinicaltrials diff --git a/share/sources/gov.doepages/icon.ico b/share/sources/gov.doepages/icon.ico deleted file mode 100644 index 58a8292cd3c0919de81ba9f2737a4e7a42d0903a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 657 zcmV;C0&e|@P)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00K!#L_t(2k!{jjNRt5+2k`&4Z)57z4a-m=%*AwGdJuHX%bLP!Ho81WJ%m!~ zgAbL6gYcyc6trlCWEv4&h)Obh&>W>9DQ>9qQq%MysHM|sO=&uH7r)bk5Pr@7+j;o$ z=Nt}bS8R@1$A-JDeZu2iXKC;731GLe<w5EFuG*IAIghk*?3HDBi3b2+mzEF+VpsSe zT)EZrZr&cx_x6TQ=f?7d!f5?qpU>@g-!V;@TCusw0fB9_Ec~VgnNE`dzrW_*=xlkd z@%!h%u}q0vaipmZQIQ<LFzjWyu(bbq%KoIPM1v_Xu&<-RXzMfN6i=q=5gH=;>hPiN zdUIF*+Ao?a(3mx+$w#PehOy2Ux-<agLD40J`lHn@EYr*Ka*MQCJyT!apI0FaH`~m6 zG!T*N$G2&PyhB~Y5-A%VqpF4cly&<du3mGkE&tyDAo_LZy1u0NM82ur_VUaHNL_c| zet2Z4055pXGQmA70N1i}Xf(UArYi2Jak3*t87L6vRNIF#6o`??KJ^Za&$u{X(P~X9 zt3a5fu{;M`vib)z&nk1_4?gK1eyS=)1T!)o#sPfy%0`?FfEZ{ynOE=Zltu`oV#P(X z<+wHx#0Vk>IUuM~`AM`hmlUe-XRUi=vME0qf)z#pcvnfR;DChGWQb^I4hNGElXjzf zt}y-(7{UDVswzUQO_TluJR$-BmWa<=bh6H$uIx82S~WWFrrYUR)n@5VrAL852mk<V rvFxD59(!0Up#x)!^Jg>DLqFp$zJoZ5_Na{j00000NkvXXu0mjf7A-ab diff --git a/share/sources/gov.doepages/source.yaml b/share/sources/gov.doepages/source.yaml deleted file mode 100644 index 4d969708d..000000000 --- a/share/sources/gov.doepages/source.yaml +++ /dev/null @@ -1,19 +0,0 @@ -configs: -- base_url: http://www.osti.gov/pages/pagesxml - disabled: false - earliest_date: null - harvester: gov.doepages - harvester_kwargs: {} - label: gov.doepages - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: gov.scitech - transformer_kwargs: - namespaces: - 'http://www.w3.org/1999/02/22-rdf-syntax-ns#': rdf - 'http://purl.org/dc/elements/1.1/': dc - 'http://purl.org/dc/terms/': dcq -home_page: http://www.osti.gov/pages/ -long_title: Department of Energy Pages -name: gov.doepages -user: providers.gov.doepages diff --git a/share/sources/gov.nih/icon.ico b/share/sources/gov.nih/icon.ico deleted file mode 100644 index 1e660b740906b95894663a8785846d0c410402bc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 726 zcmV;{0xA88P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00009a7bBm000XT z000XT0n*)m`~Uy~h)G02R5%f(Q_W8kQ4pWn(r!z;P_|TC=|X`bL@<#QQWH54gCQL1 zSq>@(4yGRbW8k2Tc*C=3xZqXT^aB%>1XK{VTBTWNyDi=BLalE`YtY1tPV)B6zWI2+ zH}e})y<Ug^5eo*JOeTolT(FnQhUdzNIAR>vV)=p=E~-^0+IbgOcDLG7R;zhlO3F!> z)2iIRJEo0~^@0fz+?alB*Y%peFVsF;F6+_Wpx_m|+XR*ftW9PJiUO>xY#!{&`5f=E zExr4o#BDZn_UYn`MhNic!GUwLmCEt-;3Z!m*eO7>!wSPg9dJ}JpwrJox9AbxY-WYU zCuwJ>#{(Oi`|}K3<_toNFxYCj8lAa4O2K%Bw@d4LsFHXE2Q)D>cW#Xa&=+ISKnV<^ z7V8SQy^+h}4{!E8W(jb)TO4$Dq@6&asIh-Li09p2T|bGTff8WBCMNm?bUH29uJ!@n zOs|Ayy9F-vcwt9AgvrSP;V`d-(U+`6Rdb&8@Z^kd@9?$KY6PRBnO(K7lu!lqRap$` zga8!`M2QS{!KW_;R)gQskdb|`4k!)w5CZJ;j;!#$c9BqAXZ?MdmSKxr0XqP`b_a|{ zf}j+tu(h2dZlIYEw7DIr7p%`e{b?KO_d`x8(~{PAA@Mp(=bly$o;`nifH8PL2^hoX zJ~EQvt$&lx>{+#MK*+LR)f{KD%>7Eh=$S%>#TV8xjU-q=M9~n`rB^aMh_7Z^T-=hB zk_iFT0J)9y4E1{8;iJ{)^74jvYU-kZJjfR-z<2_wgmjR}7IG~-xAcBniDL{Vr&%~X zOp&k{){T>U*k`p=b>ggGk%cx#(#|!_ttIx-&Oe8#0rOw>?^q!lUgi3;(*OVf07*qo IM6N<$f|IpWF#rGn diff --git a/share/sources/gov.nih/source.yaml b/share/sources/gov.nih/source.yaml deleted file mode 100644 index 5a3121207..000000000 --- a/share/sources/gov.nih/source.yaml +++ /dev/null @@ -1,16 +0,0 @@ -configs: -- base_url: https://exporter.nih.gov/ - disabled: false - earliest_date: null - harvester: gov.nih - harvester_kwargs: - table_url: https://exporter.nih.gov/ExPORTER_Catalog.aspx/ - label: gov.nih - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: gov.nih - transformer_kwargs: {} -home_page: http://exporter.nih.gov/ -long_title: NIH Research Portal Online Reporting Tools -name: gov.nih -user: providers.gov.nih diff --git a/share/sources/gov.nist/icon.ico b/share/sources/gov.nist/icon.ico deleted file mode 100644 index 1e660b740906b95894663a8785846d0c410402bc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 726 zcmV;{0xA88P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00009a7bBm000XT z000XT0n*)m`~Uy~h)G02R5%f(Q_W8kQ4pWn(r!z;P_|TC=|X`bL@<#QQWH54gCQL1 zSq>@(4yGRbW8k2Tc*C=3xZqXT^aB%>1XK{VTBTWNyDi=BLalE`YtY1tPV)B6zWI2+ zH}e})y<Ug^5eo*JOeTolT(FnQhUdzNIAR>vV)=p=E~-^0+IbgOcDLG7R;zhlO3F!> z)2iIRJEo0~^@0fz+?alB*Y%peFVsF;F6+_Wpx_m|+XR*ftW9PJiUO>xY#!{&`5f=E zExr4o#BDZn_UYn`MhNic!GUwLmCEt-;3Z!m*eO7>!wSPg9dJ}JpwrJox9AbxY-WYU zCuwJ>#{(Oi`|}K3<_toNFxYCj8lAa4O2K%Bw@d4LsFHXE2Q)D>cW#Xa&=+ISKnV<^ z7V8SQy^+h}4{!E8W(jb)TO4$Dq@6&asIh-Li09p2T|bGTff8WBCMNm?bUH29uJ!@n zOs|Ayy9F-vcwt9AgvrSP;V`d-(U+`6Rdb&8@Z^kd@9?$KY6PRBnO(K7lu!lqRap$` zga8!`M2QS{!KW_;R)gQskdb|`4k!)w5CZJ;j;!#$c9BqAXZ?MdmSKxr0XqP`b_a|{ zf}j+tu(h2dZlIYEw7DIr7p%`e{b?KO_d`x8(~{PAA@Mp(=bly$o;`nifH8PL2^hoX zJ~EQvt$&lx>{+#MK*+LR)f{KD%>7Eh=$S%>#TV8xjU-q=M9~n`rB^aMh_7Z^T-=hB zk_iFT0J)9y4E1{8;iJ{)^74jvYU-kZJjfR-z<2_wgmjR}7IG~-xAcBniDL{Vr&%~X zOp&k{){T>U*k`p=b>ggGk%cx#(#|!_ttIx-&Oe8#0rOw>?^q!lUgi3;(*OVf07*qo IM6N<$f|IpWF#rGn diff --git a/share/sources/gov.nist/source.yaml b/share/sources/gov.nist/source.yaml deleted file mode 100644 index 3d1b3f6a7..000000000 --- a/share/sources/gov.nist/source.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# DSpace -configs: -- base_url: https://materialsdata.nist.gov/dspace/oai/request - disabled: false - earliest_date: 2013-01-31T19:44:11Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: gov.nist.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: https://materialsdata.nist.gov/dspace/oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: gov.nist - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: https://materialsdata.nist.gov -long_title: NIST MaterialsData -name: gov.nist -user: providers.gov.nist diff --git a/share/sources/gov.nodc/icon.ico b/share/sources/gov.nodc/icon.ico deleted file mode 100644 index e5b3304418135aacd5dcf2a2d7fc8de75c73ccd2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 672 zcmV;R0$=@!P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS00074Nkl<ZIE{6aO-Phc6o#Msee-9`1X_ZP3z={#2ZG3iF(iTt-9%AXMiV6D zCJ_U7tz3sO6oimLDp-P$k`!h|B`G9=8n`JFiqtT(A8nlRoB2+QtJ9eFZZ7wn_rB-% zDmFo9=LZ=XnP+AuhGp6K{aYw0$t4mgRR40<8M?Zrq^0E@)6>6E3Y<;@rNFWT+ZOWj zvZ$^O5{U%W27#ugx6<1B9*4ug<uW!)49l`vUQSU}wUfri{R$w>?CJT4Qs8!P{usbC z6<)7{oE&%BW&ntYM5AGHb3G&y_J0N9@f0;RyQ!-SAtI74VeK+7@P&qkH;CwHnMfpT z0s$}G-A4hqd<V+*fO)-FWOx$7K`1EjVp%prLvy%XCQ4-rq_|Qaq9~NjjmPj}0=ynD zroY33VQ3%F#M;^r<>mP-F0NqPf(R@vS(Fs#aO!k`jzM@d3cf7(x(J4$!0m*4ui^GH zEfS4}Id?XI0%EZg!QfWzwjJi?6KH=0E(huC)U}5q{o~+qLfnGFJZQe8o&7Jrq2dXS zRrqOs4o^oRkp$Dw+=IJ7^^J)%^+J|wwN?vEQ#a`9No}idgV_bJQjl65QY-<r6{}OY zcuQpVhfd8F@OdD(1Ll6h)EpQJ9Hu@&3vKg3N1ajzz=cB~qSXxrOK}*UgirIjG?(M= zV_RS)31><(7U{@drOuaW$4XLfhs&X(u0hbgxCNDAIB`I?aox7`jfvbJgqZ~$X-y!R z(p~pwLuDx(FIF2rQzrVyMJ7JO{4dSe?t`LysMx1A@c#f{%jKPU#geQ50000<MNUMn GLSTXyTP$w? diff --git a/share/sources/gov.nodc/source.yaml b/share/sources/gov.nodc/source.yaml deleted file mode 100644 index a0eb62df5..000000000 --- a/share/sources/gov.nodc/source.yaml +++ /dev/null @@ -1,19 +0,0 @@ -configs: -- base_url: https://data.nodc.noaa.gov/cgi-bin/oai-pmh - disabled: false - earliest_date: 2013-01-30T02:45:05Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: gov.nodc - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: https://www.nodc.noaa.gov/ -long_title: National Oceanographic Data Center -name: gov.nodc -user: providers.gov.nodc diff --git a/share/sources/gov.nsfawards/icon.ico b/share/sources/gov.nsfawards/icon.ico deleted file mode 100644 index 12ea5f583aa82e1e0c4da6e87681d8cc419dcf78..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 789 zcmV+w1M2*VP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI000JJOGiWi{{a60|De66lK=n$yGcYrR49>cl3PeqaR7(^XFGE{+nlzY+UD}E z$)+Z_(z0}fW?1+TNs!TnK<J@fdI%alrjQU+FI^rgAAG5YA}9g_LCp<TX&GszB6Bx) zIcJ{xIsbF%K~jUh=kI&^{SZylz+V)Z2LR|%jL)wqs%*9IQy{OZs=;X4a!{}XO%W1v z&#dkMBxeMUlu82R>Rb2zYySy|*feG<rSqu`Xqa5qMWR&@!ZQUnTq$J91t3!-vF54i z+766nXMFa4yl`N2u5FrwHp}i1B@$mH7jmMn7~A#ZrprILY3fv2P$A9I>0i43(J4YU zncRCEA=CnJ2kx}LqLi~QSHGEVp-M&;va-Y?P~ui2cWK!c9P>6+c?=HQ5QC^@4AX-; zRXL7GOeUX9r3c%mL?+qL^+1OjFh@mCC|N&vvDs2=_`FWEszeZHP&(5G;+>UPy*XcJ zLvjTt-F2r81(=S`%t@Ka@b|UrkH@YAN_l`_nDtz>eaK&NJ7eJhirb5UqK0Q;3K`DD zLOMZ$iP)^nv7(@M-}(6NytmV+J&j~*=;Vt=^Gc3gNo4cnh77gz`Z82a`r&&|FtZgg z;L#*CwJ4=2+GRvNs4()IUz=ZanNgO^N29_4FVft6*jauAks&|xriT=YyMrabR}0Xh zODz>qRv2H)0=)&~vH;S6BF1mbqtefVePKitD3&HPZsC1czHs$5XsYh0DVxX&K-ExL zLPbWHSsHnG>{Ot_V68NotjOk<sP)MFhn6{YbRakmteD5`h%*9UctA1FhdUef{d?O( z&ay+>OOv^j=jcS8NogqJ(z@Qog&5CsEv-H){m9!<D~Vx-SpFv{_w@n*AeZ7czWCcO z`|u@>-N0-8OP^v0TI6!q0?@i?B0KL5C03Rd%xohP5kZ6{m2FU@HB~FXY+KGBRDWpO TD4aH~00000NkvXXu0mjf^7(7P diff --git a/share/sources/gov.nsfawards/source.yaml b/share/sources/gov.nsfawards/source.yaml deleted file mode 100644 index f0c5188b0..000000000 --- a/share/sources/gov.nsfawards/source.yaml +++ /dev/null @@ -1,15 +0,0 @@ -configs: -- base_url: http://api.nsf.gov/services/v1/awards.json - disabled: false - earliest_date: null - harvester: gov.nsfawards - harvester_kwargs: {} - label: gov.nsfawards - rate_limit_allowance: 1 - rate_limit_period: 3 - transformer: gov.nsfawards - transformer_kwargs: {} -home_page: http://www.nsf.gov/ -long_title: NSF Awards -name: gov.nsfawards -user: providers.gov.nsfawards diff --git a/share/sources/gov.pubmedcentral/icon.ico b/share/sources/gov.pubmedcentral/icon.ico deleted file mode 100644 index 4ce5e728e3bf230b1119efd13acce47b475db8c7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 465 zcmV;?0WSWDP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00D_fL_t(2k$sXaYyvS9#~%v<F@cz<m^e{Ufv7;7K%AI3K^7!g5D3JH2?TOk zPNNWr69}>(kO;&HvYbFvkfj1qapDA7Di9Y^QE?#?CC7!2{qscMSKsR|EoH|H3erkP zn{Be&ThC9fQ->>+^AqDqFk3J#jB8W!UqdhpN7IF=vnc@p$EM~Mic#gUa@oMRK&To9 zelZ6CHnpaUNGsiKIVSaB676Q=db&}!<v2EVJzvTc03bVN#&*B^-sgDz6X%iK)Il}g z27}=wJ7%Q|N;xYYO90?o!Q+H+p_Lwe^jxQ2F4l$7CfaPnHP=cP;{;n?`^7*zHHcVx z{^+zNfB{7?q6AZd5vpEN8qF3`ru}Zqu_?jm9)L3?iY<aIuj5`<OPP9p5JsDmpQ9Lm zEeFGiI;fkJYOn9hZ<Zai^P|0vD`h%gF=AOZwTi^%E2foBc6$K0jTHcbS;)9Zl@@UV z00>nBzfUkK->e1z#)bKaHYM$5!>QNI*SI@D#4=U7xSP=*tKaM`QyGg#00000NkvXX Hu0mjfvXi~H diff --git a/share/sources/gov.pubmedcentral/source.yaml b/share/sources/gov.pubmedcentral/source.yaml deleted file mode 100644 index 47955c088..000000000 --- a/share/sources/gov.pubmedcentral/source.yaml +++ /dev/null @@ -1,37 +0,0 @@ -configs: -- base_url: https://www.ncbi.nlm.nih.gov/pmc/oai/oai.cgi - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc, time_granularity: false} - label: gov.pubmedcentral - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: https://www.ncbi.nlm.nih.gov/pmc/oai/oai.cgi - disabled: false - earliest_date: 1999-01-01 - harvester: oai - harvester_kwargs: {metadata_prefix: pmc_fm, time_granularity: false} - label: gov.pubmedcentral.pmc - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: gov.pubmedcentral.pmc - transformer_kwargs: - namespaces: - 'http://www.openarchives.org/OAI/2.0/': null - 'http://jats.nlm.nih.gov/ns/archiving/1.0/': null - 'https://jats.nlm.nih.gov/ns/archiving/1.0/': null - 'http://www.w3.org/2001/XMLSchema-instance': xsi - 'http://www.niso.org/schemas/ali/1.0': ali - 'http://www.w3.org/1999/xlink': xlink - 'http://www.w3.org/1998/Math/MathML': mml -home_page: http://www.ncbi.nlm.nih.gov/pmc/ -long_title: PubMed Central -name: gov.pubmedcentral -user: providers.gov.pubmedcentral diff --git a/share/sources/gov.scitech/icon.ico b/share/sources/gov.scitech/icon.ico deleted file mode 100644 index 58a8292cd3c0919de81ba9f2737a4e7a42d0903a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 657 zcmV;C0&e|@P)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00K!#L_t(2k!{jjNRt5+2k`&4Z)57z4a-m=%*AwGdJuHX%bLP!Ho81WJ%m!~ zgAbL6gYcyc6trlCWEv4&h)Obh&>W>9DQ>9qQq%MysHM|sO=&uH7r)bk5Pr@7+j;o$ z=Nt}bS8R@1$A-JDeZu2iXKC;731GLe<w5EFuG*IAIghk*?3HDBi3b2+mzEF+VpsSe zT)EZrZr&cx_x6TQ=f?7d!f5?qpU>@g-!V;@TCusw0fB9_Ec~VgnNE`dzrW_*=xlkd z@%!h%u}q0vaipmZQIQ<LFzjWyu(bbq%KoIPM1v_Xu&<-RXzMfN6i=q=5gH=;>hPiN zdUIF*+Ao?a(3mx+$w#PehOy2Ux-<agLD40J`lHn@EYr*Ka*MQCJyT!apI0FaH`~m6 zG!T*N$G2&PyhB~Y5-A%VqpF4cly&<du3mGkE&tyDAo_LZy1u0NM82ur_VUaHNL_c| zet2Z4055pXGQmA70N1i}Xf(UArYi2Jak3*t87L6vRNIF#6o`??KJ^Za&$u{X(P~X9 zt3a5fu{;M`vib)z&nk1_4?gK1eyS=)1T!)o#sPfy%0`?FfEZ{ynOE=Zltu`oV#P(X z<+wHx#0Vk>IUuM~`AM`hmlUe-XRUi=vME0qf)z#pcvnfR;DChGWQb^I4hNGElXjzf zt}y-(7{UDVswzUQO_TluJR$-BmWa<=bh6H$uIx82S~WWFrrYUR)n@5VrAL852mk<V rvFxD59(!0Up#x)!^Jg>DLqFp$zJoZ5_Na{j00000NkvXXu0mjf7A-ab diff --git a/share/sources/gov.scitech/source.yaml b/share/sources/gov.scitech/source.yaml deleted file mode 100644 index 6d0ce9bdc..000000000 --- a/share/sources/gov.scitech/source.yaml +++ /dev/null @@ -1,19 +0,0 @@ -configs: -- base_url: https://www.osti.gov/scitech/scitechxml - disabled: false - earliest_date: null - harvester: gov.scitech - harvester_kwargs: {} - label: gov.scitech - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: gov.scitech - transformer_kwargs: - namespaces: - 'http://www.w3.org/1999/02/22-rdf-syntax-ns#': rdf - 'http://purl.org/dc/elements/1.1/': dc - 'http://purl.org/dc/terms/': dcq -home_page: http://www.osti.gov/scitech -long_title: DoE's SciTech Connect Database -name: gov.scitech -user: providers.gov.scitech diff --git a/share/sources/gov.usgs/icon.ico b/share/sources/gov.usgs/icon.ico deleted file mode 100644 index b4bbeeeeae8ab40e7216d3f023c2d23b00c2600b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 631 zcmV--0*L*IP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00J*bL_t(2k&RMIY!XoveQ%(naW(>iC}p6~v=)~drArObnD_xff<@?ug&GQV zt71_!UFc#K>O#6{8^1IzVQWEc+_XvUhbj@6LgP*h%rFwddoBh+L!zG5y*DTKo}7Ci z#u)qyA_oG95t9B78$t*G;CVigNC1R?p@9%;cNk&ZK&^5;y)oqWC{=37Zn^ApCPQ|S zN|h2q*xCm$1_T5ER4SF3xmmy84*|h2jD+L4TCK*ig<LM*-`_`)WK%H#0RSs2tAXG| zZ2pzY<%&L;MhHokmX`>E$miHE+3ectN0Z4!QIvRuVVK~=HK|nk=<!3}m@gWg4g><= zqI*cDke@htYGl-R=<pFOt@jR(T5Wc%j#eo5L?RJE5VHB)>9c2c?A#T;9RYen&zs}| z!gy^x{o-Zp`i+o!pXO$0av_zh@LU6<r_Z0M)O!z@2Hr1zKo|p_=f8a~`NxB>i#$4L z_6NpoHXB7z`!(8Q$1TR*K8cjPeDzAHRN{DHcw`JDlgWXDCY|08n}3Zk7L6<xi*|=| zGJI$1{)4yg7P%^)NF;g;z1><JkSXMD&n337)wCQnzrj^`hx3BfX1{xH>YVkwrki#+ zoZH)9;mq8u*W-l%AcP2lXlDV!s!+WZ3bQOr)3n8688n+ysT34lZ~yDhx?C>D<MEBn zG@CEvve_-R2t_3=MrVrZ^?Lf#W+syn_#Z~25&k``ox6z682=8D-I`;Je*(3FMUXo_ RnWX>#002ovPDHLkV1kzw9H{^R diff --git a/share/sources/gov.usgs/source.yaml b/share/sources/gov.usgs/source.yaml deleted file mode 100644 index e82892222..000000000 --- a/share/sources/gov.usgs/source.yaml +++ /dev/null @@ -1,15 +0,0 @@ -configs: -- base_url: https://pubs.er.usgs.gov/pubs-services/publication - disabled: false - earliest_date: null - harvester: gov.usgs - harvester_kwargs: {} - label: gov.usgs - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: gov.usgs - transformer_kwargs: {} -home_page: https://pubs.er.usgs.gov/ -long_title: United States Geological Survey -name: gov.usgs -user: providers.gov.usgs diff --git a/share/sources/info.spdataverse/icon.ico b/share/sources/info.spdataverse/icon.ico deleted file mode 100644 index a6de661cc1c2db8df7fcbca6665a0d4ca1f1117b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 548 zcmV+<0^9wGP)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0005oNkl<ZIE{^vJ7|+(6o#Mk{b}Q+Leo|d1o4tvac~iBOhpO>(Lp*?N|T6- z4$?s=-5dl5Cj|!)R9v))R9ysd69<!~4kEOo{jDHDP%IRygf40FI<*M?Avx1IeCIqo z?`O18rA*=ya81x{@Q%7&NY5;O0B|jjv+*2ecQBrK#(5x2NUw^y_@TA{$R-hw(|La) zmG=uBAsa^YTHP+T2f!kjDjU<0hl?wit>P2`2(=V|7l?_uO!C=|np;@ys80dClD!15 zR)9B|JrT#&&zSWgtzov!VCq<Y@n)bwITIUh$c<puBUo^-BBKxKGNzwyy)mDfS^BgY zaHCl|6%8-<xM^^sNK3#ELBE;p6q*~%`>7zq%}-p8c=wSz1Ds43{CK+HpV-pn??$>} z*gz>8A8VR(c04{($|g0NO^gP;7c+_JQYQJesezoGMpQeu`_F@x%r)*}?0tJKzArd} ztP4~<e2-e7V$J&nP+un;Y#_bIY}efQ{#YO&q#;S`o@;z>Vn80LQg@xVbr$d6g>vpd z6xnq{502)){P{25MpMj22f%x6;+vUn$y7mZ5ZR>Ae5B?L9enz++7tj#9*gzMb2l1r m+_13FZ-YnOHScQBF~DE?39;we1O&_g0000<MNUMnLSTY;HSkXW diff --git a/share/sources/info.spdataverse/source.yaml b/share/sources/info.spdataverse/source.yaml deleted file mode 100644 index 33937e9e1..000000000 --- a/share/sources/info.spdataverse/source.yaml +++ /dev/null @@ -1,19 +0,0 @@ -configs: -- base_url: http://dataverse.scholarsportal.info/ - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: info.spdataverse - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://dataverse.scholarsportal.info/ -long_title: Scholars Portal dataverse -name: info.spdataverse -user: providers.info.spdataverse diff --git a/share/sources/info.ssoar/icon.ico b/share/sources/info.ssoar/icon.ico deleted file mode 100644 index 1c1387ec541d0e7ae5c0b1d4125324b68ee88c37..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 534 zcmV+x0_pvUP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00GWPL_t(2k%f^xYZOrwK+k>iW_I1hC2KU052L{ZK`m@Vw$Q>Nh#!rW*riBe zuZ@2|L4SdyvM^v}p&;69Au1vUO@yS_eBvT1Np^SG*_nCojYT3x!m_76m&>_VX6H}D zcFR*NE;2VqJ3@qRlnQ0tF~6KNS5|rP3K2Z7s|{Fdl!#V}^>wW6VFRA0)$Xnm&h@Z& z&^9AY7@wfFOl=tfx;F$Gnfr0y*vRuRR#sNqTaAY7t5)JG2_Vb)1^M2px-11m@HPAG zy$HUxF4`FYjO`Ji#8<sD|3_<8{JX&!*UmEQ>s&6#198$xO^kS%pGO2|#X7yb`7CuI zNHG{XTDlzQ!rITJHy>vOnLK}GWMIO?p<b-&*6(EK6r&ZH`Ftogn}EwWJ2-VLcW?UP zrCNOh7#|s(xl<kf_Jprb5fAM%`N+F*`#_dON>GZ>EWCdE>D`xwQjvT>eY?>gt(|Mk zBMKeh3}O9Ww1CTOlr;Aq)}$S9yRQ_<Y^q{o5=&x<QU(earY9dC>Fe7L*$x>hm#<!b zz;K0VlemTH$eEk+VdJiGVFm|-v6KB%dH-;&KL7IltcYBz+!`A^P10uRIn51*D;&T0 Y2YwO6(&C9ORsaA107*qoM6N<$f(~NwmjD0& diff --git a/share/sources/info.ssoar/source.yaml b/share/sources/info.ssoar/source.yaml deleted file mode 100644 index d8d20b5f4..000000000 --- a/share/sources/info.ssoar/source.yaml +++ /dev/null @@ -1,24 +0,0 @@ -#DSpace -configs: -- base_url: http://www.ssoar.info/OAIHandler/request - disabled: false - earliest_date: 2012-08-29T21:40:31Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: info.ssoar - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {abridged report: report, annual report: report, company report: report, - conference paper: conferencepaper, expert report: report, festschrift: publication, - final report: report, interim report: report, journal article: article, literature report: report, - m.a. thesis: thesis, master thesis: thesis, other: creativework, phd thesis: thesis, - report from institution/organization: report, research report: report, working paper: workingpaper} -home_page: http://www.ssoar.info/en/home.html -long_title: Social Science Open Access Repository -name: info.ssoar -user: providers.info.ssoar diff --git a/share/sources/io.osf/icon.ico b/share/sources/io.osf/icon.ico deleted file mode 100644 index 2388e8876bf618a790c8d003b8694fdeb03f1a91..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 811 zcmV+`1JwM9P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0008wNkl<ZIE{T%ZAg`87`~qOJ+GBJTXNDLhDVxP79<@hJ74*cB{a6+)Tyj2 zg+cv@?1Mgj1TAXIR`_EZZA(V<BN#OHV^ARqQS|)<iIo&4R@k!4awGNV`##To?MEpt z=5Dxu-S>5WUDvHhVq7C_C<Op!ycBDEFrwe=+kfe+-Mg=XtoWZM9C4e%C`IOxOgmh6 z@M5g6(8NL^vt#SF7Lw@{iISX05qkvZ8?9tmUs_UJtCW%p7ccG4n3|jNzC3K;q~W81 zdnp*Rue79e+uQhsgRt$d$FAV7u3D4STHCJYIyD=&YzQt{``d6qi8Y)bwm+?^J)K-# zUE_J4?Ygd0yJ>SJfV8m%`oH`Bhen^zXOt$|vkG&1T?7)rNxy!coBn&RM#i(;a6pNH z<2dHWBS%gFZ~;J(1jup-LY`9n-#$jN?G3UIO-I$VbleAk2mP3{rvoRVlksrpG@Mbw z4`HdBP`C<;tSpNc4@E1!(B<d)UT$n-W1q{+F~<c-yclNA`Zk^~Z|Z&(D_f`+l`R;c z$YP6H&-b+Fn7j~rj4yDl*Q@AEVb=R>-5<c*8_$PIdTf6Nf*~0g9GnOvQJ?bpP6mU) z<vpFZGUreAw`PBUo0$%}2|$fX0?3(wj>?ZG9($SM+`CU(3xfGGp2D{N7#T5}<Y&}! zX!X&?&a&%#ed}*7?AP=UYWmN;;?m;F?qAhMPq$Oc0g5alnf2~|Z$<RW&kfD-aKyys z6q(riy>3Gn_b$23_GK-+_fr=S#MW(fV?{+K9*&r^&CMrCDw3l#PPl&UN}f<aOJPBl zw0@F~jwgMoypGqYnX~<9DVQ|$h{}S9psA@TAAs|3DT~xBPb9k6tgP&!aH)`Wk?r+v zWJ{g<tm{bwjj36YNOZ4CCjX>Jas-F>=$b-uGD%QNeLNh|@o>bnwca`Yu1SDoU@S(B p`_5zlT6V!g8Z5gW3eW#5?H_7|!hO|mTBra3002ovPDHLkV1gYgidz5x diff --git a/share/sources/io.osf/source.yaml b/share/sources/io.osf/source.yaml deleted file mode 100644 index 88b74340e..000000000 --- a/share/sources/io.osf/source.yaml +++ /dev/null @@ -1,53 +0,0 @@ -configs: -- base_url: http://osf.io/api/v1/search/ - disabled: false - earliest_date: null - harvester: io.osf - harvester_kwargs: - path: v2/nodes/ - embed_attrs: - children: relationships.children.links.related.href - contributors: relationships.contributors.links.related.href - institutions: relationships.affiliated_institutions.links.related.href - identifiers: relationships.identifiers.links.related.href - label: io.osf - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: io.osf - transformer_kwargs: {} -- base_url: http://osf.io/api/v2/preprints/ - disabled: true - earliest_date: null - harvester: io.osf - harvester_kwargs: - path: v2/preprint_providers/osf/preprints/ - embed_attrs: - contributors: relationships.contributors.links.related.href - label: io.osf.preprints - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: io.osf.preprints - transformer_kwargs: {} -- base_url: https://api.osf.io/v2/registrations/ - disabled: false - earliest_date: null - harvester: io.osf - harvester_kwargs: - path: v2/registrations/ - query_params: - embed: identifiers - embed_attrs: - children: relationships.children.links.related.href - contributors: relationships.contributors.links.related.href - institutions: relationships.affiliated_institutions.links.related.href - identifiers: relationships.identifiers.links.related.href - label: io.osf.registrations - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: io.osf.registrations - transformer_kwargs: {} -home_page: http://osf.io/ -long_title: OSF -name: io.osf -user: providers.io.osf -canonical: true diff --git a/share/sources/org.arxiv/icon.ico b/share/sources/org.arxiv/icon.ico deleted file mode 100644 index fab196091f22a1eacc59d2286bf2062ed5e34238..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 530 zcmV+t0`2{YP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00GKLL_t(2k$sXuNYh~$#(!<4`Cma3%s^cde`64#L8E(7q~sw%4^amV5&|Vy zB83N+2T2Gr2oyoZ#GyMFr07AjXl<Z76s|Jnp+IocVogw6{@L5X&o=*JA1{0le9!ZK z&-3zvfY2n({9qgJEnRfCG?4_s0?ZkcrO&N2v){Q&<=LUao9=U@vMfM+VsUb5&i`<f z1OQR+f)}DnP(OdEG`V)rpW@>m5a@&MV^2D3uUvL{o@IxFh#*2Zz@ry{Hr!rWR#U7T z{xe$SaZb-DYG#cOAK~~R<gi)L+N&5H9cJM6<fdn!u4Tdj^$cuAiZj8i&rZxF7)pDH z%;p>GwrwCuO#~z2U_>!3yXN=%sDDb+T`{Y+4(P7fdw&W;J7AFv(vGFCA%Zj7X(#cx z`s!B#$eS9>f7^MI70jU<9;dUGms4?#mU_F0CSt9<Ua}{&Xy1Y_`#m=ESQEKjRcCn3 zE`e8ksnvzXic^f*@7^hR*K@*HbZmt5vHQ-tTRIoB2Jh855@m#T1g}2#1)?9i8ZM|F zD=-2+SXEv72X4#4rw2Ex_-O-yE!tYD&leT$x2=5V%|IJZb22h)@t^fZ;EMu(0liEp U{9lAq#sB~S07*qoM6N<$f)&H;>;M1& diff --git a/share/sources/org.arxiv/source.yaml b/share/sources/org.arxiv/source.yaml deleted file mode 100644 index 95284fcb6..000000000 --- a/share/sources/org.arxiv/source.yaml +++ /dev/null @@ -1,33 +0,0 @@ -configs: -- base_url: https://export.arxiv.org/api/query - disabled: false - earliest_date: null - harvester: org.arxiv - harvester_kwargs: {} - label: org.arxiv - rate_limit_allowance: 1 - rate_limit_period: 3 - transformer: org.arxiv - transformer_kwargs: - namespaces: - 'http://www.w3.org/2005/Atom': null - 'http://arxiv.org/schemas/atom': arxiv -- base_url: http://export.arxiv.org/oai2 - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc, time_granularity: false} - label: org.arxiv.oai - rate_limit_allowance: 1 - rate_limit_period: 20 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: preprint - property_list: [] - type_map: {} -home_page: http://arxiv.org -long_title: arXiv -name: org.arxiv -user: providers.org.arxiv -canonical: true diff --git a/share/sources/org.bhl/icon.ico b/share/sources/org.bhl/icon.ico deleted file mode 100644 index 5780e83664ad02e5e00da8247978947e2d533d1a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 642 zcmV-|0)737P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0006xNkl<ZIE|f>%}dl#7=@qvF{6$%I%-31+>Ew~R$`Eb2ASBRjWE)}gqBfD zgZ_gkT9i@ICZVMe7D|R#l!=j>7<U;arh(&_Wq!=p@BZ#>Au0paV$SY893BpbSM26e zjuI3M2Jk#Hq=gVDr4T~koWmG{loBZ=#uxykln5cQ)}qv*51_ZVH%We)BbzHw9j)X{ zdnYU3msng}pzU-A)>_i(6f4V1G&UY%e*QD9ZS99urlv<7;9@acb93CipB{EEUky9J zIk)8s{)QWQ@zgamRJy6@u>;?;8!nSydm@eTsXq$owBy$8J9M;PBpQtns{_zzsi~-_ zpr$5<Qi`%*@fKQZMQRP+PK}aGCK<TikF}2V4VZj0j&+U?GqXghst^Lo%KriY=<mBs zJl??F!JBOF_y8n&5`;p6)S9EayOq8E#Q>Shc{IlB`LZy>)oWMiI(LC!Fu;*o_`T(^ zn%ZSz@)_y%G&l%{6rfQ;?=?U;zAmPCJ~GDa$2od?d&uPq{P+ow#j_rnOqR`GTL=NG zYd#aN$B9-~(sJ??vaYs@rlwjh_1)mx@)GwSJi=K+=h+ij9L_ny;c{A9juVN5h{tO< z-<9C;!$CkGa^AWt(?LLBvE&PRlu~FFAeYbX4~145&v@)-L6I%&3~6h9vW0BXFurf_ zeS;9iH*k;*1_B5PI%C5BfBJvOlR<oA(20S2B>|+;c%DJmRhQO4N{#XIT3hQ6l>)H7 cuR*-~17o5F0-=ol=l}o!07*qoM6N<$f>019i~s-t diff --git a/share/sources/org.bhl/source.yaml b/share/sources/org.bhl/source.yaml deleted file mode 100644 index 7f7816290..000000000 --- a/share/sources/org.bhl/source.yaml +++ /dev/null @@ -1,33 +0,0 @@ -configs: -- base_url: http://www.biodiversitylibrary.org/oai - disabled: false - earliest_date: 2006-01-01 - harvester: oai - harvester_kwargs: {metadata_prefix: mods, time_granularity: false} - label: org.bhl.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: http://www.biodiversitylibrary.org/oai - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc, time_granularity: false} - label: org.bhl - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://www.biodiversitylibrary.org/ -long_title: Biodiversity Heritage Library OAI Repository -name: org.bhl -user: providers.org.bhl diff --git a/share/sources/org.biorxiv/icon.ico b/share/sources/org.biorxiv/icon.ico deleted file mode 100644 index 5bc509c0c9c98c1c2391098a0e2d4d589760bbf8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 522 zcmV+l0`>igP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00F{DL_t(2kz@S-|33pMfQcm4<Y-{Ib?er)ty?WEEzK<~;G!SSU3f5S!T-Pi z{{8;Vr64C08O*D$2DO2Wjjg$%@%;Jo$j1Er`t8ZGwg3P85irt!wQK)L|CmqLZ$LHh z^YaS}3v+UE{{8#+%a<=uQFeAFHa3X>FXg-p!`Tyly?ph0*M6wfzkmN&Sy`VvejFJd zZfa<hm67r8<3~1DHU<U;hQI$97#P0YfApV$fk#6X>IVq*{{4sif`Yqu?v|I9{$*k5 zOG#&DXa9WT)~$~I=j*qqmgkGPIw3Ut`t?g!PuJSU#@fc_(W8g67c9tia${v@|M&OL z(*-N|%?#Bl3Q>ZUk&*GwpFa@3yn@1?KYxDy{Q3XqPeBtS?e4}`n|D2!wE&@kk&%&s zfq|W!9m3zbd5fx=nu4OjpFe;8efuuy?=71cd9AAX)xLvJCw=?&?fdud%a$yC^zhNK zqet%CzO!J(a?bzEKi<9n@$xkT1A|su-Pb$!&qk!^PVJHo^kptCD&pnmV`5}_^X842 zxp`Mlx0Z&+_qXqu*jd@dMFjM9nc3OJ+#DHMS-;)8&nY9tIQSAD0Abj@!<AyIjsO4v M07*qoM6N<$g8$?c%K!iX diff --git a/share/sources/org.biorxiv/source.yaml b/share/sources/org.biorxiv/source.yaml deleted file mode 100644 index 1c2b2f312..000000000 --- a/share/sources/org.biorxiv/source.yaml +++ /dev/null @@ -1,38 +0,0 @@ -configs: -- base_url: http://connect.biorxiv.org/biorxiv_xml.php - disabled: true - earliest_date: null - harvester: org.biorxiv.rss - harvester_kwargs: {} - label: org.biorxiv.rss - rate_limit_allowance: 1 - rate_limit_period: 3 - transformer: org.biorxiv.rss - transformer_kwargs: {} -- base_url: http://biorxiv.org/search/ - disabled: true - earliest_date: null - harvester: org.biorxiv - harvester_kwargs: {} - label: org.biorxiv - rate_limit_allowance: 1 - rate_limit_period: 20 - transformer: org.biorxiv - transformer_kwargs: - namespaces: - 'http://purl.org/rss/1.0/': null - 'http://purl.org/dc/elements/1.1/': dc -- base_url: http://biorxiv.org/search/ - disabled: false - earliest_date: 2013-11-01 - harvester: org.biorxiv.html - harvester_kwargs: {} - label: org.biorxiv.html - rate_limit_allowance: 1 - rate_limit_period: 20 - transformer: org.biorxiv.html -home_page: http://biorxiv.org/ -long_title: bioRxiv -name: org.biorxiv -user: providers.org.biorxiv -canonical: true diff --git a/share/sources/org.cogprints/icon.ico b/share/sources/org.cogprints/icon.ico deleted file mode 100644 index e81601721d8af6bbdccdcba2fb81fde5dc195e3e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 511 zcmV<b0RaAqP)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0005DNkl<ZIE}56ziSjh6vsa^yW}p#`UB+-i?gw?vJ$~UEX2lEI~6TbTBQ^G zH*C&Qv=EZQ!YYM`iZ;RC9|*BAX(D(@Zg%FqXEA$b7gie`7-oi<_kExDod^Fo++s7n zwz|F@-F0RMotsl9k0P_(Z#yrir~fW>e&y-(`5XH#W?%QsKK?ZO63jSv)6Psk+34c! zt-i3b&9XO-{Fo_wA(6x=2Ln*SA2~sF>;44T+Ijz^L*m<qs1PF5$p~T`NbBM85!Ias zlhNgs4oTTQBsD@aqe3AD(b)a)3MAo*lH%-z$#}55zP<7AMpwW?G6-UXWF$3IBllMC z;vI-BVR$~P@h;iCmmsP!my90`QjA=>dWEPE#ZX^R``2u|-Rv(xG!xX4OQ_XIF;Z(5 zH>cD|+9B;<a74}7LzA)AT*`t#itN*2_w<wy1m>bd$&a%LL;<zNTtNB+l0isHEjeRs z(4*v4)RWZyAmA4nAv6TPMa;5*YY1B1(;+ZlqU92*9`%M6h6tz{B8IpE^<~?FLxJ(J zYWQsJ1&<!Dfy+LXfOolFefF8*aG9#AhWq@d@e9lVC8ff05M}@X002ovPDHLkV1iyM B>n{KR diff --git a/share/sources/org.cogprints/source.yaml b/share/sources/org.cogprints/source.yaml deleted file mode 100644 index b9cb79f9d..000000000 --- a/share/sources/org.cogprints/source.yaml +++ /dev/null @@ -1,19 +0,0 @@ -configs: -- base_url: http://cogprints.org/cgi/oai2 - disabled: false - earliest_date: 2007-10-15T11:31:26Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: org.cogprints - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: preprint - property_list: [] - type_map: {} -home_page: http://www.cogprints.org/ -long_title: Cogprints -name: org.cogprints -user: providers.org.cogprints diff --git a/share/sources/org.crossref/icon.ico b/share/sources/org.crossref/icon.ico deleted file mode 100644 index 0a29026c3cad7609abe1dc507a002a3d0d5115b8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 629 zcmV-*0*d{KP)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0006kNkl<ZIE{@{&r4KM7(L(fzIiia>O|8#2Y;X`#TZxv1EEffppc3-6$z~h zT4gO-M6~E12%=ilA{Wsn%86houv*O|8VV8#N+7mLo$=>5qm#aQbKloOXL5dI9XN{% z=bX#=?)|{b^dCf?GyQG6Hg#qDObY%E&T}@@ipg6qFi3H0`qBiFU(e)3qKs(^v!PZb zhwoweQMaWu>L3RojsR8ypbE2G0At~`4~Z{#vGlmxLZc^d+64s_VtVi7=BuF|EC&>2 zPYuYX6)6DlI>6FizJhB%R^Pg8cnNo{OT_7Xx{`b;1Ku?^G8+KLBG`Ll05$&mISVl} z<+5Y@e5|zA0j30Pnowg##(uUdrCtjdqAM7dqN%rn^4C#GK%5#_YtYTA(;X|DCy&v5 z`$1QRU{fSRVTLLQl6k!4P~Zt5o&lhO06c&CX2Rcg0S-G<W+))2LLjxiF+Vak?i`4Y z{+f%&^}$ac`vuSgfOgJ+aP|7d!_okZ^yLMocJo7DpYMHS#7Lyly3MXZ6{4!D(g^_B zvd=h_Mtg8xyWKo9cfy`nth0H8+1gW0M)lPAqVj#LT4pVcq|Em(d4=G+&)+(n+Mf|w zQY78=o>fmHl;h{jnxE5&mC>l90-Vn%n1MGm#LZgq=LAyH!iL58akO(Vsv~?r!-INt z%U`N4Hu*cF!#ct(8XkHI)l^a;E+w2M{nJ!{ohutz=Q~U0j@8{>+1>aXDZw*Bk$hOG P00000NkvXXu0mjfjv5?6 diff --git a/share/sources/org.crossref/source.yaml b/share/sources/org.crossref/source.yaml deleted file mode 100644 index 5933cc22b..000000000 --- a/share/sources/org.crossref/source.yaml +++ /dev/null @@ -1,15 +0,0 @@ -configs: -- base_url: https://api.crossref.org/v1/works - disabled: false - earliest_date: null - harvester: org.crossref - harvester_kwargs: {} - label: org.crossref - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: org.crossref - transformer_kwargs: {} -home_page: http://www.crossref.org -long_title: CrossRef -name: org.crossref -user: providers.org.crossref diff --git a/share/sources/org.datacite/icon.ico b/share/sources/org.datacite/icon.ico deleted file mode 100644 index 1e660b740906b95894663a8785846d0c410402bc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 726 zcmV;{0xA88P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00009a7bBm000XT z000XT0n*)m`~Uy~h)G02R5%f(Q_W8kQ4pWn(r!z;P_|TC=|X`bL@<#QQWH54gCQL1 zSq>@(4yGRbW8k2Tc*C=3xZqXT^aB%>1XK{VTBTWNyDi=BLalE`YtY1tPV)B6zWI2+ zH}e})y<Ug^5eo*JOeTolT(FnQhUdzNIAR>vV)=p=E~-^0+IbgOcDLG7R;zhlO3F!> z)2iIRJEo0~^@0fz+?alB*Y%peFVsF;F6+_Wpx_m|+XR*ftW9PJiUO>xY#!{&`5f=E zExr4o#BDZn_UYn`MhNic!GUwLmCEt-;3Z!m*eO7>!wSPg9dJ}JpwrJox9AbxY-WYU zCuwJ>#{(Oi`|}K3<_toNFxYCj8lAa4O2K%Bw@d4LsFHXE2Q)D>cW#Xa&=+ISKnV<^ z7V8SQy^+h}4{!E8W(jb)TO4$Dq@6&asIh-Li09p2T|bGTff8WBCMNm?bUH29uJ!@n zOs|Ayy9F-vcwt9AgvrSP;V`d-(U+`6Rdb&8@Z^kd@9?$KY6PRBnO(K7lu!lqRap$` zga8!`M2QS{!KW_;R)gQskdb|`4k!)w5CZJ;j;!#$c9BqAXZ?MdmSKxr0XqP`b_a|{ zf}j+tu(h2dZlIYEw7DIr7p%`e{b?KO_d`x8(~{PAA@Mp(=bly$o;`nifH8PL2^hoX zJ~EQvt$&lx>{+#MK*+LR)f{KD%>7Eh=$S%>#TV8xjU-q=M9~n`rB^aMh_7Z^T-=hB zk_iFT0J)9y4E1{8;iJ{)^74jvYU-kZJjfR-z<2_wgmjR}7IG~-xAcBniDL{Vr&%~X zOp&k{){T>U*k`p=b>ggGk%cx#(#|!_ttIx-&Oe8#0rOw>?^q!lUgi3;(*OVf07*qo IM6N<$f|IpWF#rGn diff --git a/share/sources/org.datacite/source.yaml b/share/sources/org.datacite/source.yaml deleted file mode 100644 index b0ce6f847..000000000 --- a/share/sources/org.datacite/source.yaml +++ /dev/null @@ -1,39 +0,0 @@ -configs: -- base_url: http://oai.datacite.org/oai - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: org.datacite.oai - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: http://oai.datacite.org/oai - disabled: false - earliest_date: 2011-01-01T00:00:00Z - harvester: oai - harvester_kwargs: - metadata_prefix: oai_datacite - label: org.datacite - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: org.datacite - transformer_kwargs: - namespaces: - 'http://purl.org/dc/elements/1.1/': dc - 'http://datacite.org/schema/kernel-3': null - 'http://datacite.org/schema/kernel-4': null - 'http://www.openarchives.org/OAI/2.0/': null - 'http://schema.datacite.org/oai/oai-1.0/': null - 'http://schema.datacite.org/oai/oai-1.1/': null - 'http://www.openarchives.org/OAI/2.0/oai_dc/': null - 'http://datacite.org/schema/kernel-2.2': null -home_page: http://oai.datacite.org/ -long_title: DataCite MDS -name: org.datacite -user: providers.org.datacite diff --git a/share/sources/org.dataone/icon.ico b/share/sources/org.dataone/icon.ico deleted file mode 100644 index 5970f5437e522bdd3fac2f40180dd0ad32a3843c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 791 zcmV+y1L*vTP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00PfRL_t(2kp;nBXw_vL2k`Ijd7l4&`=1@R<LR8Vw#>K2W=(`aE*);@MW!MI z8kJs1Lfr`7bs=~mbP+;C(8Vr<0%s&Fms;qg&C=#*&ZaD*wA6Vv+;p5bkNdwoFF!vY ziZx4UEtPVlB*6HySgbrVTDi7{+$&9rV67v`LK71#8679euS4iu@cG2wM+-N$2F;mN zLj&29(=)L8bniRwt;!n9EI^JZbk;{$f|nZ@{y3lMS>5qZ;r6zb%`+9bZ{17ROXW8Q zuN++IpV+$H7%QD5Z7c}hnG0XGwdYfE|LE^XFGp;lo?h+Q+?~$>YD53!ZENKhU0a+3 zF-OO4O^=lBjBMPc27g3-02{(8`~FS=V#g$CF^L=c1|~k(-I)&%YexTx+ofnBso~_A zPyd)2{{PW03-ip(3A1yWnb{q`d1wFG3(PF0%2lY$x=h^*Ua+?%+m>%?UDlKjYmbX% zCm2iu0L;6uuIMk!0TA<X<?Y56z{1hDy*<k!IyiY_eaq^_v=ovcAgqhwt*BSffwooU zm>?p+IhH=69qldMQTB(z?4F;;vBZ7d+40eu)<D{lA413&Y4Kux(@=E*@R+o8b<Eu8 zr6wO&vD9z$8V`+6?6@>?eZE$UC8|hE79H6o2rK_pTmmLbh8U9XMo+*-`=S}kf1R(6 z-M@Qv=E?T;IlziQm?N)sAD$cugs(m4X#pMeb!~dNF0G!{W`ht;AL#1bH}u<Q8`=RM z*4na-aeDBpscB}eab#W0%vWab;hVF~-w!#~y+2+4WcU&@8_P(XV~%H|MSOSu!05H= zGiJ8TdguAkzJCjI<tRIFaqqc{%xql9nu)D8P>OVk)seBk`tFatm22PPXCBbv!`j@~ zBRaL|^$*`zW0)Nhavp^`@q~4x$Oz?`DDlUu#kr^`U8*fz-<n<O!5G%cr4oS1JO@qU Vk5yjaw449{002ovPDHLkV1h>KhT{ML diff --git a/share/sources/org.dataone/source.yaml b/share/sources/org.dataone/source.yaml deleted file mode 100644 index b3a23c08d..000000000 --- a/share/sources/org.dataone/source.yaml +++ /dev/null @@ -1,15 +0,0 @@ -configs: -- base_url: https://cn.dataone.org/cn/v2/query/solr/ - disabled: false - earliest_date: null - harvester: org.dataone - harvester_kwargs: {} - label: org.dataone - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: org.dataone - transformer_kwargs: {} -home_page: https://www.dataone.org/ -long_title: 'DataONE: Data Observation Network for Earth' -name: org.dataone -user: providers.org.dataone diff --git a/share/sources/org.dryad/icon.ico b/share/sources/org.dryad/icon.ico deleted file mode 100644 index 82909b23ebe84e52639fbbb3ec550f9da7511a37..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 801 zcmV++1K#|JP)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0008mNkl<ZIE{^xT};z=7=@o-`=?bJtW=Chi3XK!6$l~~wBi!I*)))<8;f7# zBPNb)cgq&GkVS5Y!5B5h3%yf}7v3yUmo1ybu*DEmU>ovfDNqpXFs1TQYg^jBUYIsv ziDvKR$vMxPoE&)1iAb}E)V$B5{woA!w&uun$9LtH4+`7+`pL*m>ks;tY!b)N-ZWa9 zri^Xx1bp0TlkifAlA<C!<Kt|}%cIbe!Tjq8#W{6c?Ea?Tirsd#?iXV+qXS4toX>6+ zJgML3<aG5ROr~_c{OCtargUuUa~Sjoh{hu9&axK{PxTqTy5^FQylhSZ{7l;Ylk>KF z?oo1Aouc#6V=7CI;aHQ&Z{BC@wjN}qF@dJ($g)@%nCf8#<~u1sb<3W?W6oU$EgB{v zaq>59qV~FzPWMwDbUoxlYYu%APZ$fnCNMV#z}}V(JpeipIS%lbKfp>YkC}yOPB^x4 zvZ9RE>OUwhDnKXe7@Hm8<;)9yI8;S>X=$oA4HyDgoL^+fH^PMzw>Wj{EN;&z6+5T6 zSoJGPDK7olh%qCB4)+LSle3sj>4-?HE=k&)X~tRR;C9aqR3eURM=9gp35I=t15ocQ z!@eeyPYSlO6boXv<^Y(rB%otph;*ZYK-P1jQ4<3M*s;M%wmFx}zc(`x^D^fT<Jwls zy49U@xjiVQ8gxpj-XtUxiSjCJ<lCw<d{NR!Fk~Uqw3dbFBf@?I3$FrP>HeOl)6bU= z$tUj0BQ5*xCdI?$4)WI?Wq<MK%U0Sy)y?3{W4e4lbFT7EtzKzu8bCJr6MCISJP`xn zr-$8C?9{WLw}A8aKc&;{;XqLV|9HJDhD_*{*0yZQ5n0$?*;>*QjYU!)JDQ&H^)(m6 zzFy*qx7x<6EwnXsq;jbejcQddpyuS?&8`-QN7JVPFfg5}i1wz@t7*#E!2dZBsSy#2 fh}cCW_g%g>oC+b@Xd^p@00000NkvXXu0mjfYJ7cp diff --git a/share/sources/org.dryad/source.yaml b/share/sources/org.dryad/source.yaml deleted file mode 100644 index 8ebb6ed73..000000000 --- a/share/sources/org.dryad/source.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# DSpace -configs: -- base_url: http://api.datadryad.org/oai/request - disabled: false - earliest_date: 2001-01-01T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: org.dryad - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://www.datadryad.org/ -long_title: Dryad Data Repository -name: org.dryad -user: providers.org.dryad diff --git a/share/sources/org.elife/icon.ico b/share/sources/org.elife/icon.ico deleted file mode 100644 index d14c59986ec39cf9f1d63c2ffda9030ebf9136b7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 824 zcmV-81IPS{P)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00QtyL_t(2kv+~^Xp?6g$MN6ue@{u?oEo#nri~g~(FRRBYQ?RLN<GXum3cr} zCo-5`lwAxjHc>VY*b5mrcEHVD2%ETRb<lxT*McC5Q^Z=qL#c;q--aGcx}<4yXx{gI zo?UGEz5ZN$+3_n!ZVX;tRKBpj?&po&gR(-_4DVQ7xh<?dUj2FL*-NGn08z5Fs<P6l zFXgc*Ly^?JksF__S0G^7VqL_wt=c{CW}9hDFkJxvfR9ftxb^L(_6wBBrx-{f&qV6( z&Kf^ty7+$N#pZ+NljLhH3YNch`E%X#=U-0=U;s2(|D*Txq2-~zt<{q!yRw~qETG|4 zJe3RXSk=?r8wzOT*1Q5jCgty;9>siCpGpqx`cbO#{BJF85C7wDUFKT*)}6bCQy~ib zqFh)2yxJSbdiG;pxwa%CEE|Bxlt@<N{U;{B`yyisV>ly8j2h;>J!2IBNK6`;9K77$ zS@Zi>d@Rco1=reGt?}#8C;9m1GsDU0riQAL$*D2g>~jXV-~e>RcjD?^b8*Q3NsH^f z1@q4xPGpMvzQ2tMXW!$O@mRQEa=Jb6aQo7>yqW#)*{g<5Uj2H`q21Nz4>xxuJU+*Y zrb-a&xH<ezQ!JIUXwy%7R@Qz{6I^U@6M!OV-H(1LRN(xmb?y1o*$3&i=DFbFQtzmD zmOH2`Qm%%|-Q~Mmc3rrCN+ykgNlbMzm*lED`yzX9+G=kKAM5M~v3Kjkb|y`5mVNp| z>ny$EqsBe?QWk(JIl3O5UK4eIi^N1x_d7uZneIJJ*2QL1$nArx42krH_=od@F`HYA zQ7Sj4iX3zm#QG;@;!z)j$mL5!j1~nWnA>399~^K0?c<gE@>UjrLQW$&up;09V5CdY zfCB;m%T5)^?{ovgn$C|LIKEKKsh!<ubBjnGO^?LADg<&fHUnUbNTu)ktsxRZ0AR!< z1jfJVXj!tw<V6By3mK1008~{n(nUa22lY!gjeh~a7Cep_-37S-0000<MNUMnLSTYj C0g*WX diff --git a/share/sources/org.elife/source.yaml b/share/sources/org.elife/source.yaml deleted file mode 100644 index f147e555d..000000000 --- a/share/sources/org.elife/source.yaml +++ /dev/null @@ -1,15 +0,0 @@ -configs: -- base_url: http://elifesciences.org/ - disabled: false - earliest_date: null - harvester: org.elife - harvester_kwargs: {} - label: org.elife - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: org.elife - transformer_kwargs: {} -home_page: http://elifesciences.org/ -long_title: eLife Sciences -name: org.elife -user: providers.org.elife diff --git a/share/sources/org.elis/icon.ico b/share/sources/org.elis/icon.ico deleted file mode 100644 index 674a00b2c8c4f896639842cd723cafa3c4280fa8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1406 zcmeH{!BWC76h&`SsaS=!U@)&M#X*G`paU%78w9@s|4TnYKd9<b-8$5pGFj3+Grh^l z<h|TC$-G4Xk0Xx3wTW#5=mB^jOyQc?e3gbb0}T5EtnNlw-jA>%FSmWXlJ$6k4S5<* z@i^h0Z09pF$1{1KSr&M+d-9!4@snlvC7*eYua6wNVuzwAs-94v)g7oia5NoIPTu~> zE>xZ=r9>2mFs8wn6Y*8*`@S?7P@$ZKX?Ss-mLHQrBvF*MTGDRg%Fxo4ZjvO@i94*P mUrSHxOVL`F<tC7zoN~IqKX5t_A8xGumK=YyS*qkbuI3*LoHURC diff --git a/share/sources/org.elis/source.yaml b/share/sources/org.elis/source.yaml deleted file mode 100644 index 69c17dbbd..000000000 --- a/share/sources/org.elis/source.yaml +++ /dev/null @@ -1,19 +0,0 @@ -configs: -- base_url: http://eprints.rclis.org/cgi/oai2 - disabled: false - earliest_date: 2014-10-02T11:56:21Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc, time_granularity: false} - label: org.elis - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: article - property_list: [] - type_map: {} -home_page: eprints.rclis.org -long_title: Eprints in Library and Information Science -name: org.elis -user: providers.org.elis diff --git a/share/sources/org.engrxiv/icon.ico b/share/sources/org.engrxiv/icon.ico deleted file mode 100644 index e073676b6912d750082c12231f591f1084473c16..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 15086 zcmeI(1(a3Q8UWx?unWZwKn2AH16!~ayL*Y<-QC^YVt2=~#qL&AM8VdvW6Y=Sx4-|n z@4H+FCm6){mV2!;=iGC4{9SubUEOGP?dp2;sLS^*bysz(tLs%)SJ$P>@4q|L)eXt- zI^?_WTeq*PJ2H3Zp8Mn>b^Y??eYCFsx&8C^??Zu<Qs4ja>t7lEHo1QN_1AxCzy0>x zpUzkNy{X?#z?VO_zU8&kx8Hu7uD$l!w8IWNq|G+lEbX<|UTMI90qN(Te;#IHeM|rI z&p$ss`|Pvv-Mhhq2d91a-8Wr-{q-r+A^Ht7cqIL>_K7E+nC6&cj&$guho%cJyf7Vj z;DKp@1r|t`UV7;;s}}fw`st^%%{JSl_19lN{rKaL>AUZ~OY5z-UYdXY`P1EZ-`yhE z&8fM6_St9Ywbx!tLxv1VKm71R<^5M*eU(1?=%e(@FTcb)f4=<k%T!-qAM0Ct#~pX9 zeDkpP-g~Ddms~P^{q@)B(@#H*xeNThKk~>Uf%7xZJd+-M^wHG0bLVvX?YH|B_<Z=` zhZX)AhbNzW@=44Woqqr0J$j&3pFVwpZVN56P+Di5bz%+O&N$<YwAyN`r7yntqB8X8 zqmK^yH8eB?&HDE3n~po~xOBn^C!{_0*dxs{%PeWb4L6MS$kpz<?>-Fv=mT%>ciL&E zSi8)B@4fe?-o1OLS6_WK)&u|BZ@-<ET573~&8%V-I6T~P%Pnd8>8DSJAAWdx`Q?|> zlTSVwvT(%}S46*q4?Z}}Ip>@~$1-r2U3Ljvzxn2yG-%MEfS1gGd5bN!NXH&~Z19nM zti1Bd0c$n?%DS`8I%}G4y6Mtsr=3=&PB+|eLz;5RDJ%Nn!$lWelqQ>OvNYFRbA_&y z&fkCkeOi0%wbPnwt{JjW-WQ(v4SwUuL20}0x@%f+!39Hi-+c4Uw8Roir2hT;2k!5` z|9;4$Il#ZlDyszicwODldyz#JNfS>zaf~b9ZoKivfd9GYo{LvtTw{$jf-crM=bUp& z*O00C=9@2Vyz$2A{`>Dwc;~wBzWdVi&p#h>cgP`!3>SX#jRzZTv{9OS?zw{w%PzZY zy87y?Lte-U_~~jb{2zSqL7I2odDA45Op;DM`Q!>r1%B&m?9@y%&6MuB=bm)XK?emt z;iS<CeDB$_XWDk#ZNn~Xx7~K3d*C47;P26+N9bS-%6|&|eW!QbyLV3mvdx6c>8GC_ zaJ~EPyP;d)$KzW0x%lFXW89v5?iup`@y8#>^tJFGdE}8{o7k6!9(pLoJpJ_3(U*<h za?35#a?34O(NFrpo~*m>x?uy@#TjOpAq^Zjurg+Y4K_&dqx<KbcV6(PtpC6R55yX; zyz)wnr~Byl;)^d<<ciH)X{D7?zkdBfH_2kLx8#4Ssiq44vvXvOO(2hcvomN~8XHE2 z_#QgTe&fd-cia(O$iMm63-*O=^t<pJT=UE`PeMm)ueH`%p=<C)v(;B$J!A{Mn{K*k z=m|MsGw!<Uu2{owG{jHzpJIwB(hD!VknX(m&NR(5(}e64c)?C)*jO?NFED@$&9~lq z>yZCqr*6IV)^zT<=O%O|Ti`q8lv5JFpqb<0haV2vGoBy4?6S+!%{Sj1edwDxykjSh zIO2%l`6ZWJQn4BQFC5-^=beC?4<~=dp#!>uQ)5^6+i$=0`s=U9@AAiYFxTo?`R?vp ztsEOyF7jAAzN}m3H@>z{_1tV0Sp>`Q;3?}>!`7PTG7oviA9QMMD|q4ydFG?F^1ik6 z{@J)Px3xNT>J<J<45F29e>T4QTj@^+7hQDGh$~t%PxYKlJzJB^h{tkPlSUhDw9wU? zZn`OKQ&Z!sznjZNe&{{fDL7U4YwKtJg^ieCf(atdXy3klnsm}h!;V$^?B=Xna47f2 z^rSiSG}WifPZ!x{v}c#tk==INEloJ#gb5xiuDD`ipG$x6j4YU%g1LMzu=vfV@onRe zKYrML_FK$AhQ*-p>ej7W*nB?U*ij8Xnhza1G~i^j!6?S$i}+fx6Bzjvan~4Qj1l;W zPmHU^ztH5bxLa4_W5nX%nR(`!)41b~8}#Pe;6fMVWMUyPCV$1B((Q#8UO0SdvFo+I zWMtw|;1{2fZSc6nBVrA3k2Tg<Y3#Abj=PwXEd{GMfX~-oY%kXt5ns_-Jo!bjR=alX zBE~`QE?v5Wz4k0`JN494<DK8~p6jl=E^I8>5i5>2-gs%uF~^L)V!YA{eai2VaqpjV z6tpk#op=T;U>#?iane;+T@}m9X_sDl>A+Kdvd=#IM83>VpMCb(5xW^9@4x1nYr^Ii zy%<?|kYzc!b>+X>=bwL`#OLID^wCF;SXcf-uI2pVF!1A-`0|M-o(SD`mouY9hYlU0 zpZ;1~!e3}&UUGoOV)-}Tc*Cn`<eU5#`JQ^}snhYtA0M%&{6H>-&hTby#iRHjcfuF= z!F`!!mPwC2_E?+R*Fp<5DtQzb#K-Jo!L8WyA-V1%_T~%dv3#M(x4YPytdW0uKp(^& zat8UGx#^PHR$JMn0zcc%r*_VDnwMUBDd-OFf(M@^54-KQ+d{r)oN>m02i?&hALI+_ z68OUo;g@;k(D3#S?sBBIgdc3`BIvX8&O684h4uwMFv5?$kv9#<x$yGKFJIALdO*J6 zD-Ty=0xw+j!;_N7i{IL^iR(Ld>=?3NYC_fX<Hzi?&mJ-?7TSOR{Ufg>%Lg2AK)^+R zi+sbG?C^i+jV|OLj$ki5tzKh9p2Y)n);j78@@zcON`C8`Sd9$9b)tzTihS|$#~)8u zUU_BkMIZ8g=9y=P{)&gxOS^XM8gr{<U4HrHmAOX*<}wzo&_j$&f5kH5WHmqa8#*i3 z5J$jE9uH1(rVhZ@@@IHNf9bV&WAn{7Pj9{TR@i{2o_Z>9H#Zqnn|bfO_hMaZk4P(W zZVt4TON;ICTiigl)Zf^AwGgoLTNhk#LHGprmOatvFT1K<NcQE3@^iY17U~nmi$Bx{ zz}{By7d>U`*%);*YfU@tw6PZ7#y9uQbys(`#dk4XiJQ&AUb6Y{hl6?wx`_A49sb~* zch$5D0<_^Dd5X2s*P85;x<-M8UnN&!RSg~4P&GGk9(^`O?N>dA{V|uih`4oRw!#ZO zO)SbMptTyWyjT79m}8Df1F{X(z|O9yF_S%VE?-6iH81i)?vFa^sL<<?N%sIK|0y_t zP3?-zlL<DQ&9t{s;E<Dvm%xbbYLj$^&w`7(IGcn&TA4e3jO0HBM*7G0i^aewHe*-l zg}NU3z$4#QU3JyS%RG}Mc8gy9m2elbSd;xIG9rHF%fQNK@Co#TZf>&4CXoyK=2HJG z`*p^T3it~i)pxdDEu!QFVrzbl4)ACE@NBcq7C9NXwZ9AO1rBmUAKCb_m*JaB-B?}; z_5y!3{w;h~)Nhk3!xr<nyRtqG%>H>`mhJfMU4}QV>gN$$zhfTXrdNioqpNz}40pWq zzC62U+=l5_J*GL&S&#l{qmTlbN4aHVR&yVPApKu1T}$8c+c~W+`RbFe$~mokmh11< zGQ10p;;SLA->fU!sD>N^Hsn3thMeOxWdA<QxEzBw<oGJy>zi|(h8*_})2ETO@~h@G zIgPzobz6G@V&-fsD}%+Ya$U7|F}J)-uG~~pD-++^$5h)EGug)$?`rZnbs9OTni@Z8 zFIG)ltjqTsqsgzt$YOW(4ta^1VcAo)_gP(w@?Op<mQz1G?6AY49<kL{TLnz!l&8sU z<OJeb@u?hNPHf*Dp5@$tcu|a^9;lY41_RDg+cCcyN5M^<MlD|rL_91$ki(-h95s8y z;(2&G524nhR)Y3$vtC(W|LwQmK5`uVfS>(%=U?nO881f`duVE;;1QFc8@$E0>R{FD z<15}cAK+XK{0HPbM7`2Jy1JhRe)L#jg%u(PmNQwOOvq``T22f`aT~s(kGY&HG1mDE zF{wNszvPYbV|~1bhx0*d?eJGKR>PFj;R)F7k&{uiNU^RugEiH8@I(#Yc_1{z1GNEX zf1G!dZ=7}3S&_SXXIzeV;!KTv*1X>1i#;$hi9Te=JmeAW$shSdTkES4s6S|Qr_k4L zJV(Q<w|-V)cYGzorRj&prH%)F&-xgTrlolY4)jKc>UqkT(zm&H^n*V+Z%(r>v2i{F zRL<NqXUy>Wz<cq<7mwO_?_7_dd)4-*v2pM;4o?eh8|&AccXUS1K~L;+sn@7Mp|RH3 z{N-J_H)oyNK5(Q*_6c0%mQ9qG%KJ;bqIO(c{6;@H7W*ToWqX{*Qe$D?*bn=u>bq<> z85<E?Yhh+{OC3`FO1CGUeDXNcpjKu7jcv1E<h>egOZV1m*R1}{kEo40tBBv~M_^#9 z*fsLNAF@5<LS0hby6lZt&(+drbE>_vHRuXgd80i5wFN$dKLLZ<gZesMcV5+g41Zbb z!7W+4@b9FPP70pGrL>{BzRiEA&7cVw?Gcd)bp<tLHjj@~Ti_?u-dft=hQHRTmZ=9H zd@%e1+p12%cX>x2_y+pG2A6s_-=gN!(#GejN2`OB23PAj>qrmO=<u3e@C$4mztK6@ z?bNRpTWqoLUuAqt?tBA2_v+Ov&e4i*_!#f_KKmnTl42HhGyJf}OJDe8Fp^jGV*al1 zv^g;7>%w>VIa9}n@%!eZhv-V?@W>flbh0Pq0z3aeC(%KTj~!_#3}7WY>V|Y%oHHQD z0Q|l<!aMS&p^tv_6FtNl_C(O3?Da**R{Sxyc!G^Hr&^jggADMuF1mszbV3ZuW{5k) z4Q<ufTehBz7az>$i!0Q6@rpkpkK&u=`skLx$JbedY@sdRjt|ZrxT}-0kFC-9=E6aa z)Rx5n)^rY?&Bp^VV~O>fIp3h%uT@v(?VxP?2Ibfy#%CEHlx@|Z3|~v;n<B5@;w!IF z8RNP7%5YS^H8uCB&c_Rn{%SbNy6P0l5?%6DsZ-?tHLGp8f6vCQY#Vy!_-vSOnHN1> z)&2AD`rNnPJ-?4D`?&gSH|w*{uFo=5pYy-^94FW3Gn@6k=~w#8hs9dr4re;;lgL%r zWqTyfnmc#I_RvrI!C$e<{2v>^ZrS70#~kt(u?t(tU&;~W;B35nN?+sTnruJc#<uHg z4tbM2#ytF`u`Yd$_bit*hW)3Pe7T&)vzXi({D)_8x^L{Q9LoEQ|8Qd%IVB$?K9ZYO zH+IUL!)xZ|N5v`fH~X1<Np=1-ys_XDdyB=KVK)c6<DGc1<o3fGkC)<U=d8hQtT@gy zdW(7VX{@1#7}wbp{PyhsQsk&&5O~;s#<Rwn@p3pm63_SvzT8=QK9z5^{-2!{F^3#P zY$qmh?nZqB4(bEo;V+wN<Wg*44!&D#<!p}s7ipvo&hP$kWgZXC=c)(i`oQ2U(}Q#D g^M^jakL#50jb2%QJ7u0W`t84CYAr(Zf2%|P8@H=#H~;_u diff --git a/share/sources/org.engrxiv/source.yaml b/share/sources/org.engrxiv/source.yaml deleted file mode 100644 index 0545411e1..000000000 --- a/share/sources/org.engrxiv/source.yaml +++ /dev/null @@ -1,15 +0,0 @@ -configs: -- base_url: http://engrxiv.org - disabled: true - earliest_date: null - harvester: null - label: org.engrxiv - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: org.engrxiv - transformer_kwargs: {} -home_page: http://engrxiv.org -long_title: engrXiv -name: org.engrxiv -user: providers.org.engrxiv -canonical: true diff --git a/share/sources/org.erudit/icon.ico b/share/sources/org.erudit/icon.ico deleted file mode 100644 index e5e6f1cd861d0a7be8cc31b43f34cd8c5c140bb8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 506 zcmV<W0R{evP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00FW|L_t(2kz@S-|33q6@b&B06)RRSFfznM$FQ@rGcYhP{KpF#o0=FH7|boq zfBpIe5y7tE-@kt+PMpZkFJNF`U}a_9zyAPS3cH3sfBxv{=`k=c2n!3(ojVU%iV>F| z${QM3nV8}uqf}K@kj1fR_;};St0RXP{{548aAH$aLpKT8y6>-Foyf^r%EL8>fnhEK z!(uk(1L2{c@7=@hq#v(e9|-n;a`?EAf{cW-3nL5b>!U|rU%4(}YUsRW2Zxv#x(!$R zdgd@N>~wbc@#+;s<nPa)N8)4WFfg3V&4MdqfoS{p`}d=@YgkwqMJ+5}pE>>S=Pw3^ z|4b~bg8KUGoGTx#Tc=;aq1@bvGGAJ=QC{eJ)M@6SJf{=ou-fq{{Mfq{X6nT5gc z(p7#<Ed~Y#s0~bPY#hSEKVQF53=iX$k^T4gFGAD*|IF+h9HLM^Ks7LPa!R{-yuN&$ zT}0HVs|Qc&LU{V)^=m7{h37Lc?DlZIF?I6SCy%iv0+e`ovVGfn4Yk<}46_&*?yp#m wRRasMZPFf|BBrJfR<3w``V_ak0#+9R02qC6NIUz5S^xk507*qoM6N<$g2f~3PXGV_ diff --git a/share/sources/org.erudit/source.yaml b/share/sources/org.erudit/source.yaml deleted file mode 100644 index dde5e10c9..000000000 --- a/share/sources/org.erudit/source.yaml +++ /dev/null @@ -1,19 +0,0 @@ -configs: -- base_url: http://oai.erudit.org/oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: org.erudit - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://erudit.org -long_title: "\xC9rudit" -name: org.erudit -user: providers.org.erudit diff --git a/share/sources/org.mblwhoilibrary/icon.ico b/share/sources/org.mblwhoilibrary/icon.ico deleted file mode 100644 index 0844c97c9152561b5bd3de6de52d943d4db4895f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 453 zcmV;$0XqJPP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00DhTL_t(2k(H9YN&`U@g+G@xx@Kl)3_{SL#zt)<BwIuRieRne1q8`U1QaVP z@eg0X2MB7T2p0ZCvCu*-1dFV3QPN02pTJ_)%^F4UR(H4`bI&<59r^rpS9dSa)6*v$ z320?Ij?hv7aVY>mDaSO{zi6dw%h2Ymm2H~l`v+eYrGc2a`>FO&y{x=78i{0bY$Ol7 z_;<i;u{1R~5z;`+D9PsBY|h<SEf4k&FD%TP+1>ra#%j5GT3A|M36@A<X=NMTjw6Va zQqI)m#C&P-=K5l1r($Tp0z#KRbr<JmfW69gII==o|CWhTcFrvTp7#nUEv>c$P0CT1 zt+iSPlH%X>L}?{Ov)KUB=^%P5q$AJNqP=><tJQLDAw84=WW3XA)7@Gk1+ccZl}IMn zH#ZFp#0>4%{hQlI;QVBFZ{JN1^n1@Q)zgF2K<4E5OlfIopyTl93h?WG^gC!N;!<QY vqsC}R2OEI#%xF|em`NKQq!e}eHw}COb!=POgYK3u00000NkvXXu0mjfdeX@p diff --git a/share/sources/org.mblwhoilibrary/source.yaml b/share/sources/org.mblwhoilibrary/source.yaml deleted file mode 100644 index 5e57bd721..000000000 --- a/share/sources/org.mblwhoilibrary/source.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# DSpace -configs: -- base_url: http://darchive.mblwhoilibrary.org/oai/request - disabled: false - earliest_date: 2005-07-15T16:10:02Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: org.mblwhoilibrary.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: http://darchive.mblwhoilibrary.org/oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: org.mblwhoilibrary - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://darchive.mblwhoilibrary.org -long_title: WHOAS at MBLWHOI Library -name: org.mblwhoilibrary -user: providers.org.mblwhoilibrary diff --git a/share/sources/org.mla/icon.ico b/share/sources/org.mla/icon.ico deleted file mode 100644 index 29a4b923b132b1a83c7946adbad662aa2d60e285..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 706 zcmV;z0zLhSP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00MeRL_t(2k(G?yPf~Flhd;mH;pISz2CPV0P)>Utmtk*C+-Om`Rx_meA5{N_ zt-EgB3agt|TV_s>v6gP(+FaeJ*^46*Y8FN*$HU8kbAI10db#h}bN6fyNUs*CLNy|d zR8b{S7eXhENc^#0t&t$T(Ii1aH#ka_cE~UTI)O0&j4_x1z|eI7R#|Q~8j{HbfN{=} zPH!l`q#4#?v8Y>`rj4zR?k@W_2R8%}aY+L3c6k{95y`S_0${Vq6Nz;E&OKdJ_Sjk? z!$X6EF^O{qkPwQZjE;^L3I!pA$Kx>pK)u$wy=^8n2>?LZ!lHvQ03-zCe0Fv=l}crE z9}jfz4+H{8I=$4`s1ZGUFbTl7W2d*<zw-VA2{M{G3A%pu+V4>;I@G9Z8~`LifSt?$ zx(*%UoF^Yo0N|4JxlqVtGRJ~F08~{qTPz#^I5D81Rsozo8zCkV6Hn^3nlOy{`T2#V z#c((bz-F@n-~b?Gi$;>Y381&9=V)hOwX~L)PI8kJzdIHRg--UK0?;&cL4pHl5b0J{ z;}QU}wG3Q}GRZnMIi1VqbJ=`<UwC(i4*-M&!8rgX3}JwA0^o4i2O<#*(c+th8#hN* zvhS`8#~kgg0NR~xOacdBHcL%i%dh1C%)EMGb;^Bb`pcW^@%ZiN<%@pbuDQ%}0N>ZY ztbSTS(&?oirPql${N?4d%-mGE)93HnyYGH{tn{t;FI_ly-UI;7w!<fmr^nNJ3o9GC z=g2|7$2Fqea=Bc=V9;u{JQ{!McDoftal73}I=yABU~DN=uafqYsM@G(kf9Q(Rpl#@ oN_v?{rRFA)N`}IcPA~q4U&Xmi#W-1A!vFvP07*qoM6N<$f~}@HaR2}S diff --git a/share/sources/org.mla/source.yaml b/share/sources/org.mla/source.yaml deleted file mode 100644 index 2b139ba31..000000000 --- a/share/sources/org.mla/source.yaml +++ /dev/null @@ -1,19 +0,0 @@ -configs: -- base_url: https://mla.hcommons.org/deposits/oai/ - disabled: false - earliest_date: 2015-05-04T16:57:02Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: org.mla - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: https://commons.mla.org -long_title: MLA Commons -name: org.mla -user: providers.org.mla diff --git a/share/sources/org.mpra/icon.ico b/share/sources/org.mpra/icon.ico deleted file mode 100644 index 555f387b728b49c935795b8c8208caa7f46a2b4d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 894 zcmc(eAr6B;5Jjiy1yD_8Wj!arRdE6iP)@)JxB#vWSB0y?)uHNeb-1c^xb~3=lg(1m zG-<kIm*LN!|7ReGthk~`IJa_Ki)=(>&jykmd&1{+Aei>No<Ck~QH;)gZZdS;t*(7O z<#mz-B3)HASq2#<7CxWyQyd2(C>V`Y-!xWoPE%5-xnWT7(f25D>Lb>0G^P+KnD@55 zmZgFS0&1)$DVSPlom}!<H3_U4<ueEtFwF=YGE794<B-J>+#duGUzGjkRbNv<lYsa_ kUg8W0Xq}q~fxa#up$AOi2j2(|I&Yiz%l{kC|G~HS7Y0_TyZ`_I diff --git a/share/sources/org.mpra/source.yaml b/share/sources/org.mpra/source.yaml deleted file mode 100644 index a1cc692cf..000000000 --- a/share/sources/org.mpra/source.yaml +++ /dev/null @@ -1,19 +0,0 @@ -configs: -- base_url: https://mpra.ub.uni-muenchen.de/cgi/oai2 - disabled: false - earliest_date: 2013-02-11T09:33:04Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: org.mpra - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://mpra.ub.uni-muenchen.de -long_title: Munich Personal RePEc Archive -name: org.mpra -user: providers.org.mpra diff --git a/share/sources/org.ncar/icon.ico b/share/sources/org.ncar/icon.ico deleted file mode 100644 index eaacae286166f1a3cd05bf7d6b2198592b9a82f6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 690 zcmV;j0!{siP)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0007MNkl<ZIE}@UO-NL66vco4H{*<NMn}hKa0WG}OtZ;QTNTy3BocyFQ7x)R zDnihrb=zzwQLR-ldYd*;L8KvpPY72A;zy!b$xMykA2T{%`g|?27(;hKXM68G_ug|3 z{BMZ;3W{b1wi2<sR);GAbm^*y?A2$@8ps8z0T(a`c-F(ihjN_mn6tFTuW05Xa1K}m z#&uPscM$;o<lOWfBNdnP%IoR@mt?hPgp^|#ndB9VO%BDs4%J!nJTciDXa{Bhq%49} zxwv9M_8*O|hev8>UO#NJ*c~Y(X*=@n(f#E7mx|m2fqB^*2m|pwHBdD3aJ0Yct^a&$ zM`|ftk?Sdo02}Gm*(3HsUvzfh*$rQ)>8h@Zsa+Z<nt2SU0)j>+;hG)jZt;bhZ||JB zWiRvvX9v1Fj7%~gxUOjC7%-u$V&b<3msiF<oRw^jWNG8QPGA8rbX6?xEJ*l5%^iSu zZEEm@kyt%bRD0<?@P}a7ef)woIX8W9ap+CRBD-SlV8h3(@5IVzU!#%Q$a9w0eU>Xv ze*%WGMeGN<<l>4KsfBMrTS4&>5L7htrlOhGfRlDlc{IHou9Urjmq6E!)xQXIRb+Hk zECBw(U_-C7Bsdc3eeyIBoeXWpmrjLy9=ALERg><T#vY&)Sk+Zw09djd2yA>GZ?ZZH z5*v}xYFmLXi~!Tz7M2|Q+UELlkz|w8nb>^5mhT;(dG%oV?;x!>OM-nsS+1w74X9t6 z8f+1g)o|Bb?A$r|7Vra@+b;)Jz@n=nt!QTDRw~{qS!{-sV}GuzVnWeO8?ddbVvB!; YpR&>KfFhQ)fB*mh07*qoM6N<$f;(F~LjV8( diff --git a/share/sources/org.ncar/source.yaml b/share/sources/org.ncar/source.yaml deleted file mode 100644 index 50af147b6..000000000 --- a/share/sources/org.ncar/source.yaml +++ /dev/null @@ -1,18 +0,0 @@ -configs: -- base_url: https://www.earthsystemgrid.org/oai/repository - disabled: false - earliest_date: null - harvester: org.ncar - harvester_kwargs: {} - label: org.ncar - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: org.ncar - transformer_kwargs: - namespaces: - 'http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/': null - 'http://www.openarchives.org/OAI/2.0/': null -home_page: http://www.earthsystemgrid.org/ -long_title: Earth System Grid at NCAR -name: org.ncar -user: providers.org.ncar diff --git a/share/sources/org.neurovault/icon.ico b/share/sources/org.neurovault/icon.ico deleted file mode 100644 index a88898353320475d4ae0bc0c16bf56794be20ede..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 352 zcmV-m0iXVfP)<h;3K|Lk000e1NJLTq000mG000mO1ONa4wfZ;e00002b3#c}2nbc| zMg#x=00Lr5M??Sss*NKu0003CNkl<ZD3PVm!AcuZ0EOXiW^Rm$21QUzAr*_DAg&4p z-@#|_!F&u~!i}45+Eq6~u_js_V@931Gvh)R(XIdToS(BenR!-pQO`%&TzPC_+ZQ9( z;zuEK<fq?q>!Pky@m|>QWJhWw6Ag{6xrg*k*z?b+)|mx=%zQP_eO(Af4y?Pg?@8j9 z?=Do9><S(-joh7%);G})YaVTxST%62N-%P)@>gHfC=5+?3>8k~84g`puxhO5SuHAX zEp;IeXf3-kv*}VT=;?#pfPnvI7Tqg#R9aJw#InZo>kpNIv7Q_gL7`N;G1V~+5{Vn7 yO6gY5rNp9A&?-m-eNieC5wu#FS`am6KFtgEMPE}4_~v2&0000<MNUMnLSTZcW{*7p diff --git a/share/sources/org.neurovault/source.yaml b/share/sources/org.neurovault/source.yaml deleted file mode 100644 index e3fec5bfb..000000000 --- a/share/sources/org.neurovault/source.yaml +++ /dev/null @@ -1,15 +0,0 @@ -configs: -- base_url: http://www.neurovault.org/api/collections/?format=json - disabled: false - earliest_date: null - harvester: org.neurovault - harvester_kwargs: {} - label: org.neurovault - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: org.neurovault - transformer_kwargs: {} -home_page: http://www.neurovault.org/ -long_title: NeuroVault.org -name: org.neurovault -user: providers.org.neurovault diff --git a/share/sources/org.newprairiepress/icon.ico b/share/sources/org.newprairiepress/icon.ico deleted file mode 100644 index 9ce02515f8cc48d423d0461028e89f194dcfaec3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 386 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`EX7WqAsj$Z!;#X#z`$tf>Eak- z(VP6||NsB-Y+MY&s<YVJ8Ffy~@h*=yc;bJrA^2l!^mz%E3QI{D-l>v8HolT?zKBXn zc=8?rskn0f{>cpwFNn6e=m)nj&aeNMyyMRs<HHZD-%CmWf!yBz2S0qc%g@7X^*2Ov zi|P(Zp5R}SdUvXh9r*O~xqVyL+3gb3=GC2huvquh0`0Sh6IQEmTvt#(a#G>&`L>5$ z?C0+pYj|)O8ob&j|NHmH<MM5}l5RY=gzji?$?yGj;KPHT)fV;tf9~cHe`nKbUGFr{ zo>{s6=Dz*Ag7)yVfA%e4TePS}pm|S=05cFYFJ5TW+_*}FSzg@Q#VGsh*ZqG|fB$pS z-}C4Dsn7cLK>zy-rGbLse)4yFBOTk{a~^(}`<+L={!ikCsgI6Ia&K^GVVtyBP?o1A dXxR-02G-EE+~=OUp8*CcgQu&X%Q~loCICr8r%?a^ diff --git a/share/sources/org.newprairiepress/source.yaml b/share/sources/org.newprairiepress/source.yaml deleted file mode 100644 index 4f8c559a8..000000000 --- a/share/sources/org.newprairiepress/source.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Digital Commons/Bepress -configs: -- base_url: http://newprairiepress.org/do/oai/ - disabled: false - earliest_date: 2000-01-19T00:00:00Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: org.newprairiepress - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://newprairiepress.org -long_title: New Prairie Press at Kansas State University -name: org.newprairiepress -user: providers.org.newprairiepress diff --git a/share/sources/org.philpapers/icon.ico b/share/sources/org.philpapers/icon.ico deleted file mode 100644 index 3c4620748c6489333d56bfefe06a1e46dda5a032..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1406 zcmeH{-%FEG7{|Y>EXyo4w^?p`r#Ux!O*d03%{0@Qw&~tYOKbVtR4CUEe~olUq6BRi zI4MgLnl6!yvZjqhLFk8hQ5khn*+n3D*`HwV`>K&w(M4DF96rzY`97Qjhr`1G1nlzj z1Dm~3<pqcUs94w*<SbmD55bcBtXW^C^hLE=j6$^pR!snEV#KIP2!n;inG%SL$tno* z{4tU)gquSv#3+j=Lxj*=3KN+kOlFBenS;@*Q^Bs+Kwd1tltG3`LkM1)<e*DckW`C7 zmnosHjKp(uG@8tMw3v0U6)~7DiG`;k1qrq|80!pR8gme8m*AZx4S5YFys@NX_8f;6 zv*$Aq+a86-bp;6PmZGEn3>3GeQ1nRQwiZFrD}$m>hR)_%I9QZU3d(*8mbMynUv7lj z6@jS1aP+h`Lvud@4ITCP)LsdlON*r8cwBN=k@_eREjMf^9Vo<SM=fT$nou=Zjwd(U zF?6dFMvo50XF7bma}}m>J#u-n(dBaBo3j-i!**PmXu*Y7XJLD7#e%yJGo!;;7`qGm zbQ`{p_v6FlD6YT1hKDa6V18-@p0`io`r^dk+yI8Y-ox0O8}s?Wf1O_@;(z%Mz5w}6 z><%{ZJGQOY@tPf-!mDyqJg)}LlHcXCca^7?JrkqK*eOXP^k#Wfpm!o+_6bi{<nPeZ ze(Oq39a(#jiBk|#c3>;Lg(9RtyoS*v5K^AJk@is&axw@^Y#1R$N!w{(86mkMFXR4z z;?l$6d6E8t6NS<ZM~~~X%&F<e$f1ZmIiVq${2l%zEiM8a6tx<`@?RMg1gyY~2iIRY G!|4xkyb~M% diff --git a/share/sources/org.philpapers/source.yaml b/share/sources/org.philpapers/source.yaml deleted file mode 100644 index e71422b10..000000000 --- a/share/sources/org.philpapers/source.yaml +++ /dev/null @@ -1,19 +0,0 @@ -configs: -- base_url: https://philpapers.org/oai.pl - disabled: false - earliest_date: null # earliestDatestamp is earliest published - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc, time_granularity: false} - label: org.philpapers - rate_limit_allowance: 1 - rate_limit_period: 5 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: publication - property_list: [] - type_map: {'info:eu-repo/semantics/article': article, 'info:eu-repo/semantics/book': book} -home_page: http://philpapers.org -long_title: PhilPapers -name: org.philpapers -user: providers.org.philpapers diff --git a/share/sources/org.plos/icon.ico b/share/sources/org.plos/icon.ico deleted file mode 100644 index f9bdb669768801755f837e77ac8f103863c2e657..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 379 zcmV->0fhdEP)<h;3K|Lk000e1NJLTq000mG000jN1^@s6?Q>5r00004b3#c}2nYxW zd<bNS0003pNkl<ZD3OiPy-QqC5C!m`%f`Y&ErN@v2(Bc5fuMybsI4HP1Qg4NWr&bQ zSPLzz)K;u)>};$o)H3SESl9?lgcK=+6c&P__)+W#&&Lz?z~#)`Irq-FbEyp*XwNF- zQu!PvGjz9jjCdt<nYhPg#C<4XAyF^lITpyLF6crd{^2P)pWqH<^kW@=UJuk|xcHB+ zm_ieFQIDZ$@F^+Ip*K3g0cNp-4;YSTeL)5LSju|)x8O?fBE}N+dF(}ek#+#9!4JVj zjAAqLk0@Xb4Y4<p<>YVhc51(jEga)0sZIaFDb7`={vB=TKr4E1h2_w8CJ%PwI&t$) z3`BmWI{6ZQr!8lFBm6>3#ChC=v+;NInfQ!pOhi7TI)^t%&)^k@7)&`Vpt;uW!}w8) Z@C5}sIM**lBk%wK002ovPDHLkV1kUfqQd|H diff --git a/share/sources/org.plos/source.yaml b/share/sources/org.plos/source.yaml deleted file mode 100644 index 111652933..000000000 --- a/share/sources/org.plos/source.yaml +++ /dev/null @@ -1,15 +0,0 @@ -configs: -- base_url: http://api.plos.org/search - disabled: false - earliest_date: null - harvester: org.plos - harvester_kwargs: {} - label: org.plos - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: org.plos - transformer_kwargs: {} -home_page: https://plos.org/ -long_title: Public Library of Science -name: org.plos -user: providers.org.plos diff --git a/share/sources/org.preprints/icon.ico b/share/sources/org.preprints/icon.ico deleted file mode 100644 index 4e18a07d05adb52d16381cd2120fa8de67ea4f76..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 200438 zcmeF42Y_8=nfISeO;6}mL=1v-2py3T6&4ZOZno&>%BP?CT-SUn7F<`|jCDm_RCX;< zF%nWIh1BUY_s*S?LMSQ$G=vb+d#3k!zu*5o_f7(v+%s@9GD!~l=H5By+*983{NJZP zkJ&K(x2nnb_k4S4Y_^?lW*0M7F}4f~w)6F{|2gL@8qpj?G!W51L<12GL^Kf5Ktux( z4Ma2$(Lh845e-B%5Ya$H0}%~GG!W51L<12GL^Kf5Ktux(4Ma2$(Lh845e-B%5Ya$H z0}%~GG;s83z>3@mKPJ0eD|9(#E{m&VJ1b%o<`B3uf{|}-ZEi3MatfX0taX;F!M^lk zn;U5x-Edpy@;FxK%kuBh+%T^5%0Zk^@)(_yYq}9^^4HNd%@rg^VHSU9xk5%1Tf{US zeY%LwnVi!i%X5=0*TpP{Y0hR03x=mvVI?;?a`*%@E_)K#v7&W(_E2#?f9sfm46sm` z<tABntiW<KrcD&SmW&BT*(h@lfr-hvlY;xuy=3$9M<)mSorZ9ud+oo^=w0##n2=~3 z{W&mq2Q>F_#`{N_`yro2Bm7z)W3AUHV%<-T)Hd)SKBF<gS$?|VHrjAM9G}S%<8x{- zM!pJL0YBkD{v#?C3>ZAb-0<Yc<5M^t1~Furi-8mVibgaCgG~bmWu2sa74V8AU(tG< z8m#Gso;D?KCFxZ(d)Q&ctoIu08Jm&SByas5KJdI7!?v-)7{3)?N`2})`uouHdA@bX zal%cO6&6PL7;L&Y(a+ychsZu)L~^~kOY&@E!Ej_9c*?mW&S$Nk0-s>2!~^o(TI8{5 z$<Z(8q~x7fewy2be6$0;v>v&rKp2zUl)_pH3;x<{A12w#+(hKAN$5Ra;&*f2WNyak z=I%!ho0#i=e<+sO5b=*_4hD|~4x!aU={EKKp#G7&mhY1ptogZ@7;=}!s$K6$JLYb` z&K}z|(Tet7Vzb-@_G@>BJp|vF<bN-HeJn#d23U}8A(^QkLwS4;hA=5T$=KH~UpX)v z(MB`}gGd80@FBe!K4KG~&#$;~R_IP=oNM24Z?<xGuT{Cvp^Fq+qUYCE-&k%9d)L{@ zox5$-_ExKQd(w`CYqc8J%xL5syE%3{W3yGFl~lM5R<~z~)$W^Zb<IDvs@~5di+#u* zb=TPa?riR7EV|D)o9d*e9BL;(|ByYcY*a)WgGd|E+GF-J*6tbZJS%gzTXo0(vv|vI zEz!2d>RNYOLtC#^b?vdLZbsj3XnHp^t+5B(?BO?itfFsw+7WmAEH$co_L_!2r_RfW zgP$1qiGv><7wg$&@s1r9Z{KS1)~!~%=Mk$v@FT15nrsd3HgNVP*^7B~vR}b#BKkOy z=fK;O+@w5<Vf&vv86QPqPC-ick?fI@#o6`{x&iWg-lQVSE<(pO`83?+C%xa<NXW?R zydDqB-1X4<WUJ`@kF;xl7;pGX>@Ra$xYwOvXurj~8J(M~rmxNt?svA*ecwvmS+>ZX zg3WETw*drtzW8A<4x=AQ$4L#%*?rDNOvcX==swsU))nPhvGk=#ejD=GlYb(xbNcrU zok(WS)u*uGF|Zq3(5~2MFd+R^^15Vr*?5GBeETjmG`Ht2E8hDJtJ+;;v3+YTzHh5l z?>jK$@loowA>Zxt_^9b=vRK=Gt89D8$~vC5lCC<dX=}1X$20Ji-@(`Z37&Sd#rB>n z`@&}Eo_Ua6K|}MxeE50MTxxEz3R5^b_{qFY!iK<+DPJbvhdiB}sP`Dymt^mk{wiO9 z-ffOe6K`-Qi&r?qu4D2oOTTp|+kQ9RYTeuH33sP0b-%RY?j=^){H)b9H(PCIA8T(b zYw}hraeo@}_$cq&<>fl@j|6g<M!dTj*{snjJNNm0)x8Ih$qrae<93T5c-j(;i!IUp zL#uc9+cI|!*cz{Ogn9Fr5dBXmwtQQAmh>9$Q$gRtZioGO_wF$^eJcJIFrv9WQR(g& zd2Ww`AT|Sgt9ZWj(pZk<ESoxAa`+gA{Mh0b`SyKxDKzw5Yv_G~wRRid_b^0TEq+K> z7Oj=Jy|%*Z%jxy&zl02x?iztkkFLV~#*(^B4W9v4TCA@3C9CP#Y_YB#9xtWcO*~sO zyaYajTvpq<-4cCe@Ru*z0(Zrv01vV;i>Hj5h7Z5+63f#ke|w)iILWg0#rak?Bhvqw zl%c#vaC2t`v`;)#%!c2=JH<CJj@!<|qU3#O_8x3JCCz_ttM@Onx+eHS$98Cc3p9nz zsBgd5t;>71Sb6Uzbm^BZj;>wT+hz^jZpibGL=wXly}PWmZzuZ3Hf#es7`ws2KKSEq zY+k#-!%m(>dWvK*>}0Y9bnk{o?qeR{J6)|--?Z84c9mGo-Y@dIOPN<;A_t#Oj&u*{ zbp?J-J@4>7$e~Od(Y^5o1Nlm^czMEy_h(D*Xh4p7++75n{1UrQnN@Z@Z7Vw0K|dQk zpQuBIDo2K@>A)_8>=A3-4?n?P)XHevX*I1ouv5v_I<)ymychcJ+iR6zL(fy@UiPB| z{1=0h#qLEuw}FW!WXdk9>+Z2cJ2F@c&(^d79`Z6n`**`<u%)&0*}lmVy?<a1SJ~6M z-)}FwYpvOx25*r}B{@v%PEx<;+YnzvPw4xzjb0Wuw>R*km7#|qC$KJl)v6DaVZ&+X z-Ak5e0w-O_5lwvGWaXV3t*V9f7Jg9yUB#iV8t5$1cfe|U8j;VNtOgl=sN+L+WeweD z9Ntg^kEj6`aqmajX{)=Ot?qGPV=wsG2v%NTpuclWrDI*_LNDn)03I6o9eN6*nq#ZQ zV>qUE7no@+vnA;EligcEgZH%n3jFu6b5Cg6I|kMTbBj*rHodNh-|Q@_-1pDupOw6G z(MhM(w(PaqZpq{D2hR)eA)ss2^f91|O0SC_fOe2iSVxQT&6R)`eXsB|HnjPN><HD8 znZR|TyWMKLI;^g%%j!GZ@rl4Az0CNMUlXd}DdH3IrPMG7b*_!)mOQqN{lZHyB#Zfh z%qIU!6>Cg+GrIn+wfH`NVC&m&10Oj~w!>q9Mo#E^R2-|f!((qLBqk%_miTvD)2sDw zA7g0K_fWsTc!%(@mpR?Snt|TdwbP2*o?xE;3%To)PO$~j+ZAsnTOn%|?<3dyNA4AG zmyOHo_1>qV*iE(tx{Ktr2wzMWGcs5s!pGp^zYdvGdcAC2!bz2Ed)T~|?|s@9?<uqA z8fRfAzK%B><hsl-Y+9D*V=biz268WPoWwGd+v3P;@x$kC2{(gC8_~JZ&m;aZs64W^ z8{NNaua%*bReK%34Y{lxpCdk&1F|W<Y>#*S$!dDPZl7`|d)^>Fte3q!FFB3Hajg6& z>|=ltd@GQyZ12P^MEDrgIf(Ev$o!*sr?+=k!aJnTOCPC4uWP{8vD$Tj8E2*4yRZ%K zvB&qvZAs4uaIDK0o8zW<97vW*$x~joO2#0|UyuJj!pES_L4=RN#Yc4~c8%UWUayyo zRpxbf$y~Arw&Q2rhmEk!N_%%$Y5Oi)+r5qauAg8-ywD0{2Zoo}G=_4;rYWXDvHG$# zf)%z!_+TmqagB)ZF}V1UY^C@V*~b*0T<UGZFJSZ9jLyE>)?x!M>y*uJ8<^R}Xtaiw z4qM&2+<xJ1A@*K6y8I>B7?oE^e2OEUrO<DS_{U&kL+6h0F}V1s!Dmtj-Pd9dj(0Z_ z(@eZRc_|g>9Tk1V@3*$2vu}Z~JO_U88}&N;F~lfzJ!!G#Ew;*i34U_BJ>|ypwuS{Y zT~uen<0Il95k3Z&eh1q=T-qmJqhu)jD)?989r)`zw+4AAoh{&EBZC;7t~UHkJMh78 z@P5@AUVN?Vv$AdQl-6H@9oe|D;W~;L)`)Do!p0=#A{C2}ihT>>6y(#(My?I&fWU93 z1+fcWhnp<9RenF6Bi)Gj$I!-KB|1t~UpMhOtFZ&$Yir#&-e}-2N%~5hZ5EB>`??f6 z+$g?#d2Gr<((gmw|FOP5z=pSB*c2aMF5a5UJoWQd)r<0T7T7ubu>n2?FgFoChB`jT z6UGl8YsSCdxEj6XGj^7bQNhNgIuqoy1+vyKpR@Ku;)AH+lB9?5_3=2thpk<Pk7zx1 z3)Sif>YOF(ZRGgaLeDGkWfW0cjo9><6$UYIVb34oV`$@}x`p`1u9wK2Zz4aLc<6S; zME;vCcNc(xY@{>t5(=?t6_4dr>zxO=JOM_Ie;Ev!D&QN!M%gIhq-FQcbL;#k&o4RD z%Tj*3ubo3~K{9VGe10N)3~hW=Hf^zZ&mQ8vlt15Tu})_-2hd5n{?mTyE>lcOz(=MC zANl?9*Wx3Rwb*!E59t^_Z%=%~V<F%3iy;3@YX!I=CP(#Yn4dz9@p(>xzqMZmJK`Kg z_!#2&z}8jW*G9Zn4|XtOMH%tdHmhpefZdCH9C8o}lJ;MCiN^>u;AJi9<Q?(7A1}B; z{{REXm_@Q#C}+v%YqBcj`1*jG-R11Z_6J71KBm|mZ~qJO48rCi!pG3YM}zD2vCzwV zUx1Hnv6c8z5<S$B=#wAbS)%Dz_A7T4^dOyswR8&WC9+nai=GJjdA4+M>0w@`3iw8j zwNXE6jl0K+U;3D}yYqw>pC|75hsQ?{?-t=>Xw!d~eYw8RVgKDk{>W3_XOie6c8ei- zOS#O-O)DXX4UCA71hSU&5zzy7t+4lhtnZf&#xwNm;?loVS9Z0#+aCMlPi)pRU$?!< zTuv*L4w9TB@^7&HhR#pKKZZ7bN>n?gi@F!!M6ti6KEH8$;6Ficuj7piBjO{-T0TbJ z>+HUsM!0-E#PMMdKRhPM3$f8+dvc1gSvA(yi%(u6dLMHadWCJ?e2(fbacr(FZU2;& zx4wkF*2@~Q)Cz?I$yKs_`*>W*R^Vy?_U{NELmMAq{G|lBOL<!LyzLTd5PjC28PxY) zdaCHk*WXws*-Q4!m~yhD&yHtE218%r-yuA_A0v<z!H35@>;GouUW(??zo`);I_KH! z7V1#dcYlRE?`rZ|pZ76t5k8J>`VOQ2u>DoU&BqlRObldY^Ez_hzirdqnNYpxhqV^E z@in{^-`HP2Me>(?ie5Gu()jRkn2KL60N<)@E*X+%3oygqd%?NS8Oyi1jel=7EgR9( z95!%rzEuYy;vcLNCoV?fJ#9WdSb4zJZS7XqvW?i^PeVUiS0#VoPgb4!r^GYB3-+GP ztiRN7^fbl+Lm3|))50~_(EaNE`DCC=7<DMI)B1dX%ASu|b>k{zQ03Hgp`#I>qj;SN zA15?E6cb$AgPu;Dm-3-wirMO3W%J!fprst5f`t=&hz~37SH0{!eZ`cAIFR_GHyS_W zN%~$!!gW73JpT|6$s&(t1pM<Ks7v@Xv1@(UFz^=<51Q!fVJ=i}Bho)kWd2dr2PS&a z*O9L(e60v#QM$XWes2}D^KPG$>*bG0+0a=aPf0c*S2yUFGQ|0Z*D(~=-!D^1x2C?H zkF^?)?)3qSH`Neh(CO_LvU?}mm8-cKyPy235k5|2e8jsuv1vVLr9G+_qgoJKSUd42 z?%!_}%?s^mcMkL<nqu6gyj<CBG$gAW1NJT9SUQyO;o}Ttqmqr`R9n0C8hAvBRZwTD zyi+pvW^xF+sJWyXR4@3v<_I4ruI{0Ap@KS&ijh}Oh<HagwTbpw4f$3TUFsLnVk<kp z!Ww!ZG=;owx%VP#6$O6gENBZEi#<bl_;{(n-wi%ulBv``DT~_A6KvL^NnpZJQ;Isz ztsCf<(d_l|5^~UD_)zPb_ecEW1lG-kkL7*O5PL#y332fC(8QX(<YDc98JgJu{X468 zc_p8J<MBZZQqV8e@>S<ldhJlghQBxQiZ#1rkIUj|g7_I>q{@Ar=L+u+5b=)_n}3AK zmZ}q8-}XE`Ox_NCWute<ZY3T4pgzKmp^6jbwCH}#{qYPRn+vAlm3LDYd=;_&*z7{q z#|R%0KGM;MWUR`TKiLyaKlD8q0*pw0mrgR|>t`8p$ilU`@2Sqw=p}{^a7|-^-ZJP~ z<RPbfcI}Js5#b{}jkKV<Znd?0Rlo6F!o(fEM*o9>t#=6Xj--5r|0Xs*$vg7s8}J3I zYojlv;xxNkB7B7K&-AnrVJGNsLj9C>YNGT$a;~`%mjr&pJl5l5K>sMjcJ=*n=o&dU z8NG;Bb-WBlUc|?-2Yst8!bgOUP#Q?LMpX8ww&?+Q#}2EfAL$%-H$24KcztiMq0T?N z|KE+CN<08H!hVj;RyDw1LigB-9Xry`)4ls>UpRkM{VmngS3FW>8$G@{=RY7BUh!9+ zkARjTjS-{wB{lJWNiN6ZT|ML#)3ckn{*vBp=oaDqGa~&XoJLM)9LYa|e_j14s(cTJ zZMKj)C|leqz+%YP@15*y+*<LDRjz{j$DhMpm-lV+d6eRdVedA=M}&`bG@@QV^*!Vo z5~J0?x?9d?U3ZOr&*OvMl5aHs@P7PM{7AMHV84BAKe3eS6kCaZpS+Fu_J6lp>afOq zzZLpj_BIpGf57)CO84yAx2k=)ZT7Ls!|0_tlCI-AXHXl<{XRVZILiMxzVWpj8mYs^ zTie;>`zuyKR}J(bscrrm$O!x(j{cJBOPu_h*jTpXKa3GmKG^u-T3p-v@_j6BA>SV= zRve6andaD%*0Zf@*RQE>-wv;6C02l#b^0>ZAzPLC-bU%)Uw!M6etZzi7~vz*=U*Xn zsLyCEbQK35WxmEQdAam?uiW=b^`;g^pijlUkCKguzg0ht^;PRSIQa|UL)T2+19)U@ z4{D7BF=8va{;yRu(ucmckNX4T<WE+&@8Ox6@Sz8N@zU{-nx@6zPP}NCujdl<jnH%A zQ=$J**z-mFBmMf5ZtjnTeWIb7ZhUFv;Fc4+C>pD##&6mFgjM!@!s7v&Ebw{R$$Ibs zy#_IXVf2&f{vfw0%jY?&2hhP7l40QabbRHDsryCWdG@HUV@VIOW^Kyrpm(yKC1gE} z9Vmh?23Yg;j=X#&e6TJIp#FV?kK-F3zNdQ+wc|V0UsU*LhNg(iYFFL4nLd6a=<9~R zaDr-cX4xbkAIsmw17=G91NpsZ)nD@=$yM@~=WDG81MdSDn;0GNi(c-14}FHfM?1NJ z*c;-=jbZqxY<g6@LUqA-w$Ys1>ogHQLe`bAc|C6StFJCS-m2&?qCTQpa}!WWT=Gfp z^WftK(F$A0q58PjR~5G?TEcb&EwOh{N1!YFdR@fna;Z^1NB(hlnZ<h_1S7;-5_?%A z8IgRas=h7Mz~9Cvb-)H-->q)_m480^JMti^p(j~OF=)ti1Hga8KaOwyQQbqIVd$zF z-k~1v>eZcqhp10?HM)9z)BSddm(TOeeRi~`CDpoymKy#7{i%Gcz3}NV>G9!vl48gG z2zg$A00Yh3v--rU7jV!owhEh^AHKI^`uA7U`%})Ht-5h3d?c?qjF=VVF7~Dy5kAt- z@3GED0v{UdF8m|b15Uc>_wMOS{ocu4-@Du%^m;n$sjn+1+o$j$S!B@dzf*ke5!nO8 zv%LP1AJg}h)Ztbhq~xf|-mT=hz%!^tq35Z>hNW#)^dJv=KMn4OoIBf16@R3TBD|wG zxMqZpu(>-C_ScBM(9tXCahL!bv5sw4*1j1Z8+c)DuG;m2&7h92*H>4m=8xifp`{(t z1IDFYThpz(1KdYFtC7}(FDB5TN5v%P8UFHB*ltV72h8*x)pmSQam(w2IF%s3N_M$G z9|@NqBmQxG^N%q4Qa*0H_mKb-H;tGY*pFo6mED*1mTzf>`URxhAGS|2j=xTz1NoXO zHj}<`b@bUP!%q`-|6%(he4N<y9Y!Bv`zw&O;;qH%Dbip6Tyi<<IdmK9U$)8Ay-kCk z(0#%BJUZxSLw=xeS>5?fZ_};7o*Q=mVf!O|MED4!k#zgjce=J|9dUm*L!0th=f&g; z5#3G;uipTMuzjWHF=_Wp*PvGkx`%j2E_U7#R^MFfW5%R=q`UWl?2GUb;bS0la-?Gv zZ&G`JT%+F4L*+TD`y=~~eCLH;=1EWc{WXJLFGiG0zRq#IGU+?RsBL$JHQ*mFNB^iJ zw(>~#J<#nDJ|cV!lvWOZT$9zc_K>4KleJs+UHO{jhmedl`1!ZuAM`t;(WdelbI~~_ z;|uJ-Z@$gy=?8!Kdmi{ZkMJ?@xyp2|kH_DuCVZm1!|EHKu%EdJ0VaH%5cvqhbqm$~ ze7(FwzmuNBIb`RRzGIc`)4@g~HaFrB8zcSvc%->w>6yyGN8HO;JMDMwozSR!1gfJW zeBf_}f(LP^4q}kU=<zYus`u9ux4sK}P)B!H#6ONDS~?!@wG^MU{Dtv-9pv7A7%-gU z@v+|1>#2i@4V^pak?3Qt@@$^F6TinE^2;|`tWQ4vNdGt<Y3^8hrV8bPbW(E>x{SL; z_~x>v%SY$?AB5+B1bIxapU3#?_&i4ZAdB7KSp#`c#o$BnVX1hMOy)DvKO+4j6Z%n1 zRvkI&KE|o96xz)9{h0nz`0zDOJsxvyN%t3gZ^csTFVrCO1#wrIz268Qnay7&v~@iE z*5iX3Csln-R?)c<x&IXT3877Vb(2DU<C*f0z=rH~o$<E3`R7*E)rfw+2^*GtACdp# zc%ZRk`5CJE$S<VcK?OD6E86~q-{C^_g0-pO!{>#hmz^@jhqqs;KKpa-d@I{WEp~Ee zRfoN<PxT~%c->6qH{u@=|Hy=XROh^=mv}{d4wdcOZAI4&=mh!bA=q}q<G(Y<hk9<} z_juY}ZPf>!BW_Z0>|1@U?TWt4-))4C%;zu@`a0Hs8z;B0x|^IM*Xi@p7dGB4oq1Y- zkI?+V%<(~e2Kmod5|dcd6!iG1!DdhoA5q+Krt=%&Bf>`}G^AP}72u<aT#!2Y_AT7@ zQSg!SqyGi)0sl~sv{mk1R@c-CPPY>4vfb)BrH6#)cZ5BAgpUXxVKkC%f1;bZCgg-v zgO63@c+Y?7|EOLedI8u_kHW##KMDgsWR6XDH(Fi0a#G0IQca1TeG&hNa>O&1AJWnO z!1e|HLD^E3(_Py(A4DieTmJJ1A5nhPiOzo_e5B)%5k4Y(D83}(AF#{A$dVC0B78*n zI6O9lmk1vbJ|cWX_!yY3dLm*a(mxI-SEaT^{NqH##G%g|;Uk5G!+l2hIFwdS#P1P4 z4u^}>wlI9CFIRQnc53etr$ZfZ^<k*NhojoSs()J9`%+L3wHw-^)~mLuZ+jK?bk%23 z9HvGMe=D9p71Lj(TB?kSK7LQldgbHGU#h&ZN?#+79;MV>RsHdFBf)(nsLfjA>re(Y zGnF%1Eqgw{t?b={U7xyWz2u_t9DY!HuUhr^sIj!zsdmQ;TwAdWE!=wtzf&)O9rPu8 zIq)s_QX`5Oz67yBs<9UD+GVj$d}{cek9E0C^+r{@Q=c(n0_)lq(yvLeR|Cqg%{0E` znD-AVMocl#>hma?s_fX}>+cnJY_b*YFVV-R587<^zRq&uE-QU*%Vw+X-e>h)t;CzX z=rL5;15H9(iH>Hg?P#}#&Td=P*$XaI-;WpvkEwvJgWBA}6gdFUb-Gd3No*bciz_;} zC-0HE#pKxNw;C(CzXpb4Cv=}NFre#|gP(HzlQq2^(0x1cqAk8BQ9U@RRa`?4HD2-O z*7Z4#>9o@By|%RXWm`i3yA{4aq-wxs9)JBt<150)k#%$NkTCuctn-S4frn62q6WID zqgH->+dgY(RS%y(LfbC{YY=^};@#A&gqM_dZzdM>1^9(y^st{E6rLux`uwnEZoMt( z`xCWX+HGY=tF3H@mfPsrOOMss=1rDp++Z~aHrVRclC-1JE#tgpoIlR}t+DE^brx%T z$*NnS-)8FJcRdS!h}-Ubk=pq1DaCTb8^l|*ZZ;5?Cw#={&8Hf*s^ctP5$^#z9gP8Y z;3@U+%zB=o7Cx2m{B6I-*c(mXnO;A_M`rj?Uq{JARjjp@d{*?)GnD>AiEh^14rGx7 z)Xs0&2`;umpPM-r+VZ)Yilga)p4y=|_)8Z(M_YGWN%ID1bq&XqLvKrLW#iZ3>7OTu z?GqgT9$V@zwJGj2;zvh<-ciI^=cOAQqk6F#K~Erh1%bb@(C}%T?@XKRZnwwWhp6TH zIiBG`_}6l4XnNLG9oTPc_V-#n&r{cpyx7qNt~<a-GtUXW;UUG0Quk+m_Y(XE?80m6 z;dzOUPU_f#ubxdDe{Ab45k4}*M@?4;@(sB&teIu#2F1{S32ST_IbIde%}Ta6Ag?5P z$))e0etz37t7+R!O(p0F9ut4zr&j;+x9#bU&)7!ySGLz(LcP3Gp?O~`7`o0+(YAP* z<X+X+o=Ofh!`EYFThO!NFrW4yb(5ywOTP3&st==~I!dD$qcwEy7{ex#V{k8Zm+z;y z@xQv0?2Ft(A^nLKxlbT36<QU2>gt-;)6cQRRv}N-@?5?ji^oc!i<BT+2``o6mte!| zAQ3(e>v!g*4P^T<uYXi^?eTh>YJE$7Pau!fBFjrAkseXg<5+*YkhxyAL{|m+?DwsR zIv8JcZ)S!@LxalI3<mLTs$uW*)7%tj+4n0_-xBG9Sxl+N2a;Dl^|gm1IZ=1g*GUd~ zI3DIZb-C5^M1xxVey9$ghU)Wcr2baTGvSPUoN2kXzBu33uGRTZSG?*Z@u}0e-`~1x z;0gCx-L4W_xp@oEi{1nVs^Brz<k%%x3%q{fc}wQ?XWnT36Y&p7APtto^p6_oL;Y0( zeHV;?mzt&xme~G;)x2CoO_ooBhYwH}@@ja?X<Dbbn&{luspPx(gNAzcyn29<-)pTG zovLQWL9FN;0~o5G<i}cmD=exeCj25bv_7Q<_oSF@)dx%s-KURL4Yu*CgTL2v<RMpm z4q5U6OB^V%+WqvVZ)OgVv#Q}Q<<jFL{*jixnAmjtk9q!4i=8Rn)#hzOrG0C$h5iPa z<R9%}caiTqG=;T3=r=0b%b84#Vc(xebhpm(RKGr`VP61`h$V-|fw181hyiU{HglEb z>mnCXQ;*unp6@vQ?#P4RipGZ_A%P3(3HaeLlERnAN=!1AM*n&H??HIt9Km~3-C6Dv ztj-*3uJmMA%<~0n$Z6!c-39M>fEohT*a2QfhYIfxIn?oUyz`I8d+%3&XJ1oTwdS>U z1~ikcyv5XeKhmeK&*f<fx@dLRp|4iKE9l>dt!8Oo12ycP#E$b{=+bxCAKXcvmZT>H zYkStcL9IR2+Y96u`n>TS8D>4aL4Do>`AEl$_PvfFT277N_g?Nvp40ad;=6c_r%nGG z{Wkd<YPE8HVNJ3c-|5p`P-uRrzOp|qg_}Tk_x1XNIq<wg_d0?x+8(@bglhB$_ZILh zk0IrJ`WhZF@Kp$gG+Y*PST;J>`N&y+Z}DB<vsLsjlbyP*!+D#9{426?t4CgC%YN@i zQD0^C>JI!nvS$bV0IT~l@Bfl1{*HHitnB=Xe~+x!wysDtCD}o1>cH@^Om-sPy?(#R zU0^G=K429s%fN^*@xRYkFCmVB&PN6P+TaVqfI;K@i0!PiUKf^}1SXQ}ej&J^C+Xfm zH;@h>3>}IC(VOm7bjt50LhBw+=bG!h#2OoKuwDCZwbkzbLXV#b<Q(^T=<<Hn@ULJK z_$u4K#y#Hehql!)g8g6M`1{C*`xoS&8<?M~_<o_ifUcn)@w#p>w4P{yH{n=(<NYIV z3hqg^fl;h6()0C<W6WLBFN^Uv&%5<a_8HDu)pV~lG{>p4)`_jlA$K){1?eVx(fv2^ zxBQ6Py=<E3z^~JZZ>Cp$05h)-7~vzm{V?A0W#~}vEzkFF3urFD2YT_5_(p2GdQbWF z8T$2lbJf;s><Mxm_qp@@`LH2PW4xcyAi4=;DSFj;8<G0Uds_4Mp#Vqu(4%A;@fhJQ zsTXsVY2fS^(9<;cR>tM_-{`_?+>fB;UqC<e=rLDK@7_OHWgD^!wyy*|yUY4E&<kRt z_g$1>LoV;y!P>mT%X{)!>U(+DPI5$dF?Ra>{<xkkR!&`xRbA~?k54kuN?nZh7ogEK zT(5-d&ax-nuWdR#gn!NRPKPJl=dt7O{UK}wpWTIiAM}$?@f%@AI0x5$ZOETwU|w?D z5_blC;qS0_FT~!h`uyDt`Q!J11MZ=%8{I|z>5bSlHh3RqtXn<3)4$)r-haIFkBXMh z7fKfK_*kcR#X6|EUht0r;KSFc2xuT!$JH;(%j3NFO?HyG^0UoN^?iLr2aG^BDA4zq zWI54`w>2TxaQ#3IDbjJQ&G<7ehIVdY9sMXW#&@i$ZIRWq{E?n4^iFN2Pdz?6KZyBI zPrNeL-7;uK{_P6ydkJd$DSt=rQvDfaE0F!QmQTf%1hFoPchP9_$H-@+`mJU7`O5KE z3P0r?V2j?_mH7UvJMbZPZm?SXlCnk19~o;*SY`7x><p9Pnb(7vGr5Ox(*OOOB-aAI zr}$2e))&?aY|6|L`j_S$8}O~%$8S0BPXCPc_*P=X;Z$P}PY{Ds+uOr_$zJlSM)+{d z*<j_Lmv(%H_vP1Ua~p&HRM;;)K9F%zeBy|oRwx?xwkG5!d~uSU`eih~Hm2yo3V9AM zYit|o`N%_(i@Z(9^9t~g1I=E*+WdF6dhdg_^5vz-637+MX1oI*Ben6=Z&)!N>M>jg zjjwF!!gt>3edzU_tn*!902`U_v#S22$mlV8URNIyGgS!>sl-30q2v6q{HBVyC_jj+ zQf!stVah>yrH^}2jSl>n!dNfyTF652j&5W#$31tkU3dhe@SSS<P9)l!y>EY&dj^@T zi2eUWU%bnCM)@uC)ZdX`GT(tMO!^RiGY^GXmsk<?YMwjawr;%{KKLm9palL?#UJ!B zSXMlSso(GAt_UB4iH{2RkN$mm{;^TLKP5+dnrC~zUFL}R;GOo{m*f+W4uQTey<IXC z=MWznj$cnQxNzrn?7+_R)c7;x<DP?zAMgGKKIxb}-nh+Hwc%gyBCfD+6Evy!PkbPJ zLH*rTn?h@*Vg?n5BHESiT8a)UpLIF=#X}S?=zBnLtZF9*^^=3&>l~?}xW|KIFW-Za z+OPgdig&DpCM&?CVh<I&80Q?aTbFdw7rbXX{+xZ_Rq-QjY}fJIJ+>4FQw)!*Kz6HX zA@=3KYO87fDf`}sZ~hGCXGAgUlTDj?(3dG2f!@PfHxhk*1pM=E_{JQ@R<4U51&k^d zFy8sb+rN%?`KreKE$>2f3vJ^|5PuK)NGIha_OkOx!(7pb3D_`W1(qXOS{Pv9JL41G z+~Yy=c3`v8IY-+6c5lMY`w{5r-|TVhob{cXSc{SOi4!bk%~#F*GH6WlccQl!nv+ja z^_3L6r1cy6ARf31UQtGE<cee;Enz_WYoXJc9%4AUy1`M4ABy`>Kl|W)O6J0E=|{ki z)JI;i!x}m+HR4U+rG>c2HtLjjY_^(qWUJOqR(4>smF}a~QagB{22@Qiak<1{`q)Or zD)#o-N@5f1TafG8z;4eANqLC)8O1*;4h{Jz;bt)Y5BG8|gSp2pyu0q~z%I!A1$r1g zl2~7A;H#@zR#-hTIpTfGh+T{1E6F5-Cto$V5AzO=gpaHv;Ucx&+*8<K;WvTp)$1+y zPW5twWG4NcgLEN$WLd@5tF3m|*Qn!FXSF?|NAw3|1l7pbdMz0xk(B=f`rHpzUSPb) zcnQ47#))hMT~~SA4ac)?3ookUC(KlX6~*hud!beI5yeMV`gp?a{8sYPR<0>~=@u*P zqendZQ^)8y9TN;4AH-Bv_xte_Z!4XJxI^w!v4)DlPw_a(U%@rR(>4%C^E~(020q1m zWNQ+3J9tj|f6*haq)nK45xpOs0=_9(YLUC$zYpeYo&5Kky?t&(3|Y+EKg!%&(8WH( zGu3jwNI#d(HkkNW(RFRI$4Jr-=XFWx(ZdS7ZooHQ!;uC%6r+SaYDzv)u%p~lHXiFe z?^3wgbtd$98}IY~vDPiZHiTWNuN&Hu{}@@Qb0eQ`to-~&UthX+eNQ)->%zX!V#_*S z#Ba0$41Lp{ai3srrNc>vl?_GlF3jz&(e~o5Tdnrl&$4c?UJy4Q#4w|qw8B$co&kR^ zAor0AfD9JzR;_0Eta1&|?_<PCC&(SB?QF?-{*QP0Dsy}YOXw`<^1<g>*5M{#_y3qJ zZ~CH@ZB1CkuHWMmL2p&uE^?CMyW{8&l^xqMem`$CeP?`q)(pk{OP`UAO*CBIwh7to z_g1#E-k#aLhMq6~f(`!U6jqpPWZ`WiiT@gB54dZQ@0K9@wKAX5*ETY={&eAYl+6I$ z2VNQ279@WLx>$_7uL^X$Oz%I!#}Ro)KTk1l(<Qb8pYh|w8<q7uY$d%<*rKlIY;BXX zHQ0I@yxbJX{PKHwefB8y-_7*>zR|z0B0o#|z3-Rj^{YU~uOqLewy7Pwylf3UyYRy_ zAg|qLU-NOBtV!6xv8$VV@-&-<Z#3>c4!+jIt2?kSqa)#mFT-!L6uhjE+$en>`7Yt* zSjnl#nX>6+$iE|eq{WAo&A>*bxFzgV|3Tb$UE@3}Yt{Si_3L3Rx~S|<dLN}o(<R9C zS|bzW6Rm>JXZn7Q_1~|7m)3&|*{rMhtP*eabc{~Inj+iWGDbQ1l&e_pS9O)!g4R#l z_wWm?b@ca>kEv*ceH=UBx7?Nd{#Wp6_z$|5WLV{tRbj7)gB9g)Re}lSo|RF<S$bEd z^B3VGJwBW}Wtx2A=##8lW9|3t#mT<xybso@<>=KbSOZHuKBV&|--&of=dqt(nkjwN zbrbW?Iw9MH?9t_{^(D+vDLmHuIOT`wRsOKHlWUCKs=m1ipHCgM{Vn^pd#ksR6Bh|q zM!9>@PZF)4v9;aL<2NC%5;<!nYgGe$rH1p%Zm_KDC3yTk@F!bwX8K=*k1%|ohx@pq z;n?ZN*b{pzu*dAM<(<lr-3dRJA4@WCVDAZh*1LJ%+wcpZlVtdQj`iOc<ExaMQ=Zfl z%HXfUg>utnCtHclyPCc;RpcGlfdKjV#fN3TTZ8>L=2oG{f5o=do`*dJ-TmAkj$wM& zb<FW{t8RP|zdm+8*`fK3?0{OYq^GOidZzOi;Ug_R3h{ZXAEURcD1K_yo@#6*@OR!r z9Xz8RzFylyjw~_mu`c{s$O(bHl%Bv~C)4+HtpC0oouPs?U4Bm0A*@0k4eGe4|7y^C zrmnY%xl}zC*^<R`=|#i5R&#t^Cvm^nkn1}tv1fhQe)y$P_+rl`$MVg{y!p1Iwa}LD zTkQQc)xDD86qh2oYcG0#WWNfdpA79+dIw-bw#Sj=IgPN@&Ez(Kjpg2VfZY;4UPGKl zRWG)Fcz#(AG6H@y)%d9+ULx%Nj`#iY&&zkI9Awo$s_jAlAm&RtyL1HE!c_~R4E~~= zWDUhlDqcoD$;IwjevjXh*qf4$=WU_;w*9Aj6YqnV;-8$sZ_fdH|JP#O4V+8)o826b zEXO;D!Mie@--v&t<sb43a)2<BkB>Kx`Cr0%2>tW!^d2&u|3JUgn%Ts=7A;BkkZ!5E zSL^W^;CE-p=O>vh@C6>!JADn2F60Z<1|m*ZvWooK)$PP($^X*Dz%O3i&bAI>y(AM- zr^@3Rnp5p9+1fPZ_m=-Lh}XjJ2z>={kd54r>i($4ifVT$?u9r;*^vfWeypL+ijN~h z7HdG)sE7V5JARL>dp~;pg}j$>2w^$)D`NEHor`=OqB!>{J+Pu@x0SZ<CI^7pFWuA! zXkz|aH{*|KW$hv+68;o+{}DdkSbV65oP1l}w~owQLwsT_H6N5Kk-)bm86+68`=};@ zVv=MR3r6bips$c*@Pk7(9odXj&q{KKhU#e@9Et;$&U|o4PY!(e^5dg3Q=hIBd0w@< zJU&(H3*SN|zWkt<YG(Rx7=6aEMV0n&4`5ODkVIb>I!6n=TpO?>{)+YLHeTQrj1l%# zdM_6@{sbIRUy0|I9wU86&mr9V7#YqZf1~6)VI=JO5kB5nd?=42&KjqhgSGep>X1F^ zp}mH_X5yP#c!%JHcddAJ#Tkl5RF}%rYOi?8i_k8yDe#y|{#NhCTIit;%qeb4x>2gO zoAQkm_vii0q8Z68Y}0uaPoi3js$(UaPy@Axq~AV{{b@D2^J-#T8lb5Jw(A<st@B8a z2&1n|_iG-!t%Eq6a-LQ5(9lIaJK~ya8p$>6*Z}UQFi&TD9zS*Za4X)6-?mjTJBsDU zXAdTp<0sJ?B%6R_IAKJ5B<%hoe7v#vsHeYOLUC2dM-|vQWxLmUt@x=j=vOwdQfhT- zpX8kw_D{vP*Wx>_L#|R>mSpqPsN=gI^lz-|KtGkd#qU!?brdyHIZ}$@QY=>$v0C1D zz_nLke^>$ESlYJ{-yONJ)L)VA5JyJw@n2w6HoIDn)xqT-tDt(>X)EvtiigRDTMv!b zLHnZj#RvXm)lF?2^Duc_XK?h$DUyYi-@Cl~akh7ZlO6aFg(KoVIYJnT3nR!{5k4}C zzZ!J?L+{w@lhB)&nR%Cr9S?@`F;qKO{x8)J(f|v}Jq1(Z52}Y7)YsMbpeASu@r$LL zLprZ)Kt7+F7()5z#EXJjhpJ8Ipc(OzAcj$K_P!=BJSEVB_IY1*32Sc&JWBFP5JSoR zA!jKDKa9RI-7osqGwU8JiDj-rzZab=wo<vdiam~Te^tnmmEE=A=31UA+cs`IpXa`U zd#r~)@ANnkFVSy3A3=YVJ`#3+5&w8&`A0GIshkeIH@!dAQ`CAXo2K%VRBx`BdY&c7 zPO|rC?N{B~81d*DrPPP0-d|5#dO3Jm0-q>?h8~4Zen}qIkHNxspn?Cyt~QN$$=?w( zKL;N4m{lC8uoXM%&~d<;&-n%u$mr6CG?ZT=JE`y|Jtl}NQ~sgyMRtIdm$|pWkH1`o zOr`kdYOW{QQn81!<5sYqC{|$&XeeW?SnU49{x<3sw{qWK!tSUXpD`Ts5nIW#t~x+0 zKJudYh-582t914VA48jeXg&6E5Xkaz#hbQ5qm3_F@&2c589L~iy??OP`)kP~oQGcU zeX#RM-sx@B)4I@pNUr(};x3Bu0~A0HinkAPTb1vc%5l{;?aT8yy~@E*O)TYit#@P3 zQohQU$&;PsCPGV-p#QIu_xc;F-?ze6?!||_?`f;q|7RbcUf+VBqEq_6Y;5xR4L&}E zLCKlbtP=@%h>t&1yg#~-^zBMePy==(FI9NkMJu|~He1>Akp0-5#dBxd<DGxwF(N)9 zSxfqeFfO}RgpZ+(55?0dhD-WHZTri_sn+qnr$Gmw=N<ly-32XEUqZD`e2pC+lkfd8 zfzQ&{@xi{CtbtX?{?veS<hM}{mwe9p%wJ3WEk3oj<^=KmzJ6k=u7ZvaYL)n0CUU@j zMy&E;>i$+@PmOgh0K?A_FCx1zH6p#QVQ}@22Juq(m+E)Re<-?Eu3|N{iUZk7@|Eo4 z_{^nasQz~wvGm=42Jb>ckmD&nB3Ubt*P6i`ycfGjgpUmIL2P?bvc7|A4b-^B*oyEU zD4*NwsMJd!zf`^}`5RVtwZQYK(TuF57$fz-P+pVt2g&q-{LVJsX&t#DOE)aFr`^BX znx4Ny(0hyW5?S+t+CDz7i@zY+H)>dbCGnsjhkh;f7}nrxsM_!kR`%4FtsWgHfsG-? zbF21)Vh=^f@|%>RH&pDy)`c8c(?e};#S{{2Rf&ug<XsV$jx9_yD%nYTf$U<6GYo8M znPaciRf3yaef->luB)wlUko`@=iLH+$Ok5WM1DlsCY0YL%-8Y$bPwtc5$PZ4X-_#H z>F^P6Q@wxbf3k^m@!lP}YX^2m{J{JE=;=Uq7uizlsB2TVWv9)oT5I=wZuXn(!iVkd zpWTdXbCURkr$O@9iC4|zrc^ub%r^=bsp~5CYzlQIoK5s~DTwjUvi<(v&bLl?nVp4v zR^)E7^1a`(nw<^gpUbA)%UqCG=&>TbKzZ?sACyf%wrc*ymaY15!iU!@z=v$jDZ6nB zBVp@%*!~s!pGLo`MIL>huch*$dnf%M{%FP6t%Ll1@uWaj)Ti(e^f!_XV4MG2wgGL| zy&Z4;aWaoDNso^NHUAYmAsf*O=5sN2QrUS#19i~EGWQ27Zuzs#dFlnb??<cbjJG{{ zeXh;B;|yDN^M$tj%jerwAj->1d=4jDdz9jZ!G`LrQ&;+p;~lzoP|JdQ@O6m_Q#y{% z6=Q1_he$S4{&*hGJpo*NnDGN@bj9Eu=mb4lGstaX{RrX%@Nu#>%kNXuq;>pNH6^@X z1-$?nPRE7Of7t$pR%a!O6X<)2b4}!4hgs#G50U3CpJ+h$!inO;D$q-0=a%m7d70;H z;)fAFn7G5zVi^CBA6C4%3j0Y7b`~G!f&8U9I7LmH?Hj*{S;0q($Ji<Jud=*F*HO*n z>QimbEvMOndrr26<33~)Ra@>}upzs>Xie|X)1J?7Kf-+->3)y)_CT%*>b$6asp|Di zMu$-?FUBR*5av^O3FI%;@5NVVk8PT0i=TWC>qIqq$E(PzrOpYlo8qxrH+(F5ANqpo zw7kkr9LQL~dL2gpVf!1HkQ%fQ_4Qia{Ac^LdmFqU2fxlgvJIc1djEL+Ms+OEN7NIp zBgm_gtuB(U4v&izorU2;{%+N`QBSdY<nOgz&Pt!%XOq8Ea;9DI@U50T1Ab9-;}~1` z-f=eP!=r5Wdvk35WtKbdbjy1Lo%<qgAj_u5h~4%)Lwtk$Qq|7Y`uv9DLp(z<Xud8I zdE!A$ia_^Q?zoq`3b2vcgXA&MXXMahu&<qtzVZ=!vU3Lgw4U*FD!E@aWch7U_y{l( z#FdFoC2Iw~q0Dp;#f;Tp1Ct$OMcZEP<Im)|T|hv}D687>ulPREjp!K}XQHLMtL{>q z8hPRwK|D{yKhn`)7(Pl^3)N$Ab$6T1TKi}F;FOZ_Hgft+<L$BA-fSh;<=U)w<k{>y zhuge+@@&C}!Ny&=Hs?mOM=v#-^)9>RQfMr%jQ6EahR;=0Jt1xjT4c)`Ig<P%zz4j+ z*VR_99Szk4-c8+IZ$B0f>6gFQrn<bRQ>#+-e8pSxh`qg$x<Ef6R{eQz%PvK)SKVD7 z_kIW;N$DegJx9g~;}Plhmvkw%zLOqVe*`D85y}tpwEcvdWWxNat~vOP{)mi>Zq&y< z><!W>)k{vXZ4v)qn$pl<7(Uj*gMaqqb9UpW=1#ETzq;cTd+e{yu%&mNVhh2<+-ve} z(e=Y^_O*HT=w*2}_wB>22pMbMJ@Au{jj`o+O5Vl(`dlYTU!t)rItBQc;k5Ub?!J%K zKA(H9{QIKpNepyx*?+~`z=!x<z%x{{wMeqpEB!9nzK;*Gqqi#uIAL|5=0a5qzRO+M zioNeb@+r9?NsOfMk&f;Mx~~phVi|g2IsB-c-d5Gb%2ahYYA6@mqwXww%Dor8<Hy*0 z$+Kde^YQo8H}?H|(U~HAWQLFH-t%+IJ^7cH+wh`0EPMWY(Y<em0<N~KS=U+KyuUin zmK2<BkKHiN7G48BuFA4m*9tc$S<%&J+QM6JwYwo@)o%wE<d6!Fp8g(YUH<;KHyj@+ zoCWyx_s2a72dY1xw=Up$UIt^E>VEs$-~M@!!FU$za0TLp^1-N&6}9enjKX&}-rh~T zZG&_+Fyi@0KMx_^1-n+t_B&849PYRTHnWwoA)_Ofpoc53t)ZE|+Pj`7&h4>6$%PBt zKM*fZ9U9rE<#$w#JaT%}<0ryLT71~<^0SllFv4~c&$F8Njmq`w?8`r`u&gs5xe;2= zWW3qzBZ~2YZiX4Puelj)L9s*VjjOZs_#I%VcGEo;r@x{1*<t_HS}$8`1$}zUv3n?o zePtVYgv7z99*XpL`IA)B=Wv*KE!!&GJZ3kCIw)VlFY*+%{S+sSUXD*B=yx6IA8F{% z+=D^S0-HMA_szWTSKqXI{;A4_zj@Z(HhNJe*wAlJbbOd=J<Iogm`uNjGWW0e=AXuX z)PY?|HIq89du;T&yz*}w@VN$lJ@UqonG)y~uLTQ-JGQJX&bhvVePKAgg6S6w�t6 zvf^3dt?Drx;Uf(`css+HsZ{($&S{qaxr=PXT**?O9$^b2d}PMke7`Bx^+q<EDjHqT z_&%#`B@d9C^q`kcBN$PR%ge0isz=i5WACLes9y2mX#TZqQ(eDG_Y|_%Mf3-{nf_qJ zD3f!dSZBp+MfgZByVzvKE6JvrPqo=G1*h3}-idDg&Xa8J4d@jSJ~G1xlTWYQ3Hj3k zKV#5;0{biaz*J(ausP97TK%$x5yj`k(I<R-iu42gC$9wyhdq{9;2P}n^&O?&zrLd7 zt6&2E3_L`=>LYxl#fR^;R~XcY1tU2V&#-S_c8V>%=wzFJ-Z=akk^Yf({=)Poek`4R zm<#%DdEY48Mj&gwh5D_xQg?ygAowHV-Si_t)=^EEnBs1+!&VT_d03i%4K`t46TgK% z-0-I5P5)-+a_t&wr6^`d+ai3VeZTbjI9YWQCkA%QtZT+v;q~L}F?<?x-f7u0BYb4W zKlrt5VX1fl)hfq!yq6w}McD4h1FXikxVCwbt!^Zjo*c&I*l5d%)0Hp7=SRK<EF2C? z^~6obdb^OZ@O#joZguzV*to9YZ*rA8Rl_0DKX?>IUMYQ(5^EzxV#TVb_hcZ{0b@ z7JLv}?cL}g5k8JQcSoAjS8NZ~9iOklOHfSaSdSNaoMIbM&FW$HEIx|W?T=V_`=4z& zF~Ev}mOoti`G=$V*Rrj?qmA>C&)=;c_T;PYFGrR;6WwQ)C7L4tU>a-$?-AXc_#*ow z{Uz}UUwN{9^UhQ4G5mmwuNq<VB79_q57uY#4By+=<3f1QHudNt?~B(nhV9w-_^t;F z3%ov3!&b%6i>6--77ll;bo|mbVS}85W@7TY<g@$%u_hBO(WZR$<C}k!KN;kIc)y$a zf=?YjN&Re?V%`h7cgXt<{B=I|h(og{=GebWwz_!)K0f^5nd0N^mPh{{`Op0QWchyM z@_{J!fu1{ld-BuYYY;~(Kb>TfATCNi?ZB@s-=gA;v*kna-}RXqhdWNa%~O7t;JQf} z$NR|TCr=H3KKyCAw)YS6$#);K*ndC$_oZvzOTWjk`5nN1TQyTWCCkpozE!(>s@3ix zh7cWEwTpZoE&L&>-6H>c4Y>vC$0Z+pd9QT(^zkXmqgOqkDq^q{r?rOolSEU4`uN!L zKYbT_ui_i!li3emi5Y8MAfF;z#q<1oOZSZC=m|T3XFSp|$S<1r!RsF-e6SYz`K89O zkDnVt_;`!i2g=MwO_`wnZ@NF|YWlsyt<cx}9cIPEITaQrX`N5s?~<os&TlgBlO9&g zZy!82_|E2(5Brs$>iGvBe8qmncnNQ*_orHT(7UeTujz5aJ?Z+BCaE{+D{$bkqVx7& z6SVj~Td8<P@RK_C>-;qL{FvbUoO>U+S>?*j>UfAgIjV7_x)th$s5)JWPbdEpSxNOA zRG+KL=iQ{ohwo)azE-K%$9IswiJr4>12I}ZMGyH~--k!NO4ZY=5*}06u_VBS$4BV% z4S9T&Ju&f!_$V3xwg;Jiyx9!?0Zl_^r(J5JEmi;)M6+z0Xu0Bb6Nx969u&|C+oAJM zo@!$*9d74+a=cx6=V*J^%_HrDH;k~qzIwQQ{O$Sn`3ZUU?~HG1yfx3hbzz=;gYEZS zIov*Z-3a^Jn?~4&ZyaGCd6(Ih^i`ZdAn_^O2Yv|7e{kfeUOLz9$AB<m1qIZYQhXxo zn#Z(o%@CgWt>Uy&BXfLg9IxkAOt5$vv`HizeGcMHk9a<!bzS{&%2@kV2TXbBKKBZI zC@(9Ff2amlS?2+GgLHoC2vPf`nwlu9sr^&r?zY7pzp%s})!Ebew$rn>l6%PIeg?!B zhCDu2tiD!#QhBB<uWLK}8PFGh5A(UKOn%yTILP?8IL|(sq(9N1AIm1A^GxyaiTXR| z)92XRZW?QUb;(HkB=kP@yj=U`xw$rz&$-u(ut(8@=3hC&7G6EV=IC#IN;l$jwuZjz znB=xu$zx{exC^swCS$h7gk1aW+w<&!tMcui-!szg{`gqC!S`vF?NvF1G5AdqEB(CA z-Ia{vh3^R?;KTC}4%8f{om(9fGCT%176_v`{`$NRewD{sew{tFbtXEyax|!|2M<xa zNm)BFCmq;>JvQVAPtQLTyHeVvzEso->UCg^y1$&KrmdYg$_KHR{k^T)jn9zW71g}1 zZLZ_`xvE)i>k30LGUV}5{P@+(pZreQ{rJ$@oRr<e-VetGGJUcq^wHKow3hpKpR)gh zjSru0S6(yRJ_fCRLvg=X<=LXEi4G@Lex~B(l}~a5F_1Uq+6?$W5o2C*===QS(D&5e zsqg-8_{M_NF~m$3G3IcMnT$s-55_D$XKTEDge|^&g#FhoqwO=3&bD_v_%=HSs!^<A zKc4knIk*Ld9zXq<m@Iz8&P;}Ql6b4f2C)pbnf?aCopko+cfZRTI-a1;TTq8cwUMgm z%dT3(wLZrr+`d)Q)ojJR)MvrI8}yAN?-$$d(!M{#lgODTA98h5jjzoo`&Oc{mTTus zCni~W&|%Lu<ndAc%$2gwYCa{aBx6a33_d<G-afqhYCHQAXWK28jj=Dkb%f2hDBtEo zo3o(Xx$lH8+y>3xPK^A!@I~I1Zwuc;jQo3t+njfE+$|$)(XH4DIBwQEzzJi%KG`;# zF(>(bj`k=2)^8T3j!WT5n89|vNaNa(d>>_VuOG!dkncE=oUSXl23T2iG52xBNc%4K zyAS=~G`o<lXF=?73NPq1(xn1<kXg?hAJm^l57xU|d)CNg{grH^7rox8ZJ-`e$*zQz zwki*YT94=+zGft~!j#h+#y=DzT->{f8Zuo;-33{T-1vEJ4fR#2CD8d@_{J~5$&2Xm z*njtx>bmUl`D|g&H{|j0%-&1n1Lv8tv<?k5d|?0j&BT#5{n8P(>=J17GSMlyRJVYQ zcd>roLM+QW#zOC7@ki?W7+ZS#Xq*4;(KfeWv@N;;jBpOw(KL#Zf6K3*8Yvsw9JXns z;*ICr1g&$<MT`Zv1=pCXYaz=O-82S1#JG|DjD=uj-VI=ed()Vg9CNSEv9I4T(mqsl zE_}t?Jy`2C&*~ZIc`h@b0Y+@b4&{wy=^WC{_~0GJhTHQG%kC??#p$-9<qNjF_YYv9 z9gI*Xqk|ei=<k7THobi1dsnM(2(~ZPqgQQV^_y2upVgc%M*q-R^hJyjU9?jCY0rGV z3$M!aWNk?Qd_x`|o2eP_3Vcw9Bc*>}?+klBy&LAi+m-Zgj<$RynuXTpkP|Y8JgONJ z$)^%F-i>bYSL7O!Co=D<@yJ)>Z2@Ecwd9d77Th=vdL3u;C2v8AMcmi?YmvG1w>}wj zlb>_gKhGb-XL8$YeQLYM1~|Blq3^<@hR26+0$#vO5%?+6G4Qf^@UeNiKMm!xT!v<T zQMUc&;#~XOyHBz!G{0UKX}AwLEIs{%?$`Wpq*iEMoyP-b!=}BBcu~H|_BNe$yvDs9 zThnafv=pyM9=YljqhqTccNqVWU8;eTmAdC_X&doe;9?ba+&XfoRR6AFUyD764pK^- zeLcC;<=baK54l9ckFwnk+gkeP8|wH7yZ_MrgO88YdVWlP-X<Ogp2fGo_Iz~z88?l# zZ+!7Qdk3V04CM6`Xd2ypb3-z>-t%*0Tx=yF^cA)rTBQb(VfV)NySiPmxXL4^S5-6h z$*98`hL5oQ4K1=Ct4Gg&6MIiy%Kk-KFF{3)cx3qf4Zud4W)6^3#UlsJ{}Fcoq5C6z zq{Xd#r5ea;%8Q&MJDOy~D@WQduN!OM`qDUii*)O0*s8s6NB+i7en2_vfiEQuZG^rX ze#?2#*Rh#>mVEar_ao{nsFo19RmAtIM^za8hwZOxmHn8!T<XKuV%Mr}T9CZ=tU%t2 z_=n~rkfjGjzDi4fhv*AK^0=F@t!Z$d^N<f`f|1$aLl{|vK2`L#EcB~v`$WUJb~5Cg zGl9H%&r9TU#CJas45U8>KZN0nDI-SZbHx1CH72md?%{9s63(1|D8@v7m1?&TIc~H3 z=l*_JH+h~=`8<-Z`t{u*Nxy@Quba`CxQ6upBI)kpArrw0zLmN1y<9QO=3hF@7G9oX z4}5ZrT_l>KHXiX-$XgFm+k_374SYrF(;38%=2`W&`^X<xey;LwGw*+=bCnYl87DtI zfeov+yNK%{FCrsG_&^67bp8C8#+BqMurcpibPw<$jP%>K#7l0(XM?|HhHTYu%l3BY zk9fKY{5_%TZy0vY$IhyFsKD2Yto|QQoo`Qe{Q(;jbxM3bZhHGwn0%$aMe@<tbU({E zpCcE3L6Rm$^Bl)~{8iZehVGB>kruzIU6dTN_!L&eOT4W)`8nr0x-eg#Z;#2pNsXg9 z)I7Sc_-Z?CAoLaXe(=$$&em|nz4(|9`PQGqgBe#HMC`Gl^dI71SNx*tn$&iv9x3^Z z9gC9qP~6Kg?;i}C-_ZRLKGNbtd?Up>l6Vj{q_fXv%)-xDgg?^P$m8$F-jQpM;HOv2 z%L7#UJT-LwA;NUWuoK;|C|JuC2Q6MwPwYumQ!z48cz?k#{HR8?_>l5MN~onAYhBDW z@WD9lJ;FzFt%&sVgMP(V;6*Z&{5O)Tz=eF0^JLG`!1g-_|9cU>#i9ay`31qYdDl^K zig=kBSC6#srK9};?IRY-+!W>L%AY<W8H+l?R_y;z@Dc7m9|%5FtD&L2hrFrZgS(|k ze5lu9#6Keb@d`XZ|3~%j>$2#Ah5c8<+ke+Cqkf#rEwDoIf^hrRdfCH#Zf8*2!Tt9s z!WFc9mBn``)?IN`>d}kNuD;aBOF`dr`X^EgE({-~_|@x`drKeGs?M!ecc9qc7jZPY z&oB%}_N${yFGrjHhWh!g%(CC!I^6yjKKDFt(~^JWAU?36LXGLhL41gRz~`wsDf^P@ z^~85S;d>+~S6%s1srseW@?$Utf)DjM6h3^f1$-)X`zttqP7tdU;Uk#y$p0}|I0GN^ zWY>E4NV^UgAlGB}MW;~yi0C{mHiR4OU>vU)EgxfN?)-p{%qAbY`oP2e7zprjXw6d9 zFb(gorrwZ>d90!riE0Gb6U#f1Yx!8$2p<tX2A4l7UhI-QTYOQTedn|1+nIh{#vkr; z69<A1FP8=Rp~4f+3Dv;QvI+P^Dtjl>`;Oi>$W)S})Gy&+&jjU8h38KRSGCx8g%6#h zp;2+Y(s!hzNA}%F|2V4p#{b|$x%TMwdG_!H*|rqB@B2k}4CPpf{=(%e9}DSuN03jf zemw!ca*(mE!;el4U;1MQ{XB#Z)zFgOp`P#I{`ELKCIL3oFH!aB>KfNkPvxu#AFM)5 zf?tcG_>w`x$jx~+|7P+Q$#+=<ANda|fGeLw`0#Ogq4E_6YaI`;MBZEAcT|2)mi?T* zj%CE93nS9ag$?QJ`jl)X+ijSB9`un`jEUmk(e)cQ(i>qyfDht_MI&iOq@NG2{P925 z+oNnhdZW*e9eAPQf045uMGu+t!4c&2D&Ks7@;%h)^LeGWh-L$QWSVpm*}t-&>8GIW z4a6(b7pf0GpK_d%{9{1)=!V~5yHbrT`9b0bRM-9T0(|E|pR5QUTAQNy#KBnylmjL| ziF%e2r?mim{WG4<#X~kuCGR{v{RtmFewQ92qSpXdWyE)_&-XfXwR;@eUf+)o>DuBU zlCP9&9?CyD=ryfcrr5hG!AH$LdQee0gWg%eeWt&^O!)`b@VwFHQq!@vZ6~&Ba)#-V zU5jnn=XQno|3jY7`Fc^4u!S(+-Zm1;ndmGx#&rkseEdGo_f4W7ppGX#Vk&E{@>i9A zn##=^H2LF=Uf&1%`pUafUY4&X1|PXB2Y(2$BErSdi0_5|A0V$uwkPFIZ71$T@#=~< z<nO-l`1dmT&*^m^$Y(9&y3?oIeF{{FUxnJ&d>$|dhT{B)$AAyjEt{o!b>u{<7MF5@ zg%9;i4x#_B{lWTz&9EOIg<0Z_8r)KNe4KKP#lu<4))iS^3LnbfR=(+A)6emK{uDlx z2d`QY%0mPh{=KN~*6XLMfF{Akcjzg&2TUA*o>b?WT5{l|hS(GJ7!0E?@r`(QU>8<D z(TWyoKVTR8OXFh(J?Y}j0X}rkbztOZ;R7HDAHiJfGpMID5c!c=4r&1{o9z31qeuQb zG^@G?^CSL|`TF2<cFEfXA4L<9wI*cSeBPFSSH5=15!cm#{=@KtZ1r!{Slmp0EIqeX zj|5sz;Um@?Zr@d(UCCF{>1(OkSlJ9-IzH>``g@%x{e8eVLVfJ%pTTlu>$cw_Umc2% zTJB4}%P@S0?GOC1uUdyvvR1hL!C6dt9<o^=UxANr2p_5sLamRC`{Nw%{2}c5kgXPq zhe*D<F3*1V)@(Y>rtonz_(!R`-}m<@bvwKrOY!Qj1s|$;m7vy~bQa;GiZwjZHpL#o zzv8Ba(*DuFho`?;ikIQqaqJ87{U1Gi_}a(Pv2`6k#7~mCNP6sYH2R0XzeDgruJr=y z6wFX9oUr#F?N85Re0>P;A^xFSU+|B4{rFJrt4#Svnfn?%V~gh>sz2&=iKKir5dI<m zx$u$VA5|?oson57bb>tf><zm=E22MNM$dpAp!IpKyUgM(+b!nxDAmJL+<oT!V_Vay zU_%&5;sc)H`GIUw1K@o&E!Xom@G+IzZPXz67v(d6k4Gc^k)HMkvNojTs~gG3C8l@& z+q3anB<srgca{-tai)si?^|uBbPm<yrAB+#zJC3@sxRDrRVAIiXOHg*D*dCfbt~~$ zpO*en=xe@0O=<4&sNrKdHCk#~<*#Y-HBg0*GO!yaKZWhDS@R~_>&AjRZx82b(6@sa zpMl{csh@+72k)~HcT;y$`~&@?DALc5Dn1nRzTny%yC16dapOltKaaKl!d7;u_mt|h zQ#Z7$5j}+1MC7H4zVQB2m8yl(&03AFBH5;@eIxj|7dp!Fb)wQ>L%10bjio(Dl7Cba zZ<%Nlo&$U&`lL?<aWi2UNVmVN>@BtlKWaZdyguP$7r6KQblCU%Qu=vPzWUKAb}>2E zk0LKG1|PErvPMS792rL``HK0Rjeb7sy?OQzP!+aQ{-tJo==(`aTbx66tbK22@@*xv ztDdOVUE!mW`c~;^NBb(sIj`#8?&G-B*R8T+1NE{#8mzM^o6nJXNT&FJZdH4j8vZWF z>UKWOJ-2gD&5SMH))038Vf$COFL3U$-oJ{h#FOA}c}lsF;wP`t+Yj@c{rJ%Dz!txw z&gReIAM@@)9>Bl#2>#!B#7Qh57kjp_L#|M&#-Q>|PiPyk>SAeLn1@u}t*_xI9?Z6R z?;m52y@y(m)R0_s0W#}5PPYp@Hl|4b9f<tPI_~q7d>+Ud{+;^VjNiPO@54grKa75= zy!{G()1^4xeOBMtYqQ;@PPF8G1c#v+cBF8fDG!vM!|C!R&rz!`7Vr;`ZTi3?D~8>F z*#5e<AKJr?dP`Ib6u;1Q+>$ZKj>1Q%ex>6lQpb$n<|KP^<lJ_W{e$Wp-T@YF%(Er8 zk_#tX@Xi<Ny&r-PPYWk9Mv~Z=m&8L6f6q$cg*lsfH@02XNu{pb%!^0cH`Px;ws^bm zLDiBAw-2q=F_OnTK9u_r)Hik0M#3Y`$(Vksg^ylrUc><>pv%Pm{piYXbFym=z<<Uw zcwU&o%WM5i)}09Gw!V2L{6xBxeCJ1pf2`gy+b(f=Bz>_SJ$V}QNB^$#sV!VEP<#yQ z$A_<%XW8o~*lFP7C(0AmJJ!3`JJ-7xZoo&8uLT8OPGoE(pM8$zhvE515*xzHd=1qS zzL7fE)XJLwq0{Ut(H!`oAJ)1&Xd#rp+b)-^BZ8k<eIlnKkEq@|II-&eAIumZ-hS1G zevba3zB{qTXX*8J9{1_}Xkql8F+Py1Hd3R-NoTI^{3iDynNNOmbPwtThw+E7{dHTE zFPvOomcV~`F6#69Tu$HLj=enW5l70_r9Ncx2N6%LxSo4P+FkU_SO^Xtp+14HDSZRA zppv{pdD!9|`aGfKE1uuyr3)8YOHzMpZiEl5CDewWec5QFV0d~g$XoR}QUjqajv43k zbG4T1T@_KQ!N(L)(<HX<`x(=ZbYR7|i_VloSk|}{Td`{0h07isG>~UFBJJt-$S?S@ zsS%0%vy^K2)vBX{?_7B)iY*SK|FHduX4Uf=9^^MqfKOqwR<9SuzinV{`4TqtjQzTz z)+VhT^h7()vhKfpxGh)BS=A{Ze?fQ=4QS0=n52gj9Us2-Xo_zzH*+;Fj2Ys$tTFT7 zmTN!zr*Y^V^sM#oMe{vCxrF0JYrW!k!iVqQs5OwvC}S+vQkgORs8?>SuY<CizEt~d zS=$4?Uq+CJm5j+aG7V)K1273bc$4r!za;d8oABLl@v)YPzC&}pUcVot);&gS*9AO* zkMq>rF_*|z%;5odQaCo~>6xrU446H?FVrXJyU2#Wh58&nR=rvJ6D&#A@(><W!(+B` z8$=H$`bc8K^NS=l9%WAS_ri0sZ93oI<KK(#KY<)^aK1y=Xz-CETm}AKz6Y|^F#0~- zZE^ZSg{_}q`^)etCi=QK4}Ns&Qx{>|g@E|K`hSG#7MbejTq);V;iDLQOm$~lUFVCw zuCRDojJ~8{_aC;umYQB=?q7XQi)2#i*^auFz8|J!S3T>IhoAQ;@E!4+R6nGY%*FhC z<9xf}s*(2Jm%vA`!Tb)42oFWl*^_l?PiTBdhGIU%Pko;f@eR!n_2L%2W4QhH0`vA< z-(%FzG1y4;CY?r2>vZqN?}KL~WwIPww{CQ@ZUVNR7HY)NPcV!=!uG4LVNGwJ$46c7 zPHb1VbDudo95qr0q8|+aLusEeg%9!~=^Qc2R&_tmy(os5+C{{<huweJ{<!P0(k-=C z`vQG16%!y^@3d3hY4pNWU)wa;&_1uH_&7B29nO%#M#0Ziw@>mw{-o3G!<T~%<me^H zS9<?*g$-=K{=5D@fyX@Y4B<mVG8LZ-g^j-+Wna*I`k}eLOT7$ZtY3v$s$<QjQ2B-5 zW?S(TZwt1~>rO^L^tDFq@jd0>Lw(X;zwDvkmbn-3pF8YD`|*2hw_my|Icpw5JH1o~ zU_Uqt_^{G-C!3o-o@X0vs~Z0uJ>*4n^ma>hhvz?rJ%2H|G4;Llv~Di5`+P0@Y`gmp zCnfD>q5Vz5@WcC2yh`?EHoV|`%lfh8Y2SASnX3SLkpGLZNIhooC#rUHk;YZXUsvKc zIo5Hb+KH?Qsxdmt$20T1H;yBo>SSyaCljYO-WJ|T4B|b+uHQoz!>yJz_ibh~#%J5d zgY|{kgWz88pmt(^KSFp1>))=?_Sk_th~*P)XRg0i^ff|{&m&{?*vjT|?lkWrXJgQr z>31ED4`J!`KAZm?*9c;Ti)0hX!^Vu<NWF<t^z%el=JJ(nVS2Xs{&nF0L!xEr=;k)V zI|6+ollw0W@Zs0M_gU^$7v<VdFGcTQo&3#>#1s*ms@|FXeKAw?q*y%i9gcl`B<&LP zEL^NQiCP2b0XLr>R14IrO`r6Fn?ujUM{dftW%Qexc}I@Dm$@Hi>%#k6hP@l*3HsP- zu;I^HM4y~BdwxfrhVuO~m#@S(%J4O)4|JmY$IwsCRA<z%<9Y+Z2Ysf(o+0giBM;Kc zEZ<eV#vJ>Q8*i(-UZnQK7OU-1&R1sqvBq`72l4SVHreXVBHQTu4GsV7E_juCt8mdw zhWJncwf>5f(}T`G{WLrCvV8l}g~TwTlgxl_9up>#dTGkO>3jbnXB_kK@!0It%X1Ds zj7J$WSqtV-Z+9X6Ko>C<Dn?DcKCxdcy=J6+=b!0S%{*mII#)e#Gn;$;Uiquk7tP-W z&2qLZ<c>S-@qNTrk#An@V_h?&AL-@td(^kRjNe1+YZjNyxAn4x4#2<c>qTVnoWh{+ z(KsovDPxPZRc*70<<)(K*Y66$kMby$XI$IWZguo2t#cpK^AUkF+P102a0d6T_ai@+ zY$$v$yd+q|;PWG{8e<<kC*OXqI<v@8bKw=LLE!6~%eKXs_0Hpfe?b3_vIZ<<ELFU} zY*l(Ta>{4(d~?Z{S*)J7myNRj^&fW|_QIrn9R4vW8Hbq3JFti7V=Eny_5A(jM%v2W znb<&OZ&K~u%<0G1^l5Lm+5>UB-krj`*7b7aV}^3TN*Mj<dBcuLvmXrdY}p0gM>r{4 z_Fh}k_7SVyrJ50&Gv*&UM_C6k{5}>KUU^`y{p+Ta9JRiW3O-~H``BG$Q@PTC&Bfc0 zhh06~-h5G(eeJ?r`vtW*ijcb&p?9d~tnaT(jQlZg|D}i867-F^!ioCgz)u#j78I#( zC$$#lUYup$ecwpC77{_GMfav>r#|^xcJ^@nJB+r1^<468wmsbO0dlmrS~0SR;-&_Y zKO45bR)Mp+M)GU7|A=eIrVF1!Hk1w8`wG&~Hut{HEYwDt7C(9}cSq1eTl`VE8kf>b zu5QQUJZI+mc`3Dz%36sFphuSCRw@o`qIU7UJe6XqGLsE?eoNYZ^^E*S9GQ=gpkHLt zHpv`bVRqx2bL`(HQ0IyK^&;7T6|bt?hhtqor|<OK>&Mt~@euW?emfXBKgSkbm}TF2 zPmbOByj`TevmOV`cW%(5lbjmmPIzBydiqLBJ03g29X82xSleT*%dNbvnRWFgXot16 z^Jvs>NAKA8v@P24e(n_88ohG^I+dK|<%YEPn0)rM$N1+s{5(j%HTS@nz`yNl)g#w| z&#LyRJZI+mxoQ_Fc3-`%<DE^!1go!575<|S4CwjN+<WlMsdey>wNUb+YEBf~d$2wY zaVhtlYS&C0WnaY(JO^2P$<4&B9_#pc7yRR$V||~GIqKhj;VApT6{GF;X=kAih(Bn3 zr<SfwD)e$Fv<!`T8UE<-57~6;-CN08zn?rP>Wi=j#*r1uyLKEE{Z(~O=s9S&sFyQx zmGT_?GYRvOrw>opsqN?5O&RG~tm|A8e_et1N1n1-zC!6gv)yexXXg6(<K52lj_P*e zlUsUxuj?2-k+P;=a-EF%2lstYRzUBZV0Y#FdO(HXUPEi9x6^nWWfKcAV!`Eh@n4^5 zpSw}E&|};;@*euJ7tovi-DB*#pF7tIh!B!2kZtRPhmFJStf?b44^Yvt0_+&@)uc?t z=TYDvlVwYK&|PYYrj1sGpJFvJwlzJ}0Yzuf_b~Y>Y<~^;NOAWN_s{d(5%f(&=F#); zoCnuG{zzfs(9d8kIQ+gtk2~UbU5j&LFB6@is}lz}8JgPfuC}?)d=tA`H@tEKG=9L- zc3JxdTh{WV&v~pP=Oyg<Vya(699O)XbpW5d>Q|JuZ^dT!6go*Md`mLrY017Sxzyj& zx+mL&-f8IbAMv@4Fki1e{vS`WyRdB)(oac!`-)We;ay<n4s2fJDlSr<qH1k1=Fnqi zE;>sQ`!(j?j^2x0GhaQL81s@t-!)`6nv?vleW~q-?wc!nll&H`!Eydrde_WnERc?k zuB}*0-vfvHR6OCF%SPHG#1#G-dcTu#PU^W1(!ZBClUR8D^x@Y}dT$!}Ae=>Ox3|sv zHC5~AbMB+|So2);%9pXd&`T42p&0%V!<Sdr9d6$(hu^HE=2|7Wd1YPeu?e+v4&_k2 z;B%=;cdWK`?pCfV`UE57lLvjO6!Vqw^QGqPh@W1jliq`$$KykK4?LyKoog$${WbmG zc6s}=?9jE`dv(tat0V@chUbqB!2jd>V^M3hmbt5!t&Z5ovi6-;(zU^3WaYkl>@MZv zc%5;$_<_0IXP<9V2WX$*ZU=~k!ym_*fDJG~KfXz4*zMSWzJBR&n@;c7IgHt&aWJp| z+5R!*yUV|T{dAUmDmRRvUm5hB97TN2XP<I^=QH|omE5o6bc{bPwVzKN>j&qYkI!a- zu5&&3&_LIoBYpb{d^+6MBeK!nezHw^@KP$Ii(ZJ+Jlt~~ij%|bALuq)*LX5BjGSh} zV$g^57A9P}<7jG#j73)dwH5Dw%Hr)!te?uQYs7a(&bVq_b%gg<Q(Y|O-Bx$-8`e?f zn3uAqmJnA`%kL|j9sq;qa}PdN!dw}>6jW~z8JZlEfj-CUJ<jld_-A#i#>z&A{2{+f zmXGD%>fS_-a2Yv`($iac=KZV@tzNDaUnqCs^}Wh_b&Tv+L7x*J*G1lyFd{y(s^bZp z>8|tdKrsXE*|YebZH=WTQ)hATpI`5~_@R91`KpZx&1Fj$yY*zd<ZYwv@2?(f-?|b? zcsseg7vX~=cTs)C=V}dS%q4fRNcoG5x!6@xeg9-*os-OKoUhzQeV5&p;o~OxyGVb- z7v!V#{uua|c!clK%;!wS%3N$&U%cG1Kl*OVz3vgSQ$(kJpihwpgRL{*6W}42!5Z8D zch>8ri9>%^;MY%lWM0;nUrzMN`uSORrX^bLv+_ptO0cog^|Ai-czh_Puo@dytWC9W z()&NEsL5N_ON>?bPHX6MU`4zGUcx$D+w}r#@U^n3I?0A^68uBGMhXL;gnPvIsyg(3 zkNWq(j>LLETmXI^=0SB+17E0Ur_6oUYTKWM*Ex?5@sOI{R_{MlO^UGRFQta0uR$SR z(?fmXt~M`ciI4bNVZ<yy(NksLrnfwD)_PrEe2ii4io>prM{^Firt;*pMrdeVd=*^< z{AJM<cKXN8u*+{6Yj<8T+&*<-o_+0tJp2A7**1eQi@G0kH1wVEXmY#uJ$lH#nfiV) zc`JO*yEL16Q}kZO{`_-n&)>W{-~Q=cBkhBKH`cD3cD9{IMN9JWl6`9Y8m@Ib7`jhC z(7C{cuTPnpm*n5CdJcZcq`u|>Z5Hq@P9>&eTFM6~JL?Mf&(u<X7M-INTy(-a<Ognt zUKD$=i5i?+h`$JrFG+xh66BG1-!?B>Rl<MdQ;xHKR=J;Z%RDoAKe9`@9jb$ae5E{X zY)J0m%;S^7<|L3Qnd`OChVt<}HuBfgH;eB%_6u?(>)VQ0E4XK#CBgldlkZs4rx?a? z`LY6DD|`q?Rp=n`o;K$5fS0wzOO)qc)!b&!bU$h1-FV(0?^QPIfRDZA_sI`m%WqJ; z&{-Lik~$ys7xU{=utrJt;*<ej(I<V#{5W(Q{>XoMi=A=jX*S`OlkL(QPO>YmA8%J` zyz>-$yFRZSZ&%-Rl3lL-+BOOLFB1I+YeDM#IaA1u(TK5KeksZQ=+}8Z251`i)%SIb z0L$FlE1&n(Sb6pS*ZMctLGLJFty1iw*FW$?l7;(tCpu|os(-U;A|3!2yI410^0JZg zVO4`OZ2fEyzYrF{ljj@ISAD0mhSsOB5xs-QlRnDx5_cceiGp`rf{X)Z{zCc(YZTX$ zTq%si{Cg=3c!>B|uEjb&WAWBM@T|nVL+6Tzsemu6@_8%i`KRhbRwIYW=BK$*ZxqQ| z(nqATt9DDQm%4b=XifAipt?SBE?hOx&%x*`-~Unn+w*R33y?iPa;=89_wY$BaDkV( zgLlDNhFs(AKC+v6T`9;<k$mPyAzd`6TNb=ez0<=C&e^}++nI{^jpkmTMe^UF4{qjD z!}B!pH4T1u$RM@`JOuf$T2GU{pVS=jx0kn4f4`p3Aijz`gkT*V!TbBPH8lSbTW%+7 zTq`p6R`9UXmLm_9AXiDRR=h+dazUau-2N$i$WKxUO_dNATTaY)L+d_!qU}NCDy`RJ ziPdFZf_d=oL2Fd-j#yXtHkiZnYvEC}z}$O#@1gzYxVKsE&D{GBsG%*nQZ_qkpi*a8 zHGsm_hoFXUfD!SLfR|9;1)bg7wa~$;m{Zm1j<<9ohc2|+;aSd|bu{81!sc!Y_Fz4G zFb=5~n8ZPn9<#hZ1)s{nV}sumBxT`OZ!hHi$<D0({k{B>*U>)Z)XNr~`W@HN`WOt+ z{h>HG_}%Xhd#2QW@R5=2KR6FID#!cyp*!9F%YBI0JmO5-ot0ACP4yD={>!?sxnRc; z4@lKySIkt+0OSi{$oHo8G6wucy$sstq0guf3Am6=H>m5uT#09B{gz!q@9cl@&(yOY ztxwfSu<27}V-2t|3Csp-sCb9;0LeW0(8mX?tnZKL5bajqhflM;!%ADi>w5`L!ie~Y zWG(3<Wxd<^z6~r&$A1pnMH6=9HgE!erjAR4TL^!-fNKxBeV6A>(k1IReiHFd)pa!| zKA?5Jf4%m{iMBPElawxy;x|eDgB>oY8LYowJr3nhhweMV_9FNYya;|%P-uR5{^jKg z=p!j#B>9H7btb<<M_m1=4CHM29%|etthVvbR@|c69XpV%8*OpNX6TrnH$9D3i`~AC zI)|$NR*TNvz<c+-GQdyTHMA$_mlKCZR&v92{C=jJ!ZmYgR6EXGBe@sSIV9UD-dLEK zLJraTv3}c8c}C9kI?uCK@*nSpGvp&reG@t8S!nwJ-<?(XIA6t$Y=+KNOHOr!Sp#_H zD(0*X`cFGIlCy|gR1NzT{v12N40R}*shQXF7<Rocah37#*Ac!aqi`G8m{`*u9+R{^ zX8T&jt>ivBdZzlAU{5>3HuL%VpZ$3Ft+{h0uj%~>kFwos{nMxCg#>L#zSsAh`wUyf zSEvb~W7S7cIeKfT%UlKpRJA?N`-Fbc#Vf@#I*A`?dolF=%62Il@G;!bhI&WI4ys;K z8f7gzz|l7F<gB)})Bey>jLv(y_xZc;4ZeMf%VzSPv3KYAdL!cbi{0hmV+s9El+U%5 zXHC_Llzt^!1<$RyATPIqSOZ}s3?Fs9vH`+Z6tBZv)~ZKP8+{)3J#UrWi|qw6<Dq@o zFL;!Eqt5{kB%OKKm*jsp#T&5ZE1m>iz|*p>Th6^x<#$wES!U0Ftp6TG4j#z-6u<{% zhtSY>*|dWg@hqD<eYkB|1T6|D@T`s4r`8iQa-TcdmUsNviuc8=vT+}F%TC@Awt3!H zMK|#f1F%n}-oJXvh%Tgm3L`0;6!&ZabMP2!n`>AX=es+6T>`H=X`LI|?@#NoO*>0# zjO2-Y)`3qUf6`BmH9sN$c`r6wcx)v+qq0}euR2BO+{{IF?*Q;Yy~%2F1xmo1Vm=$F zVXw6$-io}%e3kk&>H+(`dy8y(dWSZ1%2@cCFyiZ?ByCLM7rbBHXUf)~KEOkJeh2^l z(Ax_Ok)i*8duJYF*Hs?y<0Vc)D1lHaXaNV2RuDnAR4gsRE^3h|Dn(G!vX!L}$|ed0 zRi#BUe-xyO7FtTFD5!xHMC%l?;>_Zi_h$2wI4f%ss0SR9KoZ;WjA!w9=Jxmd&b@Em z*z;z<lhHWN8|ln@?|%2(<(%(3-~JtS{;*{u&ub_B1bl_qBzZ>Ig<g2>x3b+(USh3| z7o|Pp-KjQqcN!Q+W;!+%xUFxai{||9ot<5F{gZD<8|v%kYiIh9k6@pHPTW_pHaC`r z#?bSpK7{NVx_8?1Ddi{T^z@#;bicI^WA6?5?K;-?C#0*34-sE;1X`IOr(=R+e8*NB zW5D}LBl3}uwOkvPXuD>{qxP5dp?z?WYe^&W5oN8e8DzI<<58}GhF($Dc0n(JHqwR6 zc@4kKcG~64dF)9ion%?O6Mk}<kMF8KAN+JHHiE2Mf%lQ`cga}<G-<!L$(P|VA5;%M z@1lfH{J)B~kgIod$jSOPcC!xZ8+Sh4i?+*}34HhXQgbWJ^YU8B9&2U2C}~Dm{yiCM z;jYQcU(daH7T3T{+V`N0RoA~)I{x6O>&|*c<Q(h<t~VRfE0FUCk8daTgmr~?*3Lc1 zJG)o4G$J2S*2--e`rtfcReD&H%vqF1<Ri*j+O$T1yI<XP!shleCx1sh_^0t4_=k)$ zKpxPpiL8(c>BU+BTa~wg5ev;=KdKw!6!wG|hrqp!)(}N%jr}t@jIMjp*=%@u7JA%o zFy5C_$Nylup4h1x`0u{T#ugbrq5rD)n06NHm}|3N-v5{7D0PnNT=i6PZ|A7Vj@&Rf zRixe1d(&kRL&18cz3gk~A8FmCj{y1ryVl40lWR)5-pK#VS^v#6^56)$9oW3lv8*d6 zjbuK8ti^a_-^bGQVa+f1Uex}bX+(RvyyQUW>;v3UyH?lSUD&;T#F(P<&s{}q=qo{d z>D4%`Y;&Uy)1}B=C4DFlp7#8CwP}|zD#rlYsA*?W1`R#UPjoioQJ-%d)$Dtbr|zWY zvhuQVM3apB1nbfrF&Cj<A}@>YHE2WoV;4BS9RKiIS>MPt2c-?|U-s|8XFl@4_rRMs zs24HQ^0G~-7<xiFvJaO%x+Diq>t@~2ZM&3FpTwiO|9ivUk@^q68k`<U*N%a^(1?6Q z+g|L`;2IG+_tNyCFTQIUT7pI*=5ZE#N8~PR112U1|Iq-lZ!dd?cF%pEYd%CC$z19= zx;t&3`B1u<eG?C0pWn*(I`5aIbM!x_&Kf!C@Q~Aed{_DTn72|!VeZhrT7(>O9f zt#}r@;k+OXPhd|!h)j!(UEi%bxH&r`u6IZd)_byMhwER+3Cc#SS<cV0^Z`Cp^x?U| zH=vDv<b}F%;Saqpy)pJr+E{c%Ecau1#trHI;a_BE9s%ncvZ?lr?V-m#PC1L`hF<P^ z67O+#x&@4sM&u(QYoU+SXxe*?HCS%^+yg=y(MPn4wPs}I2zHS%^cw8y*nS6yaZ`pJ ziZyMFm^b_<6YQtW8Z^k04Bl~ly79gbr@OI3X1i7?Ps8IHxWSXIG9I3;*U$SKhg`KG z{QbL_$N$6M`}M=WpFV%<JJ6lSW3HqLV`tT+dyf-8HHoip>|T7?6U+g7ezOOZG+|D; z@|N<_aQt3+{R=y8dA|CFGL`a~v@ueAhndkHd0y}~>zgW@Ak!DvBs&Um?U6or(9aEb ze<qp#WUf~aYlQZQ5p<J*BYC}WUth;0{DQIPbzXOrXP2X&<>15Q<ka0irMQB)pg$+~ z;F-azwPST-zt`7i42Lz$Y!T04t?S&B&9i@-wKd0DN&oUl^NfBjb(VZ7^qG)LT!Ys? zT=Nz=Oxiz`CFbQGd0}q(S$rnw-B=ayM_W6!y<^SFu~?ZeWM8Mg_j4jfi+C;iS$9Jp z-};D0zY_cD^~I-&CBK9C@s-zali!5QRo2^-ZW{VGv}S%&j#Jl1tX|LF&XW(OU5Bnn z?_9pV?87zAYjvI28)_@L0N*ISl=|_ln(xw098Kuf(1`0%)Pk2E!?WeD%CA*C)^hm2 z9Q<ohzm!@;6BFr%2glQmKiQXdKlJz9|B@Z>x4?~Tr_ug~%&nZ!jOW1*Jui4Uv?U&$ zO->wDuY>O^TS8a(kdfKc#k2`<4X?9e`8e=hoafr<QfPS_wCraUWAYE!59Qr~>)bJ( zGoINPPJMYHS3<`@BZYB$`g6lK8oJ^pbl)eZzUgh$Irt2De!Jkoqws9wrE=Wpa$~#{ zyrK<l`P!+%UKMg#w(FX6yJPYm=HpK0{1CBxMY>^bAiWzNR#=DKcRGWc{Lp<Lw4>j0 zczfsx*CAg@BkCdg5C@Tu@;Jhyp%>Q|X~eeF*#9)Jo!Elwv=Q1cx$XT&4yB$4??^r4 zU94AM0I!}7?p%<0O5jr9b}iQ9z@MzAN*Bh*?t|ZlO&guHpcYu{>(hZ<%vRHu=lWdY z;>zg*%)ky8F)m#99%PMgVXN+j?-{3PuAH=}t<z>qcXcjf9{!B&7laQta_Ty>?IhE` zbi*D4*!y<QelGPL-kbU#WFE$5$Wb#-`~dVpY!B=IN}BeV@N?nYkUsL<I=|Tl*%x&b z?+m+S|7?+Noc#tq`secO(l_hU8B80#n{z?kso?#uEq*@jIk<z`dG`k&QPy(b>cP+* zq!aW9#xwjy+V?`%TH*X)XR+3j@jEqmRMs0GV10z2V`m;1$Im~v1AKTNaXlCFyviD` zuks9S{_d~)L}>hRY2UubTyiNgFtxgYDbv{GJGO{x4qr4ls$7<@!+(IM+;|qR#RZU` z%vHm$4NV%e>U}ePDhm=Dc7S+Z?d_Fp1&`w#_o}P>w`Zk~@h)G*Pc*>VFmm*JY4;pH zsu_F?<lTtJj+4Hrtb;F~zqB0f1Wia6wB~ao{#1KEGF9-6IdapFeK&m`8?v;K$M_MT zf3IL}#f#f=Jpl0|9Rb(2kxPElga1ojq;ZMFn8+`J@8T;d`H8=kwbVzN$MjUb*YQ{N zWAoif3~BbeXlqB`FLlWs@F@3~*S03Uj2yfQo5_39*W9BEyRb4!)-@_xP`?k^DP$IP zMq9+7p`#er%Q`Mj2aRy@Dbt317szR$N3V<fxkrk2IPMd6xJ-}8bJ+&Ur_cKs`fp61 zgwI_!_42fL>@(@ssc)uz>H>_xmBdABE7w+HF0?+^U30{j&P<0t+!*y!8S~~o>Km0z zm5&?8t8A59-FqWb-2|pwrVS<P*y;z|l-_Dhk`<2M>E3^T*yNByil^jt5FIk1kK8`{ z<<Qx?*u%Gr9FJb&mX)=P`&8D_|ECSSd8|+6`|fFEt7F#8fJRuW+=E`fOKon7`xMA| zKMdPI#N-=aI(=^%B)4a{xEXudaO(TXi__kt+K7KSjS<6rUvVDqp=^TA+^8iZuLwD; zp?h#?dC5xWw{eY%PBI;MKifL<DUGGUH;b>L29H6$(+2hKy-!T9qKZ#{@kQ{yHw{nS z5&PHoOc0ZEY!rI9hvWG9$Yaa?JZw+QLDX29f~Oz*F}C=X@6WMNp2F^1(MHJ5)F?Q5 zDD;gF7taa4fj<&?keJ+vzh!;mEzjiU`em*@vO2H1p|}uwQ6G6Jy6sK(ej2&+X5@kI z1&zozl(p1H!iUP*b}BT{+-E1g6YYk|ZOYZ={%Sv~eD{O+AC13`JOkEG{lM}y>j?B9 z`}I#uWB)Q2vXAHbB)sM=#9=;PIpwqZlnZLt6y*LMVvU~1-F2VE))n!6E1X;HXfbY? zKC&*PKK>JQ>Z#aQNZZy~>2o|ofAL57MLXdES7DF1GYuX-!q^_dkNV@Z17Dl#!Vo^a zJ;V`?96QQdfSm|_G3>Y?Q|+9k?!oMVw1YaY*UyczcC4H}oQsM^nrXxO41=sq+Nu63 zbn!~2+Jw#S@#&4Xna6&{(+9Cw#MYMbys}otA@M45N13zUpb`0qvX=UYe5BGrnsNP9 zzIUT%YAbV3ZS@`Xk3R4vH|_PxSEW4}x>VM(Rrj*Kd~Wo|rqer)%s0ND_4r%q#+jSa z(4Fr~wf%pJKk~QI7Yphml)8DW=jsR?SwS-<rlw7*qz~lN$C6k2taR;<UYdGGUkBaR zQf+1r_y0D2)F0q;rhW`#sBgQE_zz=2)hBZNhOs#CYU68T?YR$LLjB4aVn12e9s6Nq z+?>~Bd_xuUaq2pt^Jr5ZsOZCZ?4S*7G{nm@K-tEuO+=Y0$2)8y5A$Kj-&)!DuVn5X zmcH_rz4yN@?RwxwVr|G3i+MfBy7vRtu{*(OaoD`}Ip(gw4s|@{QQ6CU%?gj#>eE~Y z{b7B|^!WJIcLs~wdDbDGwJTly;M>#H<8R_Uw(+bl=iHajUJMWVZS1lyOkd&itHtlc zv3l{l^m`fa_bTZ5^=Ze^xAOhpx#m~+?B}}Q<TLYB@B7pEr&oJioQIvUzG(l@FQ`sW zejIU&!_1lYuN^lY)%*n4rJ3**_Z<BQu_?>$pZBof*b3ICwZ4DYDwPj<kqt)1>)02k z!86xA*DrWHwf5xC`i*+wF*}ZbKX|{g*bw%>)t>+5u5X<)?PBV;v0th6zmV^HvBh*B zyB%NG40Jz@JP*#WU&_ESXlM$%H8y{J(AGWkd+0qg$Uiga$?O|p?KJh{uTShHhW|Fs z-H#rNU17$Y0Qm#*5cgj0`Bob)ZTKwmTysA9X7Q^V7vVVISMMXvA)kYsI?nnwabvoX zDvo=IU+HY;7c5VIVP{dUJ}dnPvi#m-UnGA7d564_A=aPXDSV04&Fh&a&IG)&=8yR@ z#`|<r17o%4f4S=`)0-b0^Z|{?LyX1TIZ4bhy0A7Oo4KUMXVm#r532IxlqItdh&Zy~ zlb#QK&_-B`jfd@nu5x`z_UytQoA;o<Kjg;co^Q3`L-2`yct{uiy&bFzJE7qrbiM)d z2fHTCDby!86ZF$J{nd2y#2=^PP}W70dmuf+<)2l5eq{8^f=0s6OC0rMz@xuOH?ZGH z|0H;XZrz3LX~%Kpio@ui_*ZGt$2BvztvP+@M=^F)`p9u$>QK~Lnai!0I*`(K?<78H z=w&DM{cW|0d!p5+?v6cy`zOg6o}li_B=$7Vbv>}|6SSv)K;MvbGXy<(%s7LG!~VTu z`hnktUI8Dmc8#>F{Uhul{2n<*O}MGM$ou;!H5{J7%&of|d-;9Z%pZpOzGpgpSyw9d zQma8D*IIuMeZ-iF|3(hK>7gC0p~phrGImQj#{GhJVuR|RW_@+7W)9Y}xkpgW_cAY1 z+L*^@ZDM`UP0o)HujLW)5#wN|qFE!%CXW$jJd*xQ`&eJk7>_P+&pZq51i9Xe@2o7d z^7w+4j@@<B-$;E^@H}K)*RDa{SD#X^en)-}P3}h5ddEvs)HPhs5p3RRW6m>p*UI+Z z;6i6Sec(^vEwKSdyo7d)h{eOt@C(MLG2Z`5+Y|Lr2Fcg7cI!2BtjEN8?mm8eO=&~i z(jJ}tHQBb`3*S&zS5H-zs_b9t)B2?Q;DOp3jEfCA=exgaK^OX%)w|Tat>v8U6y~~Q z8=QF&%9O4X*rl*JtrWjk+P{VmSN@^Be2`d$Ix=xLc9!nheZ1>e!WMp@VV^q3eTvGs zHEjOduwkD0IKHBHls1kK^FT}~BV`Q*;wOlW4?lFxx(XM7JFg^m?#t|P`@N_Oue@tr z#h)hsW~KALT>pXDN%$N1ZfrKRYug#VFKr5W&wBT*k4C)$YibToK~t01v1xv{DYFMm zIOn?ufGy`!sQ=3=q>bFN&0^jTz1(xI7F_+qS{%j~b{|J39{VAFm+SfcR`l<u<~cQP ztoQsprx8zP>{{fSt_XJ?1++z6A2ATd)>S@f{nFA2e)hvJcRxI14K~2Lp8@XuEpbBM z#6NtTb?PYgL2ZU>D_>dbEAs0@PuCay0~&TI=p^<K<N31B#XW#>UYzm#=0loSWb32l z7=LZ0`eXFb*pS~RsF{Ne%v_g2a$nTPDu0`L$XdVumF&;m5APc$ergCiTz~Oz(Dd`1 zLm;em7pjXJkFzOip_<EK>{!I(7(Wf9-A;K7E_EL?<D2vy>4(3i*a|-WBRIl(h0NQF zpSO>iakU9;JlI+zW~<DHM!u;>9_V4#Gv#<|2T-H3j4|nB@9ee4ueH8^4>b+l15$rs z&S`}%u{S6qY7bGKRp!&iRwE{^cLo~3UNQjv#=Zm0Oh04VJ4KGqLw{f#0Y@T6&6+4} z++SV#hbD}bb-dN<Hbm~{jl}9)PfqgH#YeCs66Y9ty8Dk1JAxd%1N(c=)My$pCu`35 z&>y1b-$z59(LPH4qO{@qyw>=+*7qL)_tl}St<#S!O1oekAN&A0o5r7LSL;4*+?w$q z*#<3N+-a`)(So?g&ok~HAV2d3%oVjX;T10<cCyN2XVTg<vU+2{;F9M+9g-aHys4-e zYZ3g~ILC9SHS=oZtt*K2Q06M28}qNo&t#qlCb0PsZ)lUpn71A}0r;SuRD0=K<L6r6 zU%OM*tJQIg@%aHYW{CNRHmpCh^C+}}9A|t=Z{!LTY0va5`cfBhF7Ha$9{43<!Ad(6 z>zMPi4f!d{%qz|hYtz;;v{mN)5YxHd8nYeRfpb0szNN4STi^cz_J;pMV(qr4UBoi< z;d^!uF6-0vkjHEdabuGD;T_tI^!0j7S!=EFbFJ@x4V6OOi%GsaQh@i2Y7Mzmd)akp z5Za>q^Z`;6b2o7S15?+eFHgTJ{nNzLQZY^~{m3mj2MO)mPldjLO(o}u8Dm%0epqIX zxk#H1PH_z66^tJPh^~kJVVvy2eZ;iA|J;}5946^S8EoBM)`QqieADjP|0IV&{X|<5 zzC`9X`%Z}aK=xA3HfByA_FCiTTHn7fYzf9RYNItb#uzI3s&#Y=8hdyV)4Ol_O831d zSlPV3b!C3m+4qsde=)Uqb8cgmQ^`EJ_osZHliO`I$Dqs^&2<LOjoA3kuytwkI?KJa zpotAzi;}NcH(9ysipj0%rh}h_E^bLZ&`K9R>mKAS<3EPMh45RzOV%1c*ZTf-_`)DD z?v42NDQvY<cQa0-$gST``wo3RT{Fc}6ndaKpz*}ktJy$J*Yg<PTn{4TU}Rx)7tAl# zW)*tlYT`!w+OaQ;KBdt2!VWB5*dk_&+&R`R*Q=WA+PP)@EqQ#LgD>hC$PdKM-g|i( z`O*Hg_aM0s_^-^By1KaaQBNP&K?CxT-NZfad5HY9AAdLX-uI<+?XkBTW9&FHmiR9^ z<kRL;GTtFWoeNXn6!g&<W6gNWFM=+ljk2|Ics2QFXHz}}W4;lrJ}1k$`;0}=H==Ci zyseA+3D)tT53sNYyAK@S&!)!vQ&{g_f?VB6&O<Hu$OygGdi`GO`&$Q5e|-1!UUHg0 zm2NFwL*CUh8MAY>tMlnNJBGm<9NTEwp1A2^b@9q}lIJ4w8FQ|DrMI_fWHo7ICD)Ex zqON7R=8ilEzaw!g{olDHU2)y3kSoy9sdwfcyzb?uU2_OqXVeNjHjN&yPwzfH%_|}< zmzZJCGe*}vB8P~hGAGZykhFg)zq_|+_7CJZ5b>{vy|khhBz84@3fc#HShsW2|4a-l zFwgPa9J7H>k(@r`ta5INcDHf#=!vNPyob2hp$Y01V#ju$?RsgW%khF-Lmve)r2b1| zo_#KDxYlyZ`+Gf*uDt!tsq2B?!>@Fqb&D7u#%Sxt&zk=j^}O4-y%PL8-TUH)f2=j% z@}Ah_gDWp6Bacxh;v1}!oycY*_*433|02~7emT_--%8vE^)hF3j-hzlL)%HKLnq4V zK?C7ynL<_vPW^aoqC2VoD?XbC(Zha;?km=a86j@O+z;t5{0q>CIW6w};%CF>z&Z9A z0Eca{M~by^@Xfh5nfK^9My>8Ca$$(|t<Paof(IDas7__BpmZUPbz#f9in_#I)7;}I zdlntKC+&T(58K6GW1D&oV{vZe)tjqJ&Tgg+>7#Rf4gQdGd)xGJy5smto-eU)d%2Hg z#LqL^)hBh`i}kfb)W6vkb?22EHen}zI(YVK+Q(UwuS&y5@QWRRHpaluqxi+Z`5xBc z97}7RAbiLA@Ud_j7nWlOjYHIqob_I5D%Ngsm%i3f=|SG_K6WHNd#Ro3v7CpR&kY{L zxnGASbNsGxC&pmq+Ihw%N^4bal5xGUAHXE^1wR=&dT$yYzlZt<*F!V^0{y%WUE!kG z(^4Lh_eU`{)Yy;E)e35MN*i&E6Ra({lCf|1Wh5EL)4Aon0z(%mb@r%jD2-_6intYC zA@=Y~2dJ~XKjI|C5%m~qj>2QQis#W@gM8kRYGYTVp(D4YT@O9LKCM$}54nEsQ8K{( zM*8H9xplu*>*kpM>z?1nxY(4zY~~Q>HQ!@Zqh3P5^0_tSHv8uIU+4*crZpq<TlXJ_ zFOh3F>>kNu_$<(?`;UAx^^AQjT{rzv>h`=L{SP_2mlw|t`<^morU7UHS<~3h9FwrY z8bQ{5jy(;wqTdA!D&hR7t9qKx@9B(tkBDcBdY|qC0#2}2Ld<!641B3cGu8f%9{0Qr z<=%SXhun`X6W^rgW&ewLp<h8(|6K9x^v}hwB6od&HM|47{WP^st|Eu@2KG8)U&`?b z>P?Y<&0bou58qLAoH946j~d$M)Yg#A>d0pO*i`*p!TZ_QwD0Kt=r;_{8l>OA*d3`h zz9;pKU(3GYUx0Uf5}o2>*d(4C^%}ljY({Q;5-nn5wK4hJptrT*vhB&e8B0A({D=NF zS|3PXMy@kw?Kx}E6;YEe#-mLi3<o1|rnbQOtaF0<RsBuzy=?A9PMh~bYWosLtet3$ z6=R6~q*hspXITc32dU4>XEsKypAXXK6VTfi*-v46aS@-tlD-Syxvh9DIoE%d29LiD zynkovop>*~+3y4Y-yhBIeN*oVnCI**-V}YlUHl31)UP4yU5x(z3~0gndg=+@w`%2Y z_nr=V(4J?EY&<*fVEspF#&?2m&=Y$_jdSrCF$V6VX6{Wl@9Vy5#G>D|BcH4673bTh z-Ok7{$ntfTvCBCQdWbshu`k?N;tK06{@%MD$KF%a5yhstHT0Cob)(xx*VynA@88OK z9|w=br|$D=wUTByhx0b%I&E9I9{WM(y6f_#;7uXxycayRzO^;1x}hiUWld0z0rMQr zm-hTzA33N^@=(71C0yHmu-?qGxObdQJok4xtMqM`z6#<E*w3SU2IVUK608ZHf7#=s z&b76!_4io=yK3e7aYo_<1JHi2JO-&7JZAq@<%v8dtewc&*?zgT(nI`|Z<KzMZ5y^B zi+J8etkdtM(SOsvl2>qUg}>skw3qiN3mFbM&b(@Ix(TmKyo~jj`#?Y7WM0!<tNjjr zaixsEfx{8&SN--{^0;XEo^fyC^J~Dm<a=Dg^?26wGI-2({1c7*fY|>ak3;mS`5w(J z(<HFikFfjlNB#c3Z8{Ps+pRqYZ5(LhKpO|zIMBv{HV(9Lpp64<9BAV}8wc7r(8hr_ z4zzKgjRS2QXyd?Xivvu}B7ZoI7de^_wPA}7&FMQXohfaZf93_xIm)lp(Ev8XjtkOf zSpGG`Mg=upmG9MQa{)0J=j48eOIYKMP5tT>%*u)W&BLlo&OgmOEI(!R?TBto{TjDR zh79;O-)}U!#J}=M7loCtwK%M!^)=JxC1Auc@VEFi^TR9-%X6_fEDvc5*p}A!%MaS3 zUyND{SUhM8SUhM8Sd3^ZSjYUh%ogOkH~;w(WxuHtVVmb+hfjq05@o;9C1CXvVa`(7 zuk%EhFJHoVpHaTf|M~L#;{V*w1hYr(cl@M&&TfWHoebMj!4Cgyu+3G!(Ubc%VD*z> z3}}9qrC^;W_hUeFzolU5<bDix?zaStCE~<+aOsMEOTkz^TJ~EE#@TVrrC?Lud6d(d z|2lHNrC^7b==U?jw&eRQ+OLF-E_T0Dg>BCFTlAVg1+2c<vy}Z#3mE&oiz<sgOB>c0 z7Z<;Jh4&|nO9wDVG5!|9GJBp{4CWR6TMU-FHHdA&H9MkLK|$1$?suRB6~v+V*W3?~ z;zomlPk`0)`T0o~!pf)b$kNI}SaeGSw0GwI{4a4S?&iUimy8Ad8UoDbxcCBCXLRD< zhPYufEI&cOW7wKujY}fE)j6dZHX2>x-}!!T0bA$8w16dtyA|xkOTf<WY~AnddTZFa z(bllUgSL8>^r9tTPhSFd{>iY-1tqNf+t7L9{X*2M3tLw|5f)gO=W6r>Sc&wVcKpl5 zVR6S7EDno{K6P<epy1<Lz(%>>rWP<~eM1YFH(b}baI7oBjT_cmz`WrCSTL+ipT6On zTl5pne!T_EhiL_q9-iEypAULYD_A|Hjm@y8=XCOyf~`NP-|>!kLq_x@7}ttbjTc;Z zLcf?*d-CN(*eLI1y@OaM7xoiE2VtFw9kuyUu04T50=Fyy^J(fn)FS=7Xe$^uDMnlL z^M>)Fi(E5pUINC$wCsnNcmlmQ-p{vh3FAQ->_y+tk!lU=SOT`iNosM;rC^(v=y$5H z^Ov~h(lF+{#XB$&rwD63E~ol_3;Qu;Om+3Q6l}|aamiC%_1n@kx1*v#5MPEi&-;n6 zz?aRLRs!bN{FiZX9@bHbAAmFNmN3v3`ly%Pq-tJh0c^B_WqO#0K|76?&vdjUKMU97 zx2tCSbri*@ci~4#8Aanki$!4d=oK~{zINn(T#aWdSs#cJ&pZ#~ovIsd;cI7ezgUgp zn$@?se++XR3DWhFj{7`}ulZ1%1dPu(!T%isd8jSrXEQ90dE7sSh=<GlI%8n+asPAU zW|)uA_#2HE&i(4;a$5oU&AAPjFW>m%XahD{F2yC#*V|n6^QAi)y<+gTR4}KNAJt#^ zlY<=b$L0zWvo~)qaLS#f@~@tI<p18h=|TDBu+GC`w4<YE&ci%q{$YG-ufm7tcJkS{ zVZHpphcJJ7nfT*-APpb>9`$!eK;nhsjY?Pp8vxfmZwp^am@i*m)ThkX;KM6reBbQX zQ9*IA{KzN!H1pr8YsO1>)eOq-Q}&xDOfQ&Uq0w(N4mJM8o%!Vh<d<rI^7vJ-{GbhB zoL|ApXDESr_$pZWpd~C1djmFF9*&QU$PHMGXa(Q0;C|IhXJ~*L?-!$9bt_S)an0tD zDd}b&)^x*syQXV4y?wr26KwtsE2NsgW;v-G*nD!oYS^0btvsu-xRy`Zz@>l(LN!ov dAuQjq(XIJ8>DyT$VqB-`IUDz_zC6ld{|`wHeTM)5 diff --git a/share/sources/org.preprints/source.yaml b/share/sources/org.preprints/source.yaml deleted file mode 100644 index f94fdc161..000000000 --- a/share/sources/org.preprints/source.yaml +++ /dev/null @@ -1,19 +0,0 @@ -configs: -- base_url: http://www.preprints.org/oaipmh - disabled: false - earliest_date: 2016-05-01 - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc, time_granularity: false} - label: org.preprints - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: preprint - property_list: [] - type_map: {} -home_page: http://www.preprints.org -long_title: Preprints.org -name: org.preprints -user: providers.org.preprints diff --git a/share/sources/org.psyarxiv/icon.ico b/share/sources/org.psyarxiv/icon.ico deleted file mode 100644 index f213592358469a0369137a54772f8120830a88c6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1150 zcmbtUOH5Ny5WObu+z=P8UApp%e-jnN+JZFE0vf(9)T;axrNzWhtmp#2(U^c3NsJ~C zgTWXRH@c9T_*u9i=JmC&|5xl2ilLekk>kvLAt^Bd)HdhMy>n-J=bV{WA(qfLFHg{O zrEo44VucW56@gsjG5utOptI%6OvpVw@SmTNPd{+)rh)G326}Fo;C1)46rMksz|+TR zI!nzt<GA~`4TJ(y80t^MvfhvV+f-OK_$5lVswh~kp!IkR^;HqMwr=!eUp~dr*<Ok- z-lAgo)g+z`q@};1Is$L2j`q_DxKG4UxJHq-u_lT=YZV-IgmLA35?+@M+h!G|+g12p zoB7sFe%NW>d&|JXp08*=5}lJ@v`&G2hl;^R<7ldjg7>-kHp=IkoE2diQ(GF6cXAzk zKgS>36~I8>7%ns?WNdCeb7SuOJEtlmQVXux<&2|&>nAQXB^sJ)qc~O`HgzDqe8m6d z&^H{m1aYV+hzde^p=ry=vL5b}ae23+APC3qfJ7OsE3F#dy~*I?+wZvCmPDspM<;1> ziNI^`IUW6d<A{Z)5LB{%KvT2GO#YJkbKP7&^Wfg`nmKp0=$M|KnRmjtf2=?E)pI6+ zyIm=8?aY6%`8*dcXAHeGhY#);=xQI88vRXvM@s@8nw^VhbzJp~N**5aEvkOvU1ifG zs*5$$*+WtT=Ce@#i@^yT*cp)X#WOYwejm6m3*~=F{**oZ;L9Kv?}sx`KW8yBzx<f} IANW`M8&Q=a#sB~S diff --git a/share/sources/org.psyarxiv/source.yaml b/share/sources/org.psyarxiv/source.yaml deleted file mode 100644 index da329bd2b..000000000 --- a/share/sources/org.psyarxiv/source.yaml +++ /dev/null @@ -1,15 +0,0 @@ -configs: -- base_url: http://psyarxiv.org - disabled: true - earliest_date: null - harvester: null - label: org.psyarxiv - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: org.psyarxiv - transformer_kwargs: {} -home_page: http://psyarxiv.org -long_title: PsyArXiv -name: org.psyarxiv -user: providers.org.psyarxiv -canonical: true diff --git a/share/sources/org.repec/icon.ico b/share/sources/org.repec/icon.ico deleted file mode 100644 index cf1435c702e0b6d3702ff0cfcd39cd0cd7a48c3b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 894 zcmb7>y$!-p41^s~P*4z2AUY@+i4oWWHedwWY{4QCP(evaY3^~tvg}8sT;lh0{@x`@ zL>gRe+e*=XBC^E1MHOd*YGj2GLT@o+(GYoz)bE`1No>Imk?@0qEim{QMytyI3n!nC zFua<dAHi_=S=x8n?Et8|!?v`J0<6G^XShFJ<%!ui%;Ry8F+644oF90SKEdAKz5u$m zwnf!sm+xStnZfbfkj8|8Uq-->fGHn4YORpWRWEDS8T0i3PjbC(s!!DWq<Wv~S0AV8 B5S;)3 diff --git a/share/sources/org.repec/source.yaml b/share/sources/org.repec/source.yaml deleted file mode 100644 index 18e23ff9c..000000000 --- a/share/sources/org.repec/source.yaml +++ /dev/null @@ -1,19 +0,0 @@ -configs: -- base_url: http://oai.repec.org - disabled: false - earliest_date: 2013-09-05 - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc, time_granularity: false} - label: org.repec - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://repec.org/ -long_title: Research Papers in Economics -name: org.repec -user: providers.org.repec diff --git a/share/sources/org.seafdec/icon.ico b/share/sources/org.seafdec/icon.ico deleted file mode 100644 index 3b55577c52735e61fd52c52f6425a83cb42e0383..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16958 zcmeHP>2p=(8IRMMI{nm{&a_|pr5~Nn^grlKoqnoXwOW^|RVcQi)ViXA8#v-pS!GSa z9zYTx32T6aC6GkfNg(VyC<Iy0`<~@4=iaBk-*a+exRQ{Ndr_c=IXU;9^DfWtd7k&# z-g7A`xzPkv2e^YeVAB&29Ra8`T7f#$N8jh3q^|(8_bpC5UyQip|^QU-rAI=Fl zcjA<r<Rq<=agugo=ni97&z?lTqd0FC%w5q*48+nE&fNRIK->CT0Cz4wh>6&8@pk_H zTRguNWS)yX{!VPf*co&D`IoqV2xr{(R=pA<vBtRLYwc}-J(lU(9Q?Eo@w40I8rwwr zhucB#bLC3Rv478fcDsCMe84y%KX^ypD|X6LVwaDQ#J)Po*mEM8baaNe=AQa2M7wV# z*jp=h*#}}K{Y@;#$rnxCk3;PF(fzT15x4Rk@wZh-@|#_fB{m#k+Lz>oBa%3^O_ImA zNb=ZbNuI>#GrJ{i&**cu%VtuNJhel-O=aSi4;DM>z8k><Vh{Vo>rsCeTluyGtt%xA zE)$BRFCLd*XSLWBAB$J<rnobo6I=Xuv6G$<I}!4o3>}%;U)-`6#jAWz+`6&oUn3#( zK>OP^@tdo}o&Q>)dC)iPv!0Wnd9h@Yp3<E-_7yOX5Ibw2Sm;Bq_&VWHap%4ywr;c} zPHv6(;G7n>X&Tnllf4_Od!5hESWAaNhg+4-WZNEH^S0#a{ES`NU+l_HBzbmU#MdK@ zV&}h_pRxPO8?N(>#gMtHw0WE7h@JjyVQ9m;abXX5q%WNmv*g{v%+phzjC)9k7uHHb z@4T30!^Mp2rJnb6uDBh>H%s4_(7!0zlrLuG$3F}Y#ylYIm(x`i$#Vx`>)*bSeoEYh zLnL|rpk&iYG0TVFa2}Ytk&+FQl03IxO!*JA7U2(4mrqIte#TjF<7jZoUy_V>0Xly{ zY}JjGd#(?0DqdC{Ou;rf(_ts$byA?KDS1i~CpPPxW+i;vjfqRT#)a6>tolgl@mp7j z9gX;-K-hKM`iVN1yQ5spnEMMmR}XQR@ehmFx)kd%1sgo9U^JQ8&uL7Sx^Ps?tbZ14 zp6ijHnfsz-!USUJ?P4ZAS`gaYhLT9{*Npt(^*&cOVt<qgu^v9jJAN~*zog*jQr{jB zXWBD)AOCt}p4-Qiy&&0aD8Zrn{Kp}P<B^m_@prFn9`e`G4}hizp_`A1I|aE9^MT&x z1i3QL{XYH?@pdmkT;z$V$fG|T`=EF`7lF4KF$<83=FZ#sJMAyxUo*9zcnwn}aimGo zXZA~Y;RIr}5#r2w9&2=mgqOb&yLPPD)t^bQZK3!L$ZHn9CFb*o#g2Kf`#Cz#*ZDJj z0b*hvnkRLB)`!n#T0iLhK}nw8C8h+j>pT{8!65H**H6@3o;8ka!c~5mMQ=*#?0%)k zYgvqZ7rE}}`^9Z2#l5VUV&rjE@1VT}pBlxK4Z^)A^to3*S?r=AV#$xr^IhY!nfa__ zk$)x*Z2%u1=vCjY<}p=66@$A4xee#1-c}Bkpk;}q&mEDBe_8!PaB#hN8%xD&nl7pH zhsBmWBkqRDiq&hb1eU*Qe{cH&JU@bQAZNsItO?om<0Ica5_OJo4~v;TK-{(CRDXh& zrDEs&yJuKXZ%Q0(f~~^*)Be%3KG)(YY7@3{2x?J}i<$6<*j1m3w{^Y*`)Uz?CnQWd z;#a;8JP)aSH72SUtZk_Sj8ADdW51OAx<&lGD^<_k@;B6PyBnu`mp@RCIS=G#ik}j1 z#{$VfN0KMDBKLuRCy%<KnQL{+JdMpw#cN%+>#Be5eXM5#S+kZw-A<j^BWB?nk#2$4 z!O=#QFZmPGWU7aXQ~t8zp^m37o`8sb*uAJ85Zbe15l30nirk6_zt=cD>W}qAU2$gp zt7ANO_8`?iL3~49OQ2rHH5bo!#g(?1>!t=iu$_NgV?q5?Jzd^st~Tp=vEw6syrO?6 z{AS7%;<ql75cGSE)3ByCDjkXKRUI-Qtvpz#(Q&X|SqHI;hIY`GIKD~Sqz*MoV%Ktf z#@b)>mOgh;4`U9;vCT}xRJET)y4z9nyjR?LuWJAFz767)^q25Nv!u;wapwO=d8~G2 zE<Qsy%w*UY>JQ|R{JL57lj|#+4kR>fkYB6HOlyzOr(pkDwKusupr5i0#jtA=RHx0H zL0IdDb)7r&26bR_*Nu;8Yuhhg{S@(cEm3^7;`NR;-j=yh{S#}2wuy&n&sA94%%%eM zt&CrJ2K(ze`VxyXXQ1{;?OLt(z0v`axo}Dn-!yBiLu_1AcKL^DQ<(>FoDlUg`XD>+ zrHC)ITkylQ!Ftjj%>d6?qw6Gh{fsuI6miaq4>hI?%^7hUOHr$Y&F<8e>zjO~OfycK z+CzS}4O7I>zM-wDejD{6E9nCblG?Ud>C*MhXRvFeKYe&J>Y@WtD{hhW<*(7V-F8!! z^IuW?$xRhHo(ub!+E%UedQCGS6RfA%FK8{*Ke!gW4kLR?yd8RU&lPlc#&ESZtgEOm zgKoNw;309dwwp7afo&wsjJ^GNkU4y})+<4SgB-z3eoV}a0a~xhI%g$w>5L=}L)Yoo z=sRZ*RQab{*I;a{-KMFqmj*FrKjpvr0o2iXB-R#3_>22VrtPr$5b7uO+ueX?H6Lp` z_p5eW(n(HN;O)VE4ip9##b5Dh#JBR-QIlRF&ddSd@sa#M4b0!WO6N68-izp~9VKSn zSjCV!y&E!qLH)Zn!XNs7_M=@li{bw%f8q!4s28*Y)GPAI+gv98SF?4mB(Z9ouK)C& zbrL2Vu^T4odP^K>P`LyRGj!j;EP{RR0mkmyu&%;G<0$%+`eI#EPUYb5#zuXYHNW8S z25oCm4|7@5r`<)J&aI!M_sxRWv5zrZaq<T36z5Kz-UEAn3CksvtUWKuy>;U4T8zD) z)o7bg{C;z_>K1KeHgGhyQhi6<$T4V_{2hy;+9P69?z8xfGa_B;0}q_C=aoLv!Tk%` zH5YLW?HcWZThcFzUz!lhAudL6tTjo-9gJ=1t4K3-+18BEnq|g4AMpcYlXlw9B;sS* ze#UF`vEiv!Ngde)OqFWSZ1ua6Y^hO9Zta+;*47?Jb;u8K2V+z-`El`Ds<DsMCNBL? zUuz-IZ}Y{=5^O04onK1oRI`NVkEtI>ojas4uB{s-!GX1sI<Xz{TO+}aD)j^IvJb_6 zfgFUjaOe-|3!O76A5+Nv3-~R6&kDp#;5%ru6X3^~PvqFG4&L=tM-Ye6S93qoz^)p^ z4r;^OZTvOIgDleE8RK5|y=MEK(v{PW>&}!ut+p_{bV}n+#xA;FQv9^I+;1C?`UrKX zH~O(=jOwnlJX*WgL%+9fg1E4adCz-e9>DIfCa-%V%&l<WtsW-9?wV+imGKT@b|3kN z|K)6P%CIjN_r1oI^$*&C1ma!h0X^+^BilJsp8@t$@W;*KAa?!U+^7%OX~?VQ57zw( z2bk2SUjutQj@wwGI>3GT_}-7^$~X!21s=NYdx)tw^lSYS`%VG;t(gd37GRF+9aC!* z$hoyfF$p%WP_%7iK9Bf)HS#FtT7{aYH{+6yxHh|@u)0ax%KVP`mzjs0t2cBOsI4>q zMU@Htuz?LMG)+z=bJhUsV1t+`@FxYr)st}z;tRez!~D!C>!K!*Z~7QZAUEdW?p#<< zf8HGr4Bnyf_tt3a=`4+MkM7#%^BH5GDAyNZJul8@^EpSiW1$Y~?_Q=du&_zpw#~Qv z<MF_M)+%_g4y5&h_*o&Zs9pK(^SYa6_AZXgH(jXl`g>L?517A1@2Vj$&i6bVkFhQ4 zX31}(nmS`Wt>YA8uHK9}5qTGDT*w2NcWBLwHRIll(Op~a8O(V}Tz%ID@BYBn+1io$ z8^61`d-{w%xaOmdceIkHTcUS!_^#je`XumGV^!qcY1FuQB(TR|QCrU&Z@-T(*{89A zm+yS09n=7Dq|P4|fAc)u=V2XN>uAgwugFK?Psk(SWxniH48c2)W00%homuSV@n9|2 ztsRYgwlI6!?U;#uPoMC|{QTk-@wcLGfIYMLNTVK`INX5RMipvWbHs0&g_HHIGSm$! zC2?TAB-{3C4U2tP$Dn<6SN<38iz26u$1dEJ*hlAeo~hV_TKa+R7ikRz^&#%h<{sSN zNnbdodprU52)s3)qki&A;d14x^N`qopI7|UbLM8;`$P?u`G2qjwd><MB|Nz+I`KK! zx<I_Tak>X=OZ!E3{fafxP5t`nSguE3i2WxxZ>Hb&T@TKy^&w_r=kI^`JAkXTxs~q` zbNt&UUaM{+9xTbhPW=3B;Qrfy2gKOb?;wATXScE~*am#W*jev$KmQER%W>XnYoC~i zEf;TB-~WWR<k!v42Vx+Wu5e!K{_oKDeVh$AZ-%u&8c8c@Chgtg%SQhhz_2j7{^4?T x{rU0edS`uf{YiWs9(}(fy8Qp|kA(xnBSP+|kFFi^Iv#z$GnQ*CXNm61{{S&j2}b|` diff --git a/share/sources/org.seafdec/source.yaml b/share/sources/org.seafdec/source.yaml deleted file mode 100644 index 4363e1779..000000000 --- a/share/sources/org.seafdec/source.yaml +++ /dev/null @@ -1,44 +0,0 @@ -# DSpace -configs: -- base_url: https://repository.seafdec.org.ph/oai/request - disabled: false - earliest_date: 2011-05-12T09:05:01Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: org.seafdec.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: [col_10862_26, col_10862_48, col_10862_49, col_10862_69, col_10862_81, - com_10862_84, col_10862_151, col_10862_527, col_10862_534, col_10862_561, col_10862_658, - col_10862_670, col_10862_683, col_10862_685, col_10862_702, col_10862_719, col_10862_741, - col_10862_847, col_10862_948, col_10862_1053, col_10862_1482, col_10862_1662, - col_10862_1666, col_10862_1667, col_10862_1699, col_10862_2143, col_10862_2271, - col_10862_2273, col_10862_2818] - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: http://repository.seafdec.org.ph/oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: org.seafdec - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [col_10862_26, col_10862_48, col_10862_49, col_10862_69, col_10862_81, - com_10862_84, col_10862_151, col_10862_527, col_10862_534, col_10862_561, col_10862_658, - col_10862_670, col_10862_683, col_10862_685, col_10862_702, col_10862_719, col_10862_741, - col_10862_847, col_10862_948, col_10862_1053, col_10862_1482, col_10862_1662, - col_10862_1666, col_10862_1667, col_10862_1699, col_10862_2143, col_10862_2271, - col_10862_2273, col_10862_2818] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://conservancy.umn.edu/ -long_title: SEAFDEC/AQD Institutional Repository -name: org.seafdec -user: providers.org.seafdec diff --git a/share/sources/org.shareok/icon.ico b/share/sources/org.shareok/icon.ico deleted file mode 100644 index 0844c97c9152561b5bd3de6de52d943d4db4895f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 453 zcmV;$0XqJPP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00DhTL_t(2k(H9YN&`U@g+G@xx@Kl)3_{SL#zt)<BwIuRieRne1q8`U1QaVP z@eg0X2MB7T2p0ZCvCu*-1dFV3QPN02pTJ_)%^F4UR(H4`bI&<59r^rpS9dSa)6*v$ z320?Ij?hv7aVY>mDaSO{zi6dw%h2Ymm2H~l`v+eYrGc2a`>FO&y{x=78i{0bY$Ol7 z_;<i;u{1R~5z;`+D9PsBY|h<SEf4k&FD%TP+1>ra#%j5GT3A|M36@A<X=NMTjw6Va zQqI)m#C&P-=K5l1r($Tp0z#KRbr<JmfW69gII==o|CWhTcFrvTp7#nUEv>c$P0CT1 zt+iSPlH%X>L}?{Ov)KUB=^%P5q$AJNqP=><tJQLDAw84=WW3XA)7@Gk1+ccZl}IMn zH#ZFp#0>4%{hQlI;QVBFZ{JN1^n1@Q)zgF2K<4E5OlfIopyTl93h?WG^gC!N;!<QY vqsC}R2OEI#%xF|em`NKQq!e}eHw}COb!=POgYK3u00000NkvXXu0mjfdeX@p diff --git a/share/sources/org.shareok/source.yaml b/share/sources/org.shareok/source.yaml deleted file mode 100644 index 8bae328be..000000000 --- a/share/sources/org.shareok/source.yaml +++ /dev/null @@ -1,42 +0,0 @@ -# DSpace -configs: -- base_url: https://shareok.org/oai/request - disabled: false - earliest_date: 2013-08-16T12:17:44Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: org.shareok.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: [com_11244_14447, com_11244_1, col_11244_14248, com_11244_6231, - col_11244_7929, col_11244_7920, col_11244_10476, com_11244_10465, com_11244_10460, - col_11244_10466, col_11244_10464, col_11244_10462, com_11244_15231, col_11244_15285, - col_11244_15479, col_11244_20910, col_11244_20927, col_11244_21724, col_11244_22702, - col_11244_23528] - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: https://shareok.org/oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: org.shareok - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [com_11244_14447, com_11244_1, col_11244_14248, com_11244_6231, - col_11244_7929, col_11244_7920, col_11244_10476, com_11244_10465, com_11244_10460, - col_11244_10466, col_11244_10464, col_11244_10462, com_11244_15231, col_11244_15285, - col_11244_15479, col_11244_20910, col_11244_20927, col_11244_21724, col_11244_22702, - col_11244_23528] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: https://shareok.org -long_title: SHAREOK Repository -name: org.shareok -user: providers.org.shareok diff --git a/share/sources/org.sldr/icon.ico b/share/sources/org.sldr/icon.ico deleted file mode 100644 index 79474f29e624a71cf135d68793b7a61432c09ae0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 315 zcmV-B0mS}^P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0002;Nkl<ZIF0R)KTASk6o;Qz!=+7hX-H^nY4R)R+Th(<s!ee1D>z&wP!SPa z+Tx5tRAV)ViGxF7$fXFE9Hr#6T;oOGt8>uVe&;;rcmBYC7({Ep6`r|v5~4An4D0}H zJafMaRR>=Y%>`P)ho!TK-nqaWqW((W@2P{mL3D2+V*q&OmdW@S(PZQqqMcBk^F8Y> zN?X5BF_WUwp967+MU&U36W|mW5e)ak-z~AC+qB=_Eq#IL;Sy6q^wGI#2qd|j_ap0m zPa{vYOcFF+J`v4WIYa$<is*xlnhiz=z!{Tqp7ibjIKnf*`ssfzegIM0U4BzH(S`s3 N002ovPDHLkV1lo&hLivR diff --git a/share/sources/org.sldr/source.yaml b/share/sources/org.sldr/source.yaml deleted file mode 100644 index 1360e7217..000000000 --- a/share/sources/org.sldr/source.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# very slow -configs: -- base_url: http://sldr.org/oai-pmh.php - disabled: false - earliest_date: 2006-10-12 - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc, time_granularity: false} - label: org.sldr - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [publisher, date, language, rights, license, format, isPartOf, - created, accessRights, temporal, source, bibliographicCitation, modified, spatial, - requires, identifier, type, tableOfContents, ortolang, 'archive:long-term'] - emitted_type: CreativeWork - property_list: [modified, temporal, extent, spatial, abstract, created, license, - bibliographicCitation, isPartOf, tableOfContents, accessRights] - type_map: {} -home_page: http://sldr.org -long_title: Speech and Language Data Repository (SLDR/ORTOLANG) -name: org.sldr -user: providers.org.sldr diff --git a/share/sources/org.socarxiv/icon.ico b/share/sources/org.socarxiv/icon.ico deleted file mode 100644 index 069c36dc82620aba4eceb139c2a2c7788f8df25a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 894 zcmZQzU<5(|0R|u`!H~hsz#zuJz@P!dKp_SNAO?wp0Z<Md)HXecPP|1Vs&9E18g)a% z;3APIBJLI$>ce7zR$K^;z8Mg9-O2N+XTY`O%v;_;*W7)tMJ3)0iMml#eosUH0?-W* z^YcsZEn4>C<e85x9S>8o?yTSZ%EJEg)*WxwY<Lx&bgOsb<M$tahQ-`a*N3VH8oqAR zt9gr_0o5Nm`tJD2_j+a*dnY}aH0^0vEJ*$0WiPhuc%x}}0b;(o!G*x^8_!>U2db~E zdoXvwbN|q5tJlBUyzO;j#%-YbS@T{Ll-(<>x)1j+(5{lod%=-6dL}+8EWZcz0tBUI z-HwdE8Jlto=!WdVJ8<=|@Vt<jf2Y3XfwR|D|Iq8T%?~Q;@9UXfv~j*%*ZiQM>@J#m zpu-&8uK?XSb>@?rrUyW-iS?zX_6OY)9%~sQ!UtyfMWCyImNv9LEG)b0;dc!ru79DW z^Fc}FeTW)N^IN+fY8hRybG_Wz_ek6Lf~(Kfs`~rg6CN2`UD7dzh7a6)plM+-H>(>S zSUFy{alTSj{~#mpF3_h278gsZ?&lPtyC0zuM~Xso4>9U1>+S>HKqLY>6+J(Y$_D^K CsDJ|i diff --git a/share/sources/org.socarxiv/source.yaml b/share/sources/org.socarxiv/source.yaml deleted file mode 100644 index ac4e873b3..000000000 --- a/share/sources/org.socarxiv/source.yaml +++ /dev/null @@ -1,15 +0,0 @@ -configs: -- base_url: https://socopen.org/ - disabled: true - earliest_date: null - harvester: null - label: org.socarxiv - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: org.socarxiv - transformer_kwargs: {} -home_page: https://socopen.org/ -long_title: SocArXiv -name: org.socarxiv -user: providers.org.socarxiv -canonical: true diff --git a/share/sources/org.socialscienceregistry/icon.ico b/share/sources/org.socialscienceregistry/icon.ico deleted file mode 100644 index 9e2e9fda67378cd7530b57d366b39f119082a7f3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1150 zcmbtUyAHxI3^WoS(21FmPh;k_vM?~ymWiUOs>FbxC_s4$B;KE85+Ep;qz}>RYWwWF zq^VSZ0>3Q___GYvB0vcMDjb~R%!}s%fcNOdm=|%N4zANgyH`tuPN#DJ>V;R5hq{(2 zckG7I!=$0S+=FA+(ev8q`<*AQw-|5t6~=M!bB2PuTo)*21onq9PNwb`WG_!#EQenn z!gIX|KkMZ2qWcl``NA@L>GUeMS_g?<@?2%?yDbdE?$7(Fo@_qA?QVp})6Do?rJsnZ imgeeQT28g1PSemn^`QMMwd#>&UN`ghi0J<yZ~XvCoZ$Wd diff --git a/share/sources/org.socialscienceregistry/source.yaml b/share/sources/org.socialscienceregistry/source.yaml deleted file mode 100644 index 5709f6c3b..000000000 --- a/share/sources/org.socialscienceregistry/source.yaml +++ /dev/null @@ -1,14 +0,0 @@ -configs: -- base_url: https://www.socialscienceregistry.org - disabled: false - earliest_date: null - harvester: org.socialscienceregistry - label: org.socialscienceregistry - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: org.socialscienceregistry - transformer_kwargs: {} -home_page: https://www.socialscienceregistry.org -long_title: AEA RCT Registry -name: org.socialscienceregistry -user: providers.org.socialscienceregistry diff --git a/share/sources/org.stepic/icon.ico b/share/sources/org.stepic/icon.ico deleted file mode 100644 index 1e660b740906b95894663a8785846d0c410402bc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 726 zcmV;{0xA88P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00009a7bBm000XT z000XT0n*)m`~Uy~h)G02R5%f(Q_W8kQ4pWn(r!z;P_|TC=|X`bL@<#QQWH54gCQL1 zSq>@(4yGRbW8k2Tc*C=3xZqXT^aB%>1XK{VTBTWNyDi=BLalE`YtY1tPV)B6zWI2+ zH}e})y<Ug^5eo*JOeTolT(FnQhUdzNIAR>vV)=p=E~-^0+IbgOcDLG7R;zhlO3F!> z)2iIRJEo0~^@0fz+?alB*Y%peFVsF;F6+_Wpx_m|+XR*ftW9PJiUO>xY#!{&`5f=E zExr4o#BDZn_UYn`MhNic!GUwLmCEt-;3Z!m*eO7>!wSPg9dJ}JpwrJox9AbxY-WYU zCuwJ>#{(Oi`|}K3<_toNFxYCj8lAa4O2K%Bw@d4LsFHXE2Q)D>cW#Xa&=+ISKnV<^ z7V8SQy^+h}4{!E8W(jb)TO4$Dq@6&asIh-Li09p2T|bGTff8WBCMNm?bUH29uJ!@n zOs|Ayy9F-vcwt9AgvrSP;V`d-(U+`6Rdb&8@Z^kd@9?$KY6PRBnO(K7lu!lqRap$` zga8!`M2QS{!KW_;R)gQskdb|`4k!)w5CZJ;j;!#$c9BqAXZ?MdmSKxr0XqP`b_a|{ zf}j+tu(h2dZlIYEw7DIr7p%`e{b?KO_d`x8(~{PAA@Mp(=bly$o;`nifH8PL2^hoX zJ~EQvt$&lx>{+#MK*+LR)f{KD%>7Eh=$S%>#TV8xjU-q=M9~n`rB^aMh_7Z^T-=hB zk_iFT0J)9y4E1{8;iJ{)^74jvYU-kZJjfR-z<2_wgmjR}7IG~-xAcBniDL{Vr&%~X zOp&k{){T>U*k`p=b>ggGk%cx#(#|!_ttIx-&Oe8#0rOw>?^q!lUgi3;(*OVf07*qo IM6N<$f|IpWF#rGn diff --git a/share/sources/org.stepic/source.yaml b/share/sources/org.stepic/source.yaml deleted file mode 100644 index 3e24e7739..000000000 --- a/share/sources/org.stepic/source.yaml +++ /dev/null @@ -1,19 +0,0 @@ -configs: -- base_url: https://stepic.org/api/lessons - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: org.stepic - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://www.stepic.org -long_title: Stepic.org Online Education Platform -name: org.stepic -user: providers.org.stepic diff --git a/share/sources/org.swbiodiversity/icon.ico b/share/sources/org.swbiodiversity/icon.ico deleted file mode 100644 index 1e660b740906b95894663a8785846d0c410402bc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 726 zcmV;{0xA88P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00009a7bBm000XT z000XT0n*)m`~Uy~h)G02R5%f(Q_W8kQ4pWn(r!z;P_|TC=|X`bL@<#QQWH54gCQL1 zSq>@(4yGRbW8k2Tc*C=3xZqXT^aB%>1XK{VTBTWNyDi=BLalE`YtY1tPV)B6zWI2+ zH}e})y<Ug^5eo*JOeTolT(FnQhUdzNIAR>vV)=p=E~-^0+IbgOcDLG7R;zhlO3F!> z)2iIRJEo0~^@0fz+?alB*Y%peFVsF;F6+_Wpx_m|+XR*ftW9PJiUO>xY#!{&`5f=E zExr4o#BDZn_UYn`MhNic!GUwLmCEt-;3Z!m*eO7>!wSPg9dJ}JpwrJox9AbxY-WYU zCuwJ>#{(Oi`|}K3<_toNFxYCj8lAa4O2K%Bw@d4LsFHXE2Q)D>cW#Xa&=+ISKnV<^ z7V8SQy^+h}4{!E8W(jb)TO4$Dq@6&asIh-Li09p2T|bGTff8WBCMNm?bUH29uJ!@n zOs|Ayy9F-vcwt9AgvrSP;V`d-(U+`6Rdb&8@Z^kd@9?$KY6PRBnO(K7lu!lqRap$` zga8!`M2QS{!KW_;R)gQskdb|`4k!)w5CZJ;j;!#$c9BqAXZ?MdmSKxr0XqP`b_a|{ zf}j+tu(h2dZlIYEw7DIr7p%`e{b?KO_d`x8(~{PAA@Mp(=bly$o;`nifH8PL2^hoX zJ~EQvt$&lx>{+#MK*+LR)f{KD%>7Eh=$S%>#TV8xjU-q=M9~n`rB^aMh_7Z^T-=hB zk_iFT0J)9y4E1{8;iJ{)^74jvYU-kZJjfR-z<2_wgmjR}7IG~-xAcBniDL{Vr&%~X zOp&k{){T>U*k`p=b>ggGk%cx#(#|!_ttIx-&Oe8#0rOw>?^q!lUgi3;(*OVf07*qo IM6N<$f|IpWF#rGn diff --git a/share/sources/org.swbiodiversity/source.yaml b/share/sources/org.swbiodiversity/source.yaml deleted file mode 100644 index 857167e73..000000000 --- a/share/sources/org.swbiodiversity/source.yaml +++ /dev/null @@ -1,16 +0,0 @@ -configs: -- base_url: http://swbiodiversity.org/seinet/ - disabled: false - earliest_date: null - harvester: org.swbiodiversity - harvester_kwargs: - list_url: http://swbiodiversity.org/seinet/collections/misc/collprofiles.php - label: org.swbiodiversity - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: org.swbiodiversity - transformer_kwargs: {} -home_page: http://swbiodiversity.org/seinet/ -long_title: SEINet - Arizona Chapter Collections -name: org.swbiodiversity -user: providers.org.swbiodiversity diff --git a/share/sources/org.tdar/icon.ico b/share/sources/org.tdar/icon.ico deleted file mode 100644 index 1e660b740906b95894663a8785846d0c410402bc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 726 zcmV;{0xA88P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00009a7bBm000XT z000XT0n*)m`~Uy~h)G02R5%f(Q_W8kQ4pWn(r!z;P_|TC=|X`bL@<#QQWH54gCQL1 zSq>@(4yGRbW8k2Tc*C=3xZqXT^aB%>1XK{VTBTWNyDi=BLalE`YtY1tPV)B6zWI2+ zH}e})y<Ug^5eo*JOeTolT(FnQhUdzNIAR>vV)=p=E~-^0+IbgOcDLG7R;zhlO3F!> z)2iIRJEo0~^@0fz+?alB*Y%peFVsF;F6+_Wpx_m|+XR*ftW9PJiUO>xY#!{&`5f=E zExr4o#BDZn_UYn`MhNic!GUwLmCEt-;3Z!m*eO7>!wSPg9dJ}JpwrJox9AbxY-WYU zCuwJ>#{(Oi`|}K3<_toNFxYCj8lAa4O2K%Bw@d4LsFHXE2Q)D>cW#Xa&=+ISKnV<^ z7V8SQy^+h}4{!E8W(jb)TO4$Dq@6&asIh-Li09p2T|bGTff8WBCMNm?bUH29uJ!@n zOs|Ayy9F-vcwt9AgvrSP;V`d-(U+`6Rdb&8@Z^kd@9?$KY6PRBnO(K7lu!lqRap$` zga8!`M2QS{!KW_;R)gQskdb|`4k!)w5CZJ;j;!#$c9BqAXZ?MdmSKxr0XqP`b_a|{ zf}j+tu(h2dZlIYEw7DIr7p%`e{b?KO_d`x8(~{PAA@Mp(=bly$o;`nifH8PL2^hoX zJ~EQvt$&lx>{+#MK*+LR)f{KD%>7Eh=$S%>#TV8xjU-q=M9~n`rB^aMh_7Z^T-=hB zk_iFT0J)9y4E1{8;iJ{)^74jvYU-kZJjfR-z<2_wgmjR}7IG~-xAcBniDL{Vr&%~X zOp&k{){T>U*k`p=b>ggGk%cx#(#|!_ttIx-&Oe8#0rOw>?^q!lUgi3;(*OVf07*qo IM6N<$f|IpWF#rGn diff --git a/share/sources/org.tdar/source.yaml b/share/sources/org.tdar/source.yaml deleted file mode 100644 index b1e11735f..000000000 --- a/share/sources/org.tdar/source.yaml +++ /dev/null @@ -1,33 +0,0 @@ -configs: -- base_url: http://oai.tdar.org/oai-pmh/oai - disabled: false - earliest_date: 2008-01-01 - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: org.tdar.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: http://core.tdar.org/oai-pmh/oai - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: org.tdar - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://www.tdar.org -long_title: The Digital Archaeological Record -name: org.tdar -user: providers.org.tdar diff --git a/share/sources/org.ttu/icon.ico b/share/sources/org.ttu/icon.ico deleted file mode 100644 index d511a2c9c6f95360368121df70bd517ef5de0543..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1406 zcmZQzU<5(|0R|w+!H~hqz#zuJz@P!dKp_SNAO?xUfG{@$0|)~J!9Z8~ErXuSJ0O0` zpdhczps1h%#L5ijrsfRhX66jG);0{m0RarI&dv<Mfq@J`K(?)o4TFcfJ3~cj2~aG6 zAtox4AuT1DAulHzC|1lcW9npvav;58^CgBICw?=m-Se7Z&dN&+ljk30$Ss@1FlFuz zhVIF07`#KP7-p~k$>0@~&*1Kx#E@G$i6IXt9~qa=&@lZwL;bXG45{S@8FK2+F+`_L zWXP^q$q<v;&fw%0%HSHhjKL#(HbZguSB8dZ-+>sYw(KB-Gf*xfGLE6NtcJnCe;<QG z01*4{We86D#NY^ID**jA3PwX<Gz1_aU<(A?%pkzR3}S<Tm^K?TGY1PRglB6jVkNGn zYsw6l=QmJL7nD<k^SOjI)p-PL;Cx$V4I=>_1Q*CxN8#IOsPPEEjb~?OH8$ko1lkF) zP+6DR#sugbW*Z3KOj=q}LRv~%mJh@i0=d<g!G@ce9mue?71d+5Vb)<_=4DmY2k8)H z1SybbWoBlT2lJWCEkVG-($ZQ9C@-X}tSke>AfUo#`+=W<0UiI4XJCNg2kZ<CFx&tv HGtn^s2<L^m diff --git a/share/sources/org.ttu/source.yaml b/share/sources/org.ttu/source.yaml deleted file mode 100644 index 9c7ca99ce..000000000 --- a/share/sources/org.ttu/source.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# DSpace -configs: -- base_url: https://ttu-ir.tdl.org/ttu-oai/request - disabled: false - earliest_date: null # earliestDatestamp is earliest published - harvester: oai - harvester_kwargs: {metadata_prefix: mods, time_granularity: false} - label: org.ttu.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: [col_2346_521, col_2346_469] - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: http://ttu-ir.tdl.org/ttu-oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc, time_granularity: false} - label: org.ttu - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [col_2346_521, col_2346_469] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://ttu-ir.tdl.org/ -long_title: Texas Tech Univeristy Libraries -name: org.ttu -user: providers.org.ttu diff --git a/share/sources/org.ucescholarship/icon.ico b/share/sources/org.ucescholarship/icon.ico deleted file mode 100644 index 6251c2dd45322a156a679e75327d163db1eee4a7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 732 zcmV<20wev2P)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00NXrL_t(2k&TjDNRx3G#^3M%eRJ;MY;!o4wW%v-w23q+k3m)%b`T*mrRYF} z7e#cq5(`8S%Amj-?Vv#^K~QQYc_=b7k5rnbBb>H8Y)#$V_HFyN@6g56tgC*PFTaQP zxp;tE#b$TC{4neyeZTC^2hH6j)h}#z7wM(i`p3+c#Wjt9RcsDd=is>A>8Wn)s=RR2 z5U)N~T5}*-=Wu%jeBNe-EU2vn`~kY5V_=U#o1)VI1{elj%75-iMyfv6=5RN(59nf} zq+!C9Ky&vnLfCh+RjrWG3?ttluB>l&lD^Bw_xWjN+3hu&Eq$XVyUSCQwG-nw?q1XT zNNL#Kq-|~eV|}Ajy`z&jLL6``%S9{YnFek4uI*;)(z{Q_l2bVZ08oCqXJk?$65M&( zi7*TR7?urI{b^mQZRwGS1ZL}^+e-lenx0>v0(6)#G+ZQzR?1n9gLN=0D+=ZF2*e`@ z97o0<7ThY_nxAPnk)3?Mse5E{hGqW>1X)}r%g^V=C?m>hTQHB0DzAGrYja#J++SJW zZkn_GV~2|bH_je<-t~FG;ktb>pAhgv^qQzmky0n=GtJo$!qz_t;StNEcWQoNhgP-Z zBxMp23SMcNiBl;mit}<(^Z>vka2z3ou$YbVisHQ8iE*JJJRcPR00aP9l|rqII9c{^ zXkz+WQC3~)>1bsHkHD+06yCj5V0U>6uGe4~B<o@U0OL3g0BDAN)cR4YQYd7S#;z}i z(-UZh?Hn9WOVEe~{5L}rXAY)`h5SF$AaInPu%+tR+f==#wfAdMyjm_5cMN>P5jJD9 z=Vl}ZZ2*9^8IoRV%rxr{&<w-geEj;>lQx#+`~li%wm4j#HEpHR|MMFg2T6EhII^<< O0000<MNUMnLSTYc|52d; diff --git a/share/sources/org.ucescholarship/source.yaml b/share/sources/org.ucescholarship/source.yaml deleted file mode 100644 index ea6eeae5d..000000000 --- a/share/sources/org.ucescholarship/source.yaml +++ /dev/null @@ -1,19 +0,0 @@ -configs: -- base_url: http://escholarship.org/uc/oai - disabled: false - earliest_date: 2002-02-06 - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc, time_granularity: false} - label: org.ucescholarship - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://www.escholarship.org/ -long_title: eScholarship @ University of California -name: org.ucescholarship -user: providers.org.ucescholarship diff --git a/share/sources/org.zenodo/icon.ico b/share/sources/org.zenodo/icon.ico deleted file mode 100644 index 2cbdedde12003a1f3560b3ca645e46289dcda44e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 814 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GbKJOS+@4BLl<6e(pbstUx|vage(c z!@6@aFM%9|WRD<U28Jp%28M<f28Lfip@tU>45bDP46hOx7_4S6Fo+k-*%fF5)Mpyt z6XI%6w*!LoYq$4pI;&B+4bIiC+o4ml-K=q^W5;gi&OMe*I}>Iev1;B`y!iN&M-Tr0 zeEI+Ti=XeF&f0N7yL!8G$DWf{Zt2!+51Mq~|F;+Gj$CP6ed_<`XY+PnR4CoD?c}xY zjc3#<w%)mS@5H5>TGiWT>^T4b(=*%F-9?LzUAl3{ux^KG<IX?tpHA9xR<Cw@^t40& zKR!#Gc{pm?p?ml5nK$kH_3p{sT^ICfwr9>g8a4G`@~p#m?%p$~-L77_ZSCPJ@18%> ztlFksy-l}z`>~5RKfihm^vIO0=Qba|`v3FuA8(&b*><jZ&FNcr?}bi2_~^lXpT2#2 z&RqZh^?A|a<1=@jzkT=KqlXVR9lshf=|Jk7BY6vs8P)H2@Zf&hoTEC`+bx=Q{r~d3 zYr`4++8sL8+y4Lmuk=Ac3>dQPB|(0{3E%$#L86Do;mJTD#w2fdmo%L#H&uZg&H|6f zVj%4S#%?FG?SPCco-U3d5|@(`5>nimdUzb{ggcHNJb3cl!J|APjURSaR{Xqi^C#<! zo01MOdX9>Tf`NvGoSvF|6B?#$>CrLYJ!enTqDh-X0veoMLw&oNV`H6zy`$a5RTeDI zPvBV6)8p%N_KYLb8?k9&Z{D<BJ7%`6tjdA0n%!MpH(fm3yqw=(e}m*Vu6uU->i!*L z<ZE<n<9Q<!bWm|&;zPlSfk77y&%NQ1*;uIfky9ottWD-kr-Qjc!D|)OVn>@p2M;oe zcq9gTJZ-4%>zvzoapqLnPX2Fto4(&<2)-q~{`SL;1fc&^OI#yLQW8s2t&)pUffR$0 zfuX6cfuXLEQHY_Xm5GIwp{2Hgft7(ls0KqAiiX_$l+3hB+!|C$cf0~>VDNPHb6Mw< G&;$V6oNzz@ diff --git a/share/sources/org.zenodo/source.yaml b/share/sources/org.zenodo/source.yaml deleted file mode 100644 index 003bf3afe..000000000 --- a/share/sources/org.zenodo/source.yaml +++ /dev/null @@ -1,37 +0,0 @@ -configs: -- base_url: https://zenodo.org/oai2d - disabled: false - earliest_date: 2013-05-06T23:27:15Z - harvester: oai - harvester_kwargs: - metadata_prefix: oai_datacite3 - set_spec: openaire_data - label: org.zenodo.datacite - rate_limit_allowance: 1 - rate_limit_period: 3 - transformer: org.datacite - transformer_kwargs: - namespaces: - 'http://purl.org/dc/elements/1.1/': dc - 'http://datacite.org/schema/kernel-3': null - 'http://www.openarchives.org/OAI/2.0/': null - 'http://schema.datacite.org/oai/oai-1.0/': null - 'http://www.openarchives.org/OAI/2.0/oai_dc/': null -- base_url: https://zenodo.org/oai2d - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: org.zenodo - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: https://zenodo.org/oai2d -long_title: Zenodo -name: org.zenodo -user: providers.org.zenodo diff --git a/share/sources/pe.upc/icon.ico b/share/sources/pe.upc/icon.ico deleted file mode 100644 index fa672419797774e8cf5c7a153624e92734805633..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 318 zcmbu2F^U2~5Jg|Z;HCqDVAg?wV`jLSn+Rs2CT5x}wC2KQPB@0HGj?6XW4M>_7-Cs% zZnVGnkD{p02WOM#xiO>fpab;Hx!If3{k@dIKv~R{?M69NN?j|Li*o%_Za3wwkGFA= zIO|nn`hvDCWqSP(N##(TaO0k*D4EExp*77!d{{Z(m4?qFekm)8Gov7|jBXzqOU3Ls dscFf%8;gb@zd^pE6WOJrb%Lq>F}zy-fhU+SL3aQE diff --git a/share/sources/pe.upc/source.yaml b/share/sources/pe.upc/source.yaml deleted file mode 100644 index bef04a11e..000000000 --- a/share/sources/pe.upc/source.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# DSpace -configs: -- base_url: http://repositorioacademico.upc.edu.pe/upc/oai/request - disabled: false - earliest_date: 2013-03-01T15:51:34Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: pe.upc.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: http://repositorioacademico.upc.edu.pe/upc/oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: pe.upc - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://repositorioacademico.upc.edu.pe -long_title: Universidad Peruana de Ciencias Aplicadas (UPC) -name: pe.upc -user: providers.pe.upc diff --git a/share/sources/pt.rcaap/icon.ico b/share/sources/pt.rcaap/icon.ico deleted file mode 100644 index e179f5af717bbec7efd21b87343ae998a87269d6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 721 zcmV;?0xtcDP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00N0gL_t(2k!_MoOcPNQhVPxZ({@_Rw6wM5(ee;tV?Yuyktl80C@}<i7#70X zNEAs}V4`uOh{la^!;WAACdxyNiUz}L<qHK#2w)Rjv`}avt*vD`(9YcB0<0Q1IjfWN z|2gN+hXDZXR{*dU2>ShN93eTm4ug<}F-8dCy#fG8%rAa@)Vt|hMuOnWwvWtvEEdzX z_SXEooc{y>AWck$Z{MY&$h%LGY`YQ)#Ye|jx9b5Va|@oq{vHH>N|B^sYn#8mA^!0b zRg~16CK4MrZZ37)D?54S+4I-TjyDy>^^47%le0bT;mb}S0C{y)UR?u9F~^xYZV6&6 zKR-7Z++YwwpeW%xo!SO#T2nJ7y7<`ma+%BPEcQB!)-PQK0L(D@rVG4AV=<c$0K{It ziVnALuS^2KuYtiOgJnr?+HF&Y5|0BwGMPMGdEU3|CkP=n2VgUy%Bql8{2MK8NTI11 zi1i>=sSrX_UuH^5ieefg0{~mzhuY#+gpl}l1SIL-(2UpHG0P^B$==@nqMAk>VHhRy zT@eGDY6n7yD=a`5@2;y<=KG!crbYnh>gsl%tDP0q4nala^=Xz*{}zl_IMVnEcjoX5 zadezDn-0{~aCSSTwD0i%aX7of<|nn87-Is;7L5m{e9=OqCQGm4UFE#1JayIknfVV| z=jWg7P+gu?$039WYji5&O099OY9P{5YOTx{1T~2zl-Ypz=0{}6ldLW<OjJ42d9q`H zDOIBH>Xp#kLUG@fDg8o7A`DJaFsMoOA>rzA+cBFKrykpZ4`fN4?%4A5v9@xrx-=}S z4I15%qh~exjJ*{9g%4y&jx54<l+jvQ;V@>{eX;)y*%3p>(r*HG00000NkvXXu0mjf D3Cu^P diff --git a/share/sources/pt.rcaap/source.yaml b/share/sources/pt.rcaap/source.yaml deleted file mode 100644 index d318cc53b..000000000 --- a/share/sources/pt.rcaap/source.yaml +++ /dev/null @@ -1,19 +0,0 @@ -configs: -- base_url: https://www.rcaap.pt/oai - disabled: false - earliest_date: null # earliestDatestamp is earliest published - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc, time_granularity: false} - label: pt.rcaap - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: [portugal] - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://www.rcaap.pt -long_title: "RCAAP - Reposit\xF3rio Cient\xEDfico de Acesso Aberto de Portugal" -name: pt.rcaap -user: providers.pt.rcaap diff --git a/share/sources/ru.cyberleninka/icon.ico b/share/sources/ru.cyberleninka/icon.ico deleted file mode 100644 index 71d37e99f9a3544c46f397c1685283b5d478f24e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 664 zcmV;J0%!e+P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0006{Nkl<ZIE|f>T}aab6vcnDxztKCoqtoa{J_d0j5HDvKZq!b2m%o*p@({i z=pi%$Awo>Z9-@{;6n#9{OW4dZODjbuVh%-ZsM9%hpic24UDKR;d+^0*3U=P^z2_b- z=Ui9=GBt8trYE1R#aR>1iqd|=g-o%FYyHc=#c}>CiZp_VW90t21swICtrZ|;Aa&O+ zxrSZhmgnKFDGQE_$0sv(c^2Kq|BViFcZWK7tS`?US0#4KQ6{GM<K=%4*IcAQ|1L(e z+cPNP)ERJTa88Zkqt!Cf(@K9uyE*a1aVvfScITAM<f9F>sbqXu_aI>V)o5TG2+PSL zy--JEWf`$?@!0yTIGxi(hb!2I#xSR6;tjL{r(Zt#w0!cE{mw&>Vvu1=wRAUsk+H!a zQeUl?$)R>>H`Iu}_?FDhn&d8?q5}~_)e8VMfNHCfD~?f3-sYfeUnPX5Z>8*rO`y+M zN4K#N59ZMNda+Tng+W|A`u^dS>hQ6P0II*t8|W+?`DM#lToO7=y{I<@5~65nX{aS7 zEfFOw6wT<&Q`O$2%kEPM-*%(h*g|mB9*!K^hq<`{Yp;b6zYPTLjH0uv2Yl2ytH=0r zBK2O@O^i(jYTGO*ekyDo^-L}-P*Yn)QPNo!Y%^C@uBcZQv&NpZ6re2i9Mj4197vAC zM+u{~vI+0ERU`rG$e0*+|FG=A&|pb6NN$iKVrqLY&KWO}C1kLxJU=2r@<F7<P`<jr yK-WiW$@AB;IQP$NUgi-?m%h26pnX+=|G^)d91F9WI@Z<z0000<MNUMnLSTYg3Nw}f diff --git a/share/sources/ru.cyberleninka/source.yaml b/share/sources/ru.cyberleninka/source.yaml deleted file mode 100644 index 7cd5624bd..000000000 --- a/share/sources/ru.cyberleninka/source.yaml +++ /dev/null @@ -1,19 +0,0 @@ -configs: -- base_url: http://cyberleninka.ru/oai - disabled: false - earliest_date: 2014-05-20T09:19:31Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: ru.cyberleninka - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [isPartOf, issue, issn, pages, bibliographicCitation, uri] - type_map: {} -home_page: http://cyberleninka.ru/ -long_title: CyberLeninka - Russian open access scientific library -name: ru.cyberleninka -user: providers.ru.cyberleninka diff --git a/share/sources/tr.edu.hacettepe/icon.ico b/share/sources/tr.edu.hacettepe/icon.ico deleted file mode 100644 index 1e660b740906b95894663a8785846d0c410402bc..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 726 zcmV;{0xA88P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00009a7bBm000XT z000XT0n*)m`~Uy~h)G02R5%f(Q_W8kQ4pWn(r!z;P_|TC=|X`bL@<#QQWH54gCQL1 zSq>@(4yGRbW8k2Tc*C=3xZqXT^aB%>1XK{VTBTWNyDi=BLalE`YtY1tPV)B6zWI2+ zH}e})y<Ug^5eo*JOeTolT(FnQhUdzNIAR>vV)=p=E~-^0+IbgOcDLG7R;zhlO3F!> z)2iIRJEo0~^@0fz+?alB*Y%peFVsF;F6+_Wpx_m|+XR*ftW9PJiUO>xY#!{&`5f=E zExr4o#BDZn_UYn`MhNic!GUwLmCEt-;3Z!m*eO7>!wSPg9dJ}JpwrJox9AbxY-WYU zCuwJ>#{(Oi`|}K3<_toNFxYCj8lAa4O2K%Bw@d4LsFHXE2Q)D>cW#Xa&=+ISKnV<^ z7V8SQy^+h}4{!E8W(jb)TO4$Dq@6&asIh-Li09p2T|bGTff8WBCMNm?bUH29uJ!@n zOs|Ayy9F-vcwt9AgvrSP;V`d-(U+`6Rdb&8@Z^kd@9?$KY6PRBnO(K7lu!lqRap$` zga8!`M2QS{!KW_;R)gQskdb|`4k!)w5CZJ;j;!#$c9BqAXZ?MdmSKxr0XqP`b_a|{ zf}j+tu(h2dZlIYEw7DIr7p%`e{b?KO_d`x8(~{PAA@Mp(=bly$o;`nifH8PL2^hoX zJ~EQvt$&lx>{+#MK*+LR)f{KD%>7Eh=$S%>#TV8xjU-q=M9~n`rB^aMh_7Z^T-=hB zk_iFT0J)9y4E1{8;iJ{)^74jvYU-kZJjfR-z<2_wgmjR}7IG~-xAcBniDL{Vr&%~X zOp&k{){T>U*k`p=b>ggGk%cx#(#|!_ttIx-&Oe8#0rOw>?^q!lUgi3;(*OVf07*qo IM6N<$f|IpWF#rGn diff --git a/share/sources/tr.edu.hacettepe/source.yaml b/share/sources/tr.edu.hacettepe/source.yaml deleted file mode 100644 index 57244f438..000000000 --- a/share/sources/tr.edu.hacettepe/source.yaml +++ /dev/null @@ -1,19 +0,0 @@ -configs: -- base_url: http://bbytezarsivi.hacettepe.edu.tr/oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: tr.edu.hacettepe - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://bbytezarsivi.hacettepe.edu.tr -long_title: Hacettepe University DSpace on LibLiveCD -name: tr.edu.hacettepe -user: providers.tr.edu.hacettepe diff --git a/share/sources/uk.cambridge/icon.ico b/share/sources/uk.cambridge/icon.ico deleted file mode 100644 index c2f4d90f3d3dea455aedf676b2ef81fd94dba05c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 714 zcmV;*0yX`KP)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS0007kNkl<ZIE`(RO-NKx7>1v7z4y-8XwInPXbzd8j!{%tDHH{PQC1MHgP=B9 zP(cK>sE8IVY!Mhl(a*|iX`5(O8U{vD6r@CCY5rmUj51oz{h6=D9cRjV7YELH-}9dD zd%vRq&bRE9Z2=uiDMSF<CTC+klaUxoK{zpv)7y>G4Nzc+Fo@3Sj>k#|*z8wq?;1h{ zO#o!hW;9k!%hAIC^xe2ZS<e_MXn6drO<`jg03AT-Ix1)qx;YHMOly?fK!7D^hKVXR z3AKL*6!DX*5z{l%_zTKgMd9}unbU%m`H))L$zL~g0I_YDvxpy$A|jBmn4F3eO^e1S zCaIjUm}!lo07)bvX=M%cnmWw&wJ4v@6Sywq2I;wfog7UgU>f*O?FLQrc*ntXHRycq zy>R=^Jz0*<wsRscN+i`?BE6T6{KI!VcqEw&T-Rke+?0z5D5cWLEaP1#+d|oB9}8Dc zqYMM3Da3Jz<rnd0>@qnb1j=UldF>3Sbmopr%+<06zM-#J&j%sj@FtP4Fk72gf8qiF z19#g<wm(DibrcE`HpzzK>=+BmS3#~$=++3KTf<Ob;`~wg!lDIXlforFPiMZkC~FX3 zQ%5{6{fz-!p`@@NV-PJ3gYA(bu%J@N+6~zjYIYr<|4TnBj-~MD`M_`)9Q#3W+TioI zVPa+uvu7)@rx*FKZyk{x`+<v|L9Dh$y?ysyJ_H|9(e;wZr+F$$3NsZ{mPe3@pG;K; z7--%}sQD0Ex74dl1pu4tH>s)V8F^j(l#!k%RQddb)7&rJ<m170B)9FMzG**YrIAcP w{~IwQqhs>sRWC_5t#B+F8XKvqs`^L&8>bHfkII|&-T(jq07*qoM6N<$g2m=PLjV8( diff --git a/share/sources/uk.cambridge/source.yaml b/share/sources/uk.cambridge/source.yaml deleted file mode 100644 index 96d292cfd..000000000 --- a/share/sources/uk.cambridge/source.yaml +++ /dev/null @@ -1,38 +0,0 @@ -# DSpace -configs: -- base_url: https://www.repository.cam.ac.uk/oai/request - disabled: false - earliest_date: 2003-12-17T12:10:39Z - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: uk.cambridge.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: - Journal Article: article - Book chapter: book - Book or Book Chapter: book - Conference Object: conferencepaper -- base_url: https://www.repository.cam.ac.uk/oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: uk.cambridge - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: https://www.repository.cam.ac.uk -long_title: Apollo @ University of Cambridge -name: uk.cambridge -user: providers.uk.cambridge diff --git a/share/sources/uk.lshtm/icon.ico b/share/sources/uk.lshtm/icon.ico deleted file mode 100644 index 9d86640e14b2626e19b5834f2f2ef7f520c77803..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 926 zcmV;P17ZA$P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00004b3#c}2nYxW zd<bNS000A3Nkl<ZIE`&nK}aJ96n!(UO=?5Tq)kj^?HEb7%|M8&2NNSGDWqU<7txT$ zicr?Aifbi3?4@c!5eqJDx}HjLD@8plTMr|+o2CSFP{@!bsfC~-jfv9`&5pDrMhUY{ z53SN&{Fna@?>+v5_X%7v%gf6h<>loMa=9Euk|evgw`Zqm`u51k$nURm$*T+!iNtsL zeEtT*FdB-Y?)3EZ=m~;YyNdggL2qww7l5Khqj~%6><n95TOdgiEXzU^#p?i0l}e@i z{~F=~^!N9VEG#TM6h#pX!$7OmqO!6Q7K;UbzaQCb7JYqvkjZ3_%jKUB4-bEHc>&M! z=B1^jhoUH6M8Nv`I&?Z64h{}bDwUvCt5GVI(Ae0B)z#In!{M;`(x9WGV^OVE|JB^w z9BgW83IhO5)5z!ZK>$zF>Gae3`ubnT$H$S&<&aLNF*rE*2mnY)lAus140CgHcQ!UQ zK5;soPg+`9{z#=#5ClP$$z+~uG@92aih9rI^Wo&=1c^ifu~@7@k|bO+8jW{rYis3d zwfc?e>1o@<#KeQCsj0WDR%_MH&W?)bd2K8f`=GkII+e|4laWZoB$LVD@p#bI)^?ZN z-rm-!R4PQH(Hnlh|5KOCbt@W;a>vKVKd>zO*y(iMlO%~K6bjG0Ua#Qu`JhlJ(9qC; zP$;B>*=!!JsHl*XN@Wp%BuNtRFL-G?ozC(3`T1YY&(EdN(NU?ry<O7l_4lsD<MB0u zAOPTXo6Uw`F!-h0?LMMu`gUt;YpScOO99~Hva+(aKp;TUG##$0s``p$*++pu;2B8} zMABd|@BrZTdckqr{msqIo9T4=!%!$>ER{;PB9X{v9LK#|Q&WS(!^5URp>Tg@X67S< z!LUaHKxb#?7XT28#c+ChT0v3NbpRL|8hY+<I22=JW8d0rHdYV><Kp5Xc6WD;ZnyjA z%O1?m&i-h(+aGfr_hNE#vZ&Q+^8o&|+wJcZi$w^6a2AipZ|?8!|5jI5x6SkX#DB3- zc2VVIGWo<}v78?r9ksYzF1^WQ66JEaJd?@ftXAvaCX?ySwY9ZchGE85R#v{h>ahR- z0|NsLfE9<skue&LFS@(CMF2d2pFAGV`>*o<0hY09!v^PqxBvhE07*qoM6N<$f^)CS A5dZ)H diff --git a/share/sources/uk.lshtm/source.yaml b/share/sources/uk.lshtm/source.yaml deleted file mode 100644 index 48ca5c553..000000000 --- a/share/sources/uk.lshtm/source.yaml +++ /dev/null @@ -1,19 +0,0 @@ -configs: -- base_url: http://researchonline.lshtm.ac.uk/cgi/oai2 - disabled: true - earliest_date: 2015-02-24T12:35:10Z - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: uk.lshtm - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://researchonline.lshtm.ac.uk -long_title: London School of Hygiene and Tropical Medicine Research Online -name: uk.lshtm -user: providers.uk.lshtm diff --git a/share/sources/za.csir/icon.ico b/share/sources/za.csir/icon.ico deleted file mode 100644 index 0844c97c9152561b5bd3de6de52d943d4db4895f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 453 zcmV;$0XqJPP)<h;3K|Lk000e1NJLTq000mG000mO0ssI2kdbIM00003b3#c}2nYz< z;ZNWI00DhTL_t(2k(H9YN&`U@g+G@xx@Kl)3_{SL#zt)<BwIuRieRne1q8`U1QaVP z@eg0X2MB7T2p0ZCvCu*-1dFV3QPN02pTJ_)%^F4UR(H4`bI&<59r^rpS9dSa)6*v$ z320?Ij?hv7aVY>mDaSO{zi6dw%h2Ymm2H~l`v+eYrGc2a`>FO&y{x=78i{0bY$Ol7 z_;<i;u{1R~5z;`+D9PsBY|h<SEf4k&FD%TP+1>ra#%j5GT3A|M36@A<X=NMTjw6Va zQqI)m#C&P-=K5l1r($Tp0z#KRbr<JmfW69gII==o|CWhTcFrvTp7#nUEv>c$P0CT1 zt+iSPlH%X>L}?{Ov)KUB=^%P5q$AJNqP=><tJQLDAw84=WW3XA)7@Gk1+ccZl}IMn zH#ZFp#0>4%{hQlI;QVBFZ{JN1^n1@Q)zgF2K<4E5OlfIopyTl93h?WG^gC!N;!<QY vqsC}R2OEI#%xF|em`NKQq!e}eHw}COb!=POgYK3u00000NkvXXu0mjfdeX@p diff --git a/share/sources/za.csir/source.yaml b/share/sources/za.csir/source.yaml deleted file mode 100644 index 6862d800f..000000000 --- a/share/sources/za.csir/source.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# DSpace -configs: -- base_url: https://researchspace.csir.co.za/dspace/oai/request - disabled: false - earliest_date: 2006-10-12 09:49:41 - harvester: oai - harvester_kwargs: {metadata_prefix: mods} - label: za.csir.mods - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: mods - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -- base_url: http://researchspace.csir.co.za/oai/request - disabled: true - earliest_date: null - harvester: oai - harvester_kwargs: {metadata_prefix: oai_dc} - label: za.csir - rate_limit_allowance: 1 - rate_limit_period: 2 - transformer: oai_dc - transformer_kwargs: - approved_sets: null - emitted_type: CreativeWork - property_list: [] - type_map: {} -home_page: http://researchspace.csir.co.za/dspace -long_title: CSIR Researchspace -name: za.csir -user: providers.za.csir diff --git a/share/tasks/__init__.py b/share/tasks/__init__.py deleted file mode 100644 index 37da10801..000000000 --- a/share/tasks/__init__.py +++ /dev/null @@ -1,124 +0,0 @@ -import logging - -import celery - -from django.conf import settings -from django.db import models -from django.db import transaction - -from share.harvest.scheduler import HarvestScheduler -from share import models as db -from share.search.index_messenger import IndexMessenger -from share.search import index_strategy -from share.search.messages import MessageType -from share.tasks.jobs import HarvestJobConsumer -from share.util.source_stat import SourceStatus -from share.util.source_stat import OAISourceStatus -from trove import models as trove_db - - -logger = logging.getLogger(__name__) - - -@celery.shared_task(bind=True) -def schedule_harvests(self, *source_config_ids, cutoff=None): - """ - - Args: - *source_config_ids (int): PKs of the source configs to schedule harvests for. - If omitted, all non-disabled and non-deleted source configs will be scheduled - cutoff (optional, datetime): The time to schedule harvests up to. Defaults to today. - - """ - if source_config_ids: - qs = db.SourceConfig.objects.filter(id__in=source_config_ids) - else: - qs = db.SourceConfig.objects.exclude(disabled=True).exclude(source__is_deleted=True) - - with transaction.atomic(): - jobs = [] - - # TODO take harvest/sourceconfig version into account here - for source_config in qs.exclude(harvester_key__isnull=True).annotate(latest=models.Max('harvest_jobs__end_date')): - jobs.extend(HarvestScheduler(source_config).all(cutoff=cutoff, save=False)) - - db.HarvestJob.objects.bulk_get_or_create(jobs) - - -@celery.shared_task(bind=True, max_retries=5) -def harvest(self, **kwargs): - """Complete the harvest of the given HarvestJob or the next available HarvestJob. - - Keyword arguments from JobConsumer.consume, plus: - ingest (bool, optional): Whether or not to start the full ingest process for harvested data. Defaults to True. - limit (int, optional): Maximum number of data to harvest. Defaults to no limit. - """ - HarvestJobConsumer(task=self).consume(**kwargs) - - -@celery.shared_task(bind=True) -def schedule_index_backfill(self, index_backfill_pk): - _index_backfill = db.IndexBackfill.objects.get(pk=index_backfill_pk) - _index_backfill.pls_note_scheduling_has_begun() - try: - _index_strategy = index_strategy.get_strategy(_index_backfill.index_strategy_name) - _messenger = IndexMessenger(celery_app=self.app, index_strategys=[_index_strategy]) - _messagetype = _index_strategy.backfill_message_type - assert _messagetype in _index_strategy.supported_message_types - if _messagetype == MessageType.BACKFILL_INDEXCARD: - _targetid_queryset = ( - trove_db.Indexcard.objects - .exclude(source_record_suid__source_config__disabled=True) - .exclude(source_record_suid__source_config__source__is_deleted=True) - .values_list('id', flat=True) - ) - elif _messagetype == MessageType.BACKFILL_SUID: - _targetid_queryset = ( - db.SourceUniqueIdentifier.objects - .exclude(source_config__disabled=True) - .exclude(source_config__source__is_deleted=True) - .values_list('id', flat=True) - ) - else: - raise ValueError(f'unknown backfill messagetype {_messagetype}') - _chunk_size = settings.ELASTICSEARCH['CHUNK_SIZE'] - _messenger.stream_message_chunks( - _messagetype, - _targetid_queryset.iterator(chunk_size=_chunk_size), - chunk_size=_chunk_size, - urgent=False, - ) - except Exception as error: - _index_backfill.pls_mark_error(error) - raise error - else: - _index_backfill.pls_note_scheduling_has_finished() - - -@celery.shared_task(bind=True) -def source_stats(self): - oai_sourceconfigs = db.SourceConfig.objects.filter( - disabled=False, - base_url__isnull=False, - harvester_key='oai' - ) - for config in oai_sourceconfigs.values(): - get_source_stats.apply_async((config['id'],)) - - non_oai_sourceconfigs = db.SourceConfig.objects.filter( - disabled=False, - base_url__isnull=False - ).exclude( - harvester_key='oai' - ) - for config in non_oai_sourceconfigs.values(): - get_source_stats.apply_async((config['id'],)) - - -@celery.shared_task(bind=True) -def get_source_stats(self, config_id): - source_config = db.SourceConfig.objects.get(pk=config_id) - if source_config.harvester_key == 'oai': - OAISourceStatus(config_id).get_source_stats() - else: - SourceStatus(config_id).get_source_stats() diff --git a/share/tasks/jobs.py b/share/tasks/jobs.py deleted file mode 100644 index c2960b4b3..000000000 --- a/share/tasks/jobs.py +++ /dev/null @@ -1,219 +0,0 @@ -import logging -import random - -from django.conf import settings -from django.db import transaction, IntegrityError -from django.utils import timezone - -from share.harvest.exceptions import HarvesterConcurrencyError -from share.models import HarvestJob -from share.models.ingest import RawDatumJob -from share.util import chunked -from trove import digestive_tract - - -logger = logging.getLogger(__name__) - - -class JobConsumer: - Job = None - lock_field = None - - def __init__(self, task=None): - if self.Job is None or self.lock_field is None: - raise NotImplementedError - self.task = task - - def _consume_job(self, job, **kwargs): - raise NotImplementedError - - def _current_versions(self, job): - """Get up-to-date values for the job's `*_version` fields - - Dict from field name to version number - """ - raise NotImplementedError - - def consume(self, job_id=None, exhaust=True, superfluous=False, **kwargs): - """Consume the given job, or consume an available job if no job is specified. - - Parameters: - job_id (int, optional): Consume the given job. Defaults to None. - If the given job cannot be locked, the task will retry indefinitely. - If the given job belongs to a disabled or deleted Source or SourceConfig, the task will fail. - exhaust (bool, optional): If True and there are queued jobs, start another task. Defaults to True. - Used to prevent a backlog. If we have a valid job, spin off another task to eat through - the rest of the queue. - superfluous (bool, optional): Consuming a job should be idempotent, and subsequent runs may - skip doing work that has already been done. If superfluous=True, however, will do all - work whether or not it's already been done. Default False. - Additional keyword arguments passed to _consume_job, along with superfluous - """ - with self._locked_job(job_id) as job: - if job is None: - if job_id is None: - logger.info('No %ss are currently available', self.Job.__name__) - return - else: - message = f'Could not find/lock {self.Job.__name__}(id={job_id})' - logger.error(message) - raise self.Job.DoesNotExist(message) - - assert self.task or not exhaust, 'Cannot pass exhaust=True unless running in an async context' - if exhaust and job_id is None: - logger.debug('Spawning another task to consume %s', self.Job.__name__) - res = self.task.apply_async(self.task.request.args, self.task.request.kwargs) - logger.info('Spawned %r', res) - - if self._prepare_job(job, superfluous=superfluous): - logger.info('Consuming %r', job) - with job.handle(): - self._consume_job(job, **kwargs, superfluous=superfluous) - - def _prepare_job(self, job, superfluous): - if job.status == self.Job.STATUS.skipped: - # Need some way to short-circuit a superfluous retry loop - logger.warning('%r has been marked skipped. Change its status to allow re-running it', job) - return False - - if self.task and self.task.request.id: - # Additional attributes for the celery backend - # Allows for better analytics of currently running tasks - self.task.update_state(meta={'job_id': job.id}) - - job.task_id = self.task.request.id - job.save(update_fields=('task_id',)) - - if job.completions > 0 and job.status == self.Job.STATUS.succeeded: - if not superfluous: - job.skip(job.SkipReasons.duplicated) - logger.warning('%r has already been consumed. Force a re-run with superfluous=True', job) - return False - logger.info('%r has already been consumed. Re-running superfluously', job) - - if not self._update_versions(job): - job.skip(job.SkipReasons.obsolete) - return False - - return True - - def _filter_ready(self, qs): - return qs.filter( - status__in=self.Job.READY_STATUSES, - ).exclude( - claimed=True - ) - - def _locked_job(self, job_id): - qs = self.Job.objects.all() - if job_id is not None: - logger.debug('Loading %s %d', self.Job.__name__, job_id) - qs = qs.filter(id=job_id) - else: - logger.debug('job_id was not specified, searching for an available job.') - qs = self._filter_ready(qs).unlocked(self.lock_field) - - return qs.lock_first(self.lock_field) - - def _update_versions(self, job): - """Update version fields to the values from self.current_versions - - Return True if successful, else False. - """ - current_versions = self._current_versions(job) - if all(getattr(job, f) == v for f, v in current_versions.items()): - # No updates required - return True - - if job.completions > 0: - logger.warning('%r is outdated but has previously completed, skipping...', job) - return False - - try: - with transaction.atomic(): - for f, v in current_versions.items(): - setattr(job, f, v) - job.save() - logger.warning('%r has been updated to the versions: %s', job, current_versions) - return True - except IntegrityError: - logger.warning('A newer version of %r already exists, skipping...', job) - return False - - def _maybe_skip_by_source_config(self, job, source_config) -> bool: - if source_config.disabled or source_config.source.is_deleted: - job.skip(job.SkipReasons.disabled) - return True - return False - - -class HarvestJobConsumer(JobConsumer): - Job = HarvestJob - lock_field = 'source_config' - - def _filter_ready(self, qs): - qs = super()._filter_ready(qs) - return qs.filter( - end_date__lte=timezone.now().date(), - source_config__harvest_after__lte=timezone.now().time(), - ) - - def _current_versions(self, job): - return { - 'share_version': settings.VERSION, - 'source_config_version': job.source_config.version, - 'harvester_version': getattr(job.source_config.get_harvester_class(), 'VERSION', 1), - } - - def _consume_job(self, job, superfluous, limit=None, ingest=True): - if self._maybe_skip_by_source_config(job, job.source_config): - return - try: - if ingest: - datum_gen = (datum for datum in self._harvest(job, limit) if datum.created or superfluous) - for chunk in chunked(datum_gen, 500): - self._bulk_schedule_ingest(job, chunk) - else: - for _ in self._harvest(job, limit): - pass - except HarvesterConcurrencyError as e: - if not self.task: - raise - # If job_id was specified there's a chance that the advisory lock was not, in fact, acquired. - # If so, retry indefinitely to preserve existing functionality. - # Use random to add jitter to help break up locking issues - # Kinda hacky, allow a stupidly large number of retries as there is no options for infinite - raise self.task.retry( - exc=e, - max_retries=99999, - countdown=(random.random() + 1) * min(settings.CELERY_RETRY_BACKOFF_BASE ** self.task.request.retries, 60 * 15) - ) - - def _harvest(self, job, limit): - error = None - datum_ids = [] - logger.info('Harvesting %r', job) - harvester = job.source_config.get_harvester() - - try: - for datum in harvester.harvest_date_range(job.start_date, job.end_date, limit=limit): - datum_ids.append(datum.id) - yield datum - except Exception as e: - error = e - raise error - finally: - try: - RawDatumJob.objects.bulk_create([ - RawDatumJob(job=job, datum_id=datum_id) - for datum_id in datum_ids - ]) - except Exception as e: - logger.exception('Failed to connect %r to raw data', job) - # Avoid shadowing the original error - if not error: - raise e - - def _bulk_schedule_ingest(self, job, datums): - for _raw_datum in datums: - digestive_tract.task__extract_and_derive.delay(raw_id=_raw_datum.id) diff --git a/share/transform/__init__.py b/share/transform/__init__.py deleted file mode 100644 index 37978a563..000000000 --- a/share/transform/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from share.transform.base import BaseTransformer # noqa diff --git a/share/transform/base.py b/share/transform/base.py deleted file mode 100644 index b8bf94888..000000000 --- a/share/transform/base.py +++ /dev/null @@ -1,63 +0,0 @@ -import abc -import uuid - -from share.util.graph import MutableGraph - - -class BaseTransformer(metaclass=abc.ABCMeta): - - def __init__(self, source_config): - self.config = source_config - - @abc.abstractmethod - def do_transform(self, datum, **kwargs): - raise NotImplementedError('Transformers must implement do_transform') - - def transform(self, datum, **kwargs): - """Transform a RawDatum - - Args: - datum: RawDatum to transform - **kwargs: Forwared to do_transform. Overrides values in the source config's transformer_kwargs - - Returns a MutableGraph - """ - - source_id = None - if not isinstance(datum, (str, bytes)): - source_id = datum.suid.identifier - datum = datum.datum - if isinstance(datum, bytes): - datum = datum.decode() - jsonld, root_ref = self.do_transform(datum, **self._get_kwargs(**kwargs)) - - if not jsonld: - return None - - if source_id and jsonld and root_ref: - self.add_source_identifier(source_id, jsonld, root_ref) - - # TODO return a MutableGraph from do_transform, maybe build it directly in Parser? - return MutableGraph.from_jsonld(jsonld) - - def add_source_identifier(self, source_id, jsonld, root_ref): - from share.transform.chain.links import IRILink - uri = IRILink(urn_fallback=True).execute(str(source_id))['IRI'] - if any(n['@type'].lower() == 'workidentifier' and n['uri'] == uri for n in jsonld['@graph']): - return - - identifier_ref = { - '@id': '_:' + uuid.uuid4().hex, - '@type': 'workidentifier' - } - identifier = { - 'uri': uri, - 'creative_work': root_ref, - **identifier_ref - } - root_node = next(n for n in jsonld['@graph'] if n['@id'] == root_ref['@id'] and n['@type'] == root_ref['@type']) - root_node.setdefault('identifiers', []).append(identifier_ref) - jsonld['@graph'].append(identifier) - - def _get_kwargs(self, **kwargs): - return {**(self.config.transformer_kwargs or {}), **kwargs} diff --git a/share/transform/chain/__init__.py b/share/transform/chain/__init__.py deleted file mode 100644 index 770acce67..000000000 --- a/share/transform/chain/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -from share.transform.chain.exceptions import * # noqa -from share.transform.chain.links import * # noqa -from share.transform.chain.parsers import * # noqa -from share.transform.chain.transformer import ChainTransformer # noqa -from share.transform.chain.links import Context - - -# Context singleton to be used for parser definitions -# Class SHOULD be thread safe -# Accessing subattribtues will result in a new copy of the context -# to avoid leaking data between chains -ctx = Context() diff --git a/share/transform/chain/exceptions.py b/share/transform/chain/exceptions.py deleted file mode 100644 index 975ee8261..000000000 --- a/share/transform/chain/exceptions.py +++ /dev/null @@ -1,32 +0,0 @@ -from share.exceptions import TransformError - - -class ChainError(TransformError): - def __init__(self, *args, **kwargs): - self._chainStack = [] - super().__init__(self._chainStack, *args, **kwargs) - - def push(self, description): - self._chainStack.append(description) - - -class InvalidDate(ChainError): - pass - - -class NoneOf(ChainError): - """All of a OneOfLink's chains failed - """ - pass - - -class InvalidIRI(ChainError): - pass - - -class InvalidPath(ChainError): - pass - - -class InvalidText(ChainError): - pass diff --git a/share/transform/chain/links.py b/share/transform/chain/links.py deleted file mode 100644 index 040f8d4f1..000000000 --- a/share/transform/chain/links.py +++ /dev/null @@ -1,1102 +0,0 @@ -from collections import deque -from functools import reduce -import json -import logging -import re -import threading -import dateutil -import urllib -import types - -import xmltodict -import pendulum -from lxml import etree - -from django.conf import settings - -from share.util import iris, DictHashingDict -from share.util.nameparser import HumanName -from share.transform.chain.exceptions import ( - TransformError, - InvalidPath, - InvalidDate, - NoneOf, - InvalidIRI, - ChainError, -) -from share.transform.chain.utils import force_text - - -logger = logging.getLogger(__name__) - - -__all__ = ('ParseDate', 'ParseName', 'ParseLanguage', 'Trim', 'Concat', 'Map', 'Delegate', 'Maybe', 'XPath', 'Join', 'RunPython', 'Static', 'Try', 'Subjects', 'OneOf', 'Orcid', 'DOI', 'IRI', 'GuessAgentType', 'Filter', 'Unique', 'Int') - - -#### Public API #### - -def ParseDate(chain): - return chain + DateParserLink() - - -def ParseName(chain): - return chain + NameParserLink() - - -def ParseLanguage(chain): - return chain + LanguageParserLink() - - -def Trim(chain): - return chain + TrimLink() - - -def Concat(*chains, deep=False): - return ConcatLink(*chains, deep=deep) - - -def XPath(chain, path): - return chain + XPathLink(path) - - -def Join(chain, joiner='\n'): - return chain + JoinLink(joiner=joiner) - - -def Maybe(chain, segment, default=None): - return chain + MaybeLink(segment, default=default) - - -def Try(chain, default=None, exceptions=None): - return TryLink(chain, default=default, exceptions=exceptions) - - -def Map(chain, *chains): - return Concat(*chains) + IteratorLink() + chain - - -def Delegate(parser, chain=None): - if chain: - return chain + DelegateLink(parser) - return DelegateLink(parser) - - -def RunPython(function_name, chain=None, *args, **kwargs): - if chain: - return chain + RunPythonLink(function_name, *args, **kwargs) - return RunPythonLink(function_name, *args, **kwargs) - - -def Static(value): - return StaticLink(value) - - -def Subjects(*chains): - return Concat(Map(MapSubjectLink(), *chains), deep=True) - - -def OneOf(*chains): - return OneOfLink(*chains) - - -def Orcid(chain=None): - if chain: - return (chain + OrcidLink()).IRI - return OrcidLink().IRI - - -def DOI(chain=None): - if chain: - return (chain + DOILink()).IRI - return DOILink().IRI - - -def IRI(chain=None, urn_fallback=False): - if chain: - return (chain + IRILink(urn_fallback=urn_fallback)).IRI - return IRILink(urn_fallback=urn_fallback).IRI - - -def GuessAgentType(chain=None, default=None): - if chain: - return chain + GuessAgentTypeLink(default=default) - return GuessAgentTypeLink(default=default) - - -def Filter(func, *chains): - return Concat(*chains) + FilterLink(func) - - -def Unique(chain=None): - if chain: - return AbstractLink.__add__(chain, UniqueLink()) - return UniqueLink() - - -def Int(chain): - return chain + IntLink() - - -### /Public API -# BaseClass for all links -# Links are a single step of the parsing process -# Links may not mutate the object passed into them -# A chain is any number of links added together -class AbstractLink: - - def __init__(self, _next=None, _prev=None): - # next and prev are generally set by the __add__ method - self._next = _next - self._prev = _prev - - # Every chain must start with an AnchorLink - if self._prev is None and not isinstance(self, AnchorLink): - AnchorLink() + self - - # Build the entire chain this link is a part of - # NOTE: This results in the entire chain rather than starting from the current link - def chain(self): - first = self - while first._prev: - first = first._prev - deq = deque([first]) - while deq[-1]._next: - deq.append(deq[-1]._next) - return tuple(deq) - - # Transformation logic goes here - def execute(self, obj): - raise NotImplementedError - - # Add a link into an existing chain - def __add__(self, step): - self._next = step - step._prev = self - return step - - def __radd__(self, other): - return self + PrependLink(other) - - # For handling paths that are not valid python - # or are already used. IE text, execute, oai:title - # ctx('oai:title') - def __getitem__(self, name): - if isinstance(name, int): - return self + IndexLink(name) - if isinstance(name, str): - return self + PathLink(name) - raise TypeError( - '__getitem__ only accepts integers and strings\n' - 'Found {}'.format(name) - ) - - # Reserved for special cases - # Any other use is an error - def __call__(self, name): - if name == '*': - return self + IteratorLink() - if name == 'index': - return self + GetIndexLink() - raise ValueError( - '"{}" is not a action that __call__ can resolve\n' - '__call__ is reserved for special actions\n' - 'If you are trying to access an element use dictionary notation'.format(name) - ) - - # The preferred way of building paths. - # Can express either json paths or xpaths - # ctx.root.nextelement[0].first_item_attribute - def __getattr__(self, name): - if name[0] == '_': - raise AttributeError( - '{} has no attribute {}\n' - 'NOTE: "_"s are reserved for accessing private attributes\n' - 'Use dictionary notation to access elements beginning with "_"s\n'.format(self, name) - ) - return self + PathLink(name) - - def __repr__(self): - return '<{}()>'.format(self.__class__.__name__) - - def run(self, obj): - Context().frames.append({'link': self, 'context': obj, 'parser': Context().parser}) - try: - return self.execute(obj) - except ChainError as e: - if self.__class__ not in (AbstractLink, AnchorLink): - e.push(repr(self)) - raise e - finally: - Context().frames.pop(-1) - - -# The begining link for all chains -# Contains logic for executing a chain against an object -# Adding another link to an anchor will result in a copy of the -# original anchor -class AnchorLink(AbstractLink): - - def execute(self, obj): - return reduce(lambda acc, cur: cur.run(acc), self.chain()[1:], obj) - - -class Context(AnchorLink): - - __CONTEXT = threading.local() - - @property - def jsonld(self): - return { - '@graph': self.graph, - '@context': {} - } - - def __init__(self): - if not hasattr(Context.__CONTEXT, '_ctxdict'): - Context.__CONTEXT._ctxdict = {} - self.clear() - super().__init__() - - @property - def parser(self): - return self.parsers[-1] if self.parsers else None - - def clear(self): - self.graph = [] - self.frames = [] - self.parsers = [] - self._config = None - self.pool = DictHashingDict() - - def __add__(self, step): - return AnchorLink() + step - - def __radd__(self, other): - raise NotImplementedError - - def __setattr__(self, name, value): - if not hasattr(Context.__CONTEXT, '_ctxdict'): - self.__init__() - Context.__CONTEXT._ctxdict[name] = value - - def __getattr__(self, name): - if not hasattr(Context.__CONTEXT, '_ctxdict'): - self.__init__() - try: - return Context.__CONTEXT._ctxdict[name] - except KeyError: - return super().__getattr__(name) - - -class NameParserLink(AbstractLink): - def execute(self, obj): - return HumanName(obj) - - -class DateParserLink(AbstractLink): - LOWER_BOUND = pendulum.datetime(1200, 1, 1) - UPPER_BOUND = pendulum.today().add(years=100) - DEFAULT = pendulum.datetime(2016, 1, 1) - - def execute(self, obj): - if obj: - try: - date = dateutil.parser.parse(obj, default=self.DEFAULT) - except dateutil.parser.ParserError as e: - raise InvalidDate(str(e)) from e - - try: - date.utcoffset() # Forces tzoffset validation to run - except ValueError as e: - raise InvalidDate(*e.args) from e - - if date < self.LOWER_BOUND: - raise InvalidDate('{} is before the lower bound {}.'.format(obj, self.LOWER_BOUND.isoformat())) - if date > self.UPPER_BOUND: - raise InvalidDate('{} is after the upper bound {}.'.format(obj, self.UPPER_BOUND.isoformat())) - return date.in_tz('UTC').isoformat() - raise InvalidDate('{} is not a valid date.'.format(obj)) - - -class LanguageParserLink(AbstractLink): - def execute(self, maybe_code): - if isinstance(maybe_code, dict): - maybe_code = maybe_code['#text'] - return maybe_code - - -class ConcatLink(AbstractLink): - def __init__(self, *chains, deep=False): - self._chains = chains - self._deep = deep - super().__init__() - - def _concat(self, acc, val): - if val is None: - return acc - if not isinstance(val, list): - val = [val] - elif self._deep: - val = reduce(self._concat, val, []) - return acc + [v for v in val if v != '' and v is not None] - - def execute(self, obj): - return reduce(self._concat, [ - chain.chain()[0].run(obj) - for chain in self._chains - ], []) - - -class JoinLink(AbstractLink): - def __init__(self, joiner='\n'): - self._joiner = joiner - super().__init__() - - def execute(self, obj): - obj = obj or [] - if not isinstance(obj, (list, tuple)): - obj = (obj, ) - return self._joiner.join(x for x in obj if x) - - -class TrimLink(AbstractLink): - def execute(self, obj): - return obj.strip() - - -class IteratorLink(AbstractLink): - def __init__(self): - super().__init__() - self.__anchor = AnchorLink() - - def __add__(self, step): - # Attach all new links to the "subchain" - chain = list(step.chain()) - while isinstance(chain[0], AnchorLink): - chain.pop(0) - - self.__anchor.chain()[-1] + chain[0] - return self - - def execute(self, obj): - if not isinstance(obj, (list, tuple)): - obj = (obj, ) - return [self.__anchor.run(sub) for sub in obj] - - -class MaybeLink(AbstractLink): - def __init__(self, segment, default=None): - super().__init__() - self._segment = segment - self._default = default - self.__anchor = AnchorLink() - - def __add__(self, step): - # Attach all new links to the "subchain" - self.__anchor.chain()[-1] + step - return self - - def execute(self, obj): - if not obj: - return [] - val = obj.get(self._segment) - if val: - return self.__anchor.run(val) - if len(Context().frames) > 1 and isinstance(Context().frames[-2]['link'], (IndexLink, IteratorLink, ConcatLink, JoinLink)): - return [] - return self._default - - -class TryLink(AbstractLink): - def __init__(self, chain, default=None, exceptions=None): - super().__init__() - self._chain = chain - self._default = default - self.__anchor = AnchorLink() - self._exceptions = (InvalidPath, ) + (exceptions or ()) - - def __add__(self, step): - # Attach all new links to the "subchain" - self.__anchor.chain()[-1] + step - return self - - def execute(self, obj): - try: - val = self._chain.chain()[0].run(obj) - except self._exceptions: - return self._default - return self.__anchor.run(val) - - -class PathLink(AbstractLink): - def __init__(self, segment): - self._segment = segment - super().__init__() - - def execute(self, obj): - try: - return obj[self._segment] - except (KeyError, TypeError) as e: - raise InvalidPath from e - - def __repr__(self): - return '<{}({!r})>'.format(self.__class__.__name__, self._segment) - - -class IndexLink(AbstractLink): - def __init__(self, index): - self._index = index - super().__init__() - - def execute(self, obj): - if not isinstance(obj, list): - raise InvalidPath('Tried to find index "{}", got type {} instead of list'.format(self._index, type(obj))) - try: - return obj[self._index] - except IndexError as e: - raise InvalidPath from e - - def __repr__(self): - return '<{}([{}])>'.format(self.__class__.__name__, self._index) - - -class GetIndexLink(AbstractLink): - def execute(self, obj): - for frame in Context().frames[::-1]: - if isinstance(frame['link'], IteratorLink): - return frame['context'].index(obj) - return -1 - # return Context().parent.index(obj) - - -class PrependLink(AbstractLink): - def __init__(self, string): - self._string = string - super().__init__() - - def execute(self, obj): - return self._string + obj - - -class XPathLink(AbstractLink): - def __init__(self, xpath): - self._xpath = xpath - super().__init__() - - def execute(self, obj): - unparsed_obj = xmltodict.unparse(obj) - xml_obj = etree.XML(unparsed_obj.encode()) - elem = xml_obj.xpath(self._xpath) - elems = [xmltodict.parse(etree.tostring(x)) for x in elem] - if len(elems) == 1 and not isinstance(self._next, (IndexLink, IteratorLink)): - return elems[0] - return elems - - def __repr__(self): - return '<{}({!r})>'.format(self.__class__.__name__, self._xpath) - - -class DelegateLink(AbstractLink): - def __init__(self, parser): - self._parser = parser - super().__init__() - - def execute(self, obj): - # callable will return True for classes as well as functions - if isinstance(self._parser, types.FunctionType): - parser = self._parser(obj) - return parser(obj).parse() - return self._parser(obj).parse() - - -class RunPythonLink(AbstractLink): - def __init__(self, function_name, *args, **kwargs): - self._function_name = function_name - self._args = args - self._kwargs = kwargs - super().__init__() - - def execute(self, obj): - if callable(self._function_name): - return self._function_name(obj, *self._args, **self._kwargs) - return getattr(Context().parser, self._function_name)(obj, *self._args, **self._kwargs) - - -class StaticLink(AbstractLink): - def __init__(self, value): - self._value = value - super().__init__() - - def execute(self, obj): - return self._value - - -class MapSubjectLink(AbstractLink): - - with open(settings.SUBJECT_SYNONYMS_JSON) as fobj: - MAPPING = json.load(fobj) - - def execute(self, obj): - if not obj: - return None - - if isinstance(obj, list): - return [self.execute(x) for x in obj] - - assert isinstance(obj, str), 'Subjects must be strings. Got {}.'.format(type(obj)) - - mapped = self.MAPPING.get(obj.lower()) - - if not mapped: - logger.debug('No synonyms found for term "%s"', obj) - - return mapped - - -class OneOfLink(AbstractLink): - - def __init__(self, *chains): - self._chains = chains - super().__init__() - - def execute(self, obj): - errors = [] - for chain in self._chains: - try: - return chain.chain()[0].run(obj) - except TransformError as e: - errors.append(e) - - raise NoneOf('All chains failed {}'.format(errors)) - - -class AbstractIRILink(AbstractLink): - """Normalize IRIs - """ - SAFE_SEGMENT_CHARS = ":@-._~!$&'()*+,;=" # https://github.com/gruns/furl/blob/master/furl/furl.py#L385 - - @classmethod - def hint(cls, obj): - """A percentage expressed as a float of how likely a the given object can be parsed as this class - """ - raise NotImplementedError - - def execute(self, obj): - if not isinstance(obj, str): - raise InvalidIRI('\'{}\' is not of type str.'.format(obj)) - - parsed = self._parse(obj) - parsed = self._process(**parsed) - return iris.parse(iris.compose(**parsed)) - - def _parse(self, obj): - return iris.parse(obj) - - def _process(self, **attrs): - processed = {} - for key in sorted(attrs.keys()): - if hasattr(self, '_process_' + key): - processed[key] = getattr(self, '_process_' + key)(attrs[key]) - return processed - - def _process_scheme(self, scheme): - return scheme.lower() - - def _process_authority(self, authority): - return authority.lower() - - def _process_path(self, path): - return path - - def _process_query(self, query): - return query - - def _process_fragment(self, fragment): - return fragment or None - - -class ISSNLink(AbstractIRILink): - - ISSN_RE = re.compile(r'(?:^|\s+)(\d{4})-(\d{3}[\dxX])\s*$') - - @classmethod - def hint(cls, obj): - if re.search(cls.ISSN_RE, obj): - return 0.9 - return int('issn' in obj) * 0.35 - - @classmethod - def checksum(cls, digits): - total, checksum = 0, digits[-1] - for i, digit in enumerate(digits[:-1]): - total += (8 - i) * int(digit) - actual = (11 - (total % 11)) % 11 - if actual == 10: - actual = 'X' - if checksum != str(actual): - raise InvalidIRI('\'{}\' is not a valid ISSN; failed checksum.'.format(digits)) - - def _parse(self, obj): - match = re.search(self.ISSN_RE, obj.upper()) - if not match: - raise InvalidIRI('\'{}\' cannot be expressed as an ISSN.'.format(obj)) - self.checksum(''.join(match.groups())) - - return { - 'scheme': 'urn', - 'authority': 'ISSN', - 'path': '/{}-{}'.format(*match.groups()) - } - - -class URNLink(AbstractIRILink): - SCHEMES = {'urn', 'oai'} - URN_RE = re.compile(r'\b({schemes}):((?:\w|[.-])+):(\S+)'.format(schemes='|'.join(SCHEMES)), flags=re.I) - PARSED_URN_RE = re.compile(r'^({schemes})://([^/\s]+)/(\S+)$'.format(schemes='|'.join(SCHEMES)), flags=re.I) - - @classmethod - def hint(cls, obj): - if cls.URN_RE.search(obj) is not None: - return 0.9 - if cls.PARSED_URN_RE.search(obj) is not None: - return 0.9 - return 0.0 - - def _parse(self, obj): - match = self.URN_RE.search(obj.lower()) or self.PARSED_URN_RE.search(obj.lower()) - if not match: - raise InvalidIRI('\'{}\' is not a valid URN.'.format(obj)) - - return { - 'scheme': match.group(1), - 'authority': match.group(2), - 'path': '/{}'.format(match.group(3)) - } - - -class ISNILink(AbstractIRILink): - DOMAIN = 'isni.org' - SCHEME = 'http' - - FORMAT = 'ISNI' - FORMAT_STR = '/{}{}{}{}' - - BOUNDS = ( - # (lower, upper) - (None, 150000007), - (350000001, None), - ) - ISNI_RE = re.compile(r'^(?:https?://)?[^=/\d]*/?(\d{4})-?(\d{4})-?(\d{4})-?(\d{3}(?:\d|x))\b', re.I) - - @classmethod - def hint(cls, obj): - try: - cls().execute(obj) - except InvalidIRI: - return 0 - return 1.0 - - @classmethod - def checksum(cls, digits): - total, checksum = 0, digits[-1] - for digit in digits[:-1]: - total = (total + int(digit, 36)) * 2 - check = (12 - (total % 11)) % 11 - literal = (int(digits[:-1]) * 10) + check - if check == 10: - check = 'X' - if str(check) != checksum: - raise InvalidIRI('\'{}\' is not a valid {}; failed checksum.'.format(digits, cls.FORMAT)) - for lower, upper in cls.BOUNDS: - if (not lower or lower < literal) and (not upper or upper > literal): - return - raise InvalidIRI('\'{0}\' is outside reserved {1} range.'.format(digits, cls.FORMAT)) - - def _parse(self, obj): - match = re.search(self.ISNI_RE, obj.upper()) - if not match: - raise InvalidIRI('\'{}\' cannot be expressed as an {}.'.format(obj, self.FORMAT)) - self.checksum(''.join(match.groups())) - - return { - 'scheme': self.SCHEME, - 'authority': self.DOMAIN, - 'path': self.FORMAT_STR.format(*match.groups()) - } - - -class OrcidLink(ISNILink): - """Reformat Orcids to the cannonical form - https://orcid.org/xxx-xxxx-xxxx-xxxx - - 0000000248692419 - 0000-0002-4869-2419 - https://orcid.org/0000-0002-4869-2419 - - Any of the above would be transformed into https://orcid.org/0000-0002-4869-2419 - - ORCID is a subset of the International Standard Name Identifier (ISNI) in the range 0000-0001-5000-0007 to 0000-0003-5000-0001. - """ - DOMAIN = 'orcid.org' - SCHEME = 'http' - - FORMAT = 'ORCID' - FORMAT_STR = '/{}-{}-{}-{}' - - BOUNDS = ( - (150000007, 350000001), - ) - - -class DOILink(AbstractIRILink): - """Reformt DOIs to the cannonical form - - * All DOIs will be valid URIs - * All DOIs will use https - * All DOI paths will be uppercased - - Reference: - https://www.doi.org/doi_handbook/2_Numbering.html - https://stackoverflow.com/questions/27910/finding-a-doi-in-a-document-or-page - - While having characters like <>[] in URLs is technically valid, rfc3987 does not seem to like. - For that reason we escape them here using furl. The regex ensure we won't pick up invalid URLS - """ - - DOI_SCHEME = 'http' - DOI_DOMAIN = 'dx.doi.org' - DOI_RE = re.compile(r'^(?:https?://)?[^=/]*/?(10\.\d{4,}(?:\.\d+)*(?:/|%2F)\S+(?:(?![\"&\'<>])))\b', re.I) - - @classmethod - def hint(cls, obj): - if cls.DOI_RE.search(obj) is not None: - return 0.9 - return 0 - - def _process_scheme(self, _): - return self.DOI_SCHEME - - def _process_authority(self, _): - return self.DOI_DOMAIN - - def _parse(self, obj): - match = self.DOI_RE.search(obj.upper()) - if not match: - raise InvalidIRI('\'{}\' is not a valid DOI.'.format(obj)) - return { - 'scheme': None, - 'authority': None, - 'path': '/' + '/'.join(urllib.parse.quote(x, safe=self.SAFE_SEGMENT_CHARS) for y in match.groups() for x in urllib.parse.unquote(y).split('/')) - } - - -class URLLink(AbstractIRILink): - SCHEMES = {'http', 'https', 'ftp', 'ftps'} - SCHEMELESS_STARTS = ('www.', 'www2.') - IMPLICIT_PORTS = {80, 443} - IP_RE = re.compile(r'\b({schemes})://(\d{{1,3}}.){{4}}(?:\d{{2,5}})\b([-a-z0-9@:%_\+.~#?&//=]*)'.format(schemes='|'.join(SCHEMES)), flags=re.I) - URL_RE = re.compile(r'\b({schemes})://[-a-z0-9@:%._\+~#=]{{2,256}}\.[a-z]{{2,6}}\b([-a-z0-9@:%_\+.~#?&//=]*)'.format(schemes='|'.join(SCHEMES)), flags=re.I) - LOCAL_URL_RE = re.compile(r'\b({schemes})://localhost:[0-9]{{2,5}}\b([-a-z0-9@:%_\+.~#?&//=]*)'.format(schemes='|'.join(SCHEMES)), flags=re.I) - - @classmethod - def hint(cls, obj): - # BePress double escapes in OAI feeds - obj = obj.replace('&', '&') - - if settings.DEBUG and cls.LOCAL_URL_RE.search(obj) is not None: - return 0.25 - if cls.URL_RE.search(obj) is not None: - return 0.25 - if cls.IP_RE.search(obj) is not None: - return 0.25 - if obj.lower().startswith(cls.SCHEMELESS_STARTS): - return 0.1 - return 0 - - def _parse(self, obj): - # BePress double escapes in OAI feeds - obj = obj.replace('&', '&') - match = None - if settings.DEBUG: - match = self.LOCAL_URL_RE.search(obj) - - if not match: - match = self.URL_RE.search(obj) or self.IP_RE.search(obj) - - if not match and obj.lower().startswith(self.SCHEMELESS_STARTS): - match = self.URL_RE.search('http://{}'.format(obj)) - - return super(URLLink, self)._parse(match.group(0)) - - def _process_scheme(self, scheme): - scheme = scheme.lower() - if scheme not in self.SCHEMES: - raise InvalidIRI('\'{}\' is not a valid scheme for URLs.'.format(scheme)) - return scheme.rstrip('s') # Standardize on non-secure - - def _process_query(self, query): - return query # TODO Order me - - def _process_authority(self, authority): - authority = super()._process_authority(authority) - if ':' in authority: - host, port = authority.split(':') - if port and int(port) in self.IMPLICIT_PORTS: - authority = host - return authority - - -class EmailLink(AbstractIRILink): - - EMAIL_RE = re.compile(r'(?P<scheme>mailto:)?(?P<mailbox>[éa-zA-Z0-9_.+-]+)@(?P<authority>[a-zA-Z0-9\u2010ü-]+\.[a-zA-Z0-9-.]+)') # http://emailregex.com/ - - @classmethod - def hint(self, obj): - if self.EMAIL_RE.search(obj) is not None: - return 1.0 - return 0 - - def execute(self, obj): - if not isinstance(obj, str): - raise InvalidIRI('\'{}\' is not of type str.'.format(obj)) - # Handle unicode hyphens - emails = self.EMAIL_RE.findall(obj.replace('\u2010', '-')) - if len(emails) < 1: - raise InvalidIRI('\'{}\'is not a valid email address.'.format(obj)) - if len(emails) > 1: - raise InvalidIRI('Found many email addresses in \'{}\'.'.format(obj)) - return { - 'scheme': 'mailto', - 'authority': emails[0][2], - 'IRI': 'mailto:{1}@{2}'.format(*emails[0]) - } - - -class ArXivLink(AbstractIRILink): - # https://arxiv.org/help/arxiv_identifier - - ARXIV_SCHEME = 'http' - ARXIV_DOMAIN = 'arxiv.org' - ARXIV_PATH = '/abs/{}' - ARXIV_RE = re.compile(r'\barXiv:(\d{4}.\d{5})(v\d)?', flags=re.I) - - @classmethod - def hint(cls, obj): - if cls.ARXIV_RE.search(obj) is not None: - return 1.0 - return 0 - - def _parse(self, obj): - match = self.ARXIV_RE.search(obj) - if not match: - raise InvalidIRI('\'{}\' is not a valid ArXiv Identifier.'.format(obj)) - return { - 'scheme': self.ARXIV_SCHEME, - 'authority': self.ARXIV_DOMAIN, - 'path': self.ARXIV_PATH.format(match.group(1)) - } - - -class ARKLink(AbstractIRILink): - # https://en.wikipedia.org/wiki/Archival_Resource_Key - # https://wiki.ucop.edu/download/attachments/16744455/arkspec.pdf - - ARK_SCHEME = 'ark' - ARK_RE = re.compile(r'\bark://?(\d+)(/\S+)', flags=re.I) - - @classmethod - def hint(cls, obj): - if cls.ARK_RE.search(obj) is not None: - return 0.9 - return 0 - - def _parse(self, obj): - match = self.ARK_RE.search(obj) - if not match: - raise InvalidIRI('\'{}\' is not a valid ARK Identifier.'.format(obj)) - return { - 'scheme': self.ARK_SCHEME, - 'authority': match.group(1), - 'path': match.group(2) - } - - -class InfoURILink(AbstractIRILink): - 'info:eu-repo/grantAgreement/EC/FP7/280632/' - # http://info-uri.info/registry/docs/misc/faq.html - # https://tools.ietf.org/html/rfc4452 - - SCHEME = 'info' - INFO_RE = re.compile(r'^\s*info:([\w-]+)(/\S+)\s*$') - - @classmethod - def hint(cls, obj): - if cls.INFO_RE.search(obj) is not None: - return 0.9 - return 0 - - def _parse(self, obj): - match = self.INFO_RE.search(obj) - if not match: - raise InvalidIRI('\'{}\' is not a valid Info URI.'.format(obj)) - return { - 'scheme': self.SCHEME, - 'authority': match.group(1), - 'path': match.group(2) - } - - -class ISBNLink(AbstractIRILink): - SCHEME = 'urn' - AUTHORITY = 'isbn' - ISBN10_RE = re.compile(r'^(?:urn:\/\/isbn\/|ISBN:? ?)?(\d\d?)-(\d{3,7})-(\d{1,6})-(\d|x)$', re.I) - ISBN13_RE = re.compile(r'^(?:urn://isbn/|ISBN:? ?)?(978|979)-(\d\d?)-(\d{3,5})-(\d{2,5})-(\d)$', re.I) - - @classmethod - def hint(cls, obj): - if cls.ISBN13_RE.match(obj) or cls.ISBN10_RE.match(obj): - return 1.0 - return 0 - - def _parse(self, obj): - match = self.ISBN13_RE.match(obj.upper()) or self.ISBN10_RE.match(obj.upper()) - - if not match or len(''.join(match.groups())) not in (13, 10): - raise InvalidIRI('\'{}\' cannot be expressed as an ISBN.'.format(obj)) - - if match.re == self.ISBN13_RE: - digits = ''.join(match.groups()) - check = (10 - sum(int(x) * (i % 2 * 2 + 1) for i, x in enumerate(digits[:-1])) % 10) % 10 - - if str(check) != digits[-1]: - raise InvalidIRI('\'{}\' is not a valid ISBN; failed checksum.'.format(obj)) - - if match.re == self.ISBN10_RE: - digits = ''.join(match.groups()) - check = sum(10 if x == 'X' else int(x) * (10 - i) for i, x in enumerate(digits)) - - if check % 11 != 0: - raise InvalidIRI('\'{}\' is not a valid ISBN; failed checksum.'.format(obj)) - - # Add prefix and compute new checksum - digits = '978' + digits - digits = digits[:-1] + str((10 - sum(int(x) * (i % 2 * 2 + 1) for i, x in enumerate(digits[:-1])) % 10) % 10) - - return { - 'scheme': self.SCHEME, - 'authority': self.AUTHORITY, - # NOTE: the - seperated format is not standardized for ISBNS - # this format is being used as it groups digits by meaning - # EAN - Group - Publisher - Title - Check - # https://en.wikipedia.org/wiki/International_Standard_Book_Number#/media/File:ISBN_Details.svg - 'path': '/{}{}{}-{}{}-{}{}{}{}-{}{}{}-{}'.format(*digits) - } - - -class IRILink(AbstractLink): - FALLBACK_FORMAT = 'urn:share:{source}:{id}' - - def __init__(self, urn_fallback=False): - super().__init__() - self._urn_fallback = urn_fallback - - @classmethod - def iri_links(cls, base=AbstractIRILink): - for link in base.__subclasses__(): - yield link - yield from cls.iri_links(link) - - def execute(self, obj): - if not isinstance(obj, str): - raise InvalidIRI('\'{}\' is not of type str.'.format(obj)) - - find_all = re.findall('|'.join( - '({})'.format(attr.pattern) - for link in self.iri_links() - for attr in link.__dict__.values() - if isinstance(attr, type(re.compile(''))) # Can't import the actual type of a compiled re - ), obj) - - if len(find_all) > 1: - raise InvalidIRI('\'{}\' contains multiple IRIs'.format(obj)) - - final = (None, 0.0) - for link in self.iri_links(): - hint = link.hint(obj) - if hint and hint > final[1]: - final = (link, hint) - if hint == 1.0: - break - - if not final[0]: - if self._urn_fallback: - urn = self.FALLBACK_FORMAT.format(source=Context()._config.label, id=urllib.parse.quote(obj)) - return URNLink().execute(urn) - else: - raise InvalidIRI('\'{}\' could not be identified as an Identifier.'.format(obj)) - return final[0]().execute(obj) - - -class GuessAgentTypeLink(AbstractLink): - """ - When executed on the name of an agent, guess the agent's type. - """ - - ORGANIZATION_KEYWORDS = ( - r'(^the\s|\sthe\s)', - r'^[-A-Z]+$', - 'bureau', - 'council', - 'center', - 'foundation', - 'group', - 'inc', - 'society', - ) - ORGANIZATION_RE = re.compile(r'\b({})\b'.format('|'.join(ORGANIZATION_KEYWORDS)), flags=re.I) - - INSTITUTION_KEYWORDS = ( - 'college', - 'institute', - 'institution', - 'school', - 'university', - 'univ', - ) - INSTITUTION_RE = re.compile(r'\b({})\b'.format('|'.join(INSTITUTION_KEYWORDS)), flags=re.I) - - def __init__(self, default=None): - super(GuessAgentTypeLink, self).__init__() - self._default = default - - def execute(self, obj): - # TODO smarter guessing - obj_text = force_text(obj) - if self.INSTITUTION_RE.search(obj_text): - return 'institution' - if self.ORGANIZATION_RE.search(obj_text): - return 'organization' - return (self._default or 'person').lower() - - -class FilterLink(AbstractLink): - - def __init__(self, func): - self._func = func - super().__init__() - - def execute(self, obj): - return list(filter(self._func, obj)) - - -class UniqueLink(AbstractLink): - - def execute(self, obj): - if not isinstance(obj, (list, tuple)): - obj = (obj, ) - return list(set(obj)) - - -class IntLink(AbstractLink): - def execute(self, obj): - return int(obj) if obj else None diff --git a/share/transform/chain/parsers.py b/share/transform/chain/parsers.py deleted file mode 100644 index 48c66ef0b..000000000 --- a/share/transform/chain/parsers.py +++ /dev/null @@ -1,146 +0,0 @@ -import re -import uuid -import logging -from functools import reduce - -from django.core.exceptions import FieldDoesNotExist - -from share.schema import ShareV2Schema -from share.schema.shapes import RelationShape -from share.transform.chain.exceptions import ChainError -from share.transform.chain.links import Context -from share.transform.chain.links import AbstractLink - - -# NOTE: Context is a thread local singleton -# It is asigned to ctx here just to keep a family interface -ctx = Context() -logger = logging.getLogger(__name__) - - -class ParserMeta(type): - - def __new__(cls, name, bases, attrs): - # Enabled inheritance in parsers. - parsers = reduce(lambda acc, val: {**acc, **getattr(val, 'parsers', {})}, bases[::-1], {}) - for key, value in tuple(attrs.items()): - if isinstance(value, AbstractLink) and key != 'schema': - parsers[key] = attrs.pop(key).chain()[0] - attrs['parsers'] = parsers - - attrs['_extra'] = reduce(lambda acc, val: {**acc, **getattr(val, '_extra', {})}, bases[::-1], {}) - attrs['_extra'].update({ - key: value.chain()[0] - for key, value - in attrs.pop('Extra', object).__dict__.items() - if isinstance(value, AbstractLink) - }) - - return super(ParserMeta, cls).__new__(cls, name, bases, attrs) - - -class Parser(metaclass=ParserMeta): - - @classmethod - def using(cls, **overrides): - if not all(isinstance(x, AbstractLink) for x in overrides.values()): - raise Exception('Found non-link values in {}. Maybe you need to wrap something in Delegate?'.format(overrides)) - return type( - cls.__name__ + 'Overridden', - (cls, ), { - 'schema': cls.schema if isinstance(cls.schema, (str, AbstractLink)) else cls.__name__.lower(), - **overrides - } - ) - - @property - def schema(self): - return self.__class__.__name__.lower() - - def __init__(self, context, config=None): - self.config = config or ctx._config - self.context = context - self.id = '_:' + uuid.uuid4().hex - - def validate(self, field, value): - if field.is_relation: - if field.relation_shape in (RelationShape.ONE_TO_MANY, RelationShape.MANY_TO_MANY): - assert isinstance(value, (list, tuple)), 'Values for field {} must be lists. Found {}'.format(field, value) - else: - assert isinstance(value, dict) and '@id' in value and '@type' in value, 'Values for field {} must be a dictionary with keys @id and @type. Found {}'.format(field, value) - else: - assert not isinstance(value, dict), 'Value for non-relational field {} must be a primitive type. Found {}'.format(field, value) - - def parse(self): - Context().parsers.append(self) - try: - return self._do_parse() - except ChainError as e: - e.push(repr(self.__class__)) - raise e - finally: - Context().parsers.pop(-1) - - def _do_parse(self): - if isinstance(self.schema, AbstractLink): - schema = self.schema.chain()[0].run(self.context).lower() - else: - schema = self.schema - - schema_type = ShareV2Schema().get_type(schema) - self.ref = {'@id': self.id, '@type': schema} - - inst = {**self.ref} # Shorthand for copying ref - - for key, chain in self.parsers.items(): - try: - field = ShareV2Schema().get_field(schema_type.name, key) - except FieldDoesNotExist: - raise Exception('Tried to parse value {} which does not exist on {}'.format(key, schema_type)) - - try: - value = chain.run(self.context) - except ChainError as e: - e.push('{}.{}'.format(self.__class__.__name__, key)) - raise e - - if ( - value - and field.is_relation - and field.relation_shape in (RelationShape.ONE_TO_MANY, RelationShape.MANY_TO_MANY) - ): - if field.relation_shape == RelationShape.ONE_TO_MANY: - field_to_set = field.inverse_relation - else: - field_to_set = field.incoming_through_relation - - for v in tuple(value): # Freeze list so we can modify it will iterating - # Allow filling out either side of recursive relations - if schema_type.concrete_type == field.related_concrete_type and field.name in ctx.pool[v]: - ctx.pool[v][field_to_set] = self.ref - value.remove(v) # Prevent CyclicalDependency error. Only "subjects" should have related_works - else: - ctx.pool[v][field_to_set] = self.ref - - if value is not None: - self.validate(field, value) - inst[key] = self._normalize_white_space(value) - - inst['extra'] = {} - for key, chain in self._extra.items(): - val = chain.run(self.context) - if val: - inst['extra'][key] = val - if not inst['extra']: - del inst['extra'] - - ctx.pool[self.ref] = inst - ctx.graph.append(inst) - - # Return only a reference to the parsed object to avoid circular data structures - return self.ref - - def _normalize_white_space(self, value): - if not isinstance(value, str): - return value - return re.sub(r'\s+', ' ', value.strip()) diff --git a/share/transform/chain/soup.py b/share/transform/chain/soup.py deleted file mode 100644 index ef74f1b65..000000000 --- a/share/transform/chain/soup.py +++ /dev/null @@ -1,71 +0,0 @@ -from bs4 import BeautifulSoup - -from share.transform.chain.links import AbstractLink -from share.transform.chain import ChainTransformer - - -class SoupXMLDict: - def __init__(self, data=None, soup=None): - self.soup = soup or BeautifulSoup(data, 'lxml').html - - def __getitem__(self, key): - if key[0] == '@': - return self.soup[key[1:]] - - if key == '#text': - return self.soup.get_text() - - res = self.soup.find_all(key) - - if not res: - return None - - if isinstance(res, list): - if len(res) > 1: - return [type(self)(soup=el) for el in res] - res = res[0] - - return type(self)(soup=res) - - def __getattr__(self, key): - return self[key] - - def __repr__(self): - return '{}(\'{}\')'.format(self.__class__.__name__, self.soup) - - -class SoupLink(AbstractLink): - - def __init__(self, *args, **kwargs): - self._args = args - self._kwargs = kwargs - super().__init__() - - def execute(self, obj): - if not obj: - return None - - if isinstance(obj, list): - res = [r for o in obj for r in o.soup.find_all(*self._args, **self._kwargs)] - else: - res = obj.soup.find_all(*self._args, **self._kwargs) - - if not res: - return None - - if isinstance(res, list): - if len(res) > 1: - return [SoupXMLDict(soup=el) for el in res] - res = res[0] - return SoupXMLDict(soup=res) - - -def Soup(chain, *args, **kwargs): - return chain + SoupLink(*args, **kwargs) - - -class SoupXMLTransformer(ChainTransformer): - REMOVE_EMPTY = False - - def unwrap_data(self, data, **kwargs): - return SoupXMLDict(data) diff --git a/share/transform/chain/transformer.py b/share/transform/chain/transformer.py deleted file mode 100644 index 8acd66f1f..000000000 --- a/share/transform/chain/transformer.py +++ /dev/null @@ -1,87 +0,0 @@ -import re -from collections import OrderedDict - -import json -import xmltodict - -from share.schema import ShareV2Schema -from share.transform.base import BaseTransformer -from share.transform.chain.links import Context - - -# NOTE: Context is a thread local singleton -# It is assigned to ctx here just to keep a family interface -ctx = Context() - - -class ChainTransformer(BaseTransformer): - - EMPTY_RE = re.compile(r'\s*(|none|empty)\s*', flags=re.I) - - NAMESPACES = { - 'http://purl.org/dc/elements/1.1/': 'dc', - 'http://www.openarchives.org/OAI/2.0/': None, - 'http://www.openarchives.org/OAI/2.0/oai_dc/': None, - 'http://www.loc.gov/mods/v3': 'mods', - } - - REMOVE_EMPTY = True - - root_parser = None - - @property - def allowed_roots(self): - return ShareV2Schema().get_type_names('abstractcreativework') - - def do_transform(self, data, **kwargs): - # Parsed data will be loaded into ctx - ctx.clear() # Just in case - ctx._config = self.config - - unwrapped = self.unwrap_data(data, **kwargs) - if self.REMOVE_EMPTY: - self.remove_empty_values(unwrapped) - parser = self.get_root_parser(unwrapped, **kwargs) - - root_ref = parser(unwrapped).parse() - jsonld = ctx.jsonld - return jsonld, root_ref - - def transform(self, datum, clean_up=True): - ret = super().transform(datum) - - if clean_up: - ctx.clear() - - return ret - - def unwrap_data(self, data, namespaces=None, **kwargs): - if data.startswith('<'): - return xmltodict.parse(data, process_namespaces=True, namespaces=(namespaces or self.NAMESPACES)) - else: - return json.loads(data, object_pairs_hook=OrderedDict) - - def get_root_parser(self, unwrapped, **kwargs): - if self.root_parser: - return self.root_parser - raise NotImplementedError('ChainTransformers must implement root_parser or get_root_parser') - - def remove_empty_values(self, parsed): - if isinstance(parsed, dict): - ret = OrderedDict() - for k, v in parsed.items(): - if isinstance(v, (dict, list)): - v = self.remove_empty_values(v) - if isinstance(v, str) and self.EMPTY_RE.fullmatch(v): - continue - ret[k] = v - return ret - - ret = [] - for v in parsed: - if isinstance(v, (dict, list)): - v = self.remove_empty_values(v) - if isinstance(v, str) and self.EMPTY_RE.fullmatch(v): - continue - ret.append(v) - return ret diff --git a/share/transform/chain/utils.py b/share/transform/chain/utils.py deleted file mode 100644 index 35365701f..000000000 --- a/share/transform/chain/utils.py +++ /dev/null @@ -1,150 +0,0 @@ -import re -from lxml import etree - -import logging - -from share.transform.chain import exceptions - -logger = logging.getLogger(__name__) - - -def format_address(address1='', address2='', city='', state_or_province='', postal_code='', country=''): - if address1 and address2 and city and state_or_province and postal_code and country: - return '{}\n{}\n{}, {} {}\n{}'.format(address1, address2, city, state_or_province, postal_code, country) - - if address1 and city and state_or_province and postal_code and country: - return '{}\n{}, {} {}\n{}'.format(address1, city, state_or_province, postal_code, country) - - if address1 and address2 and city and state_or_province and postal_code: - return '{}\n{}\n{}, {} {}'.format(address1, address2, city, state_or_province, postal_code) - - if address1 and city and state_or_province and postal_code: - return '{}\n{}, {} {}'.format(address1, city, state_or_province, postal_code) - - if address1 and city and state_or_province and country: - return '{}\n{}, {}\n{}'.format(address1, city, state_or_province, country) - - if address1 and address2 and city and state_or_province: - return '{}\n{}\n{}, {}'.format(address1, address2, city, state_or_province) - - if address1 and city and state_or_province: - return '{}\n{}, {}'.format(address1, city, state_or_province) - - if address1 and address2 and city: - return '{}\n{}\n{}'.format(address1, address2, city) - - if address1 and city: - return '{}\n{}'.format(address1, city) - - if address1 and address2: - return '{}\n{}'.format(address1, address2) - - if city and state_or_province and postal_code and country: - return '{}, {} {}\n{}'.format(city, state_or_province, postal_code, country) - - if city and state_or_province and postal_code: - return '{}, {} {}'.format(city, state_or_province, postal_code) - - if city and state_or_province: - return '{}, {}'.format(city, state_or_province) - - return address1 - - -def force_text(data, list_sep=None, first_str=False): - if isinstance(data, dict): - return data.get('#text', '') - - if isinstance(data, str): - return data - - if isinstance(data, list): - text_list = [] - for datum in (data or []): - if datum is None: - continue - if isinstance(datum, dict): - if '#text' not in datum: - logger.warning('Skipping %s, no #text key exists', datum) - continue - text_list.append(datum['#text']) - elif isinstance(datum, str): - text_list.append(datum) - else: - raise exceptions.InvalidText(datum) - - if first_str and text_list: - return text_list[0] - if list_sep is not None: - return list_sep.join(text_list) - return text_list - - if data is None: - return '' - - raise exceptions.InvalidText(data) - - -def contact_extract(input_string): - contact_dict = {} - contact = input_string.replace('Contact:', '').strip() - contact_email = get_emails(contact) - contact_name = contact.split('(', 1)[0].strip() - remove_list = ['Science', 'Division', 'Chair', - 'Collections', 'Administrative', 'Mycologist and Director', - 'Director and Curator', 'Director', 'Collection', 'Manager', - 'Dr.', 'PhD', 'Ph.D.', 'MSc', 'Head', 'Curator', 'Jr.', ' and ', - 'assistant professor', 'professor', 'herbarium'] - separator_list = ['/', ','] - for item in remove_list: - insensitive_item = re.compile(re.escape(item), re.IGNORECASE) - contact_name = insensitive_item.sub('', contact_name) - if ',' in contact_name: - split_name = contact_name.split(',') - multiple_name = split_name[1].split() - if len(multiple_name) > 1: - contact_name = multiple_name[0] + ' ' + split_name[0] - else: - contact_name = split_name[1] + ' ' + split_name[0] - if '/' in contact_name: - contact_name = contact_name.split('/')[0] - contact_name = ' '.join([w for w in contact_name.split() if len(w) > 1 or w in separator_list]) - - if contact and contact_email: - contact_dict['email'] = contact_email.strip() - if contact_name: - contact_dict['name'] = contact_name.strip() - - return contact_dict - - -def get_emails(s): - """Returns first matched email found in string s.""" - # Removing lines that start with '//' because the regular expression - # mistakenly matches patterns like 'http://foo@bar.com' as '//foo@bar.com'. - # Adopted from code by Dennis Ideler ideler.dennis@gmail.com - regex = re.compile((r"([a-z0-9!#$%&'*+\/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+\/=?^_`" - r"{|}~-]+)*(@|\sat\s)(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?(\.|" - r"\sdot\s))+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?)")) - s = s.lower() - result = re.findall(regex, s) - if result: - if not result[0][0].startswith('//'): - return result[0][0] - - -def oai_allowed_by_sets(data, blocked_sets=None, approved_sets=None): - # TODO do this in the Regulator, in a ValidationStep - blocked_sets = set(blocked_sets or []) - approved_sets = set(approved_sets or []) - if blocked_sets or approved_sets: - set_specs = set(x.replace('publication:', '') for x in etree.fromstring(data).xpath( - 'ns0:header/ns0:setSpec/node()', - namespaces={'ns0': 'http://www.openarchives.org/OAI/2.0/'} - )) - approved = not approved_sets or (set_specs & approved_sets) - blocked = blocked_sets and (set_specs & blocked_sets) - if blocked or not approved: - logger.warning('Discarding datum based on set specs: %s', ', '.join(set_specs)) - return False - return True diff --git a/share/transformers/__init__.py b/share/transformers/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/share/transformers/ca_lwbin.py b/share/transformers/ca_lwbin.py deleted file mode 100644 index c0183a26a..000000000 --- a/share/transformers/ca_lwbin.py +++ /dev/null @@ -1,113 +0,0 @@ -from share.transform.chain import * - - -class AgentIdentifier(Parser): - uri = IRI(ctx) - - -class WorkIdentifier(Parser): - uri = IRI(ctx) - - -class Tag(Parser): - name = ctx.name - - -class ThroughTags(Parser): - tag = Delegate(Tag, ctx) - - -class Maintainer(Parser): - schema = GuessAgentType(ctx.maintainer) - name = ctx.maintainer - identifiers = Map(Delegate(AgentIdentifier), ctx.maintainer_email) - - -class Author(Parser): - schema = GuessAgentType(ctx.author) - name = ctx.author - identifiers = Map(Delegate(AgentIdentifier), ctx.author_email) - - -class Organization(Parser): - schema = RunPython('org_or_consortium', ctx.is_organization) - - name = ctx.title - identifiers = Map(Delegate(AgentIdentifier), RunPython('get_urls', ctx)) - - def org_or_consortium(self, is_org): - return 'Organization' if is_org else 'Consortium' - - def get_urls(self, context): - return [ - 'http://130.179.67.140/{type}/{id}'.format(**context), - 'http://130.179.67.140/uploads/group/{image_url}'.format(**context), - ] - - class Extra: - description = ctx.description - - -class Creator(Parser): - agent = Delegate(Author, ctx) - - -class CreatorMaintainer(Parser): - schema = 'creator' - agent = Delegate(Maintainer, ctx) - - -class Contributor(Parser): - agent = Delegate(Organization, ctx) - - -class CreativeWork(Parser): - schema = RunPython('get_schema', ctx.type) - - title = ctx.title - description = ctx.notes - is_deleted = ctx.private - date_published = ParseDate(ctx.metadata_created) - date_updated = ParseDate(ctx.metadata_modified) - free_to_read_type = Try(IRI(ctx.license_url)) - # free_to_read_date - rights = ctx.license_title - # language - - tags = Map(Delegate(ThroughTags), ctx.tags) - identifiers = Map( - Delegate(WorkIdentifier), - RunPython('get_url', ctx), - RunPython('get_dois', ctx.extras), - Try(IRI(ctx.url), exceptions=(InvalidIRI, )), - ) - - related_agents = Concat( - Map(Delegate(Creator), Filter(lambda x: x.get('author'), ctx)), - Map(Delegate(CreatorMaintainer), Filter(lambda x: x.get('maintainer'), ctx)), - Map(Delegate(Contributor), ctx.organization, ctx.groups) - ) - # related_works = ShareManyToManyField('AbstractCreativeWork', through='AbstractWorkRelation', through_fields=('subject', 'related'), symmetrical=False) - - class Extra: - revision_timestamp = ParseDate(ctx.revision_timestamp) - state = ctx.state - version = ctx.version - - def get_url(self, context): - return 'http://130.179.67.140/{type}/{id}'.format(**context) - - def get_dois(self, context): - # Sometimes values can be "to be added" or similar - # There also seems to be a couple dx.doi.org/11.xxx/... floating around - return [x['value'] for x in context if x['key'] == 'DOI' and '10.0' in x['value']] - - def get_schema(self, type): - return { - 'dataset': 'DataSet', - }[type] - - -class LWBINTransformer(ChainTransformer): - VERSION = 1 - root_parser = CreativeWork diff --git a/share/transformers/com_biomedcentral.py b/share/transformers/com_biomedcentral.py deleted file mode 100644 index 69cbf432e..000000000 --- a/share/transformers/com_biomedcentral.py +++ /dev/null @@ -1,79 +0,0 @@ -from share.transform.chain import * # noqa - - -class AgentIdentifier(Parser): - uri = IRI(ctx) - - -class WorkIdentifier(Parser): - uri = IRI(ctx) - - -class Tag(Parser): - name = ctx - - -class ThroughTags(Parser): - tag = Delegate(Tag, ctx) - - -class Person(Parser): - given_name = ParseName(ctx.creator).first - family_name = ParseName(ctx.creator).last - additional_name = ParseName(ctx.creator).middle - suffix = ParseName(ctx.creator).suffix - - -class Creator(Parser): - agent = Delegate(Person, ctx) - cited_as = ctx.creator - order_cited = ctx('index') - - -class Organization(Parser): - name = ctx.publisher - identifiers = Map(Delegate(AgentIdentifier), ctx.issn) - - -class Publisher(Parser): - agent = Delegate(Organization, ctx) - - class Extra: - publication_name = ctx.publicationName - - -class Article(Parser): - title = ctx.title - description = ctx.abstract - rights = ctx.copyright - date_published = ParseDate(ctx.publicationDate) - date_updated = ParseDate(ctx.publicationDate) - - identifiers = Map( - Delegate(WorkIdentifier), - ctx.doi, - ctx.identifier, - Map(ctx.value, ctx.url), - ) - - related_agents = Concat( - Map(Delegate(Creator), ctx.creators), - Map(Delegate(Publisher), ctx) - ) - - tags = Map(Delegate(ThroughTags), ctx.genre) - - class Extra: - openaccess = ctx.openaccess - ending_page = Try(ctx.endingPage) - issue_type = Try(ctx.issuetype) - number = ctx.number - starting_page = ctx.startingPage - topicalCollection = Try(ctx.topicalCollection) - journalid = Try(ctx.journalid) - issn = Try(ctx.issn) - - -class BioMedCentralTransformer(ChainTransformer): - VERSION = 1 - root_parser = Article diff --git a/share/transformers/com_dailyssrn.py b/share/transformers/com_dailyssrn.py deleted file mode 100644 index c068b173d..000000000 --- a/share/transformers/com_dailyssrn.py +++ /dev/null @@ -1,17 +0,0 @@ -from share.transform.chain import * # noqa - - -class WorkIdentifier(Parser): - uri = IRI(ctx) - - -class CreativeWork(Parser): - title = ctx.item.title - description = ctx.item.description - date_published = ParseDate(Try(ctx.item.pubDate)) - identifiers = Map(Delegate(WorkIdentifier), ctx.item.link) - - -class DailySSRNTransformer(ChainTransformer): - VERSION = 1 - root_parser = CreativeWork diff --git a/share/transformers/com_figshare.py b/share/transformers/com_figshare.py deleted file mode 100644 index df20eeb80..000000000 --- a/share/transformers/com_figshare.py +++ /dev/null @@ -1,86 +0,0 @@ -from share.transform.chain import * # noqa - - -class WorkIdentifier(Parser): - uri = IRI(ctx) - - -class Tag(Parser): - name = ctx.name - - class Extra: - id = ctx.id - - -class ThroughTags(Parser): - tag = Delegate(Tag, ctx) - - -class Person(Parser): - given_name = ParseName(ctx.author_name).first - family_name = ParseName(ctx.author_name).last - - -class DataSetPerson(Parser): - schema = 'Person' - - given_name = ParseName(ctx.full_name).first - family_name = ParseName(ctx.full_name).last - suffix = ParseName(ctx.full_name).suffix - additional_name = ParseName(ctx.full_name).middle - - class Extra: - id = ctx.id - - -class DataSetCreator(Parser): - schema = 'Creator' - - order_cited = ctx('index') - cited_as = ctx.full_name - agent = Delegate(DataSetPerson, ctx) - - -class Creator(Parser): - order_cited = ctx('index') - cited_as = ctx.author_name - agent = Delegate(Person, ctx) - - -class CreativeWork(Parser): - title = ctx.title - description = ctx.description - related_agents = Map(Delegate(Creator), ctx.authors) - date_published = ParseDate(ctx.published_date) - identifiers = Map(Delegate(WorkIdentifier), Map(Try(IRI(), exceptions=(InvalidIRI, )), ctx.url, ctx.DOI, ctx.links)) - - class Extra: - modified = ParseDate(ctx.modified_date) - - -class DataSet(Parser): - schema = 'DataSet' - title = ctx.title - description = ctx.description_nohtml - date_published = ParseDate(ctx.published_date) - - tags = Map(Delegate(ThroughTags), ctx.categories, ctx.tags) - related_agents = Map(Delegate(DataSetCreator), ctx.owner, ctx.authors) - identifiers = Map(Delegate(WorkIdentifier), ctx.figshare_url, ctx.doi, ctx.publisher_doi) - - class Extra: - status = ctx.status - version = ctx.version - total_size = ctx.total_size - article_id = ctx.article_id - defined_type = ctx.defined_type - citation = ctx.publisher_citation - - -class FigshareTransformer(ChainTransformer): - VERSION = 1 - - def get_root_parser(self, unwrapped, **kwargs): - if 'files' in unwrapped: - return DataSet - return CreativeWork diff --git a/share/transformers/com_figshare_v2.py b/share/transformers/com_figshare_v2.py deleted file mode 100644 index 9d39e6db0..000000000 --- a/share/transformers/com_figshare_v2.py +++ /dev/null @@ -1,76 +0,0 @@ -from share.transform.chain import * # noqa - - -class AgentIdentifier(Parser): - uri = IRI(ctx) - - -class WorkIdentifier(Parser): - uri = IRI(ctx) - - -class Person(Parser): - name = ctx.full_name - identifiers = Map( - Delegate(AgentIdentifier), - ctx.orcid_id, - RunPython(lambda x: 'http://figshare.com/authors/{url_name}/{id}'.format(**x), ctx) - ) - - -class Creator(Parser): - agent = Delegate(Person, ctx) - - -class Tag(Parser): - name = ctx - - -class ThroughTags(Parser): - tag = Delegate(Tag, ctx) - - -class CreativeWork(Parser): - schema = RunPython('get_schema', ctx.defined_type) - FIGSHARE_TYPES = ['figure', 'media', 'dataset', 'fileset', 'poster', 'paper', 'presentation', 'thesis', 'code', 'metadata'] - - title = ctx.title - description = ctx.description - is_deleted = RunPython(lambda x: not x, ctx.is_active) - date_published = ParseDate(ctx.published_date) - date_updated = ParseDate(ctx.modified_date) - free_to_read_type = IRI(ctx.license.url) - - related_agents = Map(Delegate(Creator), ctx.authors) - - identifiers = Map(Delegate(WorkIdentifier), ctx.doi, ctx.url, ctx.figshare_url) - - tags = Map( - Delegate(ThroughTags), - ctx.tags, - Map(ctx.title, ctx.categories) - ) - - class Extra: - files = ctx.files - version = ctx.version - thumb = ctx.thumb - embargo_date = ctx.embargo_date - embargo_reason = ctx.embargo_reason - embargo_type = ctx.embargo_type - citation = ctx.citation - defined_type = ctx.defined_type - - def get_schema(self, defined_type): - return { - 'fileset': 'Project', - 'figure': 'CreativeWork', - 'poster': 'Poster', - 'code': 'Software', - 'dataset': 'DataSet', - }[self.FIGSHARE_TYPES[defined_type - 1]] - - -class FigshareV2Transformer(ChainTransformer): - VERSION = 1 - root_parser = CreativeWork diff --git a/share/transformers/com_mendeley_data.py b/share/transformers/com_mendeley_data.py deleted file mode 100644 index c93b5f0ce..000000000 --- a/share/transformers/com_mendeley_data.py +++ /dev/null @@ -1,441 +0,0 @@ -from share.transform.chain import * # noqa -from share.transform.chain.utils import format_address - - -def format_mendeley_address(ctx): - return format_address( - address1=ctx['name'], - city=ctx['city'], - state_or_province=ctx['state'], - country=ctx['country'] - ) - - -RELATION_MAP = { - 'related_to': 'WorkRelation', - 'derived_from': 'IsDerivedFrom', - 'source_of': 'IsDerivedFrom', - 'compiles': 'Compiles', - 'compiled_by': 'Compiles', - 'cites': 'Cites', - 'cited_by': 'Cites', -} - -INVERSE_RELATIONS = { - 'cited_by', - 'compiled_by', - 'derived_from' -} - -RELATIONS = { - 'cites', - 'compiles', - 'source_of', - 'related_to', -} - - -def get_related_works(options, inverse): - results = [] - for option in options: - relation = option['rel'] - if inverse and relation in INVERSE_RELATIONS: - results.append(option) - elif not inverse and relation in RELATIONS: - results.append(option) - return results - - -def get_relation_type(relation_type): - return RELATION_MAP.get(relation_type, 'WorkRelation') - - -def get_related_work_type(work_type): - if work_type == 'other': - return 'creativework' - return work_type - - -class WorkIdentifier(Parser): - uri = ctx - - -class Tag(Parser): - name = ctx.label - - class Extra: - id = ctx.id - - -class ThroughTags(Parser): - tag = Delegate(Tag, ctx) - - -class Subject(Parser): - name = ctx - - -class ThroughSubjects(Parser): - subject = Delegate(Subject, ctx) - - -class RelatedWork(Parser): - schema = RunPython(get_related_work_type, ctx.type) - identifiers = Map( - Delegate(WorkIdentifier), - Try( - IRI(ctx.href), - exceptions=(InvalidIRI,) - ) - ) - - -class WorkRelation(Parser): - schema = RunPython(get_relation_type, ctx.rel) - related = Delegate(RelatedWork, ctx) - - -class InverseWorkRelation(Parser): - schema = RunPython(get_relation_type, ctx.rel) - subject = Delegate(RelatedWork, ctx) - - -class RelatedArticle(Parser): - schema = 'Article' - title = Try(ctx.title) - identifiers = Map( - Delegate(WorkIdentifier), - Try( - IRI(ctx.doi), - exceptions=(InvalidIRI,) - ) - ) - - class Extra: - journal = Try(ctx.journal) - title = Try(ctx.title) - doi = Try(ctx.doi) - article_id = Try(ctx.id) - - -class UsesDataFrom(Parser): - subject = Delegate(RelatedArticle, ctx) - - -class AgentIdentifier(Parser): - uri = ctx - - -class AgentInstitution(Parser): - schema = GuessAgentType(ctx.name, default='organization') - - name = Try(ctx.name) - location = Try(RunPython(format_mendeley_address, ctx)) - identifiers = Map( - Delegate(AgentIdentifier), - Concat( - Try( - IRI(ctx.urls), - exceptions=(InvalidIRI,) - ), - Try( - IRI(ctx.profile_url), - exceptions=(InvalidIRI,) - ) - ) - ) - - class Extra: - name = Try(ctx.name) - scival_id = Try(ctx.scival_id) - instituion_id = Try(ctx.id) - city = Try(ctx.city) - state = Try(ctx.state) - country = Try(ctx.country) - parent_id = Try(ctx.parent_id) - urls = Try(ctx.urls) - profile_url = Try(ctx.profile_url) - alt_names = Try(ctx.alt_names) - - -class AgentWorkRelation(Parser): - agent = Delegate(AgentInstitution, ctx) - - -class IsAffiliatedWith(Parser): - related = Delegate(AgentInstitution, ctx) - - -class Person(Parser): - """ - { - "id": "", - "first_name": "", - "last_name": "", - "display_name": "", - "link": "", - "folder": "", - "institution": "", - "institution_details": { - "scival_id": 0, - "id": "", - "name": "", - "city": "", - "state": "", - "country": "", - "parent_id": "", - "urls": [ - "" - ], - "profile_url": "", - "alt_names": [ - { - "name": "" - } - ] - }, - "location": { - "id": "", - "latitude": 0, - "longitude": 0, - "name": "", - "city": "", - "state": "", - "country": "" - }, - "created": "", - "title": "", - "web_user_id": 0, - "scopus_author_ids": [ - "" - ], - "orcid_id": "", - } - """ - given_name = ctx.first_name - family_name = ctx.last_name - location = RunPython(format_mendeley_address, Try(ctx.full_profile.location)) - - identifiers = Map( - Delegate(AgentIdentifier), - Concat( - Try( - IRI(ctx.full_profile.orcid_id), - exceptions=(InvalidIRI,) - ), - Try( - IRI(ctx.full_profile.link), - exceptions=(InvalidIRI,) - ) - ) - ) - - related_agents = Concat( - Map(Delegate(IsAffiliatedWith), Try(ctx.full_profile.institution_details)), - Map(Delegate(IsAffiliatedWith), Try(ctx.institution)), - ) - - class Extra: - profile_id = Try(ctx.profile_id) - first_name = ctx.first_name - last_name = ctx.last_name - contribution = Try(ctx.contribution) - full_profile = Try(ctx.full_profile) - - -class Contributor(Parser): - agent = Delegate(Person, ctx) - - -class Creator(Contributor): - order_cited = ctx('index') - cited_as = RunPython('full_name', ctx) - - def full_name(self, ctx): - return '{} {}'.format(ctx['first_name'], ctx['last_name']) - - -class DataSet(Parser): - """ - { - "id": "", - "doi": { - "id": "", - "status": "" - }, - "name": "", - "description": "", - "contributors": [ - { - "contribution": "", - "institution": { - "scival_id": 0, - "id": "", - "name": "", - "city": "", - "state": "", - "country": "", - "parent_id": "", - "urls": [""], - "profile_url": "", - "alt_names": [{"name": ""}] - }, - "profile_id": "", - "first_name": "", - "last_name": "" - } - ], - "articles": [ - { - "journal": { - "url": "", - "issn": "", - "name": "" - }, - "title": "", - "doi": "", - "id": "" - } - ], - "institutions": [ - { - "scival_id": 0, - "id": "", - "name": "", - "city": "", - "state": "", - "country": "", - "parent_id": "", - "urls": [], - "profile_url": "", - "alt_names": [{"name": ""}] - } - ], - "related_links": [ - { - "type": "", - "rel": "", - "href": "" - } - ], - "publish_date": "", - "data_licence": { - "description": "", - "url": "", - "full_name": "", - "short_name": "", - "id": "" - }, - "embargo_date": "" - } - """ - - schema = 'DataSet' - title = Try(ctx.name) - description = Try(ctx.description) - - # publish_date "reflects the published date of the most recent version of the dataset" - date_published = ParseDate(Try(ctx.publish_date)) - date_updated = ParseDate(Try(ctx.publish_date)) - - tags = Map( - Delegate(ThroughTags), - Try(ctx.categories) - ) - subjects = Map( - Delegate(ThroughSubjects), - Subjects(Try(ctx.categories.label)) - ) - - rights = Try(ctx.data_licence.description) - free_to_read_type = Try(ctx.data_licence.url) - free_to_read_date = ParseDate(Try(ctx.embargo_date)) - - related_agents = Concat( - Map( - Delegate(Creator), RunPython('filter_contributors', Try(ctx.contributors), 'creator') - ), - Map( - Delegate(Contributor), RunPython('filter_contributors', Try(ctx.contributors), 'contributor') - ), - Map( - Delegate(AgentWorkRelation), Try(ctx.institutions) - ) - ) - - related_works = Concat( - Map( - Delegate(UsesDataFrom), - Try(ctx.articles) # Journal articles associated with the dataset - ), - Map( - Delegate(WorkRelation), - RunPython( - get_related_works, - Try(ctx.related_links), - False - ) - ), - Map( - Delegate(InverseWorkRelation), - RunPython( - get_related_works, - Try(ctx.related_links), - True - ) - ) - ) - - identifiers = Map( - Delegate(WorkIdentifier), - Concat( - RunPython(lambda mendeley_id: 'https://data.mendeley.com/datasets/{}'.format(mendeley_id) if mendeley_id else None, Try(ctx.id)), - Try( - IRI(ctx.doi.id), - exceptions=(InvalidIRI,) - ) - ) - ) - - def filter_contributors(self, contributor_list, contributor_type): - filtered = [] - for contributor in contributor_list: - try: - if not contributor['contribution'] and contributor_type == 'creator': - filtered.append(contributor) - elif contributor['contribution'] and contributor_type == 'contributor': - filtered.append(contributor) - except KeyError: - if contributor_type == 'creator': - filtered.append(contributor) - return filtered - - class Extra: - """ Documentation: - http://dev.mendeley.com/methods/#datasets - http://dev.mendeley.com/methods/#profile-attributes - """ - mendeley_id = Try(ctx.id) - doi = Try(ctx.doi) - name = Try(ctx.name) - description = Try(ctx.description) - version = Try(ctx.version) - contributors = Try(ctx.contributors) - versions = Try(ctx.versions) - files = Try(ctx.files) - articles = Try(ctx.articles) - categories = Try(ctx.categories) - institutions = Try(ctx.institutions) - metrics = Try(ctx.metrics) - available = Try(ctx.available) - method = Try(ctx.method) - related_links = Try(ctx.related_links) - publish_date = ctx.publish_date - data_licence = Try(ctx.data_licence) - owner_id = Try(ctx.owner_id) - embargo_date = Try(ctx.embargo_date) - - -class MendeleyTransformer(ChainTransformer): - VERSION = 1 - root_parser = DataSet diff --git a/share/transformers/com_peerj.py b/share/transformers/com_peerj.py deleted file mode 100644 index 43b9673b5..000000000 --- a/share/transformers/com_peerj.py +++ /dev/null @@ -1,91 +0,0 @@ -from share.transform.chain import ChainTransformer, Parser, Delegate, RunPython, ParseDate, ParseName, Map, ctx, Try, Subjects, IRI, Concat - - -class Subject(Parser): - name = ctx - - -class ThroughSubjects(Parser): - subject = Delegate(Subject, ctx) - - -class Tag(Parser): - name = ctx - - -class ThroughTags(Parser): - tag = Delegate(Tag, ctx) - - -class Organization(Parser): - name = ctx - - -class Publisher(Parser): - agent = Delegate(Organization, ctx) - - -class Person(Parser): - given_name = ParseName(ctx).first - family_name = ParseName(ctx).last - - -class Creator(Parser): - agent = Delegate(Person, ctx) - cited_as = ctx - order_cited = ctx('index') - - -class WorkIdentifier(Parser): - uri = IRI(ctx) - - -class Article(Parser): - title = ctx.title - description = Try(ctx.description) - language = ctx.language - date_published = ParseDate(ctx.date) - date_updated = ParseDate(ctx.date) - - identifiers = Map( - Delegate(WorkIdentifier), - ctx.doi, - ctx.pdf_url, - ctx.fulltext_html_url, - RunPython(lambda x: 'https://www.ncbi.nlm.nih.gov/pubmed/{}'.format(x) if x else None, Try(ctx.identifiers.pubmed)), - RunPython(lambda x: 'https://www.ncbi.nlm.nih.gov/pmc/articles/{}'.format(x) if x else None, Try(ctx.identifiers.pmc)), - ) - - subjects = Map(Delegate(ThroughSubjects), Subjects(ctx.subjects)) - tags = Map(Delegate(ThroughTags), Try(ctx.keywords), Try(ctx.subjects)) - - related_agents = Concat( - Map(Delegate(Creator), ctx.author), - Map(Delegate(Publisher), ctx.publisher), - ) - - class Extra: - volume = Try(ctx.volume) - journal_title = Try(ctx.journal_title) - journal_abbrev = Try(ctx.journal_abbrev) - description_html = Try(ctx['description-html']) - issn = Try(ctx.issn) - - -class Preprint(Article): - - class Extra: - modified = ParseDate(ctx.date) - subjects = ctx.subjects - identifiers = Try(ctx.identifiers) - emails = Try(ctx.author_email) - description_html = Try(ctx['description-html']) - - -class PeerJTransformer(ChainTransformer): - VERSION = 1 - - def get_root_parser(self, unwrapped, emitted_type=None, **kwargs): - if emitted_type == 'preprint': - return Preprint - return Article diff --git a/share/transformers/com_peerj_xml.py b/share/transformers/com_peerj_xml.py deleted file mode 100644 index 0414b63f6..000000000 --- a/share/transformers/com_peerj_xml.py +++ /dev/null @@ -1,161 +0,0 @@ -from share.transform.chain import * # noqa -from share.transform.chain.soup import Soup, SoupXMLDict, SoupXMLTransformer - - -class WorkIdentifier(Parser): - uri = IRI(ctx) - - -class AgentIdentifier(Parser): - uri = IRI(ctx['#text']) - - -class Subject(Parser): - name = ctx - - -class ThroughSubjects(Parser): - subject = Delegate(Subject, ctx) - - -class Tag(Parser): - name = ctx - - -class ThroughTags(Parser): - tag = Delegate(Tag, ctx['#text']) - - -class PublisherOrganization(Parser): - schema = 'Organization' # TODO Switch to OAI schema picker - - name = ctx['publisher']['publisher-name']['#text'] - identifiers = Map(Delegate(AgentIdentifier), Soup(ctx, 'issn', **{'pub-type': 'epub'})) - - class Extra: - location = ctx['publisher']['publisher-loc']['#text'] - - -class Publisher(Parser): - agent = Delegate(PublisherOrganization, ctx) - - -class FunderOrganization(Parser): - schema = 'Organization' # TODO Switch to OAI schema picker - name = ctx - - -class Funder(Parser): - agent = Delegate(FunderOrganization, ctx['funding-source']['#text']) - - class Extra: - award_id = Map(ctx['#text'], ctx['award-id']) - - -class Institution(Parser): - name = ctx.institution['#text'] - - class Extra: - addr_line = Try(ctx['addr-line']['#text']) - city = Try(ctx['city']['#text']) - country = ctx.country['#text'] - - -class IsAffiliatedWith(Parser): - related = Delegate(Institution, ctx) - - -class Person(Parser): - family_name = ctx.name['surname']['#text'] - given_name = ctx.name['given-names']['#text'] - - identifiers = Map(Delegate(AgentIdentifier), ctx['contrib-id'], ctx.email) - - related_agents = Map( - Delegate(IsAffiliatedWith), - RunPython('get_affiliations', ctx.xref) - ) - - def get_affiliations(self, refs): - if not isinstance(refs, list): - refs = [refs] - return [ - SoupXMLDict(soup=ctx.frames[0]['context'].soup.find(id=ref['@rid'])) - for ref in refs - if ref - ] - - -class Contributor(Parser): - agent = Delegate(Person, ctx) - cited_as = Join(Concat(ctx.name['given-names']['#text'], ctx.name['surname']['#text'])) - - class Extra: - contributions = RunPython('get_contributions', ctx) - - def get_contributions(self, context): - return [ - x.parent.text - for x in - ctx.frames[0]['context'].soup.find_all(**{'ref-type': 'contrib', 'rid': context.soup.attrs.get('@id')}) - ] - - -class Creator(Contributor): - order_cited = ctx('index') - - -class Article(Parser): - title = ctx.article.front['article-meta']['title-group']['article-title']['#text'] - description = ctx.article.front['article-meta'].abstract['#text'] - is_deleted = Static(False) - - date_published = ParseDate(ctx.article.front['article-meta']['pub-date']['@iso-8601-date']) - date_updated = ParseDate(ctx.article.front['article-meta']['pub-date']['@iso-8601-date']) - # free_to_read_type = IRI(ctx.article.front['article-meta']['license']['@xlink:href']) - # free_to_read_date = - rights = IRI(ctx.article.front['article-meta']['license']['@xlink:href']) - - identifiers = Map( - Delegate(WorkIdentifier), - ctx.article.front['article-meta']['self-uri']['@xlink:href'], - Soup(ctx.article.front['article-meta'], 'article-id', **{'pub-id-type': 'doi'})['#text'], - ) - - subjects = Map( - Delegate(ThroughSubjects), - Subjects(Map(ctx['#text'], ctx.article.front['article-meta']['article-categories']['subject'])) - ) - - tags = Map( - Delegate(ThroughTags), - ctx.article.front['article-meta']['article-categories']['subject'], - ctx.article.front['article-meta']['kwd-group']['kwd'], - ) - - related_agents = Concat( - Map(Delegate(Funder), ctx.article.front['article-meta']['funding-group']['award-group']), - Map(Delegate(Publisher), ctx.article.front['journal-meta']), - Map(Delegate(Creator), Soup(ctx.article.front['article-meta'], 'contrib-group', **{'content-type': 'authors'}).contrib), - Map(Delegate(Contributor), Try(Soup(ctx.article.front['article-meta'], 'contrib-group', **{'content-type': lambda x: x != 'authors'}).contrib)), - ) - - # TODO Maybe process references as well? - # related_works = Concat( - # ) - - class Extra: - funding_statement = ctx.article.front['article-meta']['funding-group']['funding-statement']['#text'] - - -class Preprint(Article): - pass - - -class PeerJXMLTransformer(SoupXMLTransformer): - VERSION = 1 - - def get_root_parser(self, unwrapped, emitted_type=None, **kwargs): - if emitted_type == 'preprint': - return Preprint - return Article diff --git a/share/transformers/com_researchregistry.py b/share/transformers/com_researchregistry.py deleted file mode 100644 index d93ab4191..000000000 --- a/share/transformers/com_researchregistry.py +++ /dev/null @@ -1,134 +0,0 @@ -from share.transform.chain import Parser, ctx, links as tools, ChainTransformer - -LINK_FORMAT = 'http://www.researchregistry.com/browse-the-registry.html#home/registrationdetails/{}/' - -FIELDS = { - 'uin': 'field_21', - 'registration date': 'field_2', - 'title': 'field_7', - 'questions and objectives': 'field_75', - 'summary': 'field_78', - 'study type': 'field_72', - 'study type other': 'field_14', - 'primary investigator': 'field_3', - 'other investigator': 'field_4', - 'additional investigators': 'field_94', - 'contact details': 'field_5', - 'email': 'field_97', - 'participating institutions': 'field_68', - 'countries of recruitment': 'field_10', - 'funders': 'field_9', - 'health conditions or problems studied': 'field_11', - 'patient population': 'field_29', - 'interventions': 'field_12', - 'inclusion criteria': 'field_13', - 'exclusion criteria': 'field_70', - 'control or comparators': 'field_28', - 'primary outcomes': 'field_18', - 'key secondary outcomes': 'field_19', - 'target sample size': 'field_16', - 'recruitment status': 'field_79', - 'other recruitment status': 'field_17', - 'first enrollment date': 'field_15', - 'expected enrollment completion date': 'field_80', - 'expected research completion date': 'field_73', - 'ethical approval': 'field_81', - 'ethical approval details': 'field_63', - 'ethical committee judgment': 'field_62', - 'data': 'field_64', - 'published paper identifier': 'field_37', - 'study website': 'field_30', - 'study results': 'field_89', - 'user': 'field_66', -} - - -class Person(Parser): - family_name = ctx['last'] - given_name = ctx['first'] - - -class FullNamePerson(Parser): - schema = 'person' - name = ctx - - -class PrincipalInvestigator(Parser): - agent = tools.Delegate(Person, ctx) - - -class OtherInvestigator(Parser): - schema = 'contributor' - agent = tools.Delegate(Person, ctx) - - -class AdditionalInvestigator(Parser): - schema = 'contributor' - agent = tools.Delegate(FullNamePerson, ctx) - - -class WorkIdentifier(Parser): - uri = ctx - - -class Registration(Parser): - title = ctx[FIELDS['title']] - description = ctx[FIELDS['summary']] - date_published = tools.ParseDate(ctx[FIELDS['registration date']].timestamp) - date_updated = tools.ParseDate(ctx[FIELDS['registration date']].timestamp) - related_agents = tools.Concat( - tools.Delegate(PrincipalInvestigator, ctx[FIELDS['primary investigator']]), - tools.Delegate(OtherInvestigator, ctx[FIELDS['other investigator']]), - tools.Map( - tools.Delegate(AdditionalInvestigator), - tools.RunPython('split_names', ctx[FIELDS['additional investigators']]) - ) - ) - identifiers = tools.Map( - tools.Delegate(WorkIdentifier), - tools.RunPython('get_link', ctx.id) - ) - - class Extra: - registration_date = ctx[FIELDS['registration date']] - questions_and_objectives = ctx[FIELDS['questions and objectives']] - study_type = ctx[FIELDS['study type']] - study_type_detail = ctx[FIELDS['study type other']] - contact_details = ctx[FIELDS['contact details']] - participating_institutions = ctx[FIELDS['participating institutions']] - countries_of_recruitment = ctx[FIELDS['countries of recruitment']] - funders = ctx[FIELDS['funders']] - problems_studied = ctx[FIELDS['health conditions or problems studied']] - patient_population = ctx[FIELDS['patient population']] - interventions = ctx[FIELDS['interventions']] - inclusion_criteria = ctx[FIELDS['inclusion criteria']] - exclusion_criteria = ctx[FIELDS['exclusion criteria']] - control_or_comparators = ctx[FIELDS['control or comparators']] - primary_outcomes = ctx[FIELDS['primary outcomes']] - key_secondary_outcomes = ctx[FIELDS['key secondary outcomes']] - target_sample_size = ctx[FIELDS['target sample size']] - recruitment_status = ctx[FIELDS['recruitment status']] - other_recruitment_status = ctx[FIELDS['other recruitment status']] - first_enrollment_date = ctx[FIELDS['first enrollment date']] - expected_enrollment_completion_date = ctx[FIELDS['expected enrollment completion date']] - expected_research_completion_date = ctx[FIELDS['expected research completion date']] - ethical_approval = ctx[FIELDS['ethical approval']] - ethical_approval_details = ctx[FIELDS['ethical approval details']] - ethical_committee_judgment = ctx[FIELDS['ethical committee judgment']] - data = ctx[FIELDS['data']] - published_paper = ctx[FIELDS['published paper identifier']] - study_website = ctx[FIELDS['study website']] - study_results = ctx[FIELDS['study results']] - - def get_link(self, id): - return LINK_FORMAT.format(id) - - def split_names(self, obj): - if not obj: - return None - return obj.split(',') - - -class RRTransformer(ChainTransformer): - VERSION = 1 - root_parser = Registration diff --git a/share/transformers/com_springer.py b/share/transformers/com_springer.py deleted file mode 100644 index 54b855dd7..000000000 --- a/share/transformers/com_springer.py +++ /dev/null @@ -1,82 +0,0 @@ -from share.transform.chain import * # noqa - - -class AgentIdentifier(Parser): - uri = IRI(ctx) - - -class WorkIdentifier(Parser): - uri = IRI(ctx) - - -class Tag(Parser): - name = ctx - - -class ThroughTags(Parser): - tag = Delegate(Tag, ctx) - - -class Person(Parser): - given_name = ParseName(ctx.creator).first - family_name = ParseName(ctx.creator).last - additional_name = ParseName(ctx.creator).middle - suffix = ParseName(ctx.creator).suffix - - -class Creator(Parser): - agent = Delegate(Person, ctx) - cited_as = ctx.creator - order_cited = ctx('index') - - -class Organization(Parser): - name = ctx.publisher - identifiers = Map(Delegate(AgentIdentifier), Try(IRI(ctx.issn), exceptions=(InvalidIRI, ))) - - class Extra: - issn = Try(ctx.issn) - - -class Publisher(Parser): - agent = Delegate(Organization, ctx) - - class Extra: - publication_name = ctx.publicationName - - -class Article(Parser): - title = ctx.title - description = ctx.abstract - rights = ctx.copyright - date_published = ParseDate(ctx.publicationDate) - date_updated = ParseDate(ctx.publicationDate) - - identifiers = Map( - Delegate(WorkIdentifier), - ctx.doi, - ctx.identifier, - Map(ctx.value, ctx.url), - ) - - related_agents = Concat( - Map(Delegate(Creator), ctx.creators), - Map(Delegate(Publisher), ctx) - ) - - tags = Map(Delegate(ThroughTags), ctx.genre) - - class Extra: - openaccess = ctx.openaccess - ending_page = Try(ctx.endingPage) - issue_type = Try(ctx.issuetype) - number = ctx.number - starting_page = ctx.startingPage - topicalCollection = Try(ctx.topicalCollection) - journalid = Try(ctx.journalid) - issn = Try(ctx.issn) - - -class SpringerTransformer(ChainTransformer): - VERSION = 1 - root_parser = Article diff --git a/share/transformers/edu_ageconsearch.py b/share/transformers/edu_ageconsearch.py deleted file mode 100644 index 27606b6df..000000000 --- a/share/transformers/edu_ageconsearch.py +++ /dev/null @@ -1,138 +0,0 @@ -import re - -from share.transform.chain import * - - -class WorkIdentifier(Parser): - uri = IRI(ctx) - - -class AgentIdentifier(Parser): - uri = ctx - - -class Agent(Parser): - schema = GuessAgentType(ctx.name) - name = ctx.name - identifiers = Map(Delegate(AgentIdentifier), Try(IRI(ctx.email))) - - -class ContributorRelation(Parser): - schema = 'Contributor' - - agent = Delegate(Agent, ctx) - cited_as = ctx.name - - -class CreatorRelation(ContributorRelation): - schema = 'Creator' - - order_cited = ctx('index') - - -class AffiliatedAgent(Parser): - schema = GuessAgentType(ctx, default='organization') - name = ctx - - -class AgentWorkRelation(Parser): - agent = Delegate(AffiliatedAgent, ctx) - - -class Tag(Parser): - name = ctx - - -class ThroughTags(Parser): - tag = Delegate(Tag, ctx) - - -class Subject(Parser): - name = ctx - - -class ThroughSubjects(Parser): - subject = Delegate(Subject, ctx) - - -class Preprint(Parser): - title = Try(ctx.title) - description = Try(ctx.abstract) - identifiers = Concat( - Map(Delegate(WorkIdentifier), ctx.primary_identifier), - Map(Delegate(WorkIdentifier), ctx.uri), - Map(Delegate(WorkIdentifier), Try(ctx.identifiers)), - ) - - related_agents = Concat( - Map( - Delegate(CreatorRelation), - RunPython('get_agent_emails', ctx, 'authors', 'authors_email') - ), - Map( - Delegate(ContributorRelation), - RunPython('get_agent_emails', ctx, 'editors', 'editors_email') - ), - Map( - Delegate(AgentWorkRelation), - RunPython('get_affiliated_organization', Try(ctx.institution_association)) - ) - ) - - tags = Map(Delegate(ThroughTags), Try(ctx.keywords)) - date_published = ParseDate(Try(ctx.issue_date)) - subjects = Map(Delegate(ThroughSubjects), Subjects(Try(ctx.jel_codes))) - - class Extra: - other_titles = Try(ctx.other_titles) - notes = Try(ctx.notes) - editors = Try(ctx.editors) - editors_email = Try(ctx.editors_email) - authors = Try(ctx.authors) - authors_email = Try(ctx.authors_email) - series_report_number = Try(ctx.series_report_number) - institution_association = Try(ctx.institution_association) - collections = Try(ctx.collections) - total_pages = Try(ctx.total_pages) - from_page = Try(ctx.from_page) - to_page = Try(ctx.to_page) - identifiers = Try(ctx.identifiers) - uri = ctx.uri - - def get_agent_emails(self, ctx, agent_key, email_key): - """ - emails format: [name (email), name (email)] - """ - try: - agents = ctx[agent_key] if isinstance(ctx[agent_key], list) else [ctx[agent_key]] - except KeyError: - agents = [] - - try: - emails = ctx[email_key] if isinstance(ctx[email_key], list) else [ctx[email_key]] - except KeyError: - emails = [] - - agent_objects = [] - - for agent in agents: - agent_object = {'name': agent} - - agent_email = next((x for x in emails if agent in x), None) - - if agent_email: - agent_object['email'] = re.compile(r'\((\S+?)\)').search(agent_email).group(1) - agent_objects.append(agent_object) - - return agent_objects - - def get_affiliated_organization(self, affiliation): - """ - affiliation format: 'name>volume issue etc' - """ - return affiliation.split('>')[0] - - -class AgeconTransformer(ChainTransformer): - VERSION = 1 - root_parser = Preprint diff --git a/share/transformers/edu_gwu.py b/share/transformers/edu_gwu.py deleted file mode 100644 index 019e2176a..000000000 --- a/share/transformers/edu_gwu.py +++ /dev/null @@ -1,102 +0,0 @@ -from share.transform.chain import ctx -from share.transform.chain import links as tools -from share.transform.chain.parsers import Parser -from share.transform.chain.soup import SoupXMLTransformer, SoupXMLDict, Soup - - -class WorkIdentifier(Parser): - uri = tools.IRI(ctx['#text']) - - -class Agent(Parser): - schema = tools.RunPython('get_type', ctx) - name = Soup(ctx, itemprop='name')['#text'] - - def get_type(self, obj): - return { - 'http://schema.org/Person': 'Person', - 'http://schema.org/Organization': 'Organization', - }[obj.soup['itemtype']] - - -class Creator(Parser): - order_cited = ctx('index') - agent = tools.Delegate(Agent, ctx) - - -class Contributor(Parser): - agent = tools.Delegate(Agent, ctx) - - -class Publisher(Parser): - agent = tools.Delegate(Agent, ctx) - - -class Tag(Parser): - name = ctx['#text'] - - -class ThroughTags(Parser): - tag = tools.Delegate(Tag, ctx) - - -class CreativeWork(Parser): - schema = tools.RunPython('get_type', ctx) - - title = tools.RunPython('get_title', ctx) - description = Soup(ctx, 'p', class_='genericfile_description')['#text'] - date_published = tools.ParseDate(Soup(ctx, itemprop='datePublished')['#text']) - date_updated = tools.ParseDate(Soup(ctx, itemprop='dateModified')['#text']) - rights = tools.OneOf( - tools.RunPython('get_rights_url', ctx), - tools.RunPython('get_dd', ctx, 'Rights')['#text'], - tools.Static(None) - ) - language = tools.Try(tools.ParseLanguage(Soup(ctx, itemprop='inLanguage')['#text'])) - - tags = tools.Map(tools.Delegate(ThroughTags), Soup(ctx, itemprop='keywords')) - - identifiers = tools.Map( - tools.Delegate(WorkIdentifier), - tools.Try(tools.RunPython('get_dd', ctx, 'Permanent Link')), - ) - - related_agents = tools.Concat( - tools.Map(tools.Delegate(Creator), Soup(ctx, itemprop='creator')), - tools.Map(tools.Delegate(Contributor), Soup(ctx, itemprop='contributor')), - tools.Map(tools.Delegate(Publisher), Soup(ctx, itemprop='publisher')), - ) - - class Extra: - gwu_unit = tools.RunPython('get_dd', ctx, 'GW Unit')['#text'] - related_url = tools.RunPython('get_dd', ctx, 'Related URL')['#text'] - previous_publication_information = tools.RunPython('get_dd', ctx, 'Previous Publication Information')['#text'] - depositor = tools.RunPython('get_dd', ctx, 'Depositor')['#text'] - characterization = tools.RunPython('get_dd', ctx, 'Characterization')['#text'] - - def get_type(self, obj): - return { - 'http://schema.org/CreativeWork': 'CreativeWork', - 'http://schema.org/Article': 'Article', - 'http://schema.org/Book': 'Book', - }.get(obj.soup.find('div')['itemtype'], 'CreativeWork') - - def get_title(self, obj): - title = obj.h1.soup - title.find('span', class_='label').decompose() - return title.get_text() - - def get_dd(self, obj, dt): - dt_tag = obj.soup.find('dt', string=dt) - if dt_tag: - return SoupXMLDict(soup=dt_tag.find_next_sibling('dd')) - return None - - def get_rights_url(self, obj): - dd = self.get_dd(obj, 'Rights') - return dd.soup.find('i', class_='glyphicon-new-window').parent['href'] - - -class GWScholarSpaceTransformer(SoupXMLTransformer): - VERSION = 1 - root_parser = CreativeWork diff --git a/share/transformers/edu_harvarddataverse.py b/share/transformers/edu_harvarddataverse.py deleted file mode 100644 index e13f088a4..000000000 --- a/share/transformers/edu_harvarddataverse.py +++ /dev/null @@ -1,36 +0,0 @@ -from share.transform.chain import * - - -class Person(Parser): - given_name = ParseName(ctx).first - family_name = ParseName(ctx).last - additional_name = ParseName(ctx).middle - suffix = ParseName(ctx).suffix - - -class Creator(Parser): - agent = Delegate(Person, ctx) - order_cited = ctx('index') - cited_as = ctx - - -class WorkIdentifier(Parser): - uri = IRI(ctx) - - -class CreativeWork(Parser): - schema = 'dataset' - title = ctx.name - description = Try(ctx.description) - date_published = ParseDate(ctx.published_at) - identifiers = Map(Delegate(WorkIdentifier), ctx.url) - - related_agents = Map(Delegate(Creator), Try(ctx.authors)) - - class Extra: - citation = ctx.citation - - -class HarvardTransformer(ChainTransformer): - VERSION = 1 - root_parser = CreativeWork diff --git a/share/transformers/gov_clinicaltrials.py b/share/transformers/gov_clinicaltrials.py deleted file mode 100644 index 1c249a40c..000000000 --- a/share/transformers/gov_clinicaltrials.py +++ /dev/null @@ -1,111 +0,0 @@ -from share.transform.chain import * -from share.transform.chain.utils import force_text - - -class Tag(Parser): - name = ctx - - -class ThroughTags(Parser): - tag = Delegate(Tag, ctx) - - -class AgentIdentifier(Parser): - # email address - uri = IRI(ctx) - - -class WorkIdentifier(Parser): - uri = IRI(ctx) - - -class AffiliatedAgent(Parser): - schema = GuessAgentType(ctx, default='organization') - name = ctx - - -class IsAffiliatedWith(Parser): - related = Delegate(AffiliatedAgent, ctx) - - -class Institution(Parser): - name = OneOf(ctx.agency, ctx.facility.name, ctx) - location = RunPython('get_location', Try(ctx.facility.address)) - - class Extra: - agency_class = Try(ctx.agency_class) - - def get_location(self, ctx): - location = "" - if 'country' in ctx: - location += ctx['country'] + ': ' - if 'city' in ctx: - location += ctx['city'] + ', ' - if 'state' in ctx: - location += ctx['state'] + ' ' - return location - - -class Person(Parser): - given_name = Maybe(ctx, 'first_name') - family_name = Maybe(ctx, 'last_name') - additional_name = Maybe(ctx, 'middle_name') - - identifiers = Map(Delegate(AgentIdentifier), Try(ctx.email)) - related_agents = Map(Delegate(IsAffiliatedWith), Try(ctx.affiliation)) - - -class Contributor(Parser): - agent = Delegate(Person, ctx) - - -class Funder(Parser): - agent = Delegate(Institution, ctx) - - -class Registration(Parser): - title = OneOf( - ctx.clinical_study.official_title, - ctx.clinical_study.brief_title - ) - description = Maybe(ctx.clinical_study, 'brief_summary')['textblock'] - - date_published = Try(ParseDate(RunPython(force_text, ctx.clinical_study.firstreceived_date))) - date_updated = Try(ParseDate(RunPython(force_text, ctx.clinical_study.lastchanged_date))) - - related_agents = Concat( - Map(Delegate(Contributor), Maybe(ctx.clinical_study, 'overall_official')), - Map(Delegate(Contributor), Maybe(ctx.clinical_study, 'overall_contact')), - Map(Delegate(Contributor), Maybe(ctx.clinical_study, 'overall_contact_backup')), - Map(Delegate(Funder), - Concat(ctx.clinical_study.sponsors.lead_sponsor, - Maybe(ctx.clinical_study.sponsors, 'collaborator'))) - ) - - tags = Map(Delegate(ThroughTags), Maybe(ctx.clinical_study, 'keyword')) - - identifiers = Concat(Map(Delegate(WorkIdentifier), Concat( - ctx['clinical_study']['required_header']['url'], - RunPython('format_url', ctx.clinical_study.id_info.nct_id, 'http://www.bioportfolio.com/resources/trial/'), - RunPython('format_url', Try(ctx.clinical_study.reference.PMID), 'www.ncbi.nlm.nih.gov/pubmed/')))) - - class Extra: - share_harvest_date = ctx.clinical_study.required_header.download_date - org_study_id = ctx.clinical_study.id_info.org_study_id - status = ctx.clinical_study.overall_status - start_date = Try(ParseDate(RunPython(force_text, ctx.clinical_study.start_date))) - completion_date = Try(ParseDate(RunPython(force_text, ctx.clinical_study.completion_date))) - completion_date_type = Try(ctx.clinical_study.completion_date['@type']) - study_type = ctx.clinical_study.study_type - conditions = Try(ctx.clinical_study.condition) - is_fda_regulated = Try(ctx.clinical_study.is_fda_regulated) - is_section_801 = Try(ctx.clinical_study.is_section_801) - citation = Try(ctx.clinical_study.reference.citation) - - def format_url(self, id, base): - return base + id - - -class ClinicalTrialsTransformer(ChainTransformer): - VERSION = 1 - root_parser = Registration diff --git a/share/transformers/gov_nih.py b/share/transformers/gov_nih.py deleted file mode 100644 index 3e49cdbe7..000000000 --- a/share/transformers/gov_nih.py +++ /dev/null @@ -1,312 +0,0 @@ -import re - -from share.transform.chain import * -import share.transform.chain.links as tools -from share.transform.chain.utils import format_address - - -PROJECT_BASE_URL = 'https://projectreporter.nih.gov/project_info_description.cfm?aid={}' -FOA_BASE_URL = 'https://grants.nih.gov/grants/guide/pa-files/{}.html' - - -def filter_nil(obj): - if isinstance(obj, dict) and obj.get('@http://www.w3.org/2001/XMLSchema-instance:nil'): - return None - return obj - - -def format_org_address(doc): - org_city = doc.get('ORG_CITY', '') - org_state = doc.get('ORG_STATE', '') - org_zipcode = doc.get('ORG_ZIPCODE', '') - org_country = doc.get('ORG_COUNTRY', '') - if not any((org_city, org_state, org_zipcode, org_country)): - return None - return format_address( - city=org_city, - state_or_province=org_state, - postal_code=org_zipcode, - country=org_country - ) - - -class WorkIdentifier(Parser): - uri = RunPython('format_nih_url', ctx) - - def format_nih_url(self, id): - return PROJECT_BASE_URL.format(id) - - -class Tag(Parser): - name = ctx - - -class ThroughTags(Parser): - tag = Delegate(Tag, ctx) - - -class Subject(Parser): - name = ctx - - -class ThroughSubjects(Parser): - subject = Delegate(Subject, ctx) - - -class AwardeeAgent(Parser): - schema = GuessAgentType( - ctx.ORG_NAME, - default='organization' - ) - - name = ctx.ORG_NAME - location = RunPython(format_org_address, ctx) - - class Extra: - awardee_organization_duns = ctx.ORG_DUNS - awardee_organization_fips = ctx.ORG_FIPS - awardee_organization_dept = ctx.ORG_DEPT - awardee_organization_district = ctx.ORG_DISTRICT - awardee_organization_city = ctx.ORG_CITY - awardee_organization_state = ctx.ORG_STATE - awardee_organization_zipcode = ctx.ORG_ZIPCODE - awardee_organization_country = ctx.ORG_COUNTRY - - -class AgentWorkRelation(Parser): - agent = Delegate(AwardeeAgent, ctx) - - -class FunderAgent(Parser): - schema = GuessAgentType( - ctx.IC_NAME, - default='organization' - ) - - # The full name of the IC, as defined here: http://grants.nih.gov/grants/glossary.htm#InstituteorCenter(IC) - name = ctx.IC_NAME - - # class Extra: - # The organizational code of the IC, as defined here: http://grants.nih.gov/grants/glossary.htm#InstituteorCenter(IC) - # acronym = RunPython(filter_nil, ctx.ADMINISTERING_IC) - # funding_ics = RunPython(filter_nil, ctx.FUNDING_ICs) - # funding_mechanism = RunPython(filter_nil, ctx.FUNDING_MECHANISM) - - -class Award(Parser): - name = ctx.PROJECT_TITLE - # The amount of the award provided by the funding NIH Institute(s) or Center(s) - description = RunPython(filter_nil, ctx.FUNDING_ICs) - award_amount = Int(RunPython(filter_nil, ctx.TOTAL_COST)) - date = Try( - ParseDate(RunPython(filter_nil, ctx.BUDGET_START)), - exceptions=(InvalidDate,), - ) - uri = RunPython('format_nih_url', RunPython(filter_nil, ctx.APPLICATION_ID)) - - class Extra: - awardee_name = RunPython(filter_nil, ctx.ORG_NAME) - awardee_organization_duns = RunPython(filter_nil, ctx.ORG_DUNS) - awardee_organization_fips = RunPython(filter_nil, ctx.ORG_FIPS) - awardee_organization_dept = RunPython(filter_nil, ctx.ORG_DEPT) - awardee_organization_district = RunPython(filter_nil, ctx.ORG_DISTRICT) - - arra_funded = RunPython(filter_nil, ctx.ARRA_FUNDED) - award_notice_date = RunPython(filter_nil, ctx.AWARD_NOTICE_DATE) - - support_year = RunPython(filter_nil, ctx.SUPPORT_YEAR) - foa_number = RunPython(filter_nil, ctx.FOA_NUMBER) - - def format_nih_url(self, id): - return PROJECT_BASE_URL.format(id) - - # FOA's are NOT unique per award. AFAIK only the projects, themselves, are - # def format_foa_url(self, foa_number): - # return FOA_BASE_URL.format(foa_number) - - -class ThroughAwards(Parser): - award = Delegate(Award, ctx) - - -class FunderRelation(Parser): - schema = 'Funder' - - agent = Delegate(FunderAgent, ctx) - awards = Map(Delegate(ThroughAwards), ctx) - - -class IsAffiliatedWith(Parser): - related = tools.Delegate(AwardeeAgent, ctx) - - -class POAgent(Parser): - schema = 'Person' - - name = ctx - - -class PIAgent(Parser): - schema = 'Person' - - name = ctx.PI_NAME - related_agents = tools.Map(tools.Delegate(IsAffiliatedWith), ctx['org_ctx']) - - class Extra: - pi_id = RunPython(filter_nil, ctx.PI_ID) - - -class PIContactAgent(Parser): - schema = 'Person' - - name = ctx.PI_NAME - related_agents = tools.Concat( - tools.Map(tools.Delegate(IsAffiliatedWith), ctx['org_ctx']), - ) - - class Extra: - pi_id = RunPython(filter_nil, ctx.PI_ID) - - -class PIRelation(Parser): - schema = 'PrincipalInvestigator' - - agent = Delegate(PIAgent, ctx) - cited_as = ctx.PI_NAME - - -class PIContactRelation(Parser): - schema = 'PrincipalInvestigatorContact' - - agent = Delegate(PIContactAgent, ctx) - cited_as = ctx.PI_NAME - - -class PORelation(Parser): - schema = 'Contributor' - - agent = Delegate(POAgent, ctx) - cited_as = ctx - - -class Project(Parser): - title = RunPython(filter_nil, ctx.row.PROJECT_TITLE) - related_agents = Concat( - Map( - Delegate(PIRelation), - RunPython( - 'get_pi', - RunPython(filter_nil, Try(ctx.row)), - primary=False, - ) - ), - Map( - Delegate(PIContactRelation), - RunPython( - 'get_pi', - RunPython(filter_nil, Try(ctx.row)), - ) - ), - Map(Delegate(PORelation), RunPython(filter_nil, ctx.row.PROGRAM_OFFICER_NAME)), - Map(Delegate(AgentWorkRelation), RunPython('get_organization_ctx', RunPython(filter_nil, ctx.row))), - Map(Delegate(FunderRelation), Filter(lambda x: isinstance(x['IC_NAME'], str) or x['IC_NAME'].get('@http://www.w3.org/2001/XMLSchema-instance:nil') != 'true', ctx.row)), - ) - - identifiers = Map( - Delegate(WorkIdentifier), RunPython(filter_nil, ctx.row.APPLICATION_ID) - ) - - subjects = Map( - Delegate(ThroughSubjects), - Subjects(RunPython(filter_nil, Try(ctx.row.PROJECT_TERMSX.TERM))) - ) - - tags = Map( - Delegate(ThroughTags), - RunPython(filter_nil, Try(ctx.row.PROJECT_TERMSX.TERM)), - RunPython(filter_nil, Try(ctx.row.ORG_DEPT)) - ) - - class Extra: - activity = RunPython(filter_nil, ctx.row.ACTIVITY) - application_id = RunPython(filter_nil, ctx.row.APPLICATION_ID) - budget_start = RunPython(filter_nil, ctx.row.BUDGET_START) - budget_end = RunPython(filter_nil, ctx.row.BUDGET_END) - cfda_code = RunPython(filter_nil, ctx.row.CFDA_CODE) - core_project_number = RunPython(filter_nil, ctx.row.CORE_PROJECT_NUM) - ed_inst_type = RunPython(filter_nil, ctx.row.ED_INST_TYPE) - fiscal_year = RunPython(filter_nil, ctx.row.FY) - full_project_number = RunPython(filter_nil, ctx.row.FULL_PROJECT_NUM) - nih_spending_cats = RunPython(filter_nil, ctx.row.NIH_SPENDING_CATS) - phr = RunPython(filter_nil, ctx.row.PHR) - project_start = RunPython(filter_nil, ctx.row.PROJECT_START) - project_end = RunPython(filter_nil, ctx.row.PROJECT_END) - serial_number = RunPython(filter_nil, ctx.row.SERIAL_NUMBER) - study_section = RunPython(filter_nil, ctx.row.STUDY_SECTION) - study_section_name = RunPython(filter_nil, ctx.row.STUDY_SECTION_NAME) - subproject_id = RunPython(filter_nil, ctx.row.SUBPROJECT_ID) - suffix = RunPython(filter_nil, ctx.row.SUFFIX) - total_cost = RunPython(filter_nil, ctx.row.TOTAL_COST) - total_cost_subproject = RunPython(filter_nil, ctx.row.TOTAL_COST_SUB_PROJECT) - - def parse_awards(self, award_info): - return [award for award in award_info.split(';')] - - def maybe_org(self, obj): - if isinstance(obj.get('ORG_NAME'), dict) and obj.get('@http://www.w3.org/2001/XMLSchema-instance:nil'): - return None - return obj - - def get_organization_ctx(self, ctx): - org_ctx = { - 'ORG_NAME': filter_nil(ctx['ORG_NAME']), - 'ORG_FIPS': filter_nil(ctx['ORG_FIPS']), - 'ORG_DEPT': filter_nil(ctx['ORG_DEPT']), - 'ORG_DUNS': filter_nil(ctx['ORG_DUNS']), - 'ORG_DISTRICT': filter_nil(ctx['ORG_DISTRICT']), - 'ORG_CITY': filter_nil(ctx['ORG_CITY']), - 'ORG_STATE': filter_nil(ctx['ORG_STATE']), - 'ORG_ZIPCODE': filter_nil(ctx['ORG_ZIPCODE']), - 'ORG_COUNTRY': filter_nil(ctx['ORG_COUNTRY']) - } - return org_ctx - - def get_pi(self, ctx, primary=True): - ''' - <PI> - <PI_NAME>VIDAL, MARC (contact)</PI_NAME> - <PI_ID>2094159 (contact)</PI_ID> - </PI> - ''' - pi_list = ctx['PIS']['PI'] if isinstance(ctx['PIS']['PI'], list) else [ctx['PIS']['PI']] - org_ctx = self.get_organization_ctx(ctx) - # if only one primary contact is assumed - if len(pi_list) <= 1: - if not primary: - return None - pi_list[0]['org_ctx'] = org_ctx - return pi_list - # more than one, get the primary - if primary: - try: - pi = next(x for x in pi_list if '(contact)' in x['PI_NAME']) - except StopIteration: - return [] - - return { - 'PI_NAME': re.sub(r'(\(contact\))', '', pi['PI_NAME']).strip(), - 'PI_ID': re.sub(r'(\(contact\))', '', pi['PI_ID']).strip(), - 'org_ctx': org_ctx - } - # more than one, get the non-primary - non_primary_pi = [] - for pi in pi_list: - if '(contact)' not in pi['PI_NAME']: - pi['org_ctx'] = org_ctx - non_primary_pi.append(pi) - return non_primary_pi - - -class NIHTransformer(ChainTransformer): - VERSION = 1 - root_parser = Project diff --git a/share/transformers/gov_nsfawards.py b/share/transformers/gov_nsfawards.py deleted file mode 100644 index 0cedb46d3..000000000 --- a/share/transformers/gov_nsfawards.py +++ /dev/null @@ -1,262 +0,0 @@ -from share.transform.chain import ctx, ChainTransformer -from share.transform.chain import links as tools -from share.transform.chain.exceptions import InvalidIRI -from share.transform.chain.parsers import Parser -from share.transform.chain.utils import format_address - - -def format_url(award_id): - return 'https://www.nsf.gov/awardsearch/showAward?AWD_ID={}'.format(award_id) - - -def format_org_address(ctx): - awardee_address = ctx.get('awardeeAddress', '') - awardee_city = ctx.get('awardeeCity', '') - awardee_state_code = ctx.get('awardeeStateCode', '') - awardee_country_code = ctx.get('awardeeCountryCode', '') - awardee_zip_code = ctx.get('awardeeZipCode', '') - - if not any((awardee_address, awardee_city, awardee_state_code, awardee_zip_code, awardee_country_code)): - return None - - return format_address( - address1=awardee_address, - city=awardee_city, - state_or_province=awardee_state_code, - postal_code=awardee_zip_code, - country=awardee_country_code - ) - - -class WorkIdentifier(Parser): - uri = tools.RunPython(format_url, ctx.id) - - -class FunderAgent(Parser): - schema = tools.GuessAgentType( - ctx.agency, - default='organization' - ) - - name = ctx.agency - - -class Award(Parser): - name = ctx.title - description = ctx.fundsObligatedAmt - award_amount = tools.Int(ctx.fundsObligatedAmt) - date = tools.ParseDate(ctx.date) - uri = tools.RunPython(format_url, ctx.id) - - class Extra: - funds_obligated_amt = ctx.fundsObligatedAmt - award_id = ctx.id - transaction_type = ctx.transType - estimated_total_amt = tools.Try(ctx.estimatedTotalAmt) - catalog_of_federal_domestic_assistance_number = tools.Try(ctx.cfdaNumber) - - date = ctx.date - date_start = tools.Try(ctx.startDate) - date_expiration = tools.Try(ctx.expDate) - - awardee = tools.Try(ctx.awardee) - awardee_address = tools.Try(ctx.awardeeAddress) - awardee_name = ctx.awardeeName - awardee_city = tools.Try(ctx.awardeeCity) - awardee_county = tools.Try(ctx.awardeeCounty) - awardee_state_code = tools.Try(ctx.awardeeStateCode) - awardee_country_code = tools.Try(ctx.awardeeCountryCode) - awardee_district_code = tools.Try(ctx.awardeeDistrictCode) - awardee_zip_code = tools.Try(ctx.awardeeZipCode) - - -class ThroughAwards(Parser): - award = tools.Delegate(Award, ctx) - - -class FunderRelation(Parser): - schema = 'Funder' - - agent = tools.Delegate(FunderAgent, ctx) - awards = tools.Map(tools.Delegate(ThroughAwards), ctx) - - -class AffiliatedAgent(Parser): - schema = tools.GuessAgentType( - ctx.awardeeName, - default='organization' - ) - - name = ctx.awardeeName - location = tools.RunPython(format_org_address, ctx) - - class Extra: - awardee = tools.Try(ctx.awardee) - awardee_address = tools.Try(ctx.awardeeAddress) - awardee_name = ctx.awardeeName - awardee_city = tools.Try(ctx.awardeeCity) - awardee_county = tools.Try(ctx.awardeeCounty) - awardee_state_code = tools.Try(ctx.awardeeStateCode) - awardee_country_code = tools.Try(ctx.awardeeCountryCode) - awardee_district_code = tools.Try(ctx.awardeeDistrictCode) - awardee_zip_code = tools.Try(ctx.awardeeZipCode) - - -class IsAffiliatedWith(Parser): - related = tools.Delegate(AffiliatedAgent, ctx) - - -class AgentIdentifier(Parser): - uri = ctx - - -class POContributorAgent(Parser): - schema = 'Person' - - name = ctx.poName - - identifiers = tools.Map( - tools.Delegate(AgentIdentifier), - tools.Try( - tools.IRI(ctx.poEmail), - exceptions=(InvalidIRI,) - ) - ) - - related_agents = tools.Map( - tools.Delegate(IsAffiliatedWith), - tools.Filter(lambda x: 'awardeeName' in x, ctx) - ) - - class Extra: - po_name = tools.Try(ctx.poName) - po_email = tools.Try(ctx.poEmail) - - -class POContributorRelation(Parser): - schema = 'Contributor' - - agent = tools.Delegate(POContributorAgent, ctx) - cited_as = ctx.poName - - -class PIContributorAgent(Parser): - schema = 'Person' - - family_name = ctx.piLastName - given_name = ctx.piFirstName - additional_name = tools.Try(ctx.piMiddeInitial) - - related_agents = tools.Map( - tools.Delegate(IsAffiliatedWith), - tools.Filter(lambda x: 'awardeeName' in x, ctx) - ) - - identifiers = tools.Map( - tools.Delegate(AgentIdentifier), - tools.Try( - tools.IRI(ctx.piEmail), - exceptions=(InvalidIRI,) - ) - ) - - class Extra: - pi_last_name = ctx.piLastName - pi_first_name = ctx.piFirstName - pi_middle_initial = tools.Try(ctx.piMiddeInitial) - pi_email = tools.Try(ctx.piEmail) - - -class PIContributorRelation(Parser): - schema = 'PrincipalInvestigatorContact' - - agent = tools.Delegate(PIContributorAgent, ctx) - cited_as = tools.Join( - tools.Concat(ctx.piFirstName, ctx.piLastName), - joiner=' ' - ) - - -class AgentWorkRelation(Parser): - agent = tools.Delegate(AffiliatedAgent, ctx) - - -class CreativeWork(Parser): - # https://www.research.gov/common/webapi/awardapisearch-v1.htm#request-parameters - - title = ctx.title - description = ctx.abstractText - - identifiers = tools.Map(tools.Delegate(WorkIdentifier), ctx) - - related_agents = tools.Concat( - tools.Map(tools.Delegate(FunderRelation), ctx), - tools.Map(tools.Delegate(PIContributorRelation), ctx), - tools.Map( - tools.Delegate(POContributorRelation), - tools.Filter(lambda x: x.get('poName') is not None, ctx) - ), - tools.Map( - tools.Delegate(AgentWorkRelation), - tools.Filter(lambda x: x.get('awardeeName') is not None, ctx) - ) - ) - - date_updated = tools.ParseDate(ctx.date) - - class Extra: - catalog_of_federal_domestic_assistance_number = tools.Try(ctx.cfdaNumber) - estimated_total_amt = tools.Try(ctx.estimatedTotalAmt) - fund_program_name = tools.Try(ctx.fundProgramName) - has_project_outcomes_report = tools.Try(ctx.projectOutComesReport) - primary_program = tools.Try(ctx.primaryProgram) - public_access_mandate = tools.Try(ctx.publicAccessMandate) - transaction_type = tools.Try(ctx.transType) - - co_pi_name = tools.Try(ctx.coPDPI) # irregular field (ex. [First Last ~<numbers>, ...]) - proj_dir_pi_name = tools.Try(ctx.pdPIName) - - duns_number = tools.Try(ctx.dunsNumber) - parent_duns_number = tools.Try(ctx.parentDunsNumber) - - fund_agency_code = tools.Try(ctx.fundAgencyCode) - award_agency_code = tools.Try(ctx.awardAgencyCode) - - publication_research = tools.Try(ctx.publicationResearch) - publication_conference = tools.Try(ctx.publicationConference) - - po_name = tools.Try(ctx.poName) - po_email = tools.Try(ctx.poEmail) - - date = ctx.date - date_start = tools.Try(ctx.startDate) - date_expiration = tools.Try(ctx.expDate) - - pi_last_name = ctx.piLastName - pi_first_name = ctx.piFirstName - pi_middle_initial = tools.Try(ctx.piMiddeInitial) - pi_email = tools.Try(ctx.piEmail) - - awardee = tools.Try(ctx.awardee) - awardee_address = tools.Try(ctx.awardeeAddress) - awardee_city = tools.Try(ctx.awardeeCity) - awardee_country_code = tools.Try(ctx.awardeeCountryCode) - awardee_county = tools.Try(ctx.awardeeCounty) - awardee_district_code = tools.Try(ctx.awardeeDistrictCode) - awardee_name = tools.Try(ctx.awardeeName) - awardee_state_code = tools.Try(ctx.awardeeStateCode) - awardee_zip_code = tools.Try(ctx.awardeeZipCode) - - performance_address = tools.Try(ctx.perfAddress) - performance_city = tools.Try(ctx.perfCity) - performance_country_code = tools.Try(ctx.perfCountryCode) - performance_county = tools.Try(ctx.perfCounty) - performance_district_code = tools.Try(ctx.perfDistrictCode) - performance_location = tools.Try(ctx.perfLocation) - performance_state_code = tools.Try(ctx.perfStateCode) - performance_zip_code = tools.Try(ctx.perfZipCode) - - -class NSFTransformer(ChainTransformer): - VERSION = 2 - root_parser = CreativeWork diff --git a/share/transformers/gov_pubmedcentral_pmc.py b/share/transformers/gov_pubmedcentral_pmc.py deleted file mode 100644 index c66062569..000000000 --- a/share/transformers/gov_pubmedcentral_pmc.py +++ /dev/null @@ -1,367 +0,0 @@ -import pendulum -import re - -from share.transform.chain import ctx -from share.transform.chain.links import * -from share.transform.chain.soup import SoupXMLTransformer, SoupXMLDict, Soup -from share.transform.chain.parsers import Parser - -PMCID_FORMAT = 'http://www.ncbi.nlm.nih.gov/pmc/articles/PMC{}/' -PMID_FORMAT = 'http://www.ncbi.nlm.nih.gov/pubmed/{}' - -RETRACTION_PATTERNS = [ - r'^retraction(:|$)', - r'^retracted(:|$)', - r': retraction$', - r': retracted$', - r'\[retraction\]', - r'\[retracted\]', - r'retraction note', - r'retraction notice', - r'retraction statement', - r'retraction announcement', - r'notice of retraction', - r'statement of retraction', - r'editorial retraction', - r'author-initiated retraction', - r'retracted manuscript', - r'retraction of the research article', -] -RETRACTION_RE = re.compile('|'.join(RETRACTION_PATTERNS), re.I) - - -def pmcid_uri(pmcid): - if isinstance(pmcid, SoupXMLDict): - pmcid = pmcid['#text'] - if pmcid.startswith('PMC'): - pmcid = pmcid[3:] - return PMCID_FORMAT.format(pmcid) - - -def pmid_uri(pmid): - if isinstance(pmid, SoupXMLDict): - pmid = pmid['#text'] - return PMID_FORMAT.format(pmid) - - -class WorkIdentifier(Parser): - uri = ctx - - -class AgentIdentifier(Parser): - uri = ctx - - -class PublisherOrganization(Parser): - schema = GuessAgentType(ctx['publisher-name']['#text'], 'organization') - name = ctx['publisher-name']['#text'] - location = ctx['publisher-loc']['#text'] - - -class Publisher(Parser): - agent = Delegate(PublisherOrganization, ctx) - - -class JournalOrganization(Parser): - schema = 'organization' - name = ctx['journal-title-group']['journal-title']['#text'] - identifiers = Map( - Delegate(AgentIdentifier), - Map( - IRI(), - RunPython('get_issns', ctx) - ) - ) - - def get_issns(self, obj): - return [t['#text'] for t in obj['issn']] - - -class Journal(Parser): - schema = 'publisher' - agent = Delegate(JournalOrganization, ctx) - - -class Person(Parser): - suffix = Try(ctx.name.suffix['#text']) - family_name = ctx.name.surname['#text'] - given_name = Try(ctx.name['given-names']['#text']) - - identifiers = Map( - Delegate(AgentIdentifier), - Map( - IRI(), - Try(Soup(ctx, 'contrib-id', **{'contrib-id-type': 'orcid'})['#text']), - Try(Soup(ctx, 'email')['#text']) - ) - ) - - class Extra: - role = Try(ctx.role['#text']) - degrees = Try(ctx.degrees['#text']) - - -class Consortium(Parser): - name = ctx - - -class Creator(Parser): - agent = Delegate(Person, ctx) - order_cited = ctx('index') - - cited_as = RunPython('get_cited_as', ctx.name) - - def get_cited_as(self, obj): - surname = obj.soup.surname - given = obj.soup.find('given-names') - if given: - return '{}, {}'.format(surname.get_text(), given.get_text()) - return surname.get_text() - - -class CollabCreator(Parser): - schema = 'creator' - agent = Delegate(Consortium, RunPython('collab_name', ctx)) - - def collab_name(self, obj): - nested_group = obj.soup.find('contrib-group') - if nested_group: - # TODO add ThroughContributors - nested_group.extract() - return obj['#text'] - - -class Tag(Parser): - name = ctx['#text'] - - -class ThroughTags(Parser): - tag = Delegate(Tag, ctx) - - -class RelatedWork(Parser): - schema = 'creativework' - - title = Try(ctx['article-title']['#text']) - - identifiers = Map( - Delegate(WorkIdentifier), - IRI(RunPython('get_uri', ctx)) - ) - - def get_uri(self, soup): - id = soup['@xlink:href'] - id_type = soup['@ext-link-type'] - if id_type in ('pmid', 'pubmed'): - return pmid_uri(id) - if id_type == 'pmcid': - return pmcid_uri(id) - return id - - -class WorkRelation(Parser): - schema = RunPython('get_relation_type', ctx) - related = Delegate(RelatedWork, ctx) - - def get_relation_type(self, related): - return { - 'retracted-article': 'retracts', - 'corrected-article': 'corrects', - 'commentary-article': 'discusses', - 'commentary': 'discusses', - 'letter': 'repliesto', - 'letter-reply': 'repliesto', - 'object-of-concern': 'disputes', - }.get(related['@related-article-type'], 'references') - - -# Guidelines (largely unenforced): -# https://www.ncbi.nlm.nih.gov/pmc/pub/filespec-xml/ -# https://www.ncbi.nlm.nih.gov/pmc/pmcdoc/tagging-guidelines/article/style.html -class Article(Parser): - schema = OneOf( - RunPython('get_article_type', ctx.record.metadata.article['@article-type']), - RunPython('guess_type_from_related', Soup(ctx, 'related-article')), - RunPython('guess_type_from_title', ctx.record.metadata.article.front['article-meta']['title-group']['article-title']['#text']), - Static('publication') - ) - - title = ctx.record.metadata.article.front['article-meta']['title-group']['article-title']['#text'] - - description = Try(ctx.record.metadata.article.front['article-meta']['abstract']['#text']) - - related_agents = Concat( - Try(Delegate(Journal, ctx.record.metadata.article.front['journal-meta'])), - Try(Delegate(Publisher, ctx.record.metadata.article.front['journal-meta']['publisher'])), - Map( - Delegate(Creator), - Soup( - ctx.record.metadata.article.front['article-meta']['contrib-group'], - lambda tag: tag.name == 'contrib' and tag['contrib-type'] == 'author' and tag('name', recursive=False) - ) - ), - Map( - Delegate(CollabCreator), - Soup( - ctx.record.metadata.article.front['article-meta']['contrib-group'], - lambda tag: tag.name == 'contrib' and tag['contrib-type'] == 'author' and tag.collab - ) - ), - ) - - tags = Map( - Delegate(ThroughTags), - Concat(Try(ctx.record.metadata.article.front['article-meta']['kwd-group']['kwd'])) - ) - - date_published = RunPython( - 'get_date_published', - ctx.record.metadata.article.front['article-meta'], - ['epub', 'ppub', 'epub-ppub', 'epreprint', 'collection', 'pub'] - ) - - identifiers = Concat( - Map( - Delegate(WorkIdentifier), - Map( - IRI(), - Try(Soup( - ctx.record.metadata.article.front['article-meta'], - 'article-id', - **{'pub-id-type': 'doi'} - )['#text']), - Map( - RunPython(pmcid_uri), - Soup( - ctx.record.metadata.article.front['article-meta'], - 'article-id', - **{'pub-id-type': 'pmcid'} - ) - ), - Map( - RunPython(pmid_uri), - Soup( - ctx.record.metadata.article.front['article-meta'], - 'article-id', - **{'pub-id-type': ('pmid', 'pubmed')} - ) - ) - ) - ), - ) - - related_works = Concat( - Map( - Try(Delegate(WorkRelation)), - Soup(ctx, 'related-article', **{'ext-link-type': ['doi', 'pmid', 'pubmed', 'pmcid'], 'xlink:href': True}) - ) - ) - - rights = Try(ctx.record.metadata.article.front['article-meta']['permissions']['license']['license-p']['#text']) - - class Extra: - correspondence = Try(ctx.record.metadata.article.front['article-meta']['author-notes']['corresp']['email']['#text']) - journal = ctx.record.metadata.article.front['journal-meta']['journal-title-group']['journal-title']['#text'] - in_print = Try(RunPython('get_print_information', ctx.record.metadata.article.front['article-meta'])) - - copyright = Try(ctx.record.metadata.article.front['article-meta']['permissions']['copyright-statement']['#text']) - copyright_year = Try(ctx.record.metadata.article.front['article-meta']['permissions']['copyright-year']['#text']) - epub_date = RunPython( - 'get_year_month_day', - Soup(ctx.record.metadata.article.front['article-meta'], 'pub-date', **{'pub-type': 'epub'}) - ) - ppub_date = RunPython( - 'get_year_month_day', - Soup(ctx.record.metadata.article.front['article-meta'], 'pub-date', **{'pub-type': 'ppub'}) - ) - - def get_article_type(self, article_type): - article_type_map = { - # 'abstract' - # 'addendum' - # 'announcement' - # 'article-commentary' - # 'book-review' - # 'books-received' - 'brief-report': 'report', - # 'calendar' - 'case-report': 'report', - # 'correction' - 'data-paper': 'dataset', - # 'discussion' - # 'editorial' - # 'expression-of-concern' - # 'in-brief' - # 'introduciton' - # 'letter' - 'meeting-report': 'report', - # 'methods-article' - # 'news' - # 'obituary' - 'oration': 'presentation', - # 'other' - # 'product-review' - # 'reply' - 'research-article': 'article', - 'retraction': 'retraction', - 'review-article': 'article', - # 'systematic-review' - } - try: - return article_type_map[article_type] - except KeyError: - raise TransformError - - def guess_type_from_related(self, related): - if not isinstance(related, list): - related = [related] - if any(r.soup['related-article-type'] == 'retracted-article' for r in related): - return 'retraction' - raise Exception() - - def guess_type_from_title(self, title): - if RETRACTION_RE.search(title): - return 'retraction' - raise Exception() - - def get_date_published(self, obj, types, type_attr='pub-type'): - for t in types: - pub_date = obj.soup.find('pub-date', **{type_attr: t}) - if pub_date: - year = pub_date.year - month = pub_date.month - day = pub_date.day - if year and month and day: - return str(pendulum.datetime(int(year.string), int(month.string), int(day.string))) - elif year and month: - return str(pendulum.datetime(int(year.string), int(month.string), 1)) - if type_attr == 'pub-type': - return self.get_date_published(obj, types, 'date-type') - return None - - def get_year_month_day(self, list_): - if not list_: - return None - if not isinstance(list_, list): - list_ = [list_] - for item in list_: - year = item['year'] - month = item['month'] - day = item['day'] - if year and month and day: - return year['#text'], month['#text'], day['#text'] - elif year and month: - return year['#text'], month['#text'] - return None - - def get_print_information(self, ctx): - volume = ctx['volume']['#text'] - issue = ctx['issue']['#text'] - fpage = ctx['fpage']['#text'] - lpage = ctx['lpage']['#text'] - return "This work appeared in volume {} issue {} from pages {} - {}.".format(volume, issue, fpage, lpage) - - -class PMCTransformer(SoupXMLTransformer): - VERSION = 1 - root_parser = Article diff --git a/share/transformers/gov_scitech.py b/share/transformers/gov_scitech.py deleted file mode 100644 index d9e66667d..000000000 --- a/share/transformers/gov_scitech.py +++ /dev/null @@ -1,137 +0,0 @@ -import re - -from share.transform.chain import * - - -class AgentIdentifier(Parser): - uri = IRI(ctx) - - -class WorkIdentifier(Parser): - uri = IRI(ctx) - - -class Tag(Parser): - name = ctx - - -class ThroughTags(Parser): - tag = Delegate(Tag, ctx) - - -class Organization(Parser): - schema = GuessAgentType(ctx) - name = ctx - - -class IsAffiliatedWith(Parser): - related = Delegate(Organization, ctx) - - -class Person(Parser): - name = ctx.name - identifiers = Map(Delegate(AgentIdentifier), ctx.identifiers) - related_agents = Map(Delegate(IsAffiliatedWith), ctx.institutions) - - -class Creator(Parser): - order_cited = ctx('index') - cited_as = ctx.name - agent = Delegate(Person, ctx) - - -class Contributor(Parser): - agent = Delegate(Organization, ctx) - - -class Funder(Parser): - agent = Delegate(Organization, ctx) - - -class Publisher(Parser): - agent = Delegate(Organization, ctx) - - -class CreativeWork(Parser): - DOE_CONTRIBUTOR_REGEX = re.compile(r'((.+?)(?:, E-mail: [^,\s]+)*(?: \[.+?\])?(?: \(ORCID:.{16}\))?(?:;|$))', re.IGNORECASE) - DOE_AFFILIATIONS_REGEX = re.compile(r'\s*\[(.*?)\]') - DOE_EMAIL_REGEX = re.compile(r'(?:,? E-?mail:\s*)?(\S+@\S+?\.\S+)', re.IGNORECASE) - DOE_ORCID_REGEX = re.compile(r'\(ORCID:\s*(\S*)\)') - - schema = RunPython('get_schema', ctx.record['dc:type']) - - title = ctx.record['dc:title'] - description = ctx.record['dc:description'] - # is_deleted - date_published = Try(ParseDate(ctx.record['dc:date']), exceptions=(InvalidDate, )) - date_updated = OneOf( - ParseDate(ctx.record['dc:dateentry']), - ParseDate(ctx.record['dc:date']), - Static(None) - ) - # free_to_read_type - # free_to_read_date - rights = Maybe(ctx.record, 'dc:rights') - language = ParseLanguage(ctx.record['dc:language']) - - tags = Map(Delegate(ThroughTags), RunPython('get_tags', ctx.record['dc:subject'])) - - identifiers = Map( - Delegate(WorkIdentifier), - Try(ctx.record['dc:doi']), - ctx.record['dcq:identifier-citation'], - Try(ctx.record['dcq:identifier-purl']['#text']), - ) - related_agents = Concat( - Map(Delegate(Publisher), RunPython(lambda x: x.split(', ') if x else None, ctx.record['dcq:publisher'])), - Map(Delegate(Funder), RunPython(lambda x: x.split(', ') if x else None, ctx.record['dcq:publisherSponsor'])), - Map(Delegate(Contributor), RunPython(lambda x: x.split(', ') if x else None, ctx.record['dcq:publisherResearch'])), - Map(Delegate(Creator), RunPython('get_contributors', ctx.record['dc:creator'])), - ) - - class Extra: - coverage = ctx.record['dc:coverage'] - format = ctx.record['dc:format'] - identifier = ctx.record['dc:identifier'] - identifier_doe_contract = ctx.record['dcq:identifierDOEcontract'] - identifier_other = ctx.record['dc:identifierOther'] - identifier_report = ctx.record['dc:identifierReport'] - publisher_availability = ctx.record['dcq:publisherAvailability'] - publisher_country = ctx.record['dcq:publisherCountry'] - relation = ctx.record['dc:relation'] - type_qualifier = ctx.record['dcq:typeQualifier'] - - def get_schema(self, type): - return { - 'Thesis/Dissertation': 'Thesis', - 'Technical Report': 'Report', - 'Journal Article': 'Article', - 'Patent': 'Patent', - None: 'CreativeWork', - 'Miscellaneous': 'CreativeWork', - 'Other': 'CreativeWork', - 'Program Document': 'CreativeWork', - 'Conference': 'ConferencePaper', - 'Dataset': 'DataSet', - 'Book': 'Book', - }[type].lower() - - def get_tags(self, tags): - return (tags or '').split('; ') - - def get_contributors(self, context): - contributors = [] - for (match, name) in self.DOE_CONTRIBUTOR_REGEX.findall(context or ''): - if not match or not name: - continue - contributors.append({ - 'name': name.strip(), - 'institutions': self.DOE_AFFILIATIONS_REGEX.findall(match), - 'identifiers': self.DOE_EMAIL_REGEX.findall(match) + self.DOE_ORCID_REGEX.findall(match) - }) - return contributors - - -class ScitechTransformer(ChainTransformer): - VERSION = 1 - root_parser = CreativeWork diff --git a/share/transformers/gov_usgs.py b/share/transformers/gov_usgs.py deleted file mode 100644 index 41339f8b8..000000000 --- a/share/transformers/gov_usgs.py +++ /dev/null @@ -1,146 +0,0 @@ -import re -from share.transform.chain import * - -EMAIL_RE = re.compile(r'\S+@\S+') - - -class WorkIdentifier(Parser): - uri = IRI(ctx) - - -class AgentIdentifier(Parser): - uri = IRI(ctx, urn_fallback=True) - - -class RelatedAgent(Parser): - schema = GuessAgentType(ctx, default='organization') - - name = ctx - - -class IsAffiliatedWith(Parser): - related = Delegate(RelatedAgent, ctx) - - -class AbstractAgent(Parser): - identifiers = Map( - Delegate(AgentIdentifier), - Try(ctx.email), - RunPython('to_str', ctx.contributorId) - ) - - related_agents = Map( - Delegate(IsAffiliatedWith), - Try(ctx.affiliation.text), - RunPython('maybe_usgs', Try(ctx.usgs)) - ) - - def to_str(self, obj): - return str(obj) - - def maybe_usgs(self, obj): - if obj: - # How USGS references itself as a work publisher - return 'U.S. Geological Survey' - return None - - -class Organization(AbstractAgent): - schema = GuessAgentType(ctx.text, default='organization') - - name = ctx.text - - -class Person(AbstractAgent): - given_name = Maybe(ctx, 'given') - family_name = Maybe(ctx, 'family') - - -class Creator(Parser): - order_cited = ctx('index') - cited_as = RunPython('strip_emails', ctx.text) - agent = Delegate(Person, ctx) - - def strip_emails(self, obj): - return EMAIL_RE.sub('', obj) - - -class PublisherAgent(Parser): - schema = GuessAgentType(ctx.publisher, default='organization') - - name = ctx.publisher - location = Try(ctx.publisherLocation) - - -class Publisher(Parser): - agent = Delegate(PublisherAgent, ctx) - - -class CreativeWork(Parser): - schema = RunPython('get_schema', ctx.publicationType.text) - - title = ctx.title - description = Maybe(ctx, 'docAbstract') - date_updated = ParseDate(ctx.lastModifiedDate) - date_published = ParseDate(ctx.displayToPublicDate) - language = Maybe(ctx, 'language') - - related_agents = Concat( - Map( - Delegate(Creator), - Filter(lambda a: not a['corporation'], Try(ctx.contributors.authors)) - ), - Map( - Delegate(Creator.using(agent=Delegate(Organization, ctx))), - Filter(lambda a: a['corporation'], Try(ctx.contributors.authors)) - ), - Try(Delegate(Publisher, ctx)) - ) - - identifiers = Map( - Delegate(WorkIdentifier), - RunPython('format_usgs_id_as_url', ctx.indexId), - Try(ctx.doi) - ) - - class Extra: - additional_online_files = Maybe(ctx, 'additionalOnlineFiles') - country = Maybe(ctx, 'country') - defined_type = Maybe(ctx, 'defined_type') - end_page = Maybe(ctx, 'endPage') - geographic_extents = Maybe(ctx, 'geographicExtents') - index_id = Maybe(ctx, 'indexId') - ipds_id = Maybe(ctx, 'ipdsId') - issue = Maybe(ctx, 'issue') - links = Maybe(ctx, 'links') - online_only = Maybe(ctx, 'onlineOnly') - other_geospatial = Maybe(ctx, 'otherGeospatial') - publication_subtype = Maybe(ctx, 'publicationSubtype') - publication_year = Maybe(ctx, 'publicationYear') - start_page = Maybe(ctx, 'startPage') - state = Maybe(ctx, 'state') - type = Maybe(ctx, 'type') - volume = Maybe(ctx, 'volume') - - def get_schema(self, publication_type): - return { - 'Article': 'Article', - 'Book': 'Book', - 'Book chapter': 'Book', - 'Conference Paper': 'ConferencePaper', - 'Dataset': 'DataSet', - # 'Pamphlet': - # 'Patent': - 'Report': 'Report', - 'Speech': 'Presentation', - 'Thesis': 'Thesis', - # 'Videorecording': - }.get(publication_type) or 'CreativeWork' - - def format_usgs_id_as_url(self, id): - return 'https://pubs.er.usgs.gov/publication/{}'.format(id) - - -class USGSTransformer(ChainTransformer): - VERSION = 1 - root_parser = CreativeWork diff --git a/share/transformers/io_osf.py b/share/transformers/io_osf.py deleted file mode 100644 index e8ca917a4..000000000 --- a/share/transformers/io_osf.py +++ /dev/null @@ -1,131 +0,0 @@ -from share.transform.chain import ctx, ChainTransformer -from share.transform.chain import links as tools -from share.transform.chain.parsers import Parser - - -class SimpleWorkIdentifier(Parser): - schema = 'WorkIdentifier' - - uri = tools.IRI(ctx) - - -class WorkIdentifier(Parser): - uri = ctx.attributes.value - - class Extra: - identifier_type = tools.Try(ctx.attributes.category) - - -class AgentIdentifier(Parser): - uri = tools.IRI(ctx) - - -# TODO At somepoint we'll need to get Institutions as well -class Person(Parser): - given_name = tools.OneOf( - ctx.embeds.users.data.attributes.given_name, - ctx.embeds.users.errors[0].meta.given_name, - ) - family_name = tools.OneOf( - ctx.embeds.users.data.attributes.family_name, - ctx.embeds.users.errors[0].meta.family_name, - ) - additional_name = tools.OneOf( - ctx.embeds.users.data.attributes.middle_names, - ctx.embeds.users.errors[0].meta.middle_names, - ) - suffix = tools.OneOf( - ctx.embeds.users.data.attributes.suffix, - ctx.embeds.users.errors[0].meta.suffix, - ) - - identifiers = tools.Map( - tools.Delegate(AgentIdentifier), - tools.RunPython('registered', ctx.embeds.users.data.links.html), - tools.Try(ctx.embeds.users.data.links.profile_image), - ) - - class Extra: - locale = tools.Try(ctx.embeds.users.data.attributes.locale) - date_registered = tools.Try(ctx.embeds.users.data.attributes.date_registered) - active = tools.Try(ctx.embeds.users.data.attributes.active) - timezone = tools.Try(ctx.embeds.users.data.attributes.timezone) - - def registered(self, context): - if self.context['attributes']['unregistered_contributor']: - return None - return context - - -class Contributor(Parser): - agent = tools.Delegate(Person, ctx) - cited_as = tools.OneOf( - ctx.embeds.users.data.attributes.full_name, - ctx.embeds.users.errors[0].meta.full_name, - ) - - -class Creator(Contributor): - order_cited = ctx.attributes.index - - -class Tag(Parser): - name = ctx - - -class ThroughTags(Parser): - tag = tools.Delegate(Tag, ctx) - - -class Institution(Parser): - name = ctx.attributes.name - identifiers = tools.Map(tools.Delegate(AgentIdentifier), ctx.links.self) - - class Extra: - description = ctx.attributes.description - - -class AgentWorkRelation(Parser): - agent = tools.Delegate(Institution, ctx) - - -class CreativeWork(Parser): - title = ctx.attributes.title - description = ctx.attributes.description - is_deleted = tools.Static(False) - # date_published = - date_updated = tools.ParseDate(ctx.attributes.date_modified) - # free_to_read_type = - # free_to_read_date = - # rights = tools.Try(ctx.attributes.node_license) Doesn't seem to have an useful information - # language = - - identifiers = tools.Concat( - tools.Map(tools.Delegate(SimpleWorkIdentifier), ctx.links.html, ctx.links.self), - tools.Map(tools.Delegate(WorkIdentifier), tools.Try(ctx.identifiers)) - ) - - tags = tools.Map(tools.Delegate(ThroughTags), ctx.attributes.category, ctx.attributes.tags) - - class Extra: - date_created = tools.ParseDate(ctx.attributes.date_created) - - -class IsPartOf(Parser): - subject = tools.Delegate(CreativeWork, ctx) - - -class Project(CreativeWork): - is_root = True - related_works = tools.Map(tools.Delegate(IsPartOf), tools.Try(ctx.children)) - - related_agents = tools.Concat( - tools.Map(tools.Delegate(Creator), tools.Filter(lambda x: x['attributes']['bibliographic'], ctx.contributors)), - tools.Map(tools.Delegate(Contributor), tools.Filter(lambda x: not x['attributes']['bibliographic'], ctx.contributors)), - tools.Map(tools.Delegate(AgentWorkRelation), tools.Try(ctx.institutions)), - ) - - -class OSFTransformer(ChainTransformer): - VERSION = 1 - root_parser = Project diff --git a/share/transformers/io_osf_preprints.py b/share/transformers/io_osf_preprints.py deleted file mode 100644 index 647128396..000000000 --- a/share/transformers/io_osf_preprints.py +++ /dev/null @@ -1,106 +0,0 @@ -from share.transform.chain import ctx, links as tools, ChainTransformer -from share.transform.chain.parsers import Parser - -from . import io_osf as osf - - -# class PersonIdentifier(Parser): -# uri = ctx - -# class Person(Parser): -# given_name = OneOf( -# ctx.embeds.users.data.attributes.given_name, -# ctx.embeds.users.errors[0].meta.given_name, -# ) -# family_name = OneOf( -# ctx.embeds.users.data.attributes.family_name, -# ctx.embeds.users.errors[0].meta.family_name, -# ) -# additional_name = OneOf( -# ctx.embeds.users.data.attributes.middle_names, -# ctx.embeds.users.errors[0].meta.middle_names, -# ) -# suffix = OneOf( -# ctx.embeds.users.data.attributes.suffix, -# ctx.embeds.users.errors[0].meta.suffix, -# ) -# personidentifiers = Map(Delegate(PersonIdentifier), Try(ctx.embeds.users.data.links.html)) - -# class Extra: -# nodes = Try(ctx.embeds.users.data.relationships.nodes.links.related.href) -# locale = Try(ctx.embeds.users.data.attributes.locale) -# date_registered = Try(ctx.embeds.users.data.attributes.date_registered) -# active = Try(ctx.embeds.users.data.attributes.active) -# timezone = Try(ctx.embeds.users.data.attributes.timezone) -# profile_image = OneOf( -# ctx.embeds.users.data.links.profile_image, -# ctx.embeds.users.errors[0].meta.profile_image -# ) - - -# class Contributor(Parser): -# person = Delegate(Person, ctx) -# order_cited = ctx.attributes.index -# bibliographic = ctx.attributes.bibliographic -# cited_name = OneOf( -# ctx.embeds.users.data.attributes.full_name, -# ctx.embeds.users.errors[0].meta.full_name, -# ) - - -class Tag(Parser): - name = ctx - - -class ThroughTags(Parser): - tag = tools.Delegate(Tag, ctx) - - -# class Institution(Parser): -# name = ctx.attributes.name -# url = ctx.links.self - -# class Extra: -# nodes = ctx.relationships.nodes.links.related.href -# users = ctx.relationships.users.links.related.href -# registrations = ctx.relationships.registrations.links.related.href -# description = ctx.attributes.description - - -class Subject(Parser): - name = ctx.text - - -class ThroughSubjects(Parser): - subject = tools.Delegate(Subject, ctx) - - -class WorkIdentifier(Parser): - uri = tools.IRI(ctx) - - -class Preprint(osf.Project): - description = tools.Try(ctx.attributes.abstract) - date_updated = tools.ParseDate(ctx.attributes.date_modified) - date_published = tools.ParseDate(ctx.attributes.date_created) - # NOTE: OSF has a direct mapping to SHARE's taxonomy. Subjects() is not needed - subjects = tools.Map(tools.Delegate(ThroughSubjects), ctx.attributes.subjects) - identifiers = tools.Map( - tools.Delegate(WorkIdentifier), - ctx.links.self, - ctx.links.html, - tools.Try(ctx.links.doi) - ) - tags = tools.Map(tools.Delegate(ThroughTags), tools.Try(ctx.attributes.tags)) - rights = tools.Try(ctx.attributes.node_license) - - related_works = tools.Static([]) - related_agents = tools.Concat( - tools.Map(tools.Delegate(osf.Creator), tools.Filter(lambda x: x['attributes']['bibliographic'], ctx.contributors)), - tools.Map(tools.Delegate(osf.Contributor), tools.Filter(lambda x: not x['attributes']['bibliographic'], ctx.contributors)), - ) - - -class PreprintTransformer(ChainTransformer): - VERSION = 1 - root_parser = Preprint diff --git a/share/transformers/io_osf_registrations.py b/share/transformers/io_osf_registrations.py deleted file mode 100644 index aa0501235..000000000 --- a/share/transformers/io_osf_registrations.py +++ /dev/null @@ -1,32 +0,0 @@ -from share.transform.chain import exceptions -from share.transform.chain import ctx, ChainTransformer -from share.transform.chain.links import Delegate, Map, Maybe, Try, ParseDate -from share.transform.chain.parsers import Parser - -from . import io_osf as osf - - -class WorkIdentifier(Parser): - uri = ctx - - -class Registration(osf.Project): - date_published = ParseDate(ctx.attributes.date_registered) - free_to_read_date = Try(ParseDate(ctx.attributes.embargo_end_date), exceptions=(exceptions.InvalidDate, )) - identifiers = Map(Delegate(WorkIdentifier), ctx.links.html, ctx.links.self) - - class Extra: - registration_schema = Maybe(ctx.relationships, 'registration_schema').links.related.href - pending_registration_approval = Maybe(ctx.relationships, 'pending_registration_approval') - registration_supplement = Maybe(ctx.attributes, 'registration_supplement') - registered_meta_summary = Try(ctx.registered_meta.summary.value) - withdrawn = Maybe(ctx.attributes, 'withdrawn') - date_registered = Maybe(ctx.attributes, 'withdrawn') - pending_embargo_approval = Maybe(ctx.attributes, 'pending_embargo_approval') - withdrawal_justification = Maybe(ctx.attributes, 'withdrawal_justification') - pending_withdrawal = Maybe(ctx.attributes, 'pending_withdrawal') - - -class OSFRegistrationsTransformer(ChainTransformer): - VERSION = 1 - root_parser = Registration diff --git a/share/transformers/mods.py b/share/transformers/mods.py deleted file mode 100644 index 4a7c5d92a..000000000 --- a/share/transformers/mods.py +++ /dev/null @@ -1,526 +0,0 @@ -import re -import logging - -import xmltodict - -from share.transform.chain import ChainTransformer, ctx, links as tools -from share.transform.chain.exceptions import InvalidIRI -from share.transform.chain.links import GuessAgentTypeLink -from share.transform.chain.parsers import Parser -from share.transform.chain.utils import force_text -from share.transform.chain.utils import oai_allowed_by_sets - - -logger = logging.getLogger(__name__) - - -def get_list(dct, key): - val = dct.get(key, []) - return val if isinstance(val, list) else [val] - - -#### Identifiers #### - -class MODSWorkIdentifier(Parser): - schema = 'WorkIdentifier' - - uri = tools.RunPython(force_text, ctx) - - class Extra: - identifier_type = tools.Try(ctx['@type']) - - -class MODSAgentIdentifier(Parser): - schema = 'AgentIdentifier' - - uri = ctx - - -#### Agents #### - -class AffiliatedAgent(Parser): - schema = tools.GuessAgentType(ctx, default='organization') - - name = ctx - - -class IsAffiliatedWith(Parser): - related = tools.Delegate(AffiliatedAgent, ctx) - - -class MODSAgent(Parser): - schema = tools.RunPython('get_agent_schema', ctx) - - name = tools.OneOf( - tools.RunPython(force_text, ctx['mods:displayForm']), - tools.RunPython('squash_name_parts', ctx) - ) - - related_agents = tools.Map(tools.Delegate(IsAffiliatedWith), tools.Concat(tools.Try( - tools.Filter(lambda x: bool(x), tools.RunPython(force_text, ctx['mods:affiliation'])) - ))) - - identifiers = tools.Map( - tools.Delegate(MODSAgentIdentifier), - tools.Unique(tools.Map( - tools.Try(tools.IRI(), exceptions=(InvalidIRI, )), - tools.Map( - tools.RunPython(force_text), - tools.Filter( - lambda obj: 'invalid' not in obj, - tools.Try(ctx['mods:nameIdentifier']), - ) - ) - )) - ) - - class Extra: - name_type = tools.Try(ctx['@type']) - name_part = tools.Try(ctx['mods:namePart']) - affiliation = tools.Try(ctx['mods:affiliation']) - description = tools.Try(ctx['mods:description']) - display_form = tools.Try(ctx['mods:displayForm']) - etal = tools.Try(ctx['mods:etal']) - name_identifier = tools.Try(ctx['mods:nameIdentifier']) - - def squash_name_parts(self, name): - name_parts = get_list(name, 'mods:namePart') - return ' '.join([force_text(n) for n in name_parts]) - - def get_agent_schema(self, obj): - name_type = obj.get('@type') - if name_type == 'personal': - return 'person' - if name_type == 'conference': - return 'organization' - # TODO SHARE-718 - # if name_type == 'family': - # return 'family' - if name_type == 'corporate': - return GuessAgentTypeLink(default='organization').execute(self.squash_name_parts(obj)) - return GuessAgentTypeLink().execute(self.squash_name_parts(obj)) - - -class MODSPersonSplitName(MODSAgent): - schema = 'person' - - name = None - family_name = tools.RunPython('get_name_part', ctx, 'family') - given_name = tools.RunPython('get_name_part', ctx, 'given') - suffix = tools.RunPython('get_name_part', ctx, 'termsOfAddress') - - def get_name_part(self, obj, type): - name_parts = get_list(obj, 'mods:namePart') - return ' '.join([force_text(n) for n in name_parts if n.get('@type') == type]) - - -class MODSSimpleAgent(Parser): - schema = tools.GuessAgentType(ctx, default='organization') - - name = ctx - - -class MODSSimplePublisher(Parser): - schema = 'Publisher' - - agent = tools.Delegate(MODSSimpleAgent, ctx) - - -#### Tags/Subjects #### - -class MODSSubject(Parser): - schema = 'Subject' - - name = ctx - - -class MODSThroughSubjects(Parser): - schema = 'ThroughSubjects' - - subject = tools.Delegate(MODSSubject, ctx) - - -class MODSTag(Parser): - schema = 'Tag' - - name = ctx - - -class MODSThroughTags(Parser): - schema = 'ThroughTags' - - tag = tools.Delegate(MODSTag, ctx) - - -#### Work Relations #### - -RELATION_MAP = { - # 'preceding': - # 'succeeding': - 'original': 'IsDerivedFrom', - 'host': 'IsPartOf', - 'constituent': 'IsPartOf', - 'series': 'IsPartOf', - # 'otherVersion': - # 'otherFormat': - 'isReferencedBy': 'References', - 'references': 'References', - 'reviewOf': 'Reviews', -} -REVERSE_RELATIONS = { - 'isReferencedBy', - 'constituent', -} - - -# Finds the generated subclass of MODSCreativeWork -def related_work_parser(_): - return type(next(p for p in ctx.parsers if isinstance(p, MODSCreativeWork))) - - -def map_relation_type(obj): - return RELATION_MAP.get(obj['@type'], 'WorkRelation') - - -class MODSReverseWorkRelation(Parser): - schema = tools.RunPython(map_relation_type) - - subject = tools.Delegate(related_work_parser, ctx) - - -class MODSWorkRelation(Parser): - schema = tools.RunPython(map_relation_type) - - related = tools.Delegate(related_work_parser, ctx) - - -def work_relation_parser(obj): - if obj['@type'] in REVERSE_RELATIONS: - return MODSReverseWorkRelation - return MODSWorkRelation - - -#### Agent-work relations #### - -def agent_parser(name): - name_parts = get_list(name, 'mods:namePart') - split_name = any(isinstance(n, dict) and n.get('@type') in {'given', 'family'} for n in name_parts) - return MODSPersonSplitName if split_name else MODSAgent - - -class MODSAgentWorkRelation(Parser): - schema = 'AgentWorkRelation' - - agent = tools.Delegate(agent_parser, ctx) - cited_as = tools.RunPython(force_text, tools.Try(ctx['mods:displayForm'])) - - -class MODSHost(MODSAgentWorkRelation): - schema = 'Host' - - -class MODSFunder(MODSAgentWorkRelation): - schema = 'Funder' - - -class MODSContributor(MODSAgentWorkRelation): - schema = 'Contributor' - - -class MODSCreator(MODSContributor): - schema = 'Creator' - - order_cited = ctx('index') - - -class MODSPublisher(MODSAgentWorkRelation): - schema = 'Publisher' - - -#### Works #### - -class MODSCreativeWork(Parser): - default_type = 'CreativeWork' - type_map = None - role_map = None - - schema = tools.RunPython( - 'get_schema', - tools.OneOf( - tools.RunPython(force_text, ctx['mods:genre']), - tools.Static(None) - ) - ) - - title = tools.RunPython('join_title_info', ctx) - - # Abstracts have the optional attribute "shareable". Don't bother checking for it, because - # abstracts that are not shareable should not have been shared with SHARE. - description = tools.Join(tools.RunPython(force_text, tools.Try(ctx['mods:abstract']), '\n')) - - identifiers = tools.Map( - tools.Delegate(MODSWorkIdentifier), - tools.Filter( - lambda obj: 'invalid' not in obj, - tools.Concat( - tools.Try(ctx['mods:identifier']), - tools.Try(ctx.header['identifier']), - tools.Try(ctx['mods:location']['mods:url']), - ) - ) - ) - - related_works = tools.Concat( - tools.Map( - tools.Delegate(work_relation_parser), - tools.Try(ctx['mods:relatedItem']) - ) - ) - - related_agents = tools.Concat( - tools.Map( - tools.Delegate(MODSCreator), - tools.RunPython('filter_names', ctx, 'creator') - ), - tools.Map( - tools.Delegate(MODSFunder), - tools.RunPython('filter_names', ctx, 'funder') - ), - tools.Map( - tools.Delegate(MODSHost), - tools.RunPython('filter_names', ctx, 'host') - ), - tools.Map( - tools.Delegate(MODSPublisher), - tools.RunPython('filter_names', ctx, 'publisher') - ), - tools.Map( - tools.Delegate(MODSContributor), - tools.RunPython('filter_names', ctx, 'creator', 'funder', 'host', 'publisher', invert=True) - ), - tools.Map( - tools.Delegate(MODSSimplePublisher), - tools.Try(ctx['mods:originInfo']['mods:publisher']), - ), - ) - - rights = tools.RunPython(force_text, tools.Try(ctx['mods:accessCondition']), '\n') - - language = tools.ParseLanguage( - tools.Try(ctx['mods:language']['mods:languageTerm']), - ) - - subjects = tools.Map( - tools.Delegate(MODSThroughSubjects), - tools.Subjects( - tools.Concat( - tools.Try(ctx['mods:subject']['mods:topic']), - ) - ) - ) - - tags = tools.Map( - tools.Delegate(MODSThroughTags), - tools.Concat( - tools.Map( - tools.RunPython('tokenize'), - tools.Map( - tools.RunPython(force_text), - tools.Try(ctx.header.setSpec), - tools.Try(ctx['mods:genre']), - tools.Try(ctx['mods:classification']), - tools.Try(ctx['mods:subject']['mods:topic']), - ) - ), - deep=True - ) - ) - - date_updated = tools.ParseDate(tools.Try(ctx.header.datestamp)) - - # TODO (in regulator) handle date ranges, uncertain dates ('1904-1941', '1890?', '1980-', '19uu', etc.) - date_published = tools.OneOf( - tools.ParseDate(tools.RunPython(force_text, tools.Try(ctx['mods:originInfo']['mods:dateIssued']))), - tools.Static(None) - ) - - is_deleted = tools.RunPython(lambda status: status == 'deleted', tools.Try(ctx.record.header['@status'])) - - class Extra: - """ - Fields that are combined in the base parser are relisted as singular elements that match - their original entry to preserve raw data structure. - """ - - # (dc:description) http://www.loc.gov/standards/mods/userguide/abstract.html - abstract = tools.Try(ctx['mods:abstract']) - - # (dc:rights) http://www.loc.gov/standards/mods/userguide/accesscondition.html - accessConditions = tools.Try(ctx['mods:accessCondition']) - - # (dc:subject) http://www.loc.gov/standards/mods/userguide/classification.html - classification = tools.Try(ctx['mods:classification']) - - # (N/A) http://www.loc.gov/standards/mods/userguide/extension.html - extension = tools.Try(ctx['mods:extension']) - - # SHARE type - # (dc:type) http://www.loc.gov/standards/mods/userguide/genre.html - genre = tools.Try(ctx['mods:genre']) - - # (dc:identifier) http://www.loc.gov/standards/mods/userguide/identifier.html - identifier = tools.Try(ctx['mods:identifier']) - - # (dc:language) http://www.loc.gov/standards/mods/userguide/language.html - language = tools.Try(ctx['mods:language']) - - # (dc:identifier for url) http://www.loc.gov/standards/mods/userguide/location.html - location = tools.Try(ctx['mods:location']) - - # (dc:creator|dc:contributor) http://www.loc.gov/standards/mods/userguide/name.html - name = tools.Try(ctx['mods:name']) - - # (dc:description) http://www.loc.gov/standards/mods/userguide/note.html - note = tools.Try(ctx['mods:note']) - - # (dc:publisher|dc:date) http://www.loc.gov/standards/mods/userguide/origininfo.html - originInfo = tools.Try(ctx['mods:originInfo']) - - # Extra - # (dc:title) http://www.loc.gov/standards/mods/userguide/part.html - part = tools.Try(ctx['mods:part']) - - # (dc:format or N/A) http://www.loc.gov/standards/mods/userguide/physicaldescription.html - physicalDescription = tools.Try(ctx['mods:physicalDescription']) - - # Metadata information - # (N/A) http://www.loc.gov/standards/mods/userguide/recordinfo.html - recordInfo = tools.Try(ctx['mods:recordInfo']) - - # (dc:relation) http://www.loc.gov/standards/mods/userguide/relateditem.html - relatedItem = tools.Try(ctx['mods:relatedItem']) - - # (dc:subject|dc:type|dc:coverage|N/A) http://www.loc.gov/standards/mods/userguide/subject.html - subject = tools.Try(ctx['mods:subject']) - - # (dc:description) http://www.loc.gov/standards/mods/userguide/tableofcontents.html - tableOfContents = tools.Try(ctx['mods:tableOfContents']) - - # (N/A) http://www.loc.gov/standards/mods/userguide/targetaudience.html - targetAudience = tools.Try(ctx['mods:targetAudience']) - - # (dc:title) http://www.loc.gov/standards/mods/userguide/titleinfo.html - titleInfo = tools.Try(ctx['mods:titleInfo']) - - # Extra - # (dc:type) http://www.loc.gov/standards/mods/userguide/typeofresource.html - typeOfResource = tools.Try(ctx['mods:typeOfResource']) - - def get_schema(self, types): - if not types or not self.type_map: - return self.default_type - if isinstance(types, str): - types = [types] - for t in types: - if isinstance(t, dict): - t = t['#text'] - t = t.lower() - if t in self.type_map: - return self.type_map[t] - return self.default_type - - def tokenize(self, data): - if isinstance(data, str): - data = [data] - tokens = [] - for item in data: - tokens.extend([x.strip() for x in re.split(r'(?: - )|\.|,', item) if x]) - return tokens - - # Map titleInfos to a string: https://www.loc.gov/standards/mods/userguide/titleinfo.html#mappings - def join_title_info(self, obj): - def get_part(title_info, part_name, delimiter=''): - part = force_text(title_info.get(part_name, ''), ' ').strip() - return delimiter + part if part else '' - - title_infos = get_list(obj, 'mods:titleInfo') - titles = [] - for title_info in title_infos: - title = '' - title += get_part(title_info, 'mods:nonSort') - title += get_part(title_info, 'mods:title') - title += get_part(title_info, 'mods:subTitle', ': ') - title += get_part(title_info, 'mods:partNumber', '. ') - title += get_part(title_info, 'mods:partName', ': ') - if title: - titles.append(title) - return '. '.join(titles) - - def filter_names(self, obj, *roles, invert=False): - names = get_list(obj, 'mods:name') - filtered = [*names] if invert else [] - for name in names: - name_roles = get_list(name, 'mods:role') - for role in name_roles: - role_terms = get_list(role, 'mods:roleTerm') - name_roles = {force_text(r).lower() for r in role_terms} - name_roles.update({self.role_map[r] for r in name_roles if r in self.role_map}) - if name_roles.intersection(roles): - if invert: - filtered.remove(name) - else: - filtered.append(name) - return filtered - - -class MODSTransformer(ChainTransformer): - """Transformer for oai_dc metadata format. - - transformer_kwargs (TODO explain): - emitted_type - approved_sets - blocked_sets - type_map - role_map - """ - - VERSION = 1 - - marc_roles = { - 'fnd': 'funder', - 'hst': 'host', - 'his': 'host', - 'pbl': 'publisher', - 'cre': 'creator', - 'aut': 'creator', - 'author': 'creator', - } - - def get_root_parser(self, unwrapped, emitted_type='creativework', type_map=None, role_map=None, **kwargs): - root_type_map = { - **{r.lower(): r for r in self.allowed_roots}, - **{t.lower(): v for t, v in (type_map or {}).items()} - } - root_role_map = { - **{k: v for k, v in self.marc_roles.items()}, - **{k.lower(): v.lower() for k, v in (role_map or {}).items()} - } - - class RootParser(MODSCreativeWork): - default_type = emitted_type.lower() - type_map = root_type_map - role_map = root_role_map - - return RootParser - - def do_transform(self, datum, approved_sets=None, blocked_sets=None, **kwargs): - if not oai_allowed_by_sets(datum, blocked_sets, approved_sets): - return (None, None) - return super().do_transform(datum, **kwargs) - - def unwrap_data(self, data, namespaces=None, **kwargs): - unwrapped_data = xmltodict.parse(data, process_namespaces=True, namespaces=(namespaces or self.NAMESPACES)) - return { - **unwrapped_data['record'].get('metadata', {}).get('mods:mods', {}), - 'header': unwrapped_data['record']['header'], - } diff --git a/share/transformers/oai.py b/share/transformers/oai.py deleted file mode 100644 index d8a47db0f..000000000 --- a/share/transformers/oai.py +++ /dev/null @@ -1,325 +0,0 @@ -import re -import logging - -from share.transform.chain import ctx, ChainTransformer, links as tools -from share.transform.chain.exceptions import InvalidIRI -from share.transform.chain.parsers import Parser -from share.transform.chain.utils import force_text, oai_allowed_by_sets - - -logger = logging.getLogger(__name__) - - -def not_citation(identifier): - return re.search(r'(pp\. \d+\-\d+.)|(ISSN )|( +\(\d\d\d\d\))', identifier) is None - - -class OAIAgent(Parser): - schema = tools.GuessAgentType(ctx) - - name = ctx - - -class OAIAgentIdentifier(Parser): - schema = 'AgentIdentifier' - - uri = ctx - - -class OAIWorkIdentifier(Parser): - schema = 'WorkIdentifier' - - uri = ctx - - -class OAISubject(Parser): - schema = 'Subject' - - name = ctx - - -class OAIThroughSubjects(Parser): - schema = 'ThroughSubjects' - - subject = tools.Delegate(OAISubject, ctx) - - -class OAITag(Parser): - schema = 'Tag' - - name = ctx - - -class OAIThroughTags(Parser): - schema = 'ThroughTags' - - tag = tools.Delegate(OAITag, ctx) - - -class OAIRelatedWork(Parser): - schema = 'CreativeWork' - - identifiers = tools.Map(tools.Delegate(OAIWorkIdentifier), ctx) - - class Extra: - identifier = ctx - - -class OAIWorkRelation(Parser): - schema = 'WorkRelation' - - related = tools.Delegate(OAIRelatedWork, ctx) - - -class OAIAgentWorkRelation(Parser): - schema = 'AgentWorkRelation' - - agent = tools.Delegate(OAIAgent, tools.RunPython(force_text, ctx)) - cited_as = tools.RunPython(force_text, ctx) - - -class OAIContributor(OAIAgentWorkRelation): - schema = 'Contributor' - - -class OAICreator(OAIContributor): - schema = 'Creator' - - order_cited = ctx('index') - - -class OAIPublisher(Parser): - schema = 'Publisher' - - agent = tools.Delegate(OAIAgent.using(schema=tools.GuessAgentType(ctx, default='organization')), ctx) - - -class OAICreativeWork(Parser): - default_type = None - type_map = None - - schema = tools.RunPython( - 'get_schema', - tools.OneOf( - ctx.record.metadata.dc['dc:type'], - tools.Static(None) - ) - ) - - title = tools.Join(tools.RunPython(force_text, tools.Try(ctx.record.metadata.dc['dc:title']))) - description = tools.Join(tools.RunPython(force_text, tools.Try(ctx.record.metadata.dc['dc:description']))) - - identifiers = tools.Map( - tools.Delegate(OAIWorkIdentifier), - tools.Unique(tools.Map( - tools.Try(tools.IRI(), exceptions=(InvalidIRI, )), - tools.Filter( - not_citation, - tools.RunPython( - force_text, - tools.Concat( - tools.Try(ctx.record.metadata.dc['dc:identifier']), - tools.Try(ctx.record.header['identifier']) - ) - ) - ) - )) - ) - - related_works = tools.Concat( - tools.Map( - tools.Delegate(OAIWorkRelation), - tools.Unique(tools.Map( - tools.Try(tools.IRI(), exceptions=(InvalidIRI, )), - tools.RunPython('get_relation', ctx) - )) - ) - ) - - related_agents = tools.Concat( - tools.Map( - tools.Delegate(OAICreator), - tools.Try(ctx.record.metadata.dc['dc:creator']) - ), - tools.Map( - tools.Delegate(OAIContributor), - tools.Try(ctx.record.metadata.dc['dc:contributor']) - ), - tools.Map( - tools.Delegate(OAIPublisher), - tools.RunPython(force_text, tools.Try(ctx.record.metadata.dc['dc:publisher'])) - ), - ) - - rights = tools.Join(tools.Try(ctx.record.metadata.dc['dc:rights'])) - - # Note: this is only taking the first language in the case of multiple languages - language = tools.ParseLanguage( - tools.Try(ctx.record.metadata.dc['dc:language'][0]), - ) - - subjects = tools.Map( - tools.Delegate(OAIThroughSubjects), - tools.Subjects( - tools.Map( - tools.RunPython('tokenize'), - tools.RunPython( - force_text, - tools.Concat( - tools.Try(ctx.record.header.setSpec), - tools.Try(ctx.record.metadata.dc['dc:type']), - tools.Try(ctx.record.metadata.dc['dc:format']), - tools.Try(ctx.record.metadata.dc['dc:subject']), - ) - ) - ) - ) - ) - - tags = tools.Map( - tools.Delegate(OAIThroughTags), - tools.Concat( - tools.Map( - tools.RunPython('tokenize'), - tools.RunPython( - force_text, - tools.Concat( - tools.Try(ctx.record.header.setSpec), - tools.Try(ctx.record.metadata.dc['dc:type']), - tools.Try(ctx.record.metadata.dc['dc:format']), - tools.Try(ctx.record.metadata.dc['dc:subject']), - ) - ) - ), - deep=True - ) - ) - - date_updated = tools.ParseDate(ctx.record.header.datestamp) - - is_deleted = tools.RunPython('check_status', tools.Try(ctx.record.header['@status'])) - - class Extra: - """ - Fields that are combined in the base parser are relisted as singular elements that match - their original entry to preserve raw data structure. - """ - # An agent responsible for making contributions to the resource. - contributor = tools.Try(ctx.record.metadata.dc['dc:contributor']) - - # The spatial or temporal topic of the resource, the spatial applicability of the resource, - # or the jurisdiction under which the resource is relevant. - coverage = tools.Try(ctx.record.metadata.dc['dc:coverage']) - - # An agent primarily responsible for making the resource. - creator = tools.Try(ctx.record.metadata.dc['dc:creator']) - - # A point or period of time associated with an event in the lifecycle of the resource. - dates = tools.Try(ctx.record.metadata.dc['dc:date']) - - # The file format, physical medium, or dimensions of the resource. - resource_format = tools.Try(ctx.record.metadata.dc['dc:format']) - - # An unambiguous reference to the resource within a given context. - identifiers = tools.Concat( - tools.Try(ctx.record.metadata.dc['dc:identifier']), - tools.Try(ctx.record.header['identifier']) - ) - - # A related resource. - relation = tools.RunPython('get_relation', ctx) - - # A related resource from which the described resource is derived. - source = tools.Try(ctx.record.metadata.dc['dc:source']) - - # The nature or genre of the resource. - resource_type = tools.Try(ctx.record.metadata.dc['dc:type']) - - set_spec = tools.Try(ctx.record.header.setSpec) - - # Language also stored in the Extra class in case the language reported cannot be parsed by ParseLanguage - language = tools.Try(ctx.record.metadata.dc['dc:language']) - - # Status in the header, will exist if the resource is deleted - status = tools.Try(ctx.record.header['@status']) - - def check_status(self, status): - if status == 'deleted': - return True - return False - - def get_schema(self, types): - if not types or not self.type_map: - return self.default_type - if isinstance(types, str): - types = [types] - for t in types: - if isinstance(t, dict): - t = t['#text'] - t = t.lower() - if t in self.type_map: - return self.type_map[t] - return self.default_type - - def tokenize(self, data): - if isinstance(data, str): - data = [data] - tokens = [] - for item in data: - tokens.extend([x.strip() for x in re.split(r'(?: - )|\.|,', item) if x]) - return tokens - - def get_relation(self, ctx): - if not ctx['record'].get('metadata'): - return [] - relation = ctx['record']['metadata']['dc'].get('dc:relation') or [] - identifiers = ctx['record']['metadata']['dc'].get('dc:identifier') or [] - if isinstance(identifiers, dict): - identifiers = (identifiers, ) - identifiers = ''.join(i['#text'] if isinstance(i, dict) else i for i in identifiers if i) - - identifiers = re.sub('http|:|/', '', identifiers + ctx['record']['header']['identifier']) - - if isinstance(relation, dict): - relation = (relation['#text'], ) - - return [r for r in relation if r and re.sub('http|:|/', '', r) not in identifiers] - - -class OAITransformer(ChainTransformer): - """Transformer for oai_dc metadata format. - - transformer_kwargs (TODO explain): - emitted_type - property_list - approved_sets - blocked_sets - type_map - """ - - VERSION = 1 - - def get_root_parser(self, unwrapped, emitted_type='creativework', type_map=None, property_list=None, **kwargs): - root_type_map = { - **{r.lower(): r for r in self.allowed_roots}, - **{t.lower(): v for t, v in (type_map or {}).items()} - } - - class RootParser(OAICreativeWork): - default_type = emitted_type.lower() - type_map = root_type_map - - if property_list: - logger.debug('Attaching addition properties %s to transformer for %s', property_list, self.config.label) - for prop in property_list: - if prop in RootParser._extra: - logger.warning('Skipping property %s, it already exists', prop) - continue - RootParser._extra[prop] = tools.Try(ctx.record.metadata.dc['dc:' + prop]).chain()[0] - - return RootParser - - def do_transform(self, datum, approved_sets=None, blocked_sets=None, **kwargs): - if not oai_allowed_by_sets(datum, blocked_sets, approved_sets): - return (None, None) - return super().do_transform(datum, **kwargs) diff --git a/share/transformers/org_arxiv.py b/share/transformers/org_arxiv.py deleted file mode 100644 index b6fd70684..000000000 --- a/share/transformers/org_arxiv.py +++ /dev/null @@ -1,94 +0,0 @@ -from share.transform.chain import ctx, ChainTransformer -from share.transform.chain import links as tools -from share.transform.chain.parsers import Parser - - -class WorkIdentifier(Parser): - uri = tools.IRI(ctx) - - -class Organization(Parser): - schema = tools.GuessAgentType(ctx) - - name = tools.RunPython('get_name', ctx) - location = tools.RunPython('get_location', ctx) - - def get_name(self, context): - return context.split(',')[0] - - def get_location(self, context): - spl = context.partition(',') - if len(spl) > 1: - return spl[-1] - return None - - -class IsAffiliatedWith(Parser): - related = tools.Delegate(Organization, ctx) - - -class Person(Parser): - name = ctx.name - - related_agents = tools.Map( - tools.Delegate(IsAffiliatedWith), - tools.Try(ctx['arxiv:affiliation']) - ) - - -class Creator(Parser): - order_cited = ctx('index') - cited_as = ctx.name - agent = tools.Delegate(Person, ctx) - - -class Tag(Parser): - name = ctx - - -class ThroughTags(Parser): - tag = tools.Delegate(Tag, ctx) - - -class Subject(Parser): - name = ctx - - -class ThroughSubjects(Parser): - subject = tools.Delegate(Subject, ctx) - - -class Preprint(Parser): - title = ctx.entry.title - description = ctx.entry.summary - - date_published = tools.ParseDate(ctx.entry.published) - date_updated = tools.ParseDate(ctx.entry.updated) - # free_to_read_type - # free_to_read_date - # rights - # language - subjects = tools.Map( - tools.Delegate(ThroughSubjects), - tools.Subjects(tools.Map(ctx['@term'], ctx.entry.category)), - ) - tags = tools.Map( - tools.Delegate(ThroughTags), - tools.Map(ctx['@term'], ctx.entry.category), - ) - related_agents = tools.Concat( - tools.Map(tools.Delegate(Creator), ctx.entry.author), - ) - # related_works - identifiers = tools.Map(tools.Delegate(WorkIdentifier), tools.Try(ctx.entry['arxiv:doi']), ctx.entry.id) - - class Extra: - resource_id = ctx.entry.id - journal_ref = tools.Try(ctx.entry['arxiv:journal_ref']) - comment = tools.Try(ctx.entry['arxiv:comment']) - primary_category = tools.Try(ctx.entry['arxiv:primary_category']) - - -class ArxivTransformer(ChainTransformer): - VERSION = 1 - root_parser = Preprint diff --git a/share/transformers/org_biorxiv.py b/share/transformers/org_biorxiv.py deleted file mode 100644 index dac03517a..000000000 --- a/share/transformers/org_biorxiv.py +++ /dev/null @@ -1,109 +0,0 @@ -import itertools - -from share.transform.chain import ctx, ChainTransformer -from share.transform.chain import links as tools -from share.transform.chain.parsers import Parser - - -class AgentIdentifier(Parser): - uri = tools.IRI(ctx) - - -class WorkIdentifier(Parser): - uri = tools.IRI(ctx) - - -class Organization(Parser): - name = ctx - - -class Publisher(Parser): - agent = tools.Delegate(Organization, ctx) - - -class Institution(Parser): - name = ctx - - -class IsAffiliatedWith(Parser): - related = tools.Delegate(Institution) - - -class Person(Parser): - given_name = tools.ParseName(ctx.author).first - family_name = tools.ParseName(ctx.author).last - additional_name = tools.ParseName(ctx.author).middle - suffix = tools.ParseName(ctx.author).suffix - - identifiers = tools.Map(tools.Delegate(AgentIdentifier, tools.Try(ctx.email))) - related_agents = tools.Map(tools.Delegate(IsAffiliatedWith), tools.Try(ctx.institution)) - - -class Creator(Parser): - order_cited = ctx('index') - agent = tools.Delegate(Person, ctx) - cited_as = ctx.author - - -class Tag(Parser): - name = ctx - - -class ThroughTags(Parser): - tag = tools.Delegate(Tag, ctx) - - -class Subject(Parser): - name = ctx - - -class ThroughSubjects(Parser): - subject = tools.Delegate(Subject, ctx) - - -class Preprint(Parser): - title = tools.Try(ctx['DC.Title']) - description = tools.Try(ctx['DC.Description']) - # is_deleted - date_published = tools.ParseDate(tools.Try(ctx['article:published_time'])) - date_updated = tools.ParseDate(tools.Try(ctx['DC.Date'])) - # free_to_read_type - # free_to_read_date - rights = tools.Try(ctx['DC.Rights']) - language = tools.Try(ctx['DC.Language']) - - subjects = tools.Map(tools.Delegate(ThroughSubjects), tools.Static('Biology'), tools.Subjects(tools.Try(ctx['subject-areas']))) - tags = tools.Map(tools.Delegate(ThroughTags), tools.Try(ctx['category']), tools.Try(ctx['subject-areas'])) - - identifiers = tools.Map(tools.Delegate(WorkIdentifier), tools.Try(ctx['og:url']), ctx['citation_public_url'], ctx['citation_doi']) - - related_agents = tools.Concat( - tools.Map(tools.Delegate(Publisher), tools.Try(ctx['DC.Publisher'])), - tools.Map(tools.Delegate(Creator), tools.RunPython('get_contributors', ctx)) - ) - # related_works - - class Extra: - identifiers = ctx['DC.Identifier'] - access_rights = ctx['DC.AccessRights'] - - def get_contributors(self, link): - authors = link.get('citation_author', []) if isinstance(link.get('citation_author', []), list) else [link['citation_author']] - institutions = link.get('citation_author_institution', []) if isinstance(link.get('citation_author_institution', []), list) else [link['citation_author_institution']] - emails = link.get('citation_author_email', []) if isinstance(link.get('citation_author_email', []), list) else [link['citation_author_email']] - - contribs = [] - for author, email, institution in itertools.zip_longest(authors, emails, institutions): - contrib = { - 'author': author, - 'institution': institution, - 'email': email, - } - contribs.append(contrib) - - return contribs - - -class BiorxivTransformer(ChainTransformer): - VERSION = 1 - root_parser = Preprint diff --git a/share/transformers/org_biorxiv_html.py b/share/transformers/org_biorxiv_html.py deleted file mode 100644 index 4bf22c883..000000000 --- a/share/transformers/org_biorxiv_html.py +++ /dev/null @@ -1,115 +0,0 @@ -import re - -from share.transform.chain import * # noqa -from share.transform.chain.soup import Soup, SoupXMLTransformer - - -class WorkIdentifier(Parser): - uri = IRI(ctx) - - -class AgentIdentifier(Parser): - uri = IRI(ctx) - - -class Tag(Parser): - name = ctx - - -class Subject(Parser): - name = ctx - - -class AffiliatedAgent(Parser): - schema = GuessAgentType(ctx, default='Organization') - name = ctx - - -class Organization(Parser): - name = ctx - - -class IsAffiliatedWith(Parser): - related = Delegate(AffiliatedAgent, ctx) - - -class Publisher(Parser): - agent = Delegate(Organization, ctx) - - -class Person(Parser): - name = ctx.name - identifiers = Map(Delegate(AgentIdentifier), ctx.identifiers) - related_agents = Map(Delegate(IsAffiliatedWith), ctx.institutions) - - -class ThroughTags(Parser): - tag = Delegate(Tag, ctx['#text']) - - -class ThroughSubjects(Parser): - subject = Delegate(Subject, ctx) - - -class Creator(Parser): - cited_as = ctx.name - order_cited = ctx('index') - agent = Delegate(Person, ctx) - - -class Preprint(Parser): - title = Soup(ctx, 'meta', {'name': 'DC.Title'})['@content'] - description = Soup(ctx, 'meta', {'name': 'DC.Description'})['@content'] - is_deleted = Static(False) - - date_updated = ParseDate(Soup(ctx, 'meta', {'name': 'DC.Date'})['@content']) - date_published = ParseDate(Soup(ctx, 'meta', {'name': 'citation_publication_date'})['@content']) - - rights = Soup(ctx, 'meta', {'name': 'DC.Rights'})['@content'] - - identifiers = Map( - Delegate(WorkIdentifier), - Soup(ctx, 'meta', {'name': 'og:url'})['@content'], - Soup(ctx, 'meta', {'name': 'DC.Identifier'})['@content'], - ) - - subjects = Map(Delegate(ThroughSubjects), Subjects(Map(ctx['#text'], Soup(ctx, **{'class': 'highwire-article-collection-term'})))) - - tags = Map(Delegate(ThroughTags), Soup(ctx, **{'class': 'highwire-article-collection-term'})) - - related_agents = Concat( - Map( - Delegate(Creator), - RunPython( - 'parse_creators', - Soup(ctx, 'meta', {'name': re.compile('^citation_author')}) - ) - ), - Delegate(Publisher, Soup(ctx, 'meta', {'name': 'citation_publisher'})['@content']) - ) - - def parse_creators(self, soup): - # Creators and their related information comes in as: - # Person 1 - # Person 1's Email - # Person 2 - # Person 2's affiliation - # Person 2's Orcid - # Etc, Etc - creators = [] - for match in soup: - if match['@name'] == 'citation_author': - creators.append({'name': match['@content'], 'identifiers': [], 'institutions': []}) - elif match['@name'] == 'citation_author_institution': - creators[-1]['institutions'].extend(match['@content'].split(';')) - elif match['@name'] in ('citation_author_email', 'citation_author_orcid'): - creators[-1]['identifiers'].append(match['@content']) - else: - raise ValueError('Unknown @name "{}"'.format(match['@name'])) - return creators - - -class BiorxivHTMLTransformer(SoupXMLTransformer): - VERSION = 1 - - root_parser = Preprint diff --git a/share/transformers/org_biorxiv_rss.py b/share/transformers/org_biorxiv_rss.py deleted file mode 100644 index 063518d4f..000000000 --- a/share/transformers/org_biorxiv_rss.py +++ /dev/null @@ -1,61 +0,0 @@ -from share.transform.chain import ctx, ChainTransformer -from share.transform.chain import links as tools -from share.transform.chain.parsers import Parser - - -class WorkIdentifier(Parser): - uri = tools.IRI(ctx) - - -class Organization(Parser): - schema = tools.GuessAgentType(ctx, default='Organization') - name = ctx - - -class Publisher(Parser): - agent = tools.Delegate(Organization, ctx) - - -class Person(Parser): - given_name = tools.ParseName(ctx).first - family_name = tools.ParseName(ctx).last - additional_name = tools.ParseName(ctx).middle - suffix = tools.ParseName(ctx).suffix - - -class Creator(Parser): - order_cited = ctx('index') - agent = tools.Delegate(Person, ctx) - cited_as = ctx - - -class Subject(Parser): - name = ctx - - -class ThroughSubjects(Parser): - subject = tools.Delegate(Subject, ctx) - - -class Preprint(Parser): - title = ctx.item['dc:title'] - description = ctx.item.description - date_published = tools.ParseDate(ctx.item['dc:date']) - date_updated = tools.ParseDate(ctx.item['dc:date']) - - subjects = tools.Map( - tools.Delegate(ThroughSubjects), - tools.Concat(tools.Static('Biology')) - ) - - identifiers = tools.Map(tools.Delegate(WorkIdentifier), ctx.item['dc:identifier']) - - related_agents = tools.Concat( - tools.Delegate(Publisher, ctx.item['dc:publisher']), - tools.Map(tools.Delegate(Creator), ctx.item['dc:creator']), - ) - - -class BiorxivRSSTransformer(ChainTransformer): - VERSION = 1 - root_parser = Preprint diff --git a/share/transformers/org_crossref.py b/share/transformers/org_crossref.py deleted file mode 100644 index a72fd57ce..000000000 --- a/share/transformers/org_crossref.py +++ /dev/null @@ -1,143 +0,0 @@ -from share.transform.chain import * -from share.transform.chain import links - - -class AgentIdentifier(Parser): - uri = IRI(ctx) - - -class WorkIdentifier(Parser): - uri = IRI(ctx) - - -class RelatedAgent(Parser): - schema = GuessAgentType(OneOf(ctx.name, ctx), default='organization') - name = OneOf(ctx.name, ctx) - identifiers = Map( - Delegate(AgentIdentifier), - Try(ctx.DOI) - ) - - # class Extra: - # doi = Maybe(ctx, 'DOI') - # award = Maybe(ctx, 'award') - # doi_asserted_by = Maybe(ctx, 'doi-asserted-by') - - -class Funder(Parser): - agent = Delegate(RelatedAgent, ctx) - - -class Publisher(Parser): - agent = Delegate(RelatedAgent, ctx) - - -class IsAffiliatedWith(Parser): - related = Delegate(RelatedAgent, ctx) - - -class Person(Parser): - given_name = Maybe(ctx, 'given') - family_name = Maybe(ctx, 'family') - - identifiers = Map( - Delegate(AgentIdentifier), - Try(ctx.ORCID) - ) - - related_agents = Map(Delegate(IsAffiliatedWith), Maybe(ctx, 'affiliation')) - - -class Creator(Parser): - agent = Delegate(Person, ctx) - order_cited = ctx('index') - - cited_as = links.Join( - Concat( - Maybe(ctx, 'given'), - Maybe(ctx, 'family') - ), - joiner=' ' - ) - - -class Tag(Parser): - name = ctx - - -class ThroughTags(Parser): - tag = Delegate(Tag, ctx) - - -class CreativeWork(Parser): - """ - Documentation for CrossRef's metadata can be found here: - https://github.com/CrossRef/rest-api-doc/blob/master/api_format.md - """ - - def get_schema(self, type): - return { - 'journal-article': 'Article', - 'book': 'Book', - 'proceedings-article': 'ConferencePaper', - 'dataset': 'Dataset', - 'dissertation': 'Dissertation', - 'preprint': 'Preprint', - 'report': 'Report', - }.get(type) or 'CreativeWork' - - schema = RunPython('get_schema', ctx.type) - - title = Maybe(ctx, 'title')[0] - description = Maybe(ctx, 'subtitle')[0] - date_updated = ParseDate(Try(ctx.deposited['date-time'])) - - identifiers = Map( - Delegate(WorkIdentifier), - ctx.DOI, - # Links do not appear to be unique - # Map(OneOf(ctx.URL, ctx), Try(ctx.link)), - # Try(IRI(ctx['alternative-id'])) - ) - - related_agents = Concat( - Map(Delegate(Creator), Try(ctx.author)), - Map(Delegate(Publisher), ctx.publisher), - Map(Delegate(Funder), Filter(lambda x: isinstance(x, str) or 'name' in x, Try(ctx.funder))), - ) - - # TODO These are "a controlled vocabulary from Sci-Val", map to Subjects! - tags = Map( - Delegate(ThroughTags), - Maybe(ctx, 'subject') - ) - - class Extra: - alternative_id = Maybe(ctx, 'alternative-id') - archive = Maybe(ctx, 'archive') - article_number = Maybe(ctx, 'article-number') - chair = Maybe(ctx, 'chair') - container_title = Maybe(ctx, 'container-title') - date_created = ParseDate(Try(ctx.created['date-time'])) - date_published = Maybe(ctx, 'issued') - editor = Maybe(ctx, 'editor') - licenses = Maybe(ctx, 'license') - isbn = Maybe(ctx, 'isbn') - issn = Maybe(ctx, 'issn') - issue = Maybe(ctx, 'issue') - member = Maybe(ctx, 'member') - page = Maybe(ctx, 'page') - published_online = Maybe(ctx, 'published-online') - published_print = Maybe(ctx, 'published-print') - reference_count = ctx['reference-count'] - subjects = Maybe(ctx, 'subject') - subtitles = Maybe(ctx, 'subtitle') - titles = ctx.title - translator = Maybe(ctx, 'translator') - type = ctx.type - volume = Maybe(ctx, 'volume') - - -class CrossrefTransformer(ChainTransformer): - VERSION = 1 - root_parser = CreativeWork diff --git a/share/transformers/org_datacite.py b/share/transformers/org_datacite.py deleted file mode 100644 index 24a4f9047..000000000 --- a/share/transformers/org_datacite.py +++ /dev/null @@ -1,679 +0,0 @@ -import logging - -from share.exceptions import TransformError -from share.transform.chain import ctx, links as tools, ChainTransformer -from share.transform.chain.exceptions import InvalidIRI, InvalidPath -from share.transform.chain.parsers import Parser -from share.transform.chain.utils import force_text - - -logger = logging.getLogger(__name__) - -PEOPLE_TYPES = ( - 'ContactPerson', - 'DataCurator', - 'Editor', - 'ProjectLeader', - 'ProjectManager', - 'ProjectMember', - 'RelatedPerson', - 'Researcher', - 'Supervisor', - 'WorkPackageLeader' -) -NOT_PEOPLE_TYPES = ( - 'Distributor', - 'HostingInstitution', - 'RegistrationAgency', - 'RegistrationAuthority', - 'ResearchGroup' -) -# Other ambiguous types -# 'DataCollector', -# 'DataManager', -# 'Producer', -# 'RightsHolder', -# 'Sponsor', -# 'Other' - - -def try_contributor_type(value, target_list_types): - try: - contrib_type_item = value['@contributorType'] - if contrib_type_item in target_list_types: - return value - return None - except KeyError: - return None - - -def get_contributors(options, contrib_type): - """ - Returns list of contributors names based on their type. - """ - contribs = [] - for value in options: - val = try_contributor_type(value, contrib_type) - if val: - contribs.append(val) - return contribs - - -def get_agent_type(agent, person=False): - """ - Returns agent type based on contributor type. - """ - is_not_person = try_contributor_type(agent, NOT_PEOPLE_TYPES) - is_person = try_contributor_type(agent, PEOPLE_TYPES) - try: - agent_name = agent['creatorName'] - except KeyError: - agent_name = agent['contributorName'] - - if person and is_person: - return agent_name - elif not person and is_not_person: - return agent_name - # break OneOf option - raise TransformError - - -RELATION_MAP = { - 'IsCitedBy': 'Cites', - 'Cites': 'Cites', - 'IsSupplementedBy': 'IsSupplementTo', - 'IsSupplementTo': 'IsSupplementTo', - 'IsContinuedBy': 'Extends', - 'Continues': 'Extends', - 'IsNewVersionOf': '', - 'IsPreviousVersionOf': '', - 'References': 'References', - 'IsReferencedBy': 'References', - 'IsPartOf': 'IsPartOf', - 'HasPart': 'IsPartOf', - 'IsDocumentedBy': 'Documents', - 'Documents': 'Documents', - 'IsCompiledBy': 'Compiles', - 'Compiles': 'Compiles', - 'IsVariantFormOf': '', - 'IsOriginalFormOf': '', - 'IsReviewedBy': 'Reviews', - 'Reviews': 'Reviews', - 'IsDerivedFrom': 'IsDerivedFrom', - 'IsSourceOf': 'IsDerivedFrom', - 'IsMetadataFor': '', - 'HasMetadata': '', -} - -INVERSE_RELATIONS = ( - 'IsCitedBy', - 'IsSupplementedBy', - 'IsContinuedBy', - 'IsNewVersionOf', - 'IsReferencedBy', - 'IsPartOf', - 'IsDocumentedBy', - 'IsCompiledBy', - 'IsVariantFormOf', - 'IsReviewedBy', - 'IsDerivedFrom', - 'IsMetadataFor' -) - -RELATIONS = ( - 'Cites', - 'IsSupplementTo', - 'Continues', - 'References', - 'IsPreviousVersionOf', - 'HasPart', - 'Documents', - 'Compiles', - 'IsOriginalFormOf', - 'Reviews', - 'IsSourceOf', - 'HasMetadata', -) - - -def get_related_works(options, inverse): - results = [] - for option in options: - if not option.get('#text') or option['#text'].lower() == 'null': - continue - - if not option.get('@preprocessed'): - option['@preprocessed'] = True - option['#text'] = { - 'PMID': 'http://www.ncbi.nlm.nih.gov/pubmed/{}' - }.get(option.get('@relatedIdentifierType'), '{}').format(option['#text']) - - relation = option['@relationType'] - if inverse and relation in INVERSE_RELATIONS: - results.append(option) - elif not inverse and relation in RELATIONS: - results.append(option) - return results - - -def get_relation_type(relation_type): - normalized_relation = RELATION_MAP[relation_type] - return normalized_relation or 'WorkRelation' - - -class AgentIdentifier(Parser): - - uri = ctx - - -class AffiliatedAgent(Parser): - schema = tools.GuessAgentType(ctx, default='organization') - - name = tools.RunPython(force_text, ctx) - - -class IsAffiliatedWith(Parser): - related = tools.Delegate(AffiliatedAgent, ctx) - - -class ContributorAgent(Parser): - schema = tools.OneOf( - tools.GuessAgentType( - tools.RunPython( - get_agent_type, - ctx, - person=False - ), - default='organization' - ), - tools.GuessAgentType( - tools.OneOf( - ctx.creatorName, - ctx.contributorName - ) - ) - ) - - name = tools.RunPython(force_text, tools.OneOf(ctx.creatorName, ctx.contributorName)) - identifiers = tools.Map( - tools.Delegate(AgentIdentifier), - tools.Try( - tools.Map( - tools.IRI(ctx), - tools.RunPython( - force_text, - ctx.nameIdentifier - ) - ), - exceptions=(InvalidIRI,) - ) - ) - related_agents = tools.Map(tools.Delegate(IsAffiliatedWith), tools.Concat(tools.Try( - tools.Filter(lambda x: bool(x), tools.RunPython(force_text, ctx.affiliation)) - ))) - - class Extra: - name_identifier = tools.Try(ctx.nameIdentifier) - name_identifier_scheme = tools.Try(ctx.nameIdentifier['@nameIdentifierScheme']) - name_identifier_scheme_uri = tools.Try(ctx.nameIdentifier['@schemeURI']) - - contributor_type = tools.Try(ctx.contributorType) - - # v.4 new givenName and familyName properties - given_name = tools.OneOf( - ctx.creatorName['@givenName'], - ctx.contributorName['@givenName'], - tools.Static(None) - ) - family_name = tools.OneOf( - ctx.creatorName['@familyName'], - ctx.contributorName['@familyName'], - tools.Static(None) - ) - - -class FunderAgent(Parser): - schema = tools.GuessAgentType( - tools.OneOf(ctx.funderName, ctx.contributorName), - default='organization' - ) - - name = tools.RunPython(force_text, tools.OneOf(ctx.funderName, ctx.contributorName)) - - identifiers = tools.Map( - tools.Delegate(AgentIdentifier), - tools.Try( - tools.IRI( - tools.OneOf( - ctx.funderIdentifier, - tools.RunPython( - force_text, - ctx.nameIdentifier - ), - tools.Static(None) - ) - ), - exceptions=(InvalidIRI,) - ) - ) - - class Extra: - name_identifier = tools.Try(ctx.nameIdentifier) - name_identifier_scheme = tools.Try(ctx.nameIdentifier['@nameIdentifierScheme']) - name_identifier_scheme_uri = tools.Try(ctx.nameIdentifier['@schemeURI']) - - funder_identifier = tools.Try(ctx.funderIdentifier) - funder_identifier_type = tools.Try(ctx.funderIdentifierType) - - contributor_type = tools.Try(ctx.contributorType) - - -class HostAgent(Parser): - schema = tools.GuessAgentType(ctx.contributorName, default='organization') - - name = tools.RunPython(force_text, tools.Try(ctx.contributorName)) - - identifiers = tools.Map( - tools.Delegate(AgentIdentifier), - tools.Try( - tools.IRI( - tools.RunPython( - force_text, - ctx.nameIdentifier - ) - ), - exceptions=(InvalidIRI,) - ) - ) - - class Extra: - name_identifier = tools.Try(ctx.nameIdentifier) - name_identifier_scheme = tools.Try(ctx.nameIdentifier['@nameIdentifierScheme']) - name_identifier_scheme_uri = tools.Try(ctx.nameIdentifier['@schemeURI']) - - contributor_type = tools.Try(ctx.contributorType) - - -class PublisherAgent(Parser): - schema = tools.GuessAgentType(ctx, default='organization') - - name = tools.RunPython(force_text, ctx) - - -class ContributorRelation(Parser): - schema = 'Contributor' - - agent = tools.Delegate(ContributorAgent, ctx) - cited_as = tools.RunPython(force_text, tools.OneOf(ctx.creatorName, ctx.contributorName)) - - -class CreatorRelation(ContributorRelation): - schema = 'Creator' - - order_cited = ctx('index') - - -class HostRelation(Parser): - schema = 'Host' - - agent = tools.Delegate(HostAgent, ctx) - - -class PublisherRelation(Parser): - schema = 'Publisher' - - agent = tools.Delegate(PublisherAgent, ctx) - - -class Award(Parser): - name = tools.Try(ctx.awardTitle) - description = tools.Try(ctx.awardNumber) - uri = tools.Try(ctx.awardURI) - - -class ThroughAwards(Parser): - award = tools.Delegate(Award, ctx) - - -class FunderRelation(Parser): - schema = 'Funder' - - agent = tools.Delegate(FunderAgent, ctx) - awards = tools.Map(tools.Delegate(ThroughAwards), tools.Try(tools.RunPython('get_award', ctx), exceptions=(KeyError,))) - - def get_award(self, obj): - obj['awardURI'] - return obj - - -class Tag(Parser): - name = ctx - - -class ThroughTags(Parser): - tag = tools.Delegate(Tag, ctx) - - -class WorkIdentifier(Parser): - uri = tools.IRI(tools.RunPython(force_text, ctx)) - - class Extra: - identifier_type = tools.Try(ctx['@identifierType']) - - -class RelatedWorkIdentifier(Parser): - schema = 'WorkIdentifier' - - uri = tools.IRI(tools.RunPython( - force_text, - ctx - )) - - class Extra: - related_identifier_type = ctx['@relatedIdentifierType'] - relation_type = tools.Try(ctx['@relationType']) - related_metadata_scheme = tools.Try(ctx['@relatedMetadataScheme']) - scheme_URI = tools.Try(ctx['@schemeURI']) - scheme_type = tools.Try(ctx['@schemeType']) - - -class RelatedWork(Parser): - schema = 'CreativeWork' - identifiers = tools.Map(tools.Delegate(RelatedWorkIdentifier), ctx) - - -class WorkRelation(Parser): - schema = tools.RunPython(get_relation_type, ctx['@relationType']) - related = tools.Delegate(RelatedWork, ctx) - - -class InverseWorkRelation(Parser): - schema = tools.RunPython(get_relation_type, ctx['@relationType']) - subject = tools.Delegate(RelatedWork, ctx) - - -class Subject(Parser): - name = ctx - - -class ThroughSubjects(Parser): - subject = tools.Delegate(Subject, ctx) - - -class CreativeWork(Parser): - ''' - Documentation for Datacite's metadata: - https://schema.labs.datacite.org/meta/kernel-4.0/doc/DataCite-MetadataKernel_v4.0.pdf - ''' - - def get_schema(self, type): - return { - 'dataset': 'DataSet', - 'software': 'Software', - 'text/book': 'Book', - 'text/book chapter': 'Book', - 'text/book prospectus': 'Book', - 'text/book series': 'Book', - 'text/conference abstract': 'ConferencePaper', - 'text/conference paper': 'ConferencePaper', - 'text/conference poster': 'Poster', - 'text/dissertation': 'Dissertation', - 'text/edited book': 'Book', - 'text/journal article': 'Article', - 'text/journal issue': 'Article', - 'text/patent': 'Patent', - 'text/report': 'Report', - 'text/supervised student publication': 'Thesis', - 'text/working paper': 'WorkingPaper' - - # 'audiovisual': '', - # 'collection': '', - # 'event': '', - # 'image': '', - # 'interactiveresource': '', - # 'model': '', - # 'physicalobject': '', - # 'service': '', - # 'sound': '', - # 'text15': '', - # 'workflow': '', - # 'text/book review': '', - # 'text/conference program': '', - # 'text/dictionary entry': '', - # 'text/disclosure': '', - # 'text/encyclopedia entry': '', - # 'text/Funding submission': '', - # 'text/license': '', - # 'text/magazine article': '', - # 'text/manual': '', - # 'text/newsletter article': '', - # 'text/newspaper article': '', - # 'text/online resource': '', - # 'text/registered copyright': '', - # 'text/research tool': '', - # 'text/tenure-promotion': '', - # 'text/test': '', - # 'text/trademark': '', - # 'text/translation': '', - # 'text/university academic unit': '', - # 'text/website': '', - }.get(type.lower()) or 'CreativeWork' - - schema = tools.RunPython( - 'get_schema', tools.Try( - ctx.record.metadata['oai_datacite'].payload.resource.resourceType['@resourceTypeGeneral'], - default='CreativeWork' - ) - ) - - title = tools.RunPython( - force_text, - tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.titles.title), - first_str=True - ) - - description = tools.Try( - tools.Join( - tools.RunPython( - force_text, - tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.descriptions.description) - ) - ) - ) - - rights = tools.Try( - tools.Join( - tools.RunPython( - force_text, - tools.Concat(ctx.record.metadata['oai_datacite'].payload.resource.rightsList.rights) - ) - ) - ) - - language = tools.ParseLanguage(tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.language)) - - related_agents = tools.Concat( - tools.Map( - tools.Delegate(CreatorRelation), - tools.Concat(tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.creators.creator)) - ), - tools.Map( - tools.Delegate(ContributorRelation), - tools.Concat(tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.contributors.contributor)) - ), - tools.Map(tools.Delegate( - PublisherRelation), - tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.publisher) - ), - tools.Map(tools.Delegate(HostRelation), tools.RunPython( - get_contributors, - tools.Concat(tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.contributors.contributor)), - ['HostingInstitution'] - )), - # v.3 Funder is a contributor type - # v.4 FundingReference replaces funder contributor type - tools.Map(tools.Delegate(FunderRelation), tools.RunPython( - get_contributors, - tools.Concat(tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.contributors.contributor)), - ['Funder'] - )), - tools.Map( - tools.Delegate(FunderRelation), - tools.Concat(tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.fundingReference)) - ) - ) - - # v.4 New, free text, 'subjectScheme' attribute on subject - subjects = tools.Map( - tools.Delegate(ThroughSubjects), - tools.Subjects( - tools.RunPython( - force_text, - tools.Concat( - tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.subjects.subject), - ) - ) - ) - ) - - tags = tools.Map( - tools.Delegate(ThroughTags), - tools.RunPython( - force_text, - tools.Concat( - tools.Try(ctx.record.metadata['oai_datacite'].type), - tools.RunPython( - force_text, - (tools.Concat(tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.subjects.subject))) - ), - tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.formats.format), - tools.Try(ctx.record.metadata['oai_datacite'].datacentreSymbol), - tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.resourceType['#text']), - tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.resourceType['@resourceTypeGeneral']), - tools.Maybe(ctx.record.header, 'setSpec'), - tools.Maybe(ctx.record.header, '@status') - ) - ) - ) - - identifiers = tools.Concat( - tools.Map( - tools.Delegate(WorkIdentifier), - tools.Concat( - tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.identifier) - ) - ), - tools.Map( - tools.Delegate(WorkIdentifier, tools.RunPython(force_text, ctx.alternateIdentifier)), - tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.alternateIdentifiers), - ), - ) - - related_works = tools.Concat( - tools.Map( - tools.Delegate(WorkRelation), - tools.RunPython( - get_related_works, - tools.Concat( - tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.relatedIdentifiers.relatedIdentifier) - ), - False - ) - ), - tools.Map( - tools.Delegate(InverseWorkRelation), - tools.RunPython( - get_related_works, - tools.Concat( - tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.relatedIdentifiers.relatedIdentifier) - ), - True - ) - ) - ) - - date_updated = tools.ParseDate(tools.Try(ctx.record.header.datestamp)) - date_published = tools.ParseDate(tools.Try(tools.RunPython('get_date_type', tools.Concat(ctx.record.metadata['oai_datacite'].payload.resource.dates.date), 'Issued'))) - free_to_read_type = tools.Try( - tools.IRI(ctx.record.metadata['oai_datacite'].payload.resource.rightsList.rights['@rightsURI']), - exceptions=(InvalidIRI,) - ) - free_to_read_date = tools.ParseDate(tools.Try(tools.RunPython('get_date_type', tools.Concat(ctx.record.metadata['oai_datacite'].payload.resource.dates.date), 'Available'))) - - is_deleted = tools.RunPython('check_status', tools.Try(ctx.record.header['@status'])) - - class Extra: - """ - Fields that are combined in the base parser are relisted as singular elements that match - their original entry to preserve raw data structure. - """ - status = tools.Try(ctx.record.header['@status']) - - datestamp = tools.ParseDate(ctx.record.header.datestamp) - - set_spec = tools.Try(ctx.record.header.setSpec) - - is_reference_quality = tools.Try(ctx.record.metadata['oai_datacite'].isReferenceQuality) - - schema_version = tools.Try(ctx.record.metadata['oai_datacite'].schemaVersion) - - datacentre_symbol = tools.Try(ctx.record.metadata['oai_datacite'].datacentreSymbol) - - identifiers = tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.identifier) - - alternate_identifiers = tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.alternateIdentifiers.alternateidentifier) - - titles = tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.titles.title) - - publisher = tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.publisher) - - publication_year = tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.publicationYear) - - subject = tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.subjects.subject) - - resourceType = tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.resourceType) - - sizes = tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.size) - - format_type = tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.formats.format) - - version = tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.version) - - rights = tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.rights) - - rightsList = tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.rightsList) - - related_identifiers = tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.relatedIdentifiers.relatedIdentifier) - - description = tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.descriptions) - - dates = tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.dates.date) - - contributors = tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.contributors.contributor) - - creators = tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.creators) - - # v.4 new property geoLocationPolygon, in addition to geoLocationPoint and geoLocationBox - geolocations = tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.geoLocations) - - funding_reference = tools.Try(ctx.record.metadata['oai_datacite'].payload.resource.fundingReference) - - def check_status(self, status): - if status == 'deleted': - return True - return False - - def get_date_type(self, date_obj, date_type): - date = None - for obj in date_obj: - if obj['@dateType'] == date_type: - date = obj['#text'] - if date and date != '0000': - return date - # raise to break TryLink - raise InvalidPath - - -class DataciteTransformer(ChainTransformer): - VERSION = 1 - root_parser = CreativeWork diff --git a/share/transformers/org_dataone.py b/share/transformers/org_dataone.py deleted file mode 100644 index 6cd2922db..000000000 --- a/share/transformers/org_dataone.py +++ /dev/null @@ -1,123 +0,0 @@ -from share.transform.chain import * - - -class WorkIdentifier(Parser): - uri = ctx - - -class Tag(Parser): - name = ctx - - -class ThroughTags(Parser): - tag = Delegate(Tag, ctx) - - -class Agent(Parser): - schema = GuessAgentType(ctx) - - name = ctx - - -class Person(Parser): - name = ctx - - -class Contributor(Parser): - agent = Delegate(Person, ctx) - cited_as = ctx - - -class Creator(Contributor): - order_cited = ctx('index') - - -class RelatedWork(Parser): - schema = 'DataSet' - - identifiers = Map( - Delegate(WorkIdentifier), - IRI(ctx) - ) - - -class IsPartOf(Parser): - related = Delegate(RelatedWork, ctx) - - -class IsDocumentedBy(Parser): - schema = 'Documents' - - subject = Delegate(RelatedWork, ctx) - - -class Documents(Parser): - related = Delegate(RelatedWork, ctx) - - -class DataSet(Parser): - # https://releases.dataone.org/online/api-documentation-v2.0/design/SearchMetadata.html - title = Try(XPath(ctx, "str[@name='title']").str['#text']) - description = Try(XPath(ctx, "str[@name='abstract']").str['#text']) - date_updated = ParseDate(Try(XPath(ctx, "date[@name='dateModified']").date['#text'])) - date_published = ParseDate(Try(XPath(ctx, "date[@name='datePublished']").date['#text'])) - - related_agents = Concat( - Map( - Delegate(Creator), - Maybe(XPath(ctx, "str[@name='author']"), 'str')['#text'], - ), - Map( - Delegate(Contributor), - Maybe(XPath(ctx, "arr[@name='investigator']"), 'arr').str, - ), - Map( - Delegate(Contributor.using(agent=Delegate(Agent))), - Maybe(XPath(ctx, "arr[@name='origin']"), 'arr').str, - ) - ) - - related_works = Concat( - # TODO Maybe re introduce later with more research - # Map( - # Delegate(IsPartOf), - # Maybe(XPath(ctx, "arr[@name='resourceMap']"), 'arr').str - # ), - Map( - Delegate(Documents), - Maybe(XPath(ctx, "arr[@name='documents']"), 'arr').str - ), - Map( - Delegate(IsDocumentedBy), - Maybe(XPath(ctx, "arr[@name='isDocumentedBy']"), 'arr').str - ), - ) - - identifiers = Map( - Delegate(WorkIdentifier), - Map( - IRI(urn_fallback=True), - Maybe(XPath(ctx, "str[@name='dataUrl']"), 'str')['#text'], - Maybe(XPath(ctx, "str[@name='identifier']"), 'str')['#text'] - ) - ) - - tags = Map( - Delegate(ThroughTags), - Maybe(XPath(ctx, "arr[@name='keywords']"), 'arr').str - ) - - class Extra: - datasource = Maybe(XPath(ctx, "str[@name='datasource']"), 'str')['#text'] - datePublished = Maybe(XPath(ctx, "date[@name='datePublished']"), 'date')['#text'] - dateUploaded = Maybe(XPath(ctx, "date[@name='dateUploaded']"), 'date')['#text'] - fileID = Maybe(XPath(ctx, "str[@name='fileID']"), 'str')['#text'] - formatId = Maybe(XPath(ctx, "str[@name='formatId']"), 'str')['#text'] - formatType = Maybe(XPath(ctx, "str[@name='formatType']"), 'str')['#text'] - id = Maybe(XPath(ctx, "str[@name='id']"), 'str')['#text'] - identifier = Maybe(XPath(ctx, "str[@name='identifier']"), 'str')['#text'] - - -class DataoneTransformer(ChainTransformer): - VERSION = 1 - root_parser = DataSet diff --git a/share/transformers/org_elife.py b/share/transformers/org_elife.py deleted file mode 100644 index 706272664..000000000 --- a/share/transformers/org_elife.py +++ /dev/null @@ -1,161 +0,0 @@ -from share.transform.chain import * # noqa -from share.transform.chain.soup import Soup, SoupXMLDict, SoupXMLTransformer - - -class WorkIdentifier(Parser): - uri = IRI(ctx) - - -class AgentIdentifier(Parser): - uri = IRI(ctx['#text']) - - -class Subject(Parser): - name = ctx - - -class ThroughSubjects(Parser): - subject = Delegate(Subject, ctx) - - -class Tag(Parser): - name = ctx - - -class ThroughTags(Parser): - tag = Delegate(Tag, ctx['#text']) - - -class PublisherOrganization(Parser): - schema = 'Organization' # TODO Switch to OAI schema picker - - name = ctx['publisher']['publisher-name']['#text'] - identifiers = Map(Delegate(AgentIdentifier), Soup(ctx, 'issn', **{'pub-type': 'epub'})) - - class Extra: - location = Try(ctx['publisher']['publisher-loc']['#text']) - - -class Publisher(Parser): - agent = Delegate(PublisherOrganization, ctx) - - -class FunderOrganization(Parser): - schema = 'Organization' # TODO Switch to OAI schema picker - name = ctx - - -class Funder(Parser): - agent = Delegate(FunderOrganization, ctx['funding-source']['#text']) - - class Extra: - award_id = Map(ctx['#text'], ctx['award-id']) - - -class Institution(Parser): - name = OneOf( - Soup(ctx, 'institution', **{'content-type': None})['#text'], - Soup(ctx, 'institution', **{'content-type': None})[-1]['#text'], - Soup(ctx, 'institution')['#text'], - Soup(ctx, 'institution')[-1]['#text'], - ) - - class Extra: - addr_line = Try(ctx['addr-line']['#text']) - city = Try(ctx['city']['#text']) - country = Try(ctx.country['#text']) - - -class IsAffiliatedWith(Parser): - related = Delegate(Institution, ctx) - - -class Person(Parser): - family_name = ctx.name['surname']['#text'] - given_name = ctx.name['given-names']['#text'] - - identifiers = Map(Delegate(AgentIdentifier), Soup(ctx, 'contrib-id', **{'contrib-id-type': None}), ctx.email) - - related_agents = Map( - Delegate(IsAffiliatedWith), - RunPython('get_affiliations', Soup(ctx, 'xref', **{'ref-type': 'aff'})) - ) - - def get_affiliations(self, refs): - if not isinstance(refs, list): - refs = [refs] - return [ - SoupXMLDict(soup=ctx.frames[0]['context'].soup.find(id=ref['@rid'])) - for ref in refs - if ref - ] - - -class Consortium(Parser): - name = ctx.collab['#text'] - - -class Contributor(Parser): - agent = Delegate(Person, ctx) - cited_as = Join(Concat(ctx.name['given-names']['#text'], ctx.name['surname']['#text'])) - - class Extra: - contributions = RunPython('get_contributions', ctx) - - def get_contributions(self, context): - return [ - x.parent.text - for x in - ctx.frames[0]['context'].soup.find_all(**{'ref-type': 'contrib', 'rid': context.soup.attrs.get('@id')}) - ] - - -class Creator(Contributor): - order_cited = ctx('index') - - -class Article(Parser): - title = ctx.article.front['article-meta']['title-group']['article-title']['#text'] - description = Try(Soup(ctx.article.front['article-meta'], 'abstract', **{'abstract-type': None})['#text']) - is_deleted = Static(False) - - date_published = ParseDate(RunPython('make_date', Soup(ctx.article.front['article-meta'], 'pub-date', **{'publication-format': 'electronic'}))) - date_updated = ParseDate(RunPython('make_date', Soup(ctx.article.front['article-meta'], 'pub-date', **{'publication-format': 'electronic'}))) - rights = IRI(ctx.article.front['article-meta']['license']['@xlink:href']) - - identifiers = Map( - Delegate(WorkIdentifier), - Soup(ctx.article.front['article-meta'], 'article-id', **{'pub-id-type': 'doi'})['#text'], - ) - - subjects = Map( - Delegate(ThroughSubjects), - Subjects(Map(ctx['#text'], ctx.article.front['article-meta']['article-categories']['subject'])) - ) - - tags = Map( - Delegate(ThroughTags), - Concat( - ctx.article.front['article-meta']['article-categories']['subject'], - Map(ctx.kwd, ctx.article.front['article-meta']['kwd-group']), - deep=True - ), - ) - - related_agents = Concat( - Map(Delegate(Funder), Try(ctx.article.front['article-meta']['funding-group']['award-group'])), - Map(Delegate(Publisher), ctx.article.front['journal-meta']), - Map(Delegate(Creator), Filter(lambda x: x.collab is None, Concat(Map(ctx.contrib, ctx.article.front['article-meta']['contrib-group']), deep=True))), - Map(Delegate(Creator.using(agent=Delegate(Consortium), cited_as=ctx.collab['#text'])), Filter(lambda x: x.collab is not None, Concat(Map(ctx.contrib, ctx.article.front['article-meta']['contrib-group']), deep=True))), - ) - - class Extra: - executive_summary = Try(Soup(ctx.article.front['article-meta'], 'abstract', **{'abstract-type': 'executive-summary'})['#text']) - - def make_date(self, obj): - return '{}-{}-{}'.format(obj.year['#text'], obj.month['#text'], obj.day['#text']) - - -class ElifeTransformer(SoupXMLTransformer): - VERSION = 1 - root_parser = Article diff --git a/share/transformers/org_engrxiv.py b/share/transformers/org_engrxiv.py deleted file mode 100644 index 93591740a..000000000 --- a/share/transformers/org_engrxiv.py +++ /dev/null @@ -1,12 +0,0 @@ -from share.transform.chain import ChainTransformer -from . import io_osf - - -class Preprint(io_osf.Project): - pass - - -# TODO Could this just use the io.osf.preprints transformer instead? -class EngrxivTransformer(ChainTransformer): - VERSION = 1 - root_parser = Preprint diff --git a/share/transformers/org_ncar.py b/share/transformers/org_ncar.py deleted file mode 100644 index e2ad567ce..000000000 --- a/share/transformers/org_ncar.py +++ /dev/null @@ -1,127 +0,0 @@ -from share.transform.chain import ctx, links as tools, ChainTransformer -from share.transform.chain.parsers import Parser -from share.transform.chain.utils import format_address - - -class WorkIdentifier(Parser): - uri = tools.RunPython('get_ncar_identifier', ctx) - - class Extra: - description = tools.Try(ctx.Related_URL.Description) - url_content_type = tools.Try(ctx.Related_URL.URL_Content_Type.Type) - - def get_ncar_identifier(self, ctx): - return 'https://www.earthsystemgrid.org/dataset/{}.html'.format(ctx['Entry_ID']) - - -class Tag(Parser): - name = ctx - - -class ThroughTags(Parser): - tag = tools.Delegate(Tag, ctx) - - -class PersonnelAgent(Parser): - schema = tools.GuessAgentType( - tools.RunPython('combine_first_last_name', ctx) - ) - - name = tools.RunPython('combine_first_last_name', ctx) - location = tools.RunPython('get_address', ctx['Contact_Address']) - - class Extra: - role = tools.Try(ctx.Role) - url = tools.Try(ctx.Data_Center_URL) - - def combine_first_last_name(self, ctx): - return ctx['First_Name'] + ' ' + ctx['Last_Name'] - - def get_address(self, ctx): - address = ctx['Address'] - if isinstance(address, list): - address1 = address[0] - address2 = address[1] - return format_address( - address1=address1, - address2=address2, - city=ctx['City'], - state_or_province=ctx['Province_or_State'], - postal_code=ctx['Postal_Code'], - country=ctx['Country'] - ) - - return format_address( - address1=ctx['Address'], - address2=address2, - city=ctx['City'], - state_or_province=ctx['Province_or_State'], - postal_code=ctx['Postal_Code'], - country=ctx['Country'] - ) - - -class IsAffiliatedWith(Parser): - related = tools.Delegate(PersonnelAgent, ctx) - - -class DataCenterAgent(Parser): - schema = tools.GuessAgentType( - ctx.Data_Center_Name.Long_Name, - default='organization' - ) - - name = ctx.Data_Center_Name.Long_Name - related_agents = tools.Map(tools.Delegate(IsAffiliatedWith), tools.Try(ctx.Personnel)) - - class Extra: - data_center_short_name = ctx.Data_Center_Name.Short_Name - - -class AgentWorkRelation(Parser): - agent = tools.Delegate(DataCenterAgent, ctx) - - -class DataSet(Parser): - title = tools.Join(tools.Try(ctx.record.metadata.DIF.Entry_Title)) - description = tools.Try(ctx.record.metadata.DIF.Summary.Abstract) - - related_agents = tools.Map( - tools.Delegate(AgentWorkRelation), - tools.Try(ctx.record.metadata.DIF.Data_Center) - ) - - tags = tools.Map( - tools.Delegate(ThroughTags), - tools.Try(ctx.record.metadata.DIF.Metadata_Name), - tools.Try(ctx.record.header.setSpec) - ) - - identifiers = tools.Map(tools.Delegate(WorkIdentifier), tools.Try(ctx.record.metadata.DIF)) - - date_updated = tools.ParseDate(ctx.record.header.datestamp) - - is_deleted = tools.RunPython('check_status', tools.Try(ctx.record.header['@status'])) - - class Extra: - status = tools.Try(ctx.record.header['@status']) - - entry_id = tools.Try(ctx.record.metadata.DIF.Entry_ID) - - metadata_name = tools.Try(ctx.record.metadata.DIF.Metadata_Name) - - metadata_version = tools.Try(ctx.record.metadata.DIF.Metadata_Version) - - last_dif_revision_date = tools.Try(ctx.record.metadata.DIF.Last_DIF_Revision_Date) - - set_spec = ctx.record.header.setSpec - - def check_status(self, status): - if status == 'deleted': - return True - return False - - -class NCARTransformer(ChainTransformer): - VERSION = 1 - root_parser = DataSet diff --git a/share/transformers/org_neurovault.py b/share/transformers/org_neurovault.py deleted file mode 100644 index 684e7f9e1..000000000 --- a/share/transformers/org_neurovault.py +++ /dev/null @@ -1,47 +0,0 @@ -import re - -from share.transform.chain import * - - -class WorkIdentifier(Parser): - uri = IRI(ctx) - - -class Person(Parser): - given_name = ParseName(ctx).first - family_name = ParseName(ctx).last - additional_name = ParseName(ctx).middle - suffix = ParseName(ctx).suffix - - -class Creator(Parser): - order_cited = ctx('index') - cited_as = ctx - agent = Delegate(Person, ctx) - - -class CreativeWork(Parser): - title = Try(ctx.name) - description = Try(ctx.description) - date_published = ParseDate(Try(ctx.add_date)) - date_updated = ParseDate(Try(ctx.modify_date)) - - related_agents = Map(Delegate(Creator), RunPython('parse_names', Try(ctx.authors))) - - identifiers = Map( - Delegate(WorkIdentifier), - Try(ctx.DOI), - Try(ctx.full_dataset_url), - Try(ctx.paper_url), - Try(ctx.url), - ) - - def parse_names(self, authors): - if not authors: - return [] - return re.split(r',\s|\sand\s', authors) - - -class NeurovaultTransformer(ChainTransformer): - VERSION = 1 - root_parser = CreativeWork diff --git a/share/transformers/org_plos.py b/share/transformers/org_plos.py deleted file mode 100644 index def6ec8e0..000000000 --- a/share/transformers/org_plos.py +++ /dev/null @@ -1,64 +0,0 @@ -from share.transform.chain import * # noqa - - -class AgentIdentifier(Parser): - uri = IRI(ctx) - - -class WorkIdentifier(Parser): - uri = IRI(ctx) - - -class Person(Parser): - name = ctx - - -class Creator(Parser): - agent = Delegate(Person, ctx) - cited_as = ctx - order_cited = ctx('index') - - -class Organization(Parser): - name = XPath(ctx, "str[@name='journal']").str['#text'] - identifiers = Map( - Delegate(AgentIdentifier), - Map(Try(IRI(), exceptions=(InvalidIRI, )), XPath(ctx, "str[@name='eissn']").str['#text']) - ) - - -class Publisher(Parser): - agent = Delegate(Organization, ctx) - - -class Article(Parser): - title = XPath(ctx, "str[@name='title_display']").str['#text'] - description = XPath(ctx, "arr[@name='abstract']/str").str - # is_deleted - date_published = ParseDate(XPath(ctx, "date[@name='publication_date']").date['#text']) - date_updated = ParseDate(XPath(ctx, "date[@name='publication_date']").date['#text']) - # free_to_read_type - # free_to_read_data - # rights - # language - - # subjects - # tags - - identifiers = Map( - Delegate(WorkIdentifier), - XPath(ctx, "str[@name='id']").str['#text'], - ) - related_agents = Concat( - Map(Delegate(Creator), Try(XPath(ctx, "arr[@name='author_display']").arr.str)), - Map(Delegate(Publisher), ctx) - ) - # related_works - - class Extra: - article_type = XPath(ctx, "str[@name='article_type']").str['#text'] - - -class PLoSTransformer(ChainTransformer): - VERSION = 1 - root_parser = Article diff --git a/share/transformers/org_psyarxiv.py b/share/transformers/org_psyarxiv.py deleted file mode 100644 index 0667c61e6..000000000 --- a/share/transformers/org_psyarxiv.py +++ /dev/null @@ -1,16 +0,0 @@ -from share.transform.chain import links as tools, ChainTransformer - -from . import io_osf -from .io_osf_preprints import ThroughSubjects - - -class Preprint(io_osf.Project): - subjects = tools.Map( - tools.Delegate(ThroughSubjects), - tools.Concat(tools.Static({'text': 'Social and behavioral sciences'})) - ) - - -class PsyarxivTransformer(ChainTransformer): - VERSION = 1 - root_parser = Preprint diff --git a/share/transformers/org_socarxiv.py b/share/transformers/org_socarxiv.py deleted file mode 100644 index 5baac1754..000000000 --- a/share/transformers/org_socarxiv.py +++ /dev/null @@ -1,16 +0,0 @@ -from share.transform.chain import links as tools, ChainTransformer - -from . import io_osf -from .io_osf_preprints import ThroughSubjects - - -class Preprint(io_osf.Project): - subjects = tools.Map( - tools.Delegate(ThroughSubjects), - tools.Concat(tools.Static({'text': 'Social and behavioral sciences'})) - ) - - -class SocarxivTransformer(ChainTransformer): - VERSION = 1 - root_parser = Preprint diff --git a/share/transformers/org_socialscienceregistry.py b/share/transformers/org_socialscienceregistry.py deleted file mode 100644 index 26c0c43ae..000000000 --- a/share/transformers/org_socialscienceregistry.py +++ /dev/null @@ -1,224 +0,0 @@ -from collections import OrderedDict - -import json - -from share.transform.chain import ctx, ChainTransformer -from share.transform.chain import links as tools -from share.transform.chain.parsers import Parser - - -class AgentIdentifier(Parser): - uri = tools.IRI(ctx) - - -class WorkIdentifier(Parser): - uri = ctx - - -class Organization(Parser): - name = ctx - - -class Publisher(Parser): - agent = tools.Delegate(Organization, ctx) - - -class Institution(Parser): - name = ctx - - -class IsAffiliatedWith(Parser): - related = tools.Delegate(Institution) - - -class Person(Parser): - given_name = tools.ParseName(tools.Try(ctx.name)).first - family_name = tools.ParseName(tools.Try(ctx.name)).last - identifiers = tools.Map(tools.Delegate(AgentIdentifier), tools.Try(ctx.email)) - - -class Creator(Parser): - agent = tools.Delegate(Person, ctx) - - -class Subject(Parser): - name = ctx - - -class Tag(Parser): - name = ctx - - -class ThroughTags(Parser): - tag = tools.Delegate(Tag, ctx) - - -class ThroughSubjects(Parser): - subject = tools.Delegate(Subject, ctx) - - -def process_keywords(text): - text = json.loads(text) - text = [item for item in text if item] - return text - - -class Registration(Parser): - title = tools.Try(ctx['general-information']['title']) - description = tools.Try(ctx['additional-trial-info']['abstract']) - date_updated = tools.ParseDate(tools.Try(ctx['general-information']['last-updated'])) - date_published = tools.ParseDate(tools.Try(ctx['general-information']['published-at'])) - related_agents = tools.Map(tools.Delegate(Creator), tools.Try(ctx.pi)) - identifiers = tools.Map( - tools.Delegate(WorkIdentifier), - tools.Try(tools.IRI(ctx['general-information']['url'])), - ) - subjects = tools.Map( - tools.Delegate(ThroughSubjects), - tools.Subjects( - tools.RunPython( - process_keywords, - tools.Try(ctx['additional-trial-info']['keywords']), - ) - ) - ) - tags = tools.Map( - tools.Delegate(ThroughTags), - tools.Concat( - tools.RunPython( - process_keywords, - tools.Try(ctx['additional-trial-info']['keywords']), - ), - tools.Try(ctx['additional-trial-info']['status']), - tools.Try(ctx['additional-trial-info']['jel-code']) - ) - ) - - class Extra: - general_information = tools.Try(ctx['general-information']) - additional_trial_information = tools.Try(ctx['additional-trial-info']) - publication_data = tools.Try(ctx['data-publication']) - primary_investigator = tools.Try(ctx['pi']) - interventions = tools.Try(ctx['interventions']) - outcomes = tools.Try(ctx['outcomes']) - experimental_design = tools.Try(ctx['experimental-design']) - experimental_characteristics = tools.Try(ctx['experimental-characteristics']) - supporting_document_material = tools.Try(ctx['supporting-doc-material']) - post_trial = tools.Try(ctx['post-trial']) - reports_papers = tools.Try(ctx['reports-papers']) - - -class SCTransformer(ChainTransformer): - VERSION = 1 - root_parser = Registration - - def unwrap_data(self, data): - loaded_data = json.loads(data, object_pairs_hook=OrderedDict) - return self.process_record(loaded_data['record']) - - def process_record(self, record): - data = {} - general_info = {} - if record[0]: - general_info['title'] = record[0] - if record[5]: - general_info['RCT-ID'] = record[5] - if record[4]: - general_info['registered-on'] = record[4] - if record[2]: - general_info['last-updated'] = record[2] - if record[1]: - general_info['url'] = record[1] - if record[3]: - general_info['published-at'] = record[3] - if general_info: - data['general-information'] = general_info - - if record[6]: - pi = record[6].split(',') - data['pi'] = {'name': pi[0].strip(), 'email': pi[1].strip()} - - additional_trial_info = {} - if record[7]: - additional_trial_info['status'] = record[7] - if record[8]: - additional_trial_info['start-date'] = record[8] - if record[9]: - additional_trial_info['end-date'] = record[9] - if record[10]: - additional_trial_info['keywords'] = record[10] - if record[11]: - additional_trial_info['jel-code'] = record[11] - if record[12]: - additional_trial_info['abstract'] = record[12] - if additional_trial_info: - data['additional-trial-info'] = additional_trial_info - - interventions = {} - if record[13]: - interventions['start-date'] = record[13] - if record[14]: - interventions['end-date'] = record[14] - if interventions: - data['interventions'] = interventions - - outcomes = {} - if record[15]: - outcomes['outcome-end-points'] = record[15] - if record[16]: - outcomes['outcome-explanation'] = record[16] - if outcomes: - data['outcomes'] = outcomes - - experimental_design = {} - if record[17]: - experimental_design['experimental-design'] = record[17] - if record[19]: - experimental_design['rand-method'] = record[19] - if record[20]: - experimental_design['rand-unit'] = record[20] - if experimental_design: - data['experimental-design'] = experimental_design - - experimental_characteristics = {} - if record[21]: - experimental_characteristics['sample-size-number-clusters'] = record[21] - if record[22]: - experimental_characteristics['sample-size-number-observations'] = record[22] - if record[23]: - experimental_characteristics['sample-size-number-arms'] = record[23] - if record[24]: - experimental_characteristics['min-effect-size'] = record[24] - if experimental_characteristics: - data['experimental-characteristics'] = experimental_characteristics - - if record[25]: - data['supporting-doc-material'] = record[25] - - post_trial = {} - if record[27]: - post_trial['intervention-complete-date'] = record[27] - if record[28]: - post_trial['data-collection-completion'] = record[28] - if record[37]: - post_trial['data-collection-completion-date'] = record[37] - if post_trial: - data['post-trial'] = post_trial - - data_publication = {} - if record[33]: - data_publication['public-data-url'] = record[33] - if record[36]: - data_publication['program-files-url'] = record[36] - if data_publication: - data['data-publication'] = data_publication - - reports_papers = {} - if record[38]: - reports_papers['relevant-reports'] = record[38] - if record[39]: - reports_papers['relevant-papers'] = record[39] - if reports_papers: - data['reports-papers'] = reports_papers - - return data diff --git a/share/transformers/org_swbiodiversity.py b/share/transformers/org_swbiodiversity.py deleted file mode 100644 index 6da3b7dbe..000000000 --- a/share/transformers/org_swbiodiversity.py +++ /dev/null @@ -1,128 +0,0 @@ -from bs4 import BeautifulSoup - -from share.transform.chain import ctx -from share.transform.chain import links as tools -from share.transform.chain.parsers import Parser -from share.transform.chain.soup import SoupXMLTransformer -from share.transform.chain.utils import contact_extract - - -class AgentIdentifier(Parser): - uri = tools.IRI(ctx) - - -class WorkIdentifier(Parser): - uri = tools.IRI(ctx) - - -class Organization(Parser): - name = ctx - - -class Publisher(Parser): - agent = tools.Delegate(Organization, ctx) - - -class Institution(Parser): - name = ctx - - -class IsAffiliatedWith(Parser): - related = tools.Delegate(Institution) - - -class Person(Parser): - given_name = tools.ParseName(tools.Try(ctx.name)).first - family_name = tools.ParseName(tools.Try(ctx.name)).last - identifiers = tools.Map(tools.Delegate(AgentIdentifier), tools.Try(ctx.email)) - - -class Creator(Parser): - agent = tools.Delegate(Person, ctx) - - -class Dataset(Parser): - title = tools.Try(ctx['title']) - description = tools.Try(ctx['description']) - - rights = tools.Try( - tools.Join( - tools.Concat( - tools.Try(ctx['access-rights']), - tools.Try(ctx['usage-rights']) - ) - ) - ) - - related_agents = tools.Map(tools.Delegate(Creator), tools.Try(ctx.contact)) - - class Extra: - access_rights = tools.Try(ctx['access-rights']) - usage_rights = tools.Try(ctx['usage-rights']) - collection_statistics = tools.Try(ctx['collection-statistics']) - management = tools.Try(ctx['management']) - collection_type = tools.Try(ctx['collection-type']) - last_update = tools.ParseDate(tools.Try(ctx['last-update'])) - - -class SWTransformer(SoupXMLTransformer): - VERSION = 1 - root_parser = Dataset - - def unwrap_data(self, input_data, **kwargs): - record = BeautifulSoup(input_data, 'lxml').html - data = {} - title = self.extract_text(record.h1) - if title: - data['title'] = title - start = record.div.div - description = self.extract_text(start.find_next()) - if description: - data['description'] = description - - if start: - body = start.find_all_next(style='margin-top:5px;') - body = list(map(self.extract_text, body)) - - for entry in body: - - if 'Contact:' in entry: - data['contact'] = contact_extract(entry) - - if 'Collection Type:' in entry: - collection_type = entry.replace('Collection Type: ', '') - data['collection-type'] = collection_type - - if 'Management:' in entry: - management = entry.replace('Management: ', '') - if 'Last Update:' in management: - management_update = management.split('Last Update:', 1) - management = management_update[0] - last_update = management_update[1] - if last_update: - data['last-update'] = last_update.strip() - data['management'] = management.strip() - - if 'Usage Rights:' in entry: - usage_rights = entry.replace('Usage Rights: ', '') - data['usage-rights'] = usage_rights - - if 'Access Rights' in entry or 'Rights Holder:' in entry: - access_rights = entry.replace('Access Rights: ', '').replace('Rights Holder: ', '') - data['access-rights'] = access_rights - - collection_statistics = start.find_all_next('li') - collection_statistics = list(map(self.extract_text, collection_statistics)) - data['collection-statistics'] = self.process_collection_stat(collection_statistics) - - return data - - def extract_text(self, text): - return text.text.strip() - - def process_collection_stat(self, list_values): - stat = {} - for item in list_values: - value = item.split() - stat[item.replace(str(value[0]), '').strip()] = value[0] - return stat diff --git a/share/transformers/v1_push.py b/share/transformers/v1_push.py deleted file mode 100644 index ea7c1ac28..000000000 --- a/share/transformers/v1_push.py +++ /dev/null @@ -1,198 +0,0 @@ -import re - -from share.transform.chain import links as tools -from share.transform.chain import ctx, ChainTransformer -from share.transform.chain.parsers import Parser - -THE_REGEX = re.compile(r'(^the\s|\sthe\s)') - - -class WorkIdentifier(Parser): - uri = ctx - - -class AgentIdentifier(Parser): - uri = ctx - - -class IsAffiliatedWith(Parser): - # Moved below Agent definition to resolve cyclical references - # related = tools.Delegate(OrgAgent) - pass - - -class Agent(Parser): - schema = tools.GuessAgentType(ctx.name) - - name = ctx.name - - related_agents = tools.Map( - tools.Delegate(IsAffiliatedWith), - tools.Try(ctx.affiliation) - ) - - identifiers = tools.Map( - tools.Delegate(AgentIdentifier), - tools.Map( - tools.IRI(), - tools.Try(ctx.sameAs), - tools.Try(ctx.email) - ) - ) - - class Extra: - givenName = tools.Try(ctx.givenName) - familyName = tools.Try(ctx.familyName) - additonalName = tools.Try(ctx.additionalName) - name = tools.Try(ctx.name) - - -class OrgAgent(Agent): - schema = tools.GuessAgentType(ctx.name, default='organization') - - -IsAffiliatedWith.related = tools.Delegate(OrgAgent) - - -class Creator(Parser): - agent = tools.Delegate(Agent, ctx) - cited_as = ctx.name - order_cited = ctx('index') - - -class Publisher(Parser): - agent = tools.Delegate(OrgAgent, ctx) - cited_as = ctx.name - - -class FundingAgent(Parser): - schema = tools.GuessAgentType(ctx.sponsorName, default='organization') - - name = ctx.sponsorName - - identifiers = tools.Map( - tools.Delegate(AgentIdentifier), - tools.IRI(tools.Try(ctx.sponsorIdentifier)) - ) - - -class Award(Parser): - name = ctx.awardName - uri = tools.IRI(tools.Try(ctx.awardIdentifier)) - - -class ThroughAwards(Parser): - award = tools.Delegate(Award, ctx) - - -class Funder(Parser): - agent = tools.Delegate(FundingAgent, ctx.sponsor) - cited_as = ctx.sponsor.sponsorName - - awards = tools.Map( - tools.Delegate(ThroughAwards), - tools.Try(ctx.award) - ) - - -class Tag(Parser): - name = ctx - - -class ThroughTags(Parser): - tag = tools.Delegate(Tag, ctx) - - -class Subject(Parser): - name = ctx - - -class ThroughSubjects(Parser): - subject = tools.Delegate(Subject, ctx) - - -class CreativeWork(Parser): - title = ctx.title - description = tools.Try(ctx.description) - is_deleted = tools.RunPython('_is_deleted', tools.Try(ctx.otherProperties)) - date_updated = tools.ParseDate(tools.Try(ctx.providerUpdatedDateTime)) - rights = tools.Join(tools.Try(ctx.licenses.uri)) - - # Note: this is only taking the first language in the case of multiple languages - language = tools.ParseLanguage( - tools.Try(ctx.languages[0]), - ) - - related_agents = tools.Concat( - tools.Map( - tools.Delegate(Creator), - tools.Try(ctx.contributors) - ), - tools.Map( - tools.Delegate(Publisher), - tools.Try(ctx.publisher) - ), - tools.Map( - tools.Delegate(Funder), - tools.Try(ctx.sponsorships) - ) - ) - - identifiers = tools.Map( - tools.Delegate(WorkIdentifier), - tools.Map( - tools.IRI(), - tools.RunPython( - 'unique', - tools.Concat( - tools.Try(ctx.uris.canonicalUri), - tools.Try(ctx.uris.providerUris), - tools.Try(ctx.uris.descriptorUris), - tools.Try(ctx.uris.objectUris) - ) - ) - ) - ) - - subjects = tools.Map( - tools.Delegate(ThroughSubjects), - tools.Subjects( - tools.Try(ctx.subjects) - ) - ) - - tags = tools.Map( - tools.Delegate(ThroughTags), - tools.Try(ctx.tags), - tools.Try(ctx.subjects) - ) - - class Extra: - """ - Fields that are combined in the base parser are relisted as singular elements that match - their original entry to preserve raw data structure. - """ - freeToRead = tools.Try(ctx.freeToRead) - languages = tools.Try(ctx.languages) - licenses = tools.Try(ctx.licenses) - otherProperties = tools.Try(ctx.otherProperties) - publisher = tools.Try(ctx.publisher) - subjects = tools.Try(ctx.subjects) - sponsorships = tools.Try(ctx.sponsorships) - tags = tools.Try(ctx.tags) - uris = tools.Try(ctx.uris) - version = tools.Try(ctx.version) - - def unique(self, items): - return list(sorted(set(items))) - - def _is_deleted(self, properties): - for prop in properties or []: - if prop['name'] == 'status': - return 'deleted' in prop['properties'].get('status', []) - return False - - -class V1Transformer(ChainTransformer): - VERSION = 1 - root_parser = CreativeWork diff --git a/share/transformers/v2_push.py b/share/transformers/v2_push.py deleted file mode 100644 index 8b45e6c2c..000000000 --- a/share/transformers/v2_push.py +++ /dev/null @@ -1,14 +0,0 @@ -import json - -from collections import OrderedDict - -from share.transform import BaseTransformer - - -# The v2 Push API requires pushing already transformed data, so do nothing but parse JSON -class V2PushTransformer(BaseTransformer): - VERSION = 1 - - def do_transform(self, datum): - parsed = json.loads(datum, object_pairs_hook=OrderedDict) - return parsed, None diff --git a/share/util/extensions.py b/share/util/extensions.py deleted file mode 100644 index 49340fcd1..000000000 --- a/share/util/extensions.py +++ /dev/null @@ -1,49 +0,0 @@ -from stevedore import extension - - -class ExtensionsError(Exception): - pass - - -def on_extension_load_error(manager, entrypoint, exception): - raise ExtensionsError(f'Error loading {entrypoint}') from exception - - -class Extensions: - """Lazy singleton container for stevedore extensions. - - Loads each namespace when requested for the first time. - """ - - _managers = {} - - def __init__(self): - raise NotImplementedError() - - @classmethod - def get_names(cls, namespace): - manager = cls._get_manager(namespace) - return manager.names() - - @classmethod - def get(cls, namespace, name): - try: - return cls._get_manager(namespace)[name].plugin - except Exception as exc: - raise ExtensionsError(f'Error loading extension ("{namespace}", "{name}")') from exc - - @classmethod - def _get_manager(cls, namespace): - manager = cls._managers.get(namespace) - if manager is None: - manager = cls._load_namespace(namespace) - return manager - - @classmethod - def _load_namespace(cls, namespace): - try: - manager = extension.ExtensionManager(namespace, on_load_failure_callback=on_extension_load_error) - cls._managers[namespace] = manager - return manager - except Exception as exc: - raise ExtensionsError(f'Error loading extension namespace "{namespace}"') from exc diff --git a/share/util/graph.py b/share/util/graph.py deleted file mode 100644 index 52faa11f8..000000000 --- a/share/util/graph.py +++ /dev/null @@ -1,581 +0,0 @@ -from enum import Enum, auto -from operator import attrgetter - -import networkx as nx -import pendulum -import uuid - -from share.exceptions import ShareException -from share.schema import ShareV2Schema -from share.schema.exceptions import SchemaKeyError -from share.schema.shapes import AttributeDataType, RelationShape -from share.util import TopologicalSorter - - -class MutableGraphError(ShareException): - pass - - -class PrivateNodeAttrs(Enum): - TYPE = auto() - - -class EdgeAttrs(Enum): - FROM_NAME = auto() - TO_NAME = auto() - - -def resolve_field(type_name, field_name): - try: - return ShareV2Schema().get_field(type_name, field_name) - except SchemaKeyError: - return None - - -# TODO: ImmutableGraph (don't allow mutation and error on non-existent attr/relation) - -class MutableGraph(nx.DiGraph): - """NetworkX DiGraph with some SHARE-specific features. - - Nodes in the DiGraph are string IDs. Uses MutableNode as a convenience interface to access/manipulate nodes. - - Provides the abstraction of named edges: - * Each named edge has two names: `from_name` and `to_name` - * the "from" node knows the edge by its `from_name` - * the "to" node knows the edge by its `to_name` - * correspond to a foreign key and its related field - * All outgoing edges from a node must be unique on `from_name` - - - Example: Find all URIs identifying a work - ``` - work = graph.get_node(work_id) - uris = [identifier['uri'] for identifier in work['identifiers']] - ``` - - Example: Remove all orphan nodes (no incoming or outgoing edges) - ``` - orphans = graph.filter_nodes(lambda n: not graph.degree(n)) - for orphan in orphans: - graph.remove_node(orphan.id) - ``` - """ - - @classmethod - def from_jsonld(cls, nodes): - """Create a mutable graph from a list of JSON-LD-style dicts. - """ - central_node_id = None - if isinstance(nodes, dict): - central_node_id = nodes.get('central_node_id', None) - nodes = nodes['@graph'] - graph = cls() - if central_node_id: - graph.central_node_id = central_node_id - - for n in nodes: - node_id, node_type = None, None - attrs = {} - for k, v in n.items(): - if k == '@id': - node_id = v - elif k == '@type': - node_type = v - elif isinstance(v, dict) and k != 'extra': - graph.add_node(v['@id'], v['@type']) - attrs[k] = v['@id'] - elif isinstance(v, list): - pass # Don't bother with incoming edges, let the other node point here - else: - attrs[k] = v - if not node_id or not node_type: - raise MutableGraphError('Nodes must have id and type') - graph.add_node(node_id, node_type, attrs) - return graph - - def __init__(self): - super().__init__() - self.changed = False - self.central_node_id = None - - def to_jsonld(self, in_edges=True): - """Return a dictionary with '@graph' and 'central_node_id' keys that will serialize - to json-ld conforming with the SHARE schema - - in_edges (boolean): Include lists of incoming edges. Default True. - """ - return { - 'central_node_id': self.central_node_id, - '@graph': [ - node.to_jsonld(in_edges=in_edges) - for node in self.topologically_sorted() - ], - } - - def add_node(self, node_id, node_type, attrs=None): - """Create a node in the graph. - - node_id (hashable): Unique node ID. If None, generate a random ID. - node_type (str): The node's @type value - attrs: Dictionary of attributes or relations corresponding to fields on the node's model - - Returns a MutableNode wrapper for the new node. - """ - if node_type is None: - raise MutableGraphError('Must provide `node_type` to MutableGraph.add_node') - self.changed = True - - if node_id is None: - node_id = '_:{}'.format(uuid.uuid4()) - - super().add_node(node_id) - return MutableNode(self, node_id, node_type, attrs) - - def get_node(self, node_id): - """Get a node by ID. - - node_id (hashable): Unique node ID - - Returns a MutableNode wrapper for the node, or None. - """ - if node_id in self: - return MutableNode(self, node_id) - return None - - def remove_node(self, node_id, cascade=True): - """Remove a node and its incoming/outgoing edges. - - node_id (hashable): Unique node ID - cascade (boolean): Also remove nodes with edges which point to this node. Default True. - """ - self.changed = True - - to_remove = list(self.predecessors(node_id)) if cascade else [] - super().remove_node(node_id) - for from_id in to_remove: - self.remove_node(from_id, cascade) - - def filter_nodes(self, filter): - """Filter the nodes in the graph. - - filter (callable): When called with a MutableNode argument, return something truthy to - include it in the filtered list, or something falsy to omit it. - - Returns list of MutableNodes. - """ - # TODO figure out common sorts of filters, make kwargs for them and optimize - return [node for node in self if filter(node)] - - def filter_type(self, node_type): - # TODO make a sort of index dict, mapping type to nodes - return self.filter_nodes(lambda n: n.type == node_type.lower()) - - def filter_by_concrete_type(self, concrete_type): - # TODO make a sort of index dict, mapping concrete_type to nodes - lower_concrete_type = concrete_type.lower() - return self.filter_nodes(lambda n: n.concrete_type == lower_concrete_type) - - def add_named_edge(self, from_id, to_id, from_name, to_name): - """Add a named edge. - - from_id (hashable): Unique ID for the node this edge comes from - to_id (hashable): Unique ID for the node this edge points to - from_name (str): Name of the edge on its 'from' node (must be unique on the node) - to_name (str): Name of the edge on its 'to' node - """ - if any(data.get(EdgeAttrs.FROM_NAME) == from_name - for _, _, data in self.out_edges(from_id, data=True)): - raise MutableGraphError('Out-edge names must be unique on the node') - - self.changed = True - - self.add_edge(from_id, to_id) - self.edges[from_id, to_id][EdgeAttrs.FROM_NAME] = from_name - self.edges[from_id, to_id][EdgeAttrs.TO_NAME] = to_name - - def remove_named_edge(self, from_id, from_name): - """Remove a named edge. - - from_id (hashable): Unique ID for the node this edge comes from - from_name (str): Name of the edge on its 'from' node - """ - self.changed = True - try: - to_id = next( - to_id for _, to_id, data - in self.out_edges(from_id, data=True) - if data.get(EdgeAttrs.FROM_NAME) == from_name - ) - self.remove_edge(from_id, to_id) - except StopIteration: - pass - - def resolve_named_out_edge(self, from_id, from_name): - """Get the node a named edge points to. - - from_id (hashable): Unique ID for the node this edge comes from - from_name (str): Name of the edge on its 'from' node - - Returns a MutableNode wrapper for the node the edge points to. - """ - try: - return next( - MutableNode(self, to_id) for _, to_id, data - in self.out_edges(from_id, data=True) - if data.get(EdgeAttrs.FROM_NAME) == from_name - ) - except StopIteration: - return None - - def resolve_named_in_edges(self, to_id, to_name): - """Get all nodes which point to a node with the same named edges. - - to_id (hashable): Unique ID for the node these edges point to - to_name (str): Name of the edges on their 'to' node - - Returns list of MutableNode wrappers for the nodes these edges come from. - """ - return [ - MutableNode(self, from_id) for from_id, _, data - in self.in_edges(to_id, data=True) - if data.get(EdgeAttrs.TO_NAME) == to_name - ] - - def named_out_edges(self, from_id): - """Get all outgoing named edges from a node. - - from_id (hashable): Unique node ID - - Returns dict with: - keys: `from_name` of each outgoing edge - values: MutableNode wrapper for the node each edge points to - """ - return { - data[EdgeAttrs.FROM_NAME]: MutableNode(self, to_id) for _, to_id, data - in self.out_edges(from_id, data=True) - if data.get(EdgeAttrs.FROM_NAME) is not None - } - - def named_in_edges(self, to_id): - """Get all incoming named edges to a node. - - to_id (hashable): Unique node ID - - Returns dict of edges with: - keys: `to_name` of each incoming edge - values: list of MutableNode wrappers for the nodes each edge comes from - """ - in_edges = {} - for from_id, _, data in self.in_edges(to_id, data=True): - to_name = data.get(EdgeAttrs.TO_NAME) - if to_name is not None: - in_edges.setdefault(to_name, []).append(MutableNode(self, from_id)) - return in_edges - - def merge_nodes(self, from_node, into_node): - """Merge a nodes attrs and edges into another node. - """ - if from_node.concrete_type != into_node.concrete_type: - raise MutableGraphError('Cannot merge nodes of different types') - - self.changed = True - - # into_node will have the more specific typ - if from_node.schema_type.distance_from_concrete_type > into_node.schema_type.distance_from_concrete_type: - from_node, into_node = into_node, from_node - - self._merge_node_attrs(from_node, into_node) - self._merge_in_edges(from_node, into_node) - self._merge_out_edges(from_node, into_node) - - from_node.delete(cascade=False) - - def topologically_sorted(self): - return TopologicalSorter( - sorted(self, key=attrgetter('id')), - dependencies=lambda n: sorted(self.successors(n.id)), - key=attrgetter('id'), - ).sorted() - - def __iter__(self): - return (MutableNode(self, node_id) for node_id in super().__iter__()) - - def __contains__(self, n): - if isinstance(n, MutableNode): - n = n.id - return super().__contains__(n) - - def __bool__(self): - return bool(len(self)) - - def _merge_node_attrs(self, from_node, into_node): - into_attrs = into_node.attrs() - for k, new_val in from_node.attrs().items(): - if k in into_attrs: - old_val = into_attrs[k] - if new_val == old_val: - continue - - field = resolve_field(into_node.type, k) - if getattr(field, 'data_type', None) == AttributeDataType.DATETIME: - new_val = max(pendulum.parse(new_val), pendulum.parse(old_val)).isoformat() - else: - new_val = self._merge_value(new_val, old_val) - into_node[k] = new_val - - def _merge_value(self, value_a, value_b): - # use the longer value, or the first alphabetically if they're the same length - return sorted([value_a, value_b], key=lambda x: (-len(str(x)), str(x)))[0] - - def _merge_in_edges(self, from_node, into_node): - for in_edge_name, source_nodes in self.named_in_edges(from_node.id).items(): - inverse_relation = resolve_field(from_node.type, in_edge_name).inverse_relation - for source_node in source_nodes: - source_node[inverse_relation] = into_node - - def _merge_out_edges(self, from_node, into_node): - into_edges = self.named_out_edges(into_node.id) - for edge_name, from_target in self.named_out_edges(from_node.id).items(): - into_target = into_edges.get(edge_name) - if from_target != into_target: - self.merge_nodes(from_target, into_target) - - def get_central_node(self, guess=False): - if guess and self.central_node_id is None: - self._guess_central_node() - return self.get_node(self.central_node_id) - - def _guess_central_node(self): - # use a heuristic to guess the "central" node, when it's not given - # (the whole idea of guessing here is a hack to handle old data -- - # hopefully we can get away from it eventually) - - def centrality_heuristic(work_node): - # return a tuple of numbers (and booleans), where - # higher numbers (including `True`s) => more likely central - has_identifiers = bool(work_node['identifiers']) - has_contributor_info = bool(work_node['agent_relations']) - how_much_total_info = ( - len(work_node.attrs()) - + len(self.in_edges(work_node.id)) - + len(self.out_edges(work_node.id)) - ) - how_much_contributor_info = len(work_node['agent_relations']) - has_parent_work = any( - relation.type == 'ispartof' - for relation in work_node['outgoing_creative_work_relations'] - ) - return ( - has_identifiers, - has_contributor_info, - how_much_total_info, - how_much_contributor_info, - has_parent_work, - ) - - work_nodes = self.filter_by_concrete_type('abstractcreativework') - if work_nodes: - # get the work node with the most attrs+relations - work_nodes.sort(key=centrality_heuristic, reverse=True) - if ( - len(work_nodes) > 1 - and centrality_heuristic(work_nodes[0]) == centrality_heuristic(work_nodes[1]) - ): - raise MutableGraphError(f'cannot guess central node -- multiple candidates ({work_nodes[0].id}, {work_nodes[1].id})') - central_node = work_nodes[0] - self.central_node_id = central_node.id - - -class MutableNode: - """Convenience wrapper around a node in a MutableGraph. - """ - - def __new__(cls, graph, node_id, *args, **kwargs): - if node_id not in graph: - return graph.add_node(node_id, *args, **kwargs) - return super().__new__(cls) - - def __init__(self, graph, node_id, type_name=None, attrs=None): - self.__graph = graph - self.__id = node_id - self.__attrs = graph.nodes[node_id] - if type_name: - self.type = type_name - if attrs: - self.update(attrs) - - @property - def id(self): - return self.__id - - @property - def graph(self): - return self.__graph - - @property - def type(self): - return self.__attrs[PrivateNodeAttrs.TYPE] - - @type.setter - def type(self, value): - self.graph.changed = True - - schema_type = ShareV2Schema().get_type(value) - self.__attrs.update({ - PrivateNodeAttrs.TYPE: schema_type.name.lower(), - }) - - @property - def concrete_type(self): - return self.schema_type.concrete_type.lower() - - @property - def schema_type(self): - return ShareV2Schema().get_type(self.type) - - def attrs(self): - return { - k: v for k, v in self.__attrs.items() - if not isinstance(k, PrivateNodeAttrs) - } - - def relations(self, in_edges=True, jsonld=False): - relations = {} - for from_name, node in self.graph.named_out_edges(self.id).items(): - relations[from_name] = node.to_jsonld(ref=True) if jsonld else node - if in_edges: - for to_name, nodes in self.graph.named_in_edges(self.id).items(): - sorted_nodes = sorted(nodes, key=lambda n: n.id) - relations[to_name] = [n.to_jsonld(ref=True) for n in sorted_nodes] if jsonld else sorted_nodes - return relations - - def __getitem__(self, key): - """Get an attribute value or related node(s). - - key (str): Name of an attribute, outgoing named edge, or incoming named edge. - - If key is the name of a plain attribute in the SHARE schema, return that attribute's value. - If key is the name of an outgoing edge, return a MutableNode that edge points to - If key is the name of incoming edges, return a list of MutableNodes those edges come from - """ - field = resolve_field(self.type, key) - if field and field.is_relation and field.name != 'extra': - if field.relation_shape == RelationShape.MANY_TO_ONE: - return self.graph.resolve_named_out_edge(self.id, field.name) - if field.relation_shape == RelationShape.ONE_TO_MANY: - return self.graph.resolve_named_in_edges(self.id, field.name) - if field.relation_shape == RelationShape.MANY_TO_MANY: - m2m_related_nodes = self._resolve_many_to_many( - field.through_concrete_type, - field.incoming_through_relation, - field.outgoing_through_relation, - ) - is_reflexive = (field.related_concrete_type.lower() == self.concrete_type) - if is_reflexive: - # for a reflexive m2m, include nodes related in either direction - m2m_related_nodes.update(self._resolve_many_to_many( - field.through_concrete_type, - # outgoing/incoming swapped - field.outgoing_through_relation, - field.incoming_through_relation, - )) - return list(m2m_related_nodes) - - raise MutableGraphError('Only many-to-one, one-to-many, and non-reflexive many-to-many relations allowed') - return self.__attrs.get(field.name if field else key) - - def _resolve_many_to_many(self, through_concrete_type, incoming_through_relation, outgoing_through_relation): - incoming_edge_name = ShareV2Schema().get_field( - through_concrete_type, - incoming_through_relation - ).inverse_relation - - through_nodes = self.graph.resolve_named_in_edges(self.id, incoming_edge_name) - - return set( - self.graph.resolve_named_out_edge(through_node.id, outgoing_through_relation) - for through_node in through_nodes - ) - - def __setitem__(self, key, value): - """Set an attribute value or add an outgoing named edge. - - key (str): Name of an attribute or an outgoing edge. - - If key is the name of a plain attribute in the SHARE schema, set that attribute's value. - If key is the name of an outgoing edge, expect `value` to be a node ID or a MutableNode. Add an edge from this node to that one. - If key is the name of incoming edges, raise an error. - - If value is None, same as `del node[key]` - """ - self.graph.changed = True - - field = resolve_field(self.type, key) - field_name = field.name if field else key - - if value is None: - del self[field_name] - return - - if field and field.is_relation: - if field.relation_shape != RelationShape.MANY_TO_ONE: - raise MutableGraphError('Can set only many-to-one relations') - to_id = value.id if hasattr(value, 'id') else value - self.graph.remove_named_edge(self.id, field_name) - self.graph.add_named_edge(self.id, to_id, field_name, field.inverse_relation) - else: - self.__attrs[field_name] = value - - def __delitem__(self, key): - """Delete an attribute value or outgoing named edge. - - key (str): Name of an attribute or an outgoing edge. - - If key is the name of an attribute in the SHARE schema, delete that attribute from this node. - If key is the name of an outgoing edge, remove that edge. - If key is the name of incoming edges, raise an error. - """ - self.graph.changed = True - - field = resolve_field(self.type, key) - field_name = field.name if field else key - - if field and field.is_relation: - if field.relation_shape != RelationShape.MANY_TO_ONE: - raise MutableGraphError('Can delete only many-to-one relations') - self.graph.remove_named_edge(self.id, field_name) - elif field_name in self.__attrs: - del self.__attrs[field_name] - - def update(self, attrs): - for k, v in attrs.items(): - self[k] = v - - def delete(self, cascade=True): - """Remove this node from its graph. - - cascade (boolean): Also remove nodes with edges which point to this node. Default True. - """ - self.graph.changed = True - self.graph.remove_node(self.id, cascade) - self.__graph = None - - def to_jsonld(self, ref=False, in_edges=False): - ld_node = { - '@id': self.id, - '@type': self.type, - } - if not ref: - ld_node.update(self.relations(in_edges=in_edges, jsonld=True)) - ld_node.update(self.attrs()) - return ld_node - - def __eq__(self, other): - return isinstance(other, self.__class__) and other.graph is self.graph and other.id == self.id - - def __hash__(self): - return hash(self.id) - - def __str__(self): - return '<{} id({}) type({})>'.format(self.__class__.__name__, self.id, self.type) - __repr__ = __str__ diff --git a/share/util/iris.py b/share/util/iris.py deleted file mode 100644 index 792107bda..000000000 --- a/share/util/iris.py +++ /dev/null @@ -1,47 +0,0 @@ -from urllib.parse import urlparse - - -# TODO leave URNs alone, do scheme:authority:path instead of scheme://authority/path -URN_SCHEMES = frozenset({'urn', 'oai'}) - - -def parse(iri): - """Parse an IRI string into its constituent parts. - """ - scheme, _, remainder = iri.partition(':') - if scheme.lower() in URN_SCHEMES: - if remainder.startswith('//'): - # Handle our own brand of slashed up URNs - authority, _, remainder = remainder.lstrip('/').partition('/') - else: - # Technically, everything past 'urn:' is the path, but the next segment is usually an authority of some sort - authority, _, remainder = remainder.partition(':') - return { - 'scheme': scheme, - 'authority': authority, - 'path': '/{}'.format(remainder), - 'IRI': iri, - } - # If it doesn't have a URN scheme, assume it's a URL - parsed = urlparse(iri) - return { - 'scheme': parsed.scheme, - 'authority': parsed.netloc, - 'path': parsed.path, - 'query': parsed.query, - 'fragment': parsed.fragment, - 'IRI': iri, - } - - -def compose(scheme, authority, path, **kwargs): - """Build an IRI out of constituent parts. - """ - - return '{scheme}://{authority}{path}{query}{fragment}'.format( - scheme=scheme, - authority=authority, - path=path, - query='?{}'.format(kwargs['query']) if kwargs.get('query') else '', - fragment='#{}'.format(kwargs['fragment']) if kwargs.get('fragment') else '', - ) diff --git a/share/util/nameparser.py b/share/util/nameparser.py deleted file mode 100644 index 8a319cfdd..000000000 --- a/share/util/nameparser.py +++ /dev/null @@ -1,13 +0,0 @@ -from nameparser import HumanName as OriginalHumanName -from nameparser.config import Constants - -# Disable stripping emoji from names -# https://nameparser.readthedocs.io/en/latest/customize.html#don-t-remove-emojis - -constants = Constants() -constants.regexes.emoji = False - - -class HumanName(OriginalHumanName): - def __init__(self, *args, **kwargs): - super().__init__(*args, constants=constants, **kwargs) diff --git a/share/util/names.py b/share/util/names.py deleted file mode 100644 index e928517d0..000000000 --- a/share/util/names.py +++ /dev/null @@ -1,28 +0,0 @@ - -def build_name_from_parts(agent_node): - """construct some name from parts, making wild cultural assumptions - - @param agent_node: share.util.graph.MutableNode with concrete type 'abstractagent' - @returns string (possibly empty) - """ - # filter out falsy parts - name_parts = filter(None, [ - agent_node['given_name'], - agent_node['additional_name'], - agent_node['family_name'], - agent_node['suffix'], - ]) - return ' '.join(name_parts).strip() - - -def get_related_agent_name(relation_node): - """get the name to refer to a related agent - - @param relation_node: share.util.graph.MutableNode with concrete type 'abstractagentworkrelation' - @returns string (possibly empty) - """ - return ( - relation_node['cited_as'] - or relation_node['agent']['name'] - or build_name_from_parts(relation_node['agent']) - ) diff --git a/share/util/osf.py b/share/util/osf.py deleted file mode 100644 index 60e65b50b..000000000 --- a/share/util/osf.py +++ /dev/null @@ -1,39 +0,0 @@ -import re - -from django.conf import settings - -from share.models.ingest import Source -from share.util.graph import MutableGraph - - -def osf_sources(): - return Source.objects.filter( - canonical=True, - ).exclude( - name='org.arxiv', - ).exclude( - user__username=settings.APPLICATION_USERNAME, - ) - - -OSF_GUID_RE = re.compile(r'^https?://(?:[^.]+\.)?osf\.io/(?P<guid>[^/]+)/?$') - - -def get_guid_from_uri(uri: str): - match = OSF_GUID_RE.match(uri) - return match.group('guid') if match else None - - -def guess_osf_guid(mgraph: MutableGraph): - central_work = mgraph.get_central_node(guess=True) - if not central_work: - return None - - osf_guids = list(filter(bool, ( - get_guid_from_uri(identifier['uri']) - for identifier in central_work['identifiers'] - ))) - # if >1, too ambiguous - if len(osf_guids) == 1: - return osf_guids[0] - return None diff --git a/share/util/source_stat.py b/share/util/source_stat.py deleted file mode 100644 index 0f17bb46a..000000000 --- a/share/util/source_stat.py +++ /dev/null @@ -1,181 +0,0 @@ -import logging - -import requests -from furl import furl -from lxml import etree -import pendulum - -from share.models import SourceStat -from share.models import SourceConfig - -logger = logging.getLogger(__name__) - - -class SourceStatus: - - ACCEPTABLE_STATUS_CODES = (200, 401, 403) - - def __init__(self, config_id): - self.source_config = SourceConfig.objects.get(pk=config_id) - - def assert_no_exception(self, url, timeout=15.0): - try: - r = requests.get(url, timeout=timeout) - # except all exception and log - except Exception as e: - logger.warning('Exception received from source: %s', e) - return (None, e) - return (r, None) - - def get_source_stats(self): - base_url_config = self.source_config.base_url - response_elapsed_time = 0 - response_status_code = None - grade = 10 - - response, response_exception = self.assert_no_exception(base_url_config) - - if response is not None: - response_elapsed_time = response.elapsed.total_seconds() - response_status_code = response.status_code - if response_status_code not in self.ACCEPTABLE_STATUS_CODES or response_elapsed_time == 0: - grade = 0 - if response_elapsed_time > 1: - grade = 5 - - self.create_source_stat( - earliest_datestamp_config=str(self.source_config.earliest_date) if self.source_config.earliest_date else None, - base_url_config=base_url_config, - response_status_code=response_status_code, - response_elapsed_time=response_elapsed_time, - response_exception=response_exception, - grade=grade, - ) - - def create_source_stat(self, earliest_datestamp_source=None, - earliest_datestamps_match=True, base_url_source='', - base_urls_match=True, admin_note='', **kwargs): - SourceStat.objects.create( - config_id=self.source_config.id, - - earliest_datestamp_source=earliest_datestamp_source, - earliest_datestamp_config=kwargs.pop('earliest_datestamp_config'), - earliest_datestamps_match=earliest_datestamps_match, - - base_url_source=base_url_source, - base_url_config=kwargs.pop('base_url_config'), - base_urls_match=base_urls_match, - - response_status_code=kwargs.pop('response_status_code'), - response_elapsed_time=kwargs.pop('response_elapsed_time'), - response_exception=kwargs.pop('response_exception'), - - grade=kwargs.pop('grade'), - admin_note=admin_note, - ) - - -class OAISourceStatus(SourceStatus): - - NAMESPACES = { - 'dc': 'http://purl.org/dc/elements/1.1/', - 'ns0': 'http://www.openarchives.org/OAI/2.0/', - 'oai_dc': 'http://www.openarchives.org/OAI/2.0/oai_dc/', - } - - # Known incorrect baseUrl: - INCORRECT_BASE_URLS = { - 'https://biblio.ugent.be/oai': 'Listed baseURL is their homepage.', - 'http://purr.purdue.edu/oaipmh': 'Listed baseURL is their homepage.', - 'https://www.ncbi.nlm.nih.gov/pmc/oai/oai.cgi': 'Listed baseURL is incorrect.', - 'https://mla.hcommons.org/deposits/oai/': 'isted baseURL is incorrect.', - 'http://oai.repec.org': 'Listed baseURL redirects.', - } - - # Known incorrect earliestDatestamp (all emailed): - INCORRECT_EARLIEST_DATESTAMP = { - 'edu.oaktrust.mods': 'Listed earliestDatestamp is the most recent datestamp.', - 'edu.scholarsarchiveosu.mods': 'Listed earliestDatestamp is 0011-01-01.', - 'edu.uwashington.mods': 'Listed earliestDatestamp is 2083-03-01.', - 'gov.nodc': 'Listed earliestDatestamp is 1996-10-09.', - 'org.philpapers': 'Listed earliestDatestamp is 1990-01-01T00:00:00Z.', - 'org.ttu.mods': 'Listed earliestDatestamp is 1989-05-01T05:00:00Z.', - 'edu.umich.mods': 'Listed earliestDatestamp is 1983-01-01T05:00:00Z.', - 'edu.citeseerx': 'Listed earliestDatestamp is 1970-01-01.', - 'br.pcurio': 'Listed earliestDatestamp is 1970-01-01.', - 'edu.vtech.mods': 'Listed earliestDatestamp is 1900-02-02T05:00:00Z.', - 'edu.icpsr': 'Listed earliestDatestamp is 01-01-1900', - 'pt.rcaap': 'Listed earliestDatestamp is 1900-01-01T00:00:00Z.', - 'com.nature': 'Listed earliestDatestamp is 1869-11-04.', - } - - def get_field_from_identify(self, response, field): - # TODO: record which sources are providing invalid XML i.e, fail without recover=True - parsed = etree.fromstring(response.content, parser=etree.XMLParser(recover=True)) - return parsed.xpath('//ns0:Identify/ns0:{}'.format(field), namespaces=self.NAMESPACES)[0].text - - def get_source_stats(self): - base_url_config = self.source_config.base_url - base_url_source = '' - base_urls_match = False - earliest_datestamp_config = str(self.source_config.earliest_date) if self.source_config.earliest_date else None - earliest_datestamp_source = None - earliest_datestamps_match = False - response_elapsed_time = 0 - response_status_code = None - admin_note = '' - grade = 10 - - response, response_exception = self.assert_no_exception(furl(base_url_config).set({'verb': 'Identify'}).url) - - if response is not None: - response_elapsed_time = response.elapsed.total_seconds() - response_status_code = response.status_code - if response: - base_url_source = self.get_field_from_identify(response, 'baseURL') - # ignores http vs https - if len(base_url_source.split('://', 1)) > 1: - base_urls_match = base_url_source.split('://', 1)[1] == base_url_config.split('://', 1)[1] - else: - logger.warning('Source baseURL is improper: %s', base_url_source) - - if base_url_config in self.INCORRECT_BASE_URLS: - admin_note = self.INCORRECT_BASE_URLS[base_url_config] - if self.source_config.label in self.INCORRECT_EARLIEST_DATESTAMP: - admin_note = ' '.join(admin_note, self.INCORRECT_EARLIEST_DATESTAMP[self.source_config.label]) if admin_note else self.INCORRECT_EARLIEST_DATESTAMP[self.source_config.label] - - earliest_datestamp_identify = self.get_field_from_identify(response, 'earliestDatestamp') - earliest_datestamp_source = pendulum.parse(earliest_datestamp_identify).to_date_string() if earliest_datestamp_identify else None - earliest_datestamps_match = earliest_datestamp_config == earliest_datestamp_source - - if response_status_code not in self.ACCEPTABLE_STATUS_CODES or response_elapsed_time == 0: - grade = 0 - if response_elapsed_time > 1: - grade = 5 - if not earliest_datestamps_match: - if self.source_config.label in self.INCORRECT_EARLIEST_DATESTAMP: - grade = 5 - else: - grade = 0 - if not base_urls_match: - if base_url_config in self.INCORRECT_BASE_URLS: - grade = 5 - else: - grade = 0 - - self.create_source_stat( - earliest_datestamp_source=earliest_datestamp_source, - earliest_datestamp_config=earliest_datestamp_config, - earliest_datestamps_match=earliest_datestamps_match, - - base_url_source=base_url_source, - base_url_config=base_url_config, - base_urls_match=base_urls_match, - - response_status_code=response_status_code, - response_elapsed_time=response_elapsed_time, - response_exception=response_exception, - - grade=grade, - admin_note=admin_note, - ) diff --git a/tests/share/test_subject_synonyms.py b/tests/share/test_subject_synonyms.py deleted file mode 100644 index f049dcf4a..000000000 --- a/tests/share/test_subject_synonyms.py +++ /dev/null @@ -1,17 +0,0 @@ -import json -import yaml - -from django.conf import settings - - -def test_synonyms_valid(): - with open(settings.SUBJECTS_YAML) as f: - subjects = yaml.load(f, Loader=yaml.CLoader) - subject_names = set(s['name'] for s in subjects) - - with open(settings.SUBJECT_SYNONYMS_JSON) as f: - synonyms = json.load(f) - mapped_subjects = set(s for syns in synonyms.values() for s in syns) - - diff = mapped_subjects - subject_names - assert not diff From 5232fef397251af377bad25dc75d776196ba36cd Mon Sep 17 00:00:00 2001 From: abram axel booth <boothaa@gmail.com> Date: Wed, 5 Mar 2025 14:47:20 -0500 Subject: [PATCH 03/46] wip --- tests/api/test_feeds.py | 4 +- tests/conftest.py | 27 +------------- tests/factories/__init__.py | 73 +------------------------------------ 3 files changed, 4 insertions(+), 100 deletions(-) diff --git a/tests/api/test_feeds.py b/tests/api/test_feeds.py index 218128baa..6ba452931 100644 --- a/tests/api/test_feeds.py +++ b/tests/api/test_feeds.py @@ -7,7 +7,7 @@ from share.metadata_formats.sharev2_elastic import ShareV2ElasticFormatter -from tests.factories import NormalizedDataFactory, RawDatumFactory +from tests.factories import RawDatumFactory from tests.share.normalize import factories as f @@ -22,7 +22,7 @@ class TestFeed: @pytest.fixture() - def fake_items(self, Graph): + def fake_items(self): records = [ Graph(f.CreativeWork( title=f'my fabulous work {i}', diff --git a/tests/conftest.py b/tests/conftest.py index 637e8a9f2..61eef76dd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,12 +12,11 @@ from oauth2_provider.models import AccessToken, Application -from share.models import NormalizedData, RawDatum +from share.models import RawDatum from share.models import ShareUser from share.models import SourceUniqueIdentifier from tests import factories -from tests.share.normalize.factories import GraphBuilder logger = logging.getLogger(__name__) @@ -109,30 +108,6 @@ def raw_data_id(raw_data): return raw_data.id -@pytest.fixture -def normalized_data(share_user): - normalized_data = NormalizedData(source=share_user, data={}) - normalized_data.save() - return normalized_data - - -@pytest.fixture -def normalized_data_id(normalized_data): - return normalized_data.id - - -@pytest.fixture -def Graph(): - return GraphBuilder() - - -@pytest.fixture -def ExpectedGraph(Graph): - def expected_graph(*args, **kwargs): - return Graph(*args, **kwargs, normalize_fields=True) - return expected_graph - - @contextlib.contextmanager def rolledback_transaction(loglabel): class ExpectedRollback(Exception): diff --git a/tests/factories/__init__.py b/tests/factories/__init__.py index 95c3c1fa6..ed5e649e6 100644 --- a/tests/factories/__init__.py +++ b/tests/factories/__init__.py @@ -1,10 +1,5 @@ -import datetime import hashlib import uuid -from unittest import mock - -import pkg_resources -import stevedore import factory from factory import fuzzy @@ -13,9 +8,6 @@ from project import celery_app -from share.harvest import BaseHarvester -from share.harvest.serialization import StringLikeSerializer -from share.util.extensions import Extensions from share import models as share_db from trove import models as trove_db @@ -31,26 +23,6 @@ class Meta: model = share_db.ShareUser -class NormalizedDataFactory(DjangoModelFactory): - data = {} - source = factory.SubFactory(ShareUserFactory) - - class Meta: - model = share_db.NormalizedData - - @classmethod - def _generate(cls, create, attrs): - normalized_datum = super()._generate(create, attrs) - - # HACK: allow overriding auto_now_add on created_at - created_at = attrs.pop('created_at', None) - if created_at is not None: - normalized_datum.created_at = created_at - normalized_datum.save() - - return normalized_datum - - class SourceFactory(DjangoModelFactory): name = factory.Sequence(lambda x: '{}{}'.format(fake.name(), x)) long_title = factory.Sequence(lambda x: '{}{}'.format(fake.sentence(), x)) @@ -70,41 +42,17 @@ def __call__(self, *args, **kwargs): return (x for x in self) -UNSET = object() # to distinguish unset value - - class SourceConfigFactory(DjangoModelFactory): label = factory.Faker('sentence') base_url = factory.Faker('url') harvest_after = '00:00' source = factory.SubFactory(SourceFactory) - harvester_key = UNSET + harvester_key = None transformer_key = None class Meta: model = share_db.SourceConfig - @factory.post_generation - def make_harvester(self, create, extracted, **kwargs): - if self.harvester_key is UNSET: - self.harvester_key = fake.word() - stevedore.ExtensionManager('share.harvesters') # Force extensions to load - - class MockHarvester(BaseHarvester): - KEY = self.harvester_key - VERSION = 1 - SERIALIZER_CLASS = StringLikeSerializer - - _do_fetch = ListGenerator() - - mock_entry = mock.create_autospec(pkg_resources.EntryPoint, instance=True) - mock_entry.name = self.harvester_key - mock_entry.module_name = self.harvester_key - mock_entry.resolve.return_value = MockHarvester - - stevedore.ExtensionManager.ENTRY_POINT_CACHE['share.harvesters'].append(mock_entry) - Extensions._load_namespace('share.harvesters') - class SourceUniqueIdentifierFactory(DjangoModelFactory): identifier = factory.Faker('sentence') @@ -135,18 +83,6 @@ def _generate(cls, create, attrs): return raw_datum -class HarvestJobFactory(DjangoModelFactory): - source_config = factory.SubFactory(SourceConfigFactory) - start_date = factory.Faker('date_object') - end_date = factory.LazyAttribute(lambda job: job.start_date + datetime.timedelta(days=1)) - - source_config_version = factory.SelfAttribute('source_config.version') - harvester_version = 1 - - class Meta: - model = share_db.HarvestJob - - class CeleryTaskResultFactory(DjangoModelFactory): task_id = factory.Sequence(lambda x: uuid.uuid4()) task_name = fuzzy.FuzzyChoice(list(celery_app.tasks.keys())) @@ -156,13 +92,6 @@ class Meta: model = share_db.CeleryTaskResult -class FormattedMetadataRecordFactory(DjangoModelFactory): - suid = factory.SubFactory(SourceUniqueIdentifierFactory) - - class Meta: - model = share_db.FormattedMetadataRecord - - ### # trove models From 134607334a150fc59a5e0563fbc48c94e8ad4df4 Mon Sep 17 00:00:00 2001 From: abram axel booth <boothaa@gmail.com> Date: Thu, 6 Mar 2025 13:27:56 -0500 Subject: [PATCH 04/46] wip --- tests/api/test_feeds.py | 606 ++++++++++++- tests/share/harvesters/test_oai.py | 66 -- .../test_socialscienceregistry_harvester.py | 30 - .../test_swbiodiversity_harvester.py | 124 --- tests/share/metadata_formats/base.py | 90 -- tests/share/metadata_formats/conftest.py | 385 -------- .../metadata_formats/test_oai_dc_formatter.py | 98 -- .../test_sharev2_elastic_formatter.py | 268 ------ tests/share/normalize/factories.py | 425 --------- tests/share/normalize/test_harness.py | 166 ---- tests/share/normalize/test_json.py | 61 -- tests/share/normalize/test_links.py | 840 ------------------ tests/share/normalize/test_models.py | 496 ----------- tests/share/normalize/test_v1.py | 202 ----- tests/share/normalize/test_xml.py | 111 --- tests/share/regulate/steps/test_bases.py | 13 - .../regulate/steps/test_block_extra_values.py | 55 -- .../share/regulate/steps/test_deduplicate.py | 66 -- .../regulate/steps/test_normalize_iris.py | 93 -- tests/share/regulate/test_regulator.py | 63 -- tests/share/schema/test_schema.py | 241 ----- tests/share/schema/test_schema_loader.py | 237 ----- tests/share/test_harvester.py | 171 ---- tests/share/transformers/test_mods.py | 41 - tests/share/transformers/test_oai_dc.py | 58 -- .../test_socialscienceregistry_transformer.py | 65 -- .../test_swbiodiversity_transformer.py | 81 -- 27 files changed, 567 insertions(+), 4585 deletions(-) delete mode 100644 tests/share/harvesters/test_oai.py delete mode 100644 tests/share/harvesters/test_socialscienceregistry_harvester.py delete mode 100644 tests/share/harvesters/test_swbiodiversity_harvester.py delete mode 100644 tests/share/metadata_formats/base.py delete mode 100644 tests/share/metadata_formats/conftest.py delete mode 100644 tests/share/metadata_formats/test_oai_dc_formatter.py delete mode 100644 tests/share/metadata_formats/test_sharev2_elastic_formatter.py delete mode 100644 tests/share/normalize/factories.py delete mode 100644 tests/share/normalize/test_harness.py delete mode 100644 tests/share/normalize/test_json.py delete mode 100644 tests/share/normalize/test_links.py delete mode 100644 tests/share/normalize/test_models.py delete mode 100644 tests/share/normalize/test_v1.py delete mode 100644 tests/share/normalize/test_xml.py delete mode 100644 tests/share/regulate/steps/test_bases.py delete mode 100644 tests/share/regulate/steps/test_block_extra_values.py delete mode 100644 tests/share/regulate/steps/test_deduplicate.py delete mode 100644 tests/share/regulate/steps/test_normalize_iris.py delete mode 100644 tests/share/regulate/test_regulator.py delete mode 100644 tests/share/schema/test_schema.py delete mode 100644 tests/share/schema/test_schema_loader.py delete mode 100644 tests/share/test_harvester.py delete mode 100644 tests/share/transformers/test_mods.py delete mode 100644 tests/share/transformers/test_oai_dc.py delete mode 100644 tests/share/transformers/test_socialscienceregistry_transformer.py delete mode 100644 tests/share/transformers/test_swbiodiversity_transformer.py diff --git a/tests/api/test_feeds.py b/tests/api/test_feeds.py index 6ba452931..6a39480e2 100644 --- a/tests/api/test_feeds.py +++ b/tests/api/test_feeds.py @@ -1,17 +1,9 @@ -import json from unittest import mock import pytest -import faker from lxml import etree -from share.metadata_formats.sharev2_elastic import ShareV2ElasticFormatter -from tests.factories import RawDatumFactory -from tests.share.normalize import factories as f - - -fake = faker.Faker() NAMESPACES = {'atom': 'http://www.w3.org/2005/Atom'} @@ -23,46 +15,17 @@ class TestFeed: @pytest.fixture() def fake_items(self): - records = [ - Graph(f.CreativeWork( - title=f'my fabulous work {i}', - identifiers=[f.WorkIdentifier()], - agent_relations=[ - f.Creator(), - f.Creator(), - ], - )).to_jsonld() - for i in range(11) - ] - normds = [ - NormalizedDataFactory( - data=record, - raw=RawDatumFactory( - datum='', - ), - ) - for record in records - ] - formatter = ShareV2ElasticFormatter() - formatted_items = [ - formatter.format(normd) - for normd in normds - ] - json_items = [ - json.loads(formatted_item) - for formatted_item in formatted_items - ] with mock.patch('api.views.feeds.index_strategy.get_strategy_for_sharev2_search') as mock_get_for_searching: mock_strategy = mock_get_for_searching.return_value mock_strategy.pls_handle_search__passthru.return_value = { 'hits': { 'hits': [ {'_source': item, '_id': item['id']} - for item in json_items + for item in _FAKE_ITEMS ], }, } - yield json_items + yield _FAKE_ITEMS def test_atom(self, client, fake_items): resp = client.get('/api/v2/feeds/atom') @@ -91,3 +54,568 @@ def test_gone(self, client, fake_items): ): resp = client.get(feed_url) assert resp.status_code == expected_status + + +_FAKE_ITEMS = [ + { + "id": "2011F-F56-334", + "sources": ["Why hospital international impact car.22"], + "source_config": "Option appear him.", + "source_unique_id": "Family measure once seat religious.", + "type": "creative work", + "types": ["creative work"], + "date_created": "2025-03-05T19:52:17.517102+00:00", + "date_modified": "2025-03-05T19:52:17.529787+00:00", + "date_published": "2007-02-16T03:39:48Z", + "date_updated": "2003-09-12T09:37:20Z", + "description": "Discover pattern family ok face. Security whom during attention set political cell bad.", + "language": "sd", + "retracted": False, + "title": "my fabulous work 0", + "date": "2007-02-16T03:39:48Z", + "affiliations": [], + "contributors": ["Mcgee-Zimmerman", "Young, Hall and Jensen"], + "funders": [], + "publishers": [], + "hosts": [], + "identifiers": ["http://wagner.com/"], + "tags": [], + "subjects": [], + "subject_synonyms": [], + "lists": { + "affiliations": [], + "contributors": [ + { + "type": "institution", + "types": ["institution", "organization", "agent"], + "name": "Mcgee-Zimmerman", + "identifiers": [], + "relation": "creator", + "cited_as": "Mcgee-Zimmerman", + }, + { + "type": "organization", + "types": ["organization", "agent"], + "name": "Young, Hall and Jensen", + "identifiers": [], + "relation": "creator", + "cited_as": "Young, Hall and Jensen", + }, + ], + "funders": [], + "publishers": [], + "hosts": [], + "lineage": [], + }, + }, + { + "id": "201FE-A32-223", + "sources": ["Record material white piece.24"], + "source_config": "Tell around benefit receive this.", + "source_unique_id": "Seven color six society daughter home born.", + "type": "creative work", + "types": ["creative work"], + "date_created": "2025-03-05T19:52:17.552298+00:00", + "date_modified": "2025-03-05T19:52:17.563387+00:00", + "date_published": "2007-02-16T03:39:48Z", + "date_updated": "2003-09-12T09:37:20Z", + "description": "Discover pattern family ok face. Security whom during attention set political cell bad.", + "language": "sd", + "retracted": False, + "title": "my fabulous work 1", + "date": "2007-02-16T03:39:48Z", + "affiliations": [], + "contributors": ["Mcgee-Zimmerman", "Young, Hall and Jensen"], + "funders": [], + "publishers": [], + "hosts": [], + "identifiers": ["http://wagner.com/"], + "tags": [], + "subjects": [], + "subject_synonyms": [], + "lists": { + "affiliations": [], + "contributors": [ + { + "type": "institution", + "types": ["institution", "organization", "agent"], + "name": "Mcgee-Zimmerman", + "identifiers": [], + "relation": "creator", + "cited_as": "Mcgee-Zimmerman", + }, + { + "type": "organization", + "types": ["organization", "agent"], + "name": "Young, Hall and Jensen", + "identifiers": [], + "relation": "creator", + "cited_as": "Young, Hall and Jensen", + }, + ], + "funders": [], + "publishers": [], + "hosts": [], + "lineage": [], + }, + }, + { + "id": "20089-44F-D12", + "sources": ["Learn its quickly while develop traditional politics office.26"], + "source_config": "To another series better need away.", + "source_unique_id": "Lot popular energy worry up group company medical.", + "type": "creative work", + "types": ["creative work"], + "date_created": "2025-03-05T19:52:17.590476+00:00", + "date_modified": "2025-03-05T19:52:17.602544+00:00", + "date_published": "2007-02-16T03:39:48Z", + "date_updated": "2003-09-12T09:37:20Z", + "description": "Discover pattern family ok face. Security whom during attention set political cell bad.", + "language": "sd", + "retracted": False, + "title": "my fabulous work 2", + "date": "2007-02-16T03:39:48Z", + "affiliations": [], + "contributors": ["Mcgee-Zimmerman", "Young, Hall and Jensen"], + "funders": [], + "publishers": [], + "hosts": [], + "identifiers": ["http://wagner.com/"], + "tags": [], + "subjects": [], + "subject_synonyms": [], + "lists": { + "affiliations": [], + "contributors": [ + { + "type": "institution", + "types": ["institution", "organization", "agent"], + "name": "Mcgee-Zimmerman", + "identifiers": [], + "relation": "creator", + "cited_as": "Mcgee-Zimmerman", + }, + { + "type": "organization", + "types": ["organization", "agent"], + "name": "Young, Hall and Jensen", + "identifiers": [], + "relation": "creator", + "cited_as": "Young, Hall and Jensen", + }, + ], + "funders": [], + "publishers": [], + "hosts": [], + "lineage": [], + }, + }, + { + "id": "20167-F2B-C01", + "sources": ["Must else live my toward eight we major.28"], + "source_config": "Weight hand without through sure.", + "source_unique_id": "Develop join the listen crime war.", + "type": "creative work", + "types": ["creative work"], + "date_created": "2025-03-05T19:52:17.631057+00:00", + "date_modified": "2025-03-05T19:52:17.644379+00:00", + "date_published": "2007-02-16T03:39:48Z", + "date_updated": "2003-09-12T09:37:20Z", + "description": "Discover pattern family ok face. Security whom during attention set political cell bad.", + "language": "sd", + "retracted": False, + "title": "my fabulous work 3", + "date": "2007-02-16T03:39:48Z", + "affiliations": [], + "contributors": ["Mcgee-Zimmerman", "Young, Hall and Jensen"], + "funders": [], + "publishers": [], + "hosts": [], + "identifiers": ["http://wagner.com/"], + "tags": [], + "subjects": [], + "subject_synonyms": [], + "lists": { + "affiliations": [], + "contributors": [ + { + "type": "institution", + "types": ["institution", "organization", "agent"], + "name": "Mcgee-Zimmerman", + "identifiers": [], + "relation": "creator", + "cited_as": "Mcgee-Zimmerman", + }, + { + "type": "organization", + "types": ["organization", "agent"], + "name": "Young, Hall and Jensen", + "identifiers": [], + "relation": "creator", + "cited_as": "Young, Hall and Jensen", + }, + ], + "funders": [], + "publishers": [], + "hosts": [], + "lineage": [], + }, + }, + { + "id": "20246-A07-AF0", + "sources": ["Street alone truth Mr still bar.30"], + "source_config": "Few water matter here miss bag defense.", + "source_unique_id": "Reduce personal begin series bank movement.", + "type": "creative work", + "types": ["creative work"], + "date_created": "2025-03-05T19:52:17.669044+00:00", + "date_modified": "2025-03-05T19:52:17.681888+00:00", + "date_published": "2007-02-16T03:39:48Z", + "date_updated": "2003-09-12T09:37:20Z", + "description": "Discover pattern family ok face. Security whom during attention set political cell bad.", + "language": "sd", + "retracted": False, + "title": "my fabulous work 4", + "date": "2007-02-16T03:39:48Z", + "affiliations": [], + "contributors": ["Mcgee-Zimmerman", "Young, Hall and Jensen"], + "funders": [], + "publishers": [], + "hosts": [], + "identifiers": ["http://wagner.com/"], + "tags": [], + "subjects": [], + "subject_synonyms": [], + "lists": { + "affiliations": [], + "contributors": [ + { + "type": "institution", + "types": ["institution", "organization", "agent"], + "name": "Mcgee-Zimmerman", + "identifiers": [], + "relation": "creator", + "cited_as": "Mcgee-Zimmerman", + }, + { + "type": "organization", + "types": ["organization", "agent"], + "name": "Young, Hall and Jensen", + "identifiers": [], + "relation": "creator", + "cited_as": "Young, Hall and Jensen", + }, + ], + "funders": [], + "publishers": [], + "hosts": [], + "lineage": [], + }, + }, + { + "id": "200D1-425-5DF", + "sources": ["Level threat culture think guy success join.32"], + "source_config": "Before stay travel outside event walk.", + "source_unique_id": "Group learn public common some very big.", + "type": "creative work", + "types": ["creative work"], + "date_created": "2025-03-05T19:52:17.704433+00:00", + "date_modified": "2025-03-05T19:52:17.717365+00:00", + "date_published": "2007-02-16T03:39:48Z", + "date_updated": "2003-09-12T09:37:20Z", + "description": "Discover pattern family ok face. Security whom during attention set political cell bad.", + "language": "sd", + "retracted": False, + "title": "my fabulous work 5", + "date": "2007-02-16T03:39:48Z", + "affiliations": [], + "contributors": ["Mcgee-Zimmerman", "Young, Hall and Jensen"], + "funders": [], + "publishers": [], + "hosts": [], + "identifiers": ["http://wagner.com/"], + "tags": [], + "subjects": [], + "subject_synonyms": [], + "lists": { + "affiliations": [], + "contributors": [ + { + "type": "institution", + "types": ["institution", "organization", "agent"], + "name": "Mcgee-Zimmerman", + "identifiers": [], + "relation": "creator", + "cited_as": "Mcgee-Zimmerman", + }, + { + "type": "organization", + "types": ["organization", "agent"], + "name": "Young, Hall and Jensen", + "identifiers": [], + "relation": "creator", + "cited_as": "Young, Hall and Jensen", + }, + ], + "funders": [], + "publishers": [], + "hosts": [], + "lineage": [], + }, + }, + { + "id": "201AF-F01-4CE", + "sources": ["Back heavy animal.34"], + "source_config": "Direction east certainly we bring blue wide.", + "source_unique_id": "Measure field year its.", + "type": "creative work", + "types": ["creative work"], + "date_created": "2025-03-05T19:52:17.740574+00:00", + "date_modified": "2025-03-05T19:52:17.754145+00:00", + "date_published": "2007-02-16T03:39:48Z", + "date_updated": "2003-09-12T09:37:20Z", + "description": "Discover pattern family ok face. Security whom during attention set political cell bad.", + "language": "sd", + "retracted": False, + "title": "my fabulous work 6", + "date": "2007-02-16T03:39:48Z", + "affiliations": [], + "contributors": ["Mcgee-Zimmerman", "Young, Hall and Jensen"], + "funders": [], + "publishers": [], + "hosts": [], + "identifiers": ["http://wagner.com/"], + "tags": [], + "subjects": [], + "subject_synonyms": [], + "lists": { + "affiliations": [], + "contributors": [ + { + "type": "institution", + "types": ["institution", "organization", "agent"], + "name": "Mcgee-Zimmerman", + "identifiers": [], + "relation": "creator", + "cited_as": "Mcgee-Zimmerman", + }, + { + "type": "organization", + "types": ["organization", "agent"], + "name": "Young, Hall and Jensen", + "identifiers": [], + "relation": "creator", + "cited_as": "Young, Hall and Jensen", + }, + ], + "funders": [], + "publishers": [], + "hosts": [], + "lineage": [], + }, + }, + { + "id": "2003A-91E-FBD", + "sources": ["Hope outside cup follow nearly natural.36"], + "source_config": "Hotel study team imagine raise raise.", + "source_unique_id": "Time reduce speak child.", + "type": "creative work", + "types": ["creative work"], + "date_created": "2025-03-05T19:52:17.780641+00:00", + "date_modified": "2025-03-05T19:52:17.797155+00:00", + "date_published": "2007-02-16T03:39:48Z", + "date_updated": "2003-09-12T09:37:20Z", + "description": "Discover pattern family ok face. Security whom during attention set political cell bad.", + "language": "sd", + "retracted": False, + "title": "my fabulous work 7", + "date": "2007-02-16T03:39:48Z", + "affiliations": [], + "contributors": ["Mcgee-Zimmerman", "Young, Hall and Jensen"], + "funders": [], + "publishers": [], + "hosts": [], + "identifiers": ["http://wagner.com/"], + "tags": [], + "subjects": [], + "subject_synonyms": [], + "lists": { + "affiliations": [], + "contributors": [ + { + "type": "institution", + "types": ["institution", "organization", "agent"], + "name": "Mcgee-Zimmerman", + "identifiers": [], + "relation": "creator", + "cited_as": "Mcgee-Zimmerman", + }, + { + "type": "organization", + "types": ["organization", "agent"], + "name": "Young, Hall and Jensen", + "identifiers": [], + "relation": "creator", + "cited_as": "Young, Hall and Jensen", + }, + ], + "funders": [], + "publishers": [], + "hosts": [], + "lineage": [], + }, + }, + { + "id": "20119-3FA-EAC", + "sources": ["Subject manage trip address.38"], + "source_config": "Food fly raise drop though ask.", + "source_unique_id": "Know reach image town.", + "type": "creative work", + "types": ["creative work"], + "date_created": "2025-03-05T19:52:17.824233+00:00", + "date_modified": "2025-03-05T19:52:17.836944+00:00", + "date_published": "2007-02-16T03:39:48Z", + "date_updated": "2003-09-12T09:37:20Z", + "description": "Discover pattern family ok face. Security whom during attention set political cell bad.", + "language": "sd", + "retracted": False, + "title": "my fabulous work 8", + "date": "2007-02-16T03:39:48Z", + "affiliations": [], + "contributors": ["Mcgee-Zimmerman", "Young, Hall and Jensen"], + "funders": [], + "publishers": [], + "hosts": [], + "identifiers": ["http://wagner.com/"], + "tags": [], + "subjects": [], + "subject_synonyms": [], + "lists": { + "affiliations": [], + "contributors": [ + { + "type": "institution", + "types": ["institution", "organization", "agent"], + "name": "Mcgee-Zimmerman", + "identifiers": [], + "relation": "creator", + "cited_as": "Mcgee-Zimmerman", + }, + { + "type": "organization", + "types": ["organization", "agent"], + "name": "Young, Hall and Jensen", + "identifiers": [], + "relation": "creator", + "cited_as": "Young, Hall and Jensen", + }, + ], + "funders": [], + "publishers": [], + "hosts": [], + "lineage": [], + }, + }, + { + "id": "201F7-ED6-D9B", + "sources": ["Senior think myself between travel get cut.40"], + "source_config": "Amount word something long on alone level.", + "source_unique_id": "Operation kind which.", + "type": "creative work", + "types": ["creative work"], + "date_created": "2025-03-05T19:52:17.859089+00:00", + "date_modified": "2025-03-05T19:52:17.871550+00:00", + "date_published": "2007-02-16T03:39:48Z", + "date_updated": "2003-09-12T09:37:20Z", + "description": "Discover pattern family ok face. Security whom during attention set political cell bad.", + "language": "sd", + "retracted": False, + "title": "my fabulous work 9", + "date": "2007-02-16T03:39:48Z", + "affiliations": [], + "contributors": ["Mcgee-Zimmerman", "Young, Hall and Jensen"], + "funders": [], + "publishers": [], + "hosts": [], + "identifiers": ["http://wagner.com/"], + "tags": [], + "subjects": [], + "subject_synonyms": [], + "lists": { + "affiliations": [], + "contributors": [ + { + "type": "institution", + "types": ["institution", "organization", "agent"], + "name": "Mcgee-Zimmerman", + "identifiers": [], + "relation": "creator", + "cited_as": "Mcgee-Zimmerman", + }, + { + "type": "organization", + "types": ["organization", "agent"], + "name": "Young, Hall and Jensen", + "identifiers": [], + "relation": "creator", + "cited_as": "Young, Hall and Jensen", + }, + ], + "funders": [], + "publishers": [], + "hosts": [], + "lineage": [], + }, + }, + { + "id": "20082-8F4-88A", + "sources": ["Size alone raise draw ok common.42"], + "source_config": "Writer two source newspaper bank lot figure.", + "source_unique_id": "Civil bill imagine finally.", + "type": "creative work", + "types": ["creative work"], + "date_created": "2025-03-05T19:52:17.895226+00:00", + "date_modified": "2025-03-05T19:52:17.908008+00:00", + "date_published": "2007-02-16T03:39:48Z", + "date_updated": "2003-09-12T09:37:20Z", + "description": "Discover pattern family ok face. Security whom during attention set political cell bad.", + "language": "sd", + "retracted": False, + "title": "my fabulous work 10", + "date": "2007-02-16T03:39:48Z", + "affiliations": [], + "contributors": ["Mcgee-Zimmerman", "Young, Hall and Jensen"], + "funders": [], + "publishers": [], + "hosts": [], + "identifiers": ["http://wagner.com/"], + "tags": [], + "subjects": [], + "subject_synonyms": [], + "lists": { + "affiliations": [], + "contributors": [ + { + "type": "institution", + "types": ["institution", "organization", "agent"], + "name": "Mcgee-Zimmerman", + "identifiers": [], + "relation": "creator", + "cited_as": "Mcgee-Zimmerman", + }, + { + "type": "organization", + "types": ["organization", "agent"], + "name": "Young, Hall and Jensen", + "identifiers": [], + "relation": "creator", + "cited_as": "Young, Hall and Jensen", + }, + ], + "funders": [], + "publishers": [], + "hosts": [], + "lineage": [], + }, + }, +] diff --git a/tests/share/harvesters/test_oai.py b/tests/share/harvesters/test_oai.py deleted file mode 100644 index 3d67e65f3..000000000 --- a/tests/share/harvesters/test_oai.py +++ /dev/null @@ -1,66 +0,0 @@ -import pytest -from unittest import mock - -from lxml import etree - -from share.harvesters.oai import OAIHarvester - -from tests import factories - - -@pytest.mark.django_db -class TestOAIHarvester: - - OAI_DC_RECORD = etree.fromstring(''' - <record xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" - xmlns:dc="http://purl.org/dc/elements/1.1/" - xmlns="http://www.openarchives.org/OAI/2.0/" - xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd" - xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> - <header> - <identifier>oai:philpapers.org/rec/SILGGG</identifier> - <datestamp>2017-01-29T15:25:15Z</datestamp> - </header> - <metadata> - <oai_dc:dc xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd"> - <dc:title>Good government, Governance and Human Complexity. Luigi Einaudi’s Legacy and Contemporary Society</dc:title> - <dc:type>info:eu-repo/semantics/book</dc:type> - <dc:creator>Silvestri, Paolo</dc:creator> - <dc:creator>Heritier, Paolo</dc:creator> - <dc:subject>Philosophy</dc:subject> - <dc:description>The book presents an interdisciplinary exploration aimed at renewing interest in Luigi Einaudi’s search for “good government”, broadly understood as “good society”. Prompted by the Einaudian quest, the essays - exploring philosophy of law, economics, politics and epistemology - develop the issue of good government in several forms, including the relationship between public and private, public governance, the question of freedom and the complexity of the human in contemporary societies.</dc:description> - <dc:date>2012</dc:date> - <dc:identifier>https://philpapers.org/rec/SILGGG</dc:identifier> - <dc:language>en</dc:language> - </oai_dc:dc> - </metadata> - <about> - <rights xmlns="http://www.openarchives.org/OAI/2.0/rights/" - xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" - xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/rights/ http://www.openarchives.org/OAI/2.0/rights.xsd"> - <rightsReference xmlns="">https://philpapers.org/help/terms.html</rightsReference> - </rights> - </about> - </record> - ''') - - def test_duplicate_resumption_tokens(self, monkeypatch): - harvester = OAIHarvester(factories.SourceConfigFactory(harvester_kwargs={'metadata_prefix': 'oai_dc'})) - monkeypatch.setattr(harvester, 'fetch_page', mock.Mock(return_value=([self.OAI_DC_RECORD], 'token'))) - - records = [] - with pytest.raises(ValueError) as e: - for x in harvester.fetch_records(''): - records.append(x) - - assert len(records) == 1 - assert e.value.args == ('Found duplicate resumption token "token" from {!r}'.format(harvester), ) - - def test_resumption_tokens(self, monkeypatch): - harvester = OAIHarvester(factories.SourceConfigFactory(harvester_kwargs={'metadata_prefix': 'oai_dc'})) - monkeypatch.setattr(harvester, 'fetch_page', mock.Mock(side_effect=( - ([self.OAI_DC_RECORD], 'token'), - ([self.OAI_DC_RECORD], None), - ))) - - assert len(list(harvester.fetch_records(''))) == 2 diff --git a/tests/share/harvesters/test_socialscienceregistry_harvester.py b/tests/share/harvesters/test_socialscienceregistry_harvester.py deleted file mode 100644 index 5c4705f18..000000000 --- a/tests/share/harvesters/test_socialscienceregistry_harvester.py +++ /dev/null @@ -1,30 +0,0 @@ -from datetime import timedelta - -from httpretty import httpretty -import pendulum -import pytest - -from share.models import SourceConfig - -csv_repsonse = '''...first row is always ignored it contains the columns' titles -sample2,url2,"{last_update_date}",2017-05-02 16:17:17 -0400,2013-05-21,AEARCTR-0000005,"David James, david@gmail.com",completed,2013-01-26,2014-05-31,"[""electoral"", """"]","","abstract2",2013-03-02,2013-03-07,"information2",See pre-analysis plan.,"Treatment was randomly assigned at the group level, in a nationwide sample.","",Randomization done using Stata.,Individual,N/A,2500 individuals/1600 individuals,N/A,"MDE=5% change in perceived leakage, sd=28.8667, power=0.80, alpha=0.05",Public,This section is unavailable to the public.,,,"",,"","","",info,,"",,"","", -'''.format(last_update_date=pendulum.today().strftime('%B %d, %Y')) - - -@pytest.mark.django_db -def test_AEA_harvester(): - httpretty.enable() - httpretty.allow_net_connect = False - config = SourceConfig.objects.get(label='org.socialscienceregistry') - url = config.base_url + '/trials/search.csv' - harvester = config.get_harvester() - - httpretty.register_uri(httpretty.GET, url, - body=csv_repsonse, content_type='text/html') - start = pendulum.now() - timedelta(days=3) - end = pendulum.now() - result = harvester._do_fetch(start, end) - for data in result: - assert data[0] == 'AEARCTR-0000005' - assert len(data[1]['record']) == 41 - httpretty.disable() diff --git a/tests/share/harvesters/test_swbiodiversity_harvester.py b/tests/share/harvesters/test_swbiodiversity_harvester.py deleted file mode 100644 index 81fa8401c..000000000 --- a/tests/share/harvesters/test_swbiodiversity_harvester.py +++ /dev/null @@ -1,124 +0,0 @@ -from datetime import timedelta - -from furl import furl -from httpretty import httpretty, httprettified -import pendulum -import pytest - -from share.models import SourceConfig - -main_page = ''' -<html> -<head> - <title>SEINet - Arizona Chapter Collection Profiles - - - -
-

SEINet - Arizona Chapter Collections

-
- Select a collection to see full details. -
-
- - - -
-

- - A. Michael Powell Herbarium - -

-
-
Sample description
-
- Contact: - Test Author (author@email.com) -
-
-
- - - - -''' - -collection_detail = ''' -
-

A. Michael Powell Herbarium (SRSC)

-
-
- Sample description -
-
- Contact: Test Author (author@email.com) -
-
-
-
- Collection Type: Preserved Specimens -
-
- Management: Data snapshot of local collection database
Last Update: 1 October 2016
-
- -
- Rights Holder: Sul Ross University -
-
-
Collection Statistics:
-
    -
  • 4,868 specimen records
  • -
  • 1,195 (25%) georeferenced
  • -
  • 2,954 (61%) with images
  • 2,849 (59%) identified to species
  • -
  • 104 families
  • -
  • 361 genera
  • -
  • 661 species
  • -
  • 762 total taxa (including subsp. and var.)
  • -
-
-
-
-''' - -collection_page = f''' - - - SEINet - Arizona Chapter A. Michael Powell Herbarium Collection Profiles - - - -{collection_detail} -
- - -''' - - -@pytest.mark.django_db -@httprettified -def test_swbiodiversity_harvester(): - httpretty.enable() - httpretty.allow_net_connect = False - - config = SourceConfig.objects.get(label=('org.swbiodiversity')) - url = config.harvester_kwargs['list_url'] - harvester = config.get_harvester() - - httpretty.register_uri(httpretty.GET, url, - body=main_page, content_type='text/html', match_querystring=True) - collection = furl(url) - collection.args['collid'] = 223 - httpretty.register_uri(httpretty.GET, collection.url, - body=collection_page, content_type='text/html', match_querystring=True) - start = pendulum.now() - timedelta(days=3) - end = pendulum.now() - results = harvester.fetch_date_range(start, end) - for result in results: - assert result.identifier == collection.url - assert result.datum.split() == collection_detail.split() - - httpretty.disable() diff --git a/tests/share/metadata_formats/base.py b/tests/share/metadata_formats/base.py deleted file mode 100644 index 7b6aa5807..000000000 --- a/tests/share/metadata_formats/base.py +++ /dev/null @@ -1,90 +0,0 @@ -import pytest - -from share.models.core import FormattedMetadataRecord -from share.util.extensions import Extensions - -from tests import factories -from .conftest import FORMATTER_TEST_INPUTS - - -@pytest.mark.usefixtures('nested_django_db') -class BaseMetadataFormatterTest: - - ####### override these things ####### - - # formatter key, as registered in setup.py - formatter_key = None - - # dictionary with the same keys as `FORMATTER_TEST_INPUTS`, mapping to values - # that `assert_formatter_outputs_equal` will understand - expected_outputs = {} - - def assert_formatter_outputs_equal(self, actual_output, expected_output): - """raise AssertionError if the two outputs aren't equal - - @param actual_output (str): return value of the formatter's `.format()` method - @param expected_output: corresponding value from this class's `expected_outputs` dictionary - """ - raise NotImplementedError - - ####### don't override anything else ####### - - @pytest.fixture(scope='session', autouse=True) - def _sanity_check(self): - assert FORMATTER_TEST_INPUTS.keys() == self.expected_outputs.keys(), f'check the test class\'s `expected_outputs` matches {__name__}.FORMATTER_TEST_INPUTS' - - @pytest.fixture(scope='class') - def formatter(self): - return Extensions.get('share.metadata_formats', self.formatter_key)() - - @pytest.fixture(scope='class') - def expected_output(self, _test_key): - return self.expected_outputs[_test_key] - - @pytest.fixture(scope='class') - def source(self, formatter_test_input, class_scoped_django_db, request): - print(f'>>> source ({request.node})') - return factories.SourceFactory(long_title=formatter_test_input['source_name']) - - @pytest.fixture(scope='class') - def source_config(self, source, formatter_test_input, class_scoped_django_db): - return factories.SourceConfigFactory( - label=formatter_test_input['source_config_label'], - source=source, - ) - - @pytest.fixture(scope='class') - def suid(self, source_config, formatter_test_input, class_scoped_django_db): - return factories.SourceUniqueIdentifierFactory( - id=formatter_test_input['suid_id'], - identifier=formatter_test_input['suid_value'], - source_config=source_config, - ) - - @pytest.fixture(scope='class') - def normalized_datum(self, suid, source, formatter_test_input, class_scoped_django_db): - return factories.NormalizedDataFactory( - raw=factories.RawDatumFactory( - **formatter_test_input['raw_datum_kwargs'], - suid=suid, - ), - **formatter_test_input['normalized_datum_kwargs'], - source__source=source, - ) - - def test_formatter(self, formatter, normalized_datum, expected_output): - actual_output = formatter.format(normalized_datum) - self.assert_formatter_outputs_equal(actual_output, expected_output) - - def test_save_formatted_records(self, normalized_datum, expected_output): - saved_records = FormattedMetadataRecord.objects.save_formatted_records( - suid=normalized_datum.raw.suid, - record_formats=[self.formatter_key], - normalized_datum=normalized_datum, - ) - if expected_output is None: - assert len(saved_records) == 0 - else: - assert len(saved_records) == 1 - actual_output = saved_records[0].formatted_metadata - self.assert_formatter_outputs_equal(actual_output, expected_output) diff --git a/tests/share/metadata_formats/conftest.py b/tests/share/metadata_formats/conftest.py deleted file mode 100644 index dbcec3175..000000000 --- a/tests/share/metadata_formats/conftest.py +++ /dev/null @@ -1,385 +0,0 @@ -import dateutil - -import pytest - - -FORMATTER_TEST_INPUTS = { - 'mycorrhizas': { - 'suid_id': 7, - 'suid_value': 'oof', - 'source_name': 'SomeSource', - 'source_config_label': 'foo', - 'raw_datum_kwargs': { - 'date_created': dateutil.parser.isoparse('2017-04-07T21:09:05.023090+00:00'), - }, - 'normalized_datum_kwargs': { - 'created_at': dateutil.parser.isoparse('2017-04-07T21:09:05.023090+00:00'), - 'data': { - '@graph': [ - {'@id': '_:cfed87cc7294471eac2b67d9ce92f60b', '@type': 'person', 'given_name': 'Suzanne', 'family_name': 'Simard', 'identifiers': [], 'related_agents': []}, - {'@id': '_:c786ef414acb423f878522690453a6b8', '@type': 'creator', 'agent': {'@id': '_:cfed87cc7294471eac2b67d9ce92f60b', '@type': 'person'}, 'cited_as': 'Suzanne Simard', 'order_cited': 0, 'creative_work': {'@id': '_:de04f3a34eb047e98891662b5345afd9', '@type': 'creativework'}}, - {'@id': '_:2afb5767c79c47c9ab6b87c7d5b3aa0a', '@type': 'person', 'given_name': 'Mary', 'family_name': 'Austi', 'identifiers': [], 'related_agents': []}, - {'@id': '_:44ec4e74e8ae487cbd86abcde5c2a075', '@type': 'creator', 'agent': {'@id': '_:2afb5767c79c47c9ab6b87c7d5b3aa0a', '@type': 'person'}, 'cited_as': 'Mary Austi', 'order_cited': 1, 'creative_work': {'@id': '_:de04f3a34eb047e98891662b5345afd9', '@type': 'creativework'}}, - {'@id': '_:acfbb4f3c8314771ab718e1f42dead89', 'name': 'InTech', '@type': 'organization', 'identifiers': []}, - {'@id': '_:e0fdb4b7b6194b699078f26a799cd232', '@type': 'publisher', 'agent': {'@id': '_:acfbb4f3c8314771ab718e1f42dead89', '@type': 'organization'}, 'creative_work': {'@id': '_:de04f3a34eb047e98891662b5345afd9', '@type': 'creativework'}}, - {'@id': '_:8ae1b46cd2f341cb968fbf76c9a7f345', 'uri': 'http://dx.doi.org/10.5772/9813', '@type': 'workidentifier', 'creative_work': {'@id': '_:de04f3a34eb047e98891662b5345afd9', '@type': 'creativework'}}, - {'@id': '_:de04f3a34eb047e98891662b5345afd9', 'tags': [], '@type': 'creativework', 'extra': {'type': 'book-chapter', 'member': 'http://id.crossref.org/member/3774', 'titles': ['The Role of Mycorrhizas in Forest Soil Stability with Climate Change'], 'date_created': '2012-03-29T07:53:20+00:00', 'date_published': {'date_parts': [[2010, 8, 17]]}, 'container_title': ['Climate Change and Variability'], 'published_online': {'date_parts': [[2010, 8, 17]]}}, 'title': 'The Role of Mycorrhizas in Forest Soil Stability with Climate Change', 'identifiers': [{'@id': '_:8ae1b46cd2f341cb968fbf76c9a7f345', '@type': 'workidentifier'}], 'date_updated': '2017-03-31T05:39:48+00:00', 'related_agents': [{'@id': '_:c786ef414acb423f878522690453a6b8', '@type': 'creator'}, {'@id': '_:44ec4e74e8ae487cbd86abcde5c2a075', '@type': 'creator'}, {'@id': '_:e0fdb4b7b6194b699078f26a799cd232', '@type': 'publisher'}]}, - ], - '@context': {} - }, - }, - }, - 'no-names-only-name-parts': { - 'suid_id': 8, - 'suid_value': 'rab', - 'source_name': 'SameSource', - 'source_config_label': 'bar', - 'raw_datum_kwargs': { - 'date_created': dateutil.parser.isoparse('2017-04-07T21:09:05.023090+00:00'), - }, - 'normalized_datum_kwargs': { - 'created_at': dateutil.parser.isoparse('2017-04-07T21:09:05.023090+00:00'), - 'data': { - '@graph': [ - {'@id': '_:cfed87cc7294471eac2b67d9ce92f60b', '@type': 'person', 'given_name': 'Suzanne', 'family_name': 'Simard', 'identifiers': [], 'related_agents': []}, - {'@id': '_:c786ef414acb423f878522690453a6b8', '@type': 'creator', 'agent': {'@id': '_:cfed87cc7294471eac2b67d9ce92f60b', '@type': 'person'}, 'order_cited': 0, 'creative_work': {'@id': '_:de04f3a34eb047e98891662b5345afd9', '@type': 'creativework'}}, - {'@id': '_:2afb5767c79c47c9ab6b87c7d5b3aa0a', '@type': 'person', 'given_name': 'Mary', 'family_name': 'Austi', 'identifiers': [], 'related_agents': []}, - {'@id': '_:44ec4e74e8ae487cbd86abcde5c2a075', '@type': 'creator', 'agent': {'@id': '_:2afb5767c79c47c9ab6b87c7d5b3aa0a', '@type': 'person'}, 'order_cited': 1, 'creative_work': {'@id': '_:de04f3a34eb047e98891662b5345afd9', '@type': 'creativework'}}, - {'@id': '_:8ae1b46cd2f341cb968fbf76c9a7f345', 'uri': 'http://dx.doi.org/10.5772/9813', '@type': 'workidentifier', 'creative_work': {'@id': '_:de04f3a34eb047e98891662b5345afd9', '@type': 'creativework'}}, - {'@id': '_:de04f3a34eb047e98891662b5345afd9', 'tags': [], '@type': 'creativework', 'extra': {'type': 'book-chapter', 'member': 'http://id.crossref.org/member/3774', 'titles': ['The Role of Mycorrhizas in Forest Soil Stability with Climate Change'], 'date_created': '2012-03-29T07:53:20+00:00', 'date_published': {'date_parts': [[2010, 8, 17]]}, 'container_title': ['Climate Change and Variability'], 'published_online': {'date_parts': [[2010, 8, 17]]}}, 'title': 'The Role of Mycorrhizas in Forest Soil Stability with Climate Change', 'identifiers': [{'@id': '_:8ae1b46cd2f341cb968fbf76c9a7f345', '@type': 'workidentifier'}], 'date_updated': '2017-03-31T05:39:48+00:00', 'related_agents': [{'@id': '_:c786ef414acb423f878522690453a6b8', '@type': 'creator'}, {'@id': '_:44ec4e74e8ae487cbd86abcde5c2a075', '@type': 'creator'}, {'@id': '_:e0fdb4b7b6194b699078f26a799cd232', '@type': 'publisher'}]}, - ], - '@context': {} - }, - }, - }, - 'with-is_deleted': { - 'suid_id': 57, - 'suid_value': 'zab', - 'source_name': 'foo', - 'source_config_label': 'baz', - 'raw_datum_kwargs': {}, - 'normalized_datum_kwargs': { - 'data': { - '@graph': [ - {'@id': '_:8ae1b46cd2f341cb968fbf76c9a7f345', 'uri': 'http://dx.doi.org/10.5772/9813', '@type': 'workidentifier', 'creative_work': {'@id': '_:de04f3a34eb047e98891662b5345afd9', '@type': 'creativework'}}, - {'@id': '_:de04f3a34eb047e98891662b5345afd9', '@type': 'creativework', 'is_deleted': True}, - ], - }, - }, - }, - 'with-subjects': { - 'suid_id': 123, - 'suid_value': 'xuq', - 'source_name': 'osf reg', - 'source_config_label': 'subj', - 'raw_datum_kwargs': { - 'date_created': dateutil.parser.isoparse('2020-02-02T20:20:02.02+00:00'), - }, - 'normalized_datum_kwargs': { - 'created_at': dateutil.parser.isoparse('2020-02-02T20:20:02.02+00:00'), - 'data': { - '@graph': [ - {'@id': '_:d4723d06-063b-4b62-816b-45ae45356991', 'name': 'Some Rando', '@type': 'person'}, - { - '@id': '_:0683a366-dce6-439d-8992-e96caf0c9d27', - 'uri': 'http://staging.osf.io/rando/', - 'host': 'staging.osf.io', - '@type': 'agentidentifier', - 'agent': { - '@id': '_:d4723d06-063b-4b62-816b-45ae45356991', - '@type': 'person' - }, - 'scheme': 'http' - }, - { - '@id': '_:4058232c-106f-4a2f-8700-d8c14a6c6ece', - '@type': 'registration', - 'title': 'Assorted chair', - 'withdrawn': False, - 'is_deleted': False, - 'date_published': '2019-01-23T20:34:21.633684+00:00', - 'registration_type': 'Open-Ended Registration' - }, - { - '@id': '_:759c7f4d-a0ba-42d3-aaa0-69ea11cc3cc7', - 'uri': 'http://staging.osf.io/chair/', - 'host': 'staging.osf.io', - '@type': 'workidentifier', - 'scheme': 'http', - 'creative_work': { - '@id': '_:4058232c-106f-4a2f-8700-d8c14a6c6ece', - '@type': 'registration' - } - }, - { - '@id': '_:79f1833e-273f-453e-9f33-8e41b4c06feb', - 'name': 'Wassamatter University', - '@type': 'institution' - }, - { - '@id': '_:98fec91a-57d5-4aac-828e-05cf53f8102c', - '@type': 'agentworkrelation', - 'agent': { - '@id': '_:79f1833e-273f-453e-9f33-8e41b4c06feb', - '@type': 'institution' - }, - 'cited_as': 'Wassamatter University', - 'creative_work': { - '@id': '_:4058232c-106f-4a2f-8700-d8c14a6c6ece', - '@type': 'registration' - } - }, - { - '@id': '_:bf8c6e89-2889-4b84-9e66-5d99843d4be4', - '@type': 'isaffiliatedwith', - 'related': { - '@id': '_:79f1833e-273f-453e-9f33-8e41b4c06feb', - '@type': 'institution' - }, - 'subject': { - '@id': '_:d4723d06-063b-4b62-816b-45ae45356991', - '@type': 'person' - } - }, - { - '@id': '_:c3ced6d4-9f80-4883-9a2c-8823cd9d3772', - 'uri': 'mailto:rando@example.com', - 'host': 'example.com', - '@type': 'agentidentifier', - 'agent': { - '@id': '_:d4723d06-063b-4b62-816b-45ae45356991', - '@type': 'person' - }, - 'scheme': 'mailto' - }, - { - '@id': '_:c99b622c-6c0a-4ce5-bdbc-2d6c52372a6a', - '@type': 'registration', - 'title': 'Miscellaneous department', - }, - { - '@id': '_:ee56a463-dcde-41a6-9621-0ac45819a0c2', - 'uri': 'http://staging.osf.io/mdept/', - 'host': 'staging.osf.io', - '@type': 'workidentifier', - 'scheme': 'http', - 'creative_work': { - '@id': '_:c99b622c-6c0a-4ce5-bdbc-2d6c52372a6a', - '@type': 'registration' - } - }, - { - '@id': '_:ef2e1a06-76a7-46ed-95cf-413b56c4a49d', - '@type': 'ispartof', - 'related': { - '@id': '_:c99b622c-6c0a-4ce5-bdbc-2d6c52372a6a', - '@type': 'registration' - }, - 'subject': { - '@id': '_:c99b622c-6c0a-4ce5-bdbc-1d6c52372a6a', - '@type': 'registration' - } - }, - { - '@id': '_:c99b622c-6c0a-4ce5-bdbc-1d6c52372a6a', - '@type': 'registration', - 'title': 'Various room', - }, - { - '@id': '_:ee56a463-dcde-41a6-9621-9ac45819a0c2', - 'uri': 'http://staging.osf.io/vroom/', - 'host': 'staging.osf.io', - '@type': 'workidentifier', - 'scheme': 'http', - 'creative_work': { - '@id': '_:c99b622c-6c0a-4ce5-bdbc-1d6c52372a6a', - '@type': 'registration' - } - }, - { - '@id': '_:ef2e1a06-76a7-46ed-95cf-313b56c4a49d', - '@type': 'ispartof', - 'related': { - '@id': '_:c99b622c-6c0a-4ce5-bdbc-1d6c52372a6a', - '@type': 'registration' - }, - 'subject': { - '@id': '_:4058232c-106f-4a2f-8700-d8c14a6c6ece', - '@type': 'registration' - } - }, - { - '@id': '_:fc11d92a-9784-465d-9d43-80af2a7cd83c', - '@type': 'creator', - 'agent': { - '@id': '_:d4723d06-063b-4b62-816b-45ae45356991', - '@type': 'person' - }, - 'cited_as': 'Some Rando', - 'order_cited': 0, - 'creative_work': { - '@id': '_:4058232c-106f-4a2f-8700-d8c14a6c6ece', - '@type': 'registration' - } - }, - { - '@id': '_:through-subj-architecture', - '@type': 'throughsubjects', - 'creative_work': { - '@id': '_:4058232c-106f-4a2f-8700-d8c14a6c6ece', - '@type': 'registration' - }, - 'subject': { - '@id': '_:subj-architecture', - '@type': 'subject' - }, - }, - { - '@id': '_:subj-architecture', - '@type': 'subject', - 'name': 'Architecture', - }, - { - '@id': '_:through-subj-business', - '@type': 'throughsubjects', - 'is_deleted': True, # back-compat with a prior hack - 'creative_work': { - '@id': '_:4058232c-106f-4a2f-8700-d8c14a6c6ece', - '@type': 'registration' - }, - 'subject': { - '@id': '_:subj-business', - '@type': 'subject' - }, - }, - { - '@id': '_:subj-business', - '@type': 'subject', - 'name': 'Business', - }, - { - '@id': '_:through-subj-education', - '@type': 'throughsubjects', - 'creative_work': { - '@id': '_:4058232c-106f-4a2f-8700-d8c14a6c6ece', - '@type': 'registration' - }, - 'subject': { - '@id': '_:subj-education', - '@type': 'subject' - }, - }, - { - '@id': '_:subj-education', - '@type': 'subject', - 'name': 'Education', - 'is_deleted': True, # back-compat with a prior hack - }, - { - '@id': '_:through-subj-custom-biology', - '@type': 'throughsubjects', - 'creative_work': { - '@id': '_:4058232c-106f-4a2f-8700-d8c14a6c6ece', - '@type': 'registration' - }, - 'subject': { - '@id': '_:subj-custom-biology', - '@type': 'subject' - }, - }, - { - '@id': '_:subj-custom-biology', - '@type': 'subject', - 'name': 'Custom biologyyyy', - 'parent': { - '@id': '_:subj-custom-life-sciences', - '@type': 'subject', - }, - 'central_synonym': { - '@id': '_:subj-central-biology', - '@type': 'subject', - }, - }, - { - '@id': '_:subj-custom-life-sciences', - '@type': 'subject', - 'name': 'Custom life sciencesssss', - 'central_synonym': { - '@id': '_:subj-central-life-sciences', - '@type': 'subject', - }, - }, - { - '@id': '_:subj-central-biology', - '@type': 'subject', - 'name': 'Biology', - 'parent': { - '@id': '_:subj-central-life-sciences', - '@type': 'subject', - }, - }, - { - '@id': '_:subj-central-life-sciences', - '@type': 'subject', - 'name': 'Life Sciences', - }, - ], - }, - }, - }, - 'with-osf-extra': { - 'suid_id': 99, - 'suid_value': 'guidz', - 'source_name': 'OsfProbably', - 'source_config_label': 'osf.io.v2_push', - 'raw_datum_kwargs': { - 'date_created': dateutil.parser.isoparse('2017-04-07T21:09:05.023090+00:00'), - }, - 'normalized_datum_kwargs': { - 'created_at': dateutil.parser.isoparse('2017-04-07T21:09:05.023090+00:00'), - 'data': { - '@graph': [ - { - '@id': '_:p', - '@type': 'person', - 'name': 'Open McOperton', - }, - { - '@id': '_:c', - '@type': 'creator', - 'agent': {'@id': '_:p', '@type': 'person'}, - 'creative_work': {'@id': '_:w', '@type': 'creativework'}, - 'cited_as': 'Open McOperton', - 'order_cited': 0, - }, - { - '@id': '_:i', - '@type': 'workidentifier', - 'creative_work': {'@id': '_:w', '@type': 'creativework'}, - 'uri': 'https://example.com/open', - }, - { - '@id': '_:w', - '@type': 'creativework', - 'title': 'So open', - 'date_updated': '2017-03-31T05:39:48+00:00', - 'extra': { - 'osf_related_resource_types': {'foo': True, 'bar': False}, - }, - }, - ], - '@context': {} - }, - }, - }, -} - - -@pytest.fixture(scope='module', params=FORMATTER_TEST_INPUTS.keys()) -def _test_key(request): - return request.param - - -@pytest.fixture(scope='module') -def formatter_test_input(_test_key): - return FORMATTER_TEST_INPUTS[_test_key] diff --git a/tests/share/metadata_formats/test_oai_dc_formatter.py b/tests/share/metadata_formats/test_oai_dc_formatter.py deleted file mode 100644 index 0b77c2180..000000000 --- a/tests/share/metadata_formats/test_oai_dc_formatter.py +++ /dev/null @@ -1,98 +0,0 @@ -from lxml import etree - -from tests.share.metadata_formats.base import BaseMetadataFormatterTest - - -def xml_elements_equal(element_1, element_2): - return ( - element_1.tag == element_2.tag - and element_1.text == element_2.text - and element_1.tail == element_2.tail - and element_1.attrib == element_2.attrib - and len(element_1) == len(element_2) - and all( - xml_elements_equal(child_1, child_2) - for child_1, child_2 in zip(element_1, element_2) - ) - ) - - -class TestOaiDcFormatter(BaseMetadataFormatterTest): - formatter_key = 'oai_dc' - - def assert_formatter_outputs_equal(self, actual_output, expected_output): - if expected_output is None: - assert actual_output is None - else: - xml_parser = etree.XMLParser(remove_blank_text=True) - actual_xml = etree.fromstring(actual_output, parser=xml_parser) - expected_xml = etree.fromstring(expected_output, parser=xml_parser) - assert xml_elements_equal(actual_xml, expected_xml), f"actual: {etree.tostring(actual_xml, encoding='unicode', pretty_print=True)}\nexpected: {etree.tostring(expected_xml, encoding='unicode', pretty_print=True)}" - - expected_outputs = { - 'mycorrhizas': ''' - - The Role of Mycorrhizas in Forest Soil Stability with Climate Change - Suzanne Simard - Mary Austi - InTech - 2017-03-31T05:39:48Z - creativework - http://dx.doi.org/10.5772/9813 - - ''', - 'no-names-only-name-parts': ''' - - The Role of Mycorrhizas in Forest Soil Stability with Climate Change - Suzanne Simard - Mary Austi - 2017-03-31T05:39:48Z - creativework - http://dx.doi.org/10.5772/9813 - - ''', - 'with-is_deleted': None, - 'with-subjects': ''' - - Assorted chair - Some Rando - Architecture - Business - Custom biologyyyy - Education - 2019-01-23T20:34:21Z - registration - http://staging.osf.io/chair/ - http://staging.osf.io/vroom/ - - ''', - 'with-osf-extra': ''' - - So open - Open McOperton - 2017-03-31T05:39:48Z - creativework - https://example.com/open - - ''', - } diff --git a/tests/share/metadata_formats/test_sharev2_elastic_formatter.py b/tests/share/metadata_formats/test_sharev2_elastic_formatter.py deleted file mode 100644 index a2c71c957..000000000 --- a/tests/share/metadata_formats/test_sharev2_elastic_formatter.py +++ /dev/null @@ -1,268 +0,0 @@ -import json -import pytest -from unittest.mock import patch - -from share.metadata_formats.sharev2_elastic import format_type - -from tests.share.metadata_formats.base import BaseMetadataFormatterTest - - -@pytest.mark.parametrize('type_name,expected', [ - ('Foo', 'foo'), - ('FooBar', 'foo bar'), -]) -def test_format_type(type_name, expected): - actual = format_type(type_name) - assert actual == expected - - -def fake_id_encode(obj): - return f'encoded-{obj.id}' - - -class TestSharev2ElasticFormatter(BaseMetadataFormatterTest): - @pytest.fixture(scope='class', autouse=True) - def patch_encode(self): - with patch('share.util.IDObfuscator.encode', wraps=fake_id_encode): - yield - - def assert_formatter_outputs_equal(self, actual_output, expected_output): - assert json.loads(actual_output) == expected_output - - formatter_key = 'sharev2_elastic' - - expected_outputs = { - 'mycorrhizas': { - 'contributors': ['Suzanne Simard', 'Mary Austi'], - 'date': '2017-03-31T05:39:48+00:00', - 'date_created': '2017-04-07T21:09:05.023090+00:00', - 'date_modified': '2017-04-07T21:09:05.023090+00:00', - 'date_updated': '2017-03-31T05:39:48+00:00', - 'id': 'encoded-7', - 'identifiers': ['http://dx.doi.org/10.5772/9813'], - 'publishers': ['InTech'], - 'retracted': False, - 'source_config': 'foo', - 'source_unique_id': 'oof', - 'sources': ['SomeSource'], - 'title': 'The Role of Mycorrhizas in Forest Soil Stability with Climate Change', - 'type': 'creative work', - 'types': ['creative work'], - 'affiliations': [], - 'funders': [], - 'hosts': [], - 'subject_synonyms': [], - 'subjects': [], - 'tags': [], - 'lists': { - 'affiliations': [], - 'contributors': [ - { - 'cited_as': 'Suzanne Simard', - 'family_name': 'Simard', - 'given_name': 'Suzanne', - 'identifiers': [], - 'name': 'Suzanne Simard', - 'order_cited': 0, - 'relation': 'creator', - 'type': 'person', - 'types': ['person', 'agent'], - }, - { - 'cited_as': 'Mary Austi', - 'family_name': 'Austi', - 'given_name': 'Mary', - 'identifiers': [], - 'name': 'Mary Austi', - 'order_cited': 1, - 'relation': 'creator', - 'type': 'person', - 'types': ['person', 'agent'], - }, - ], - 'funders': [], - 'hosts': [], - 'lineage': [], - 'publishers': [ - { - 'name': 'InTech', - 'identifiers': [], - 'relation': 'publisher', - 'type': 'organization', - 'types': ['organization', 'agent'], - }, - ], - }, - }, - 'no-names-only-name-parts': { - 'contributors': ['Suzanne Simard', 'Mary Austi'], - 'date': '2017-03-31T05:39:48+00:00', - 'date_created': '2017-04-07T21:09:05.023090+00:00', - 'date_modified': '2017-04-07T21:09:05.023090+00:00', - 'date_updated': '2017-03-31T05:39:48+00:00', - 'id': 'encoded-8', - 'identifiers': ['http://dx.doi.org/10.5772/9813'], - 'publishers': [], - 'retracted': False, - 'source_config': 'bar', - 'source_unique_id': 'rab', - 'sources': ['SameSource'], - 'title': 'The Role of Mycorrhizas in Forest Soil Stability with Climate Change', - 'type': 'creative work', - 'types': ['creative work'], - 'affiliations': [], - 'funders': [], - 'hosts': [], - 'subject_synonyms': [], - 'subjects': [], - 'tags': [], - 'lists': { - 'affiliations': [], - 'contributors': [ - { - 'family_name': 'Simard', - 'given_name': 'Suzanne', - 'identifiers': [], - 'name': 'Suzanne Simard', - 'order_cited': 0, - 'relation': 'creator', - 'type': 'person', - 'types': ['person', 'agent'], - }, - { - 'family_name': 'Austi', - 'given_name': 'Mary', - 'identifiers': [], - 'name': 'Mary Austi', - 'order_cited': 1, - 'relation': 'creator', - 'type': 'person', - 'types': ['person', 'agent'], - }, - ], - 'funders': [], - 'hosts': [], - 'lineage': [], - 'publishers': [], - }, - }, - 'with-is_deleted': { - 'id': 'encoded-57', - 'is_deleted': True, - }, - 'with-subjects': { - 'affiliations': ['Wassamatter University'], - 'contributors': ['Some Rando'], - 'date': '2019-01-23T20:34:21.633684+00:00', - 'date_created': '2020-02-02T20:20:02.020000+00:00', - 'date_modified': '2020-02-02T20:20:02.020000+00:00', - 'date_published': '2019-01-23T20:34:21.633684+00:00', - 'id': 'encoded-123', - 'identifiers': ['http://staging.osf.io/chair/'], - 'registration_type': 'Open-Ended Registration', - 'retracted': False, - 'source_config': 'subj', - 'source_unique_id': 'xuq', - 'sources': ['osf reg'], - 'subject_synonyms': [ - 'bepress|Life Sciences|Biology', - ], - 'subjects': [ - 'bepress|Architecture', - 'osf reg|Custom life sciencesssss|Custom biologyyyy', - ], - 'title': 'Assorted chair', - 'type': 'registration', - 'types': ['registration', 'publication', 'creative work'], - 'withdrawn': False, - 'funders': [], - 'hosts': [], - 'publishers': [], - 'tags': [], - 'lists': { - 'affiliations': [ - { - 'cited_as': 'Wassamatter University', - 'identifiers': [], - 'name': 'Wassamatter University', - 'relation': 'agent work relation', - 'type': 'institution', - 'types': ['institution', 'organization', 'agent'], - }, - ], - 'contributors': [ - { - 'cited_as': 'Some Rando', - 'identifiers': ['http://staging.osf.io/rando/', 'mailto:rando@example.com'], - 'name': 'Some Rando', - 'order_cited': 0, - 'relation': 'creator', - 'type': 'person', - 'types': ['person', 'agent'], - }, - ], - 'lineage': [ - { - 'identifiers': ['http://staging.osf.io/mdept/'], - 'title': 'Miscellaneous department', - 'type': 'registration', - 'types': ['registration', 'publication', 'creative work'], - }, - { - 'identifiers': ['http://staging.osf.io/vroom/'], - 'title': 'Various room', - 'type': 'registration', - 'types': ['registration', 'publication', 'creative work'], - }, - ], - 'funders': [], - 'hosts': [], - 'publishers': [], - }, - }, - 'with-osf-extra': { - 'affiliations': [], - 'contributors': ['Open McOperton'], - 'date': '2017-03-31T05:39:48+00:00', - 'date_created': '2017-04-07T21:09:05.023090+00:00', - 'date_modified': '2017-04-07T21:09:05.023090+00:00', - 'date_updated': '2017-03-31T05:39:48+00:00', - 'id': 'encoded-99', - 'identifiers': ['https://example.com/open'], - 'source_config': 'osf.io.v2_push', - 'source_unique_id': 'guidz', - 'sources': ['OsfProbably'], - 'subject_synonyms': [], - 'subjects': [], - 'title': 'So open', - 'type': 'creative work', - 'types': ['creative work'], - 'retracted': False, - 'funders': [], - 'hosts': [], - 'publishers': [], - 'tags': [], - 'osf_related_resource_types': { - 'foo': True, - 'bar': False, - }, - 'lists': { - 'affiliations': [], - 'contributors': [ - { - 'cited_as': 'Open McOperton', - 'identifiers': [], - 'name': 'Open McOperton', - 'order_cited': 0, - 'relation': 'creator', - 'type': 'person', - 'types': ['person', 'agent'], - }, - ], - 'lineage': [], - 'funders': [], - 'hosts': [], - 'publishers': [], - }, - }, - } diff --git a/tests/share/normalize/factories.py b/tests/share/normalize/factories.py deleted file mode 100644 index b405ba308..000000000 --- a/tests/share/normalize/factories.py +++ /dev/null @@ -1,425 +0,0 @@ -import contextlib -import functools -import json -import random -import logging -from operator import attrgetter - -import faker - -import factory -import factory.fuzzy - -from share.schema import ShareV2Schema -from share.schema.shapes import RelationShape -from share.transform.chain.links import IRILink -from share.util import TopologicalSorter -from share.util.graph import MutableGraph, MutableNode - - -logger = logging.getLogger(__name__) - - -sharev2_schema = ShareV2Schema() -used_ids = set() -_Faker = faker.Faker() - - -def format_id(type_name, id): - return '_:{}--{}'.format(type_name, id) - - -class FactoryGraph(MutableGraph): - # Override to ignore IDs - def topologically_sorted(self): - def sort_key(node): - return ( - node.type, - *(node.attrs().items()) - ) - - return TopologicalSorter( - sorted(self, key=sort_key), - dependencies=lambda n: sorted( - self.successors(n.id), - key=lambda id: sort_key(self.get_node(id)), - ), - key=attrgetter('id'), - ).sorted() - - # Within tests, `graph1 == graph2` compares their contents - def __eq__(self, other): - if not isinstance(other, self.__class__): - return False - return self.to_jsonld(in_edges=False) == other.to_jsonld(in_edges=False) - - # Normalize IDs to ease comparison - def to_jsonld(self, *args, **kwargs): - jsonld = super().to_jsonld(*args, **kwargs) - id_map = { - node['@id']: '_:__{}'.format(i) - for i, node in enumerate(jsonld['@graph']) - } - - def map_id(value): - if isinstance(value, dict): - value['@id'] = id_map[value['@id']] - elif isinstance(value, list): - for v in value: - map_id(v) - - for node in jsonld['@graph']: - for v in node.values(): - map_id(v) - map_id(node) - - return jsonld - - # More readable test output - def __repr__(self): - return '{}({})'.format( - self.__class__.__name__, - json.dumps(self.to_jsonld(in_edges=False), indent=4, sort_keys=True), - ) - - -class FactoryNode(MutableNode): - - def __new__(cls, graph, id, type=None, **attrs): - return super().__new__(cls, graph, id, type, attrs) - - def __init__(self, graph, id, type, **attrs): - super().__init__(graph, id, type, attrs) - - -class RandomStateManager: - def __init__(self, randoms, seed=None): - self._randoms = randoms - self._seed = seed or random.random() - self._states = {} - - def get_states(self): - return tuple(r.getstate() for r in self._randoms) - - def set_states(self, states): - for r, state in zip(self._randoms, states): - r.setstate(state) - - def reseed(self, seed=None): - self._seed = seed or random.random() - - for r in self._randoms: - r.seed(self._seed) - # factory.fuzzy.reseed_random(self._seed) - self._states = {} - - @contextlib.contextmanager - def seed(self, name=None, seed=None): - old_states = self.get_states() - - new_states = self._states.get(name) if name else None - - if new_states is None: - initial_seed = seed or self._seed - for r in self._randoms: - r.seed(initial_seed) - new_states = self.get_states() - if name: - self._states[name] = new_states - - self.set_states(new_states) - - yield hash(new_states) - - # Save the new state if it was advanced/used - if name: - self._states[name] = self.get_states() - - # Leave random(s) untouched upon exiting - self.set_states(old_states) - - -class GraphBuilder: - - def __init__(self): - self.random_states = RandomStateManager([random, _Faker.random]) - - def reseed(self): - self.random_states.reseed() - - def build(self, *nodes, normalize_fields=False): - # Reset all seeds at the being of each graph generation - # Ensures that graphs will be comparable - self.random_states.reseed(self.random_states._seed) - - graph = FactoryGraph() - NodeBuilder(graph, self.random_states, normalize_fields).build_nodes(nodes) - return graph - - def __call__(self, *args, **kwargs): - return self.build(*args, **kwargs) - - -class NodeBuilder: - def __init__(self, graph, random_states, normalize_fields=False): - self.graph = graph - self.random_states = random_states - self.normalize_fields = normalize_fields - - def get_factory(self, schema_type): - return { - 'AbstractCreativeWork': AbstractCreativeWorkFactory, - 'AbstractAgent': AbstractAgentFactory, - 'AbstractAgentWorkRelation': AbstractAgentWorkRelationFactory, - 'AbstractWorkRelation': AbstractWorkRelationFactory, - # 'AbstractAgentRelation': AbstractAgentRelationFactory, - 'WorkIdentifier': WorkIdentifierFactory, - 'AgentIdentifier': AgentIdentifierFactory, - 'Subject': SubjectFactory, - 'ThroughSubjects': ThroughSubjectsFactory, - 'Tag': TagFactory, - 'ThroughTags': ThroughTagsFactory, - # 'Award': AwardFactory, - # 'ThroughAwards': ThroughAwardsFactory, - }[schema_type.concrete_type] - - def build_nodes(self, nodes): - for n in nodes: - if isinstance(n, list): - self.build_nodes(n) - else: - self.build(n) - - def build(self, attrs): - assert 'type' in attrs, 'Must provide "type" when constructing a node' - - attrs = {**attrs} # make a copy to avoid mutating the arg - node_type = attrs.pop('type') - sparse = attrs.pop('sparse', False) - seed = attrs.pop('seed', None) - - if 'id' in attrs and attrs['id'] in self.graph: - id = attrs.pop('id') - assert not attrs, 'Cannot reference a previously defined node by id and set attrs' - return self.graph.get_node(id) - - if self.normalize_fields: - attrs['parse'] = True - - relations = {} - for key in tuple(attrs.keys()): - if isinstance(attrs[key], (dict, list)): - relations[key] = attrs.pop(key) - - schema_type = sharev2_schema.get_type(node_type.replace('Abstract', '')) - - # If it's a specific type, pass it along, otherwise let the factory choose a subtype - if node_type == schema_type.concrete_type: - attrs['type'] = random.choice( - list(sharev2_schema.get_type_names(schema_type.concrete_type)) - ) - else: - attrs['type'] = schema_type.name - - # Extract/generate required relations. - # e.g. WorkIdentifier requires a work, Creator requires work and agent - for field_name in schema_type.explicit_fields: - field = sharev2_schema.get_field(node_type, field_name) - if ( - field_name not in attrs - and field.is_relation - and field.is_required - ): - try: - relation = relations.pop(field_name) - except KeyError: - # Value missing for required relation; generate a fake one - relation = {'type': field.related_concrete_type} - attrs[field_name] = self.build(relation) - - if sparse: - # Don't generate fake data for missing fields - node = FactoryNode(self.graph, **attrs) - else: - if seed: - seed_ctx = self.random_states.seed(seed=str(seed) + schema_type.concrete_type) - else: - seed_ctx = self.random_states.seed(name=schema_type.concrete_type) - - with seed_ctx: - node = self.get_factory(schema_type)(graph=self.graph, **attrs) - - # Build specified *-to-many relations - for key, value in sorted(relations.items(), key=lambda x: x[0]): - field = sharev2_schema.get_field(node_type, key) - - if isinstance(value, list): - if field.relation_shape == RelationShape.MANY_TO_MANY: - related = [self.build(v) for v in value] - for rel in related: - self.build({ - 'type': field.through_concrete_type, - field.incoming_through_relation: node, - field.outgoing_through_relation: rel, - }) - else: - reverse_name = field.inverse_relation - for v in value: - v[reverse_name] = node - self.build(v) - else: - node[key] = self.build(value) - - return node - - -class GraphNodeFactory(factory.Factory): - - id = None # Let the graph generate an ID - graph = factory.SelfAttribute('..graph') # Subfactories use the parent's graph - - class Meta: - abstract = True - model = FactoryNode - inline_args = ('graph',) - - @factory.lazy_attribute - def type(self): - raise NotImplementedError('must give a `type`!') - - @factory.post_generation - def parse(self, _, parse, **kwargs): - # Override this to parse fields like the regulator is expected to - pass - - -class AbstractAgentFactory(GraphNodeFactory): - - @factory.lazy_attribute - def name(self): - if self.type == 'Person': - if any(getattr(self, n, None) for n in ('given_name', 'family_name', 'suffix', 'additional_name')): - return None - return _Faker.name() - return _Faker.company() - - class Meta: - model = FactoryNode - - @factory.post_generation - def parse(self, _, parse, **kwargs): - if not parse or self.type != 'Person': - return - - name = self['name'] - if not name: - self['name'] = ' '.join(filter(None, ( - self[k] - for k in ['given_name', 'additional_name', 'family_name', 'suffix'] - ))) - - -class TagFactory(GraphNodeFactory): - name = factory.Faker('word') - - -class SubjectFactory(GraphNodeFactory): - name = factory.Faker('word') - - -class AbstractCreativeWorkFactory(GraphNodeFactory): - title = factory.Faker('sentence') - description = factory.Faker('paragraph') - language = factory.Faker('language_code') - - # related_agents = factory.SubFactory(AgentWorkRelationFactory) - - # identifiers = factory.SubFactory('tests.share.normalize.factories.WorkIdentifierFactory') - # related_works = factory.SubFactory(RelatedWorkFactory) - date_updated = factory.Faker('date', pattern='%Y-%m-%dT%H:%M:%SZ') - date_published = factory.Faker('date', pattern='%Y-%m-%dT%H:%M:%SZ') - rights = factory.Faker('paragraph') - free_to_read_type = factory.Faker('url') - free_to_read_date = factory.Faker('date', pattern='%Y-%m-%dT%H:%M:%SZ') - is_deleted = False - - class Meta: - model = FactoryNode - - -class AbstractAgentWorkRelationFactory(GraphNodeFactory): - # lazy attr - # agent = factory.SubFactory(AbstractAgentFactory) - # creative_work = factory.SubFactory(AbstractCreativeWorkFactory) - # order_cited = factory.Faker('pyint') - - @factory.lazy_attribute - def cited_as(self): - return self.agent['name'] - - # lazy attr base on type - # award = factory.SubFactory(AwardFactory) - - class Meta: - model = FactoryNode - - -class AbstractWorkRelationFactory(GraphNodeFactory): - # related = factory.SubFactory(AbstractCreativeWorkFactory) - # subject = factory.SubFactory(AbstractCreativeWorkFactory) - - class Meta: - model = FactoryNode - - -class ThroughTagsFactory(GraphNodeFactory): - pass - # tag = factory.SubFactory(TagFactory) - # creative_work = factory.SubFactory(AbstractCreativeWorkFactory) - - -class ThroughSubjectsFactory(GraphNodeFactory): - pass - # subject = factory.SubFactory(SubjectFactory) - # creative_work = factory.SubFactory(AbstractCreativeWorkFactory) - - -class WorkIdentifierFactory(GraphNodeFactory): - uri = factory.Faker('url') - # creative_work = factory.SubFactory(AbstractCreativeWorkFactory) - - @factory.post_generation - def parse(self, _, parse, **kwargs): - if parse: - parsed = IRILink().execute(self['uri']) - self['uri'] = parsed['IRI'] - self['scheme'] = parsed['scheme'] - self['host'] = parsed['authority'] - - -class AgentIdentifierFactory(GraphNodeFactory): - uri = factory.Faker('url') - # agent = factory.SubFactory(AbstractAgentFactory) - - @factory.post_generation - def parse(self, _, parse, **kwargs): - if parse: - parsed = IRILink().execute(self['uri']) - self['uri'] = parsed['IRI'] - self['scheme'] = parsed['scheme'] - self['host'] = parsed['authority'] - - -def _get_node_builder_params(seed=None, id=None, schema_type=None, model=None, **kwargs): - ret = {'type': schema_type.name.lower(), **kwargs} - if id is not None: - ret['id'] = format_id(schema_type.concrete_type.lower().replace('abstract', ''), id) - if seed is not None: - ret['seed'] = seed - return ret - - -__all__ = () - -for schema_type in sharev2_schema.schema_types.values(): - locals()[schema_type.name] = functools.partial(_get_node_builder_params, schema_type=schema_type) diff --git a/tests/share/normalize/test_harness.py b/tests/share/normalize/test_harness.py deleted file mode 100644 index 53e386da4..000000000 --- a/tests/share/normalize/test_harness.py +++ /dev/null @@ -1,166 +0,0 @@ -from tests.share.normalize.factories import ( - Agent, - AgentIdentifier, - Article, - CreativeWork, - Institution, - Organization, - Patent, - Person, - Preprint, - Publication, - Tag, - ThroughTags, - WorkIdentifier, -) -from tests.share.normalize.factories import FactoryGraph - - -class TestShortHand: - - def test_seed(self): - assert Agent(0) == {'seed': 0, 'type': 'agent'} - assert Person(0) == {'seed': 0, 'type': 'person'} - assert Organization(0) == {'seed': 0, 'type': 'organization'} - assert Institution(0) == {'seed': 0, 'type': 'institution'} - - def test_id(self): - assert Agent(id=0) == {'id': '_:agent--0', 'type': 'agent'} - assert Person(id=0) == {'id': '_:agent--0', 'type': 'person'} - assert Organization(id=0) == {'id': '_:agent--0', 'type': 'organization'} - assert Institution(id=0) == {'id': '_:agent--0', 'type': 'institution'} - - def test_anon(self): - assert CreativeWork() == {'type': 'creativework'} - assert Article() == {'type': 'article'} - assert Publication() == {'type': 'publication'} - assert Patent() == {'type': 'patent'} - - def test_kwargs(self): - kwargs = {'hello': 'World'} - assert CreativeWork(**kwargs) == {'type': 'creativework', **kwargs} - assert Article(**kwargs) == {'type': 'article', **kwargs} - assert Publication(**kwargs) == {'type': 'publication', **kwargs} - assert Patent(**kwargs) == {'type': 'patent', **kwargs} - - def test_nesting(self): - assert CreativeWork( - identifiers=[WorkIdentifier(id=0), WorkIdentifier(id=1)], - related_works=[Preprint(identifiers=[WorkIdentifier(id=0)])] - ) == { - 'type': 'creativework', - 'identifiers': [{'id': '_:workidentifier--0', 'type': 'workidentifier'}, {'id': '_:workidentifier--1', 'type': 'workidentifier'}], - 'related_works': [{ - 'type': 'preprint', - 'identifiers': [{'id': '_:workidentifier--0', 'type': 'workidentifier'}] - }] - } - - -class TestMakeGraph: - - def test_single_node(self, Graph): - graph = Graph( - CreativeWork(name='Foo') - ) - assert isinstance(graph, FactoryGraph) - assert len(graph) == 1 - node = list(graph)[0] - assert node['name'] == 'Foo' - assert node.type == 'creativework' - assert node.id.startswith('_:') - - def test_multiple_nodes(self, Graph): - graph = Graph( - CreativeWork(name='Foo'), - Tag(name='Bar'), - ) - - assert len(graph) == 2 - - tag = next(x for x in graph if x.type == 'tag') - work = next(x for x in graph if x.type == 'creativework') - - assert work['name'] == 'Foo' - assert work.type == 'creativework' - assert work.id.startswith('_:') - assert tag['name'] == 'Bar' - assert tag.type == 'tag' - assert tag.id.startswith('_:') - - def test_cross_graph_identity(self, Graph, ExpectedGraph): - assert Graph(CreativeWork(0)) == ExpectedGraph(CreativeWork(0)) - - def test_nested(self, Graph): - graph = Graph(CreativeWork(identifiers=[WorkIdentifier()])) - - work = next(x for x in graph if x.type == 'creativework') - identifier = next(x for x in graph if x.type == 'workidentifier') - - assert len(graph) == 2 - assert identifier['creative_work'].id == work.id - assert identifier['creative_work'].type == work.type - assert len(work['identifiers']) == 1 - assert work['identifiers'][0] == identifier - - def test_many_to_many(self, Graph): - graph = list(Graph(CreativeWork(tags=[Tag()]))) - - assert len(graph) == 3 - assert graph[0].type == 'creativework' - assert graph[1].type == 'tag' - assert graph[2].type == 'throughtags' - - def test_many_to_many_related(self, Graph): - graph = list(Graph(CreativeWork(tag_relations=[ThroughTags()]))) - - assert len(graph) == 3 - assert graph[0].type == 'creativework' - assert graph[1].type == 'tag' - assert graph[2].type == 'throughtags' - - def test_reseeds(self, Graph): - assert Graph(CreativeWork()) == Graph(CreativeWork()) - - def test_reseeds_many(self, Graph): - assert Graph(CreativeWork(), CreativeWork(), CreativeWork(), Tag(), WorkIdentifier()) == Graph(CreativeWork(), CreativeWork(), CreativeWork(), Tag(), WorkIdentifier()) - - def test_type_out_of_order(self, Graph): - assert Graph(Tag(), CreativeWork(), Tag()) == Graph(CreativeWork(), Tag(), Tag()) - - def test_ids_dont_effect(self, Graph): - assert Graph(Tag(), Tag(1, id=1), Tag()) == Graph(Tag(), Tag(), Tag(1, id=1)) - assert Graph(Tag(), Tag(1, id=2), Tag()) == Graph(Tag(), Tag(), Tag(1, id=1)) - assert Graph(Tag(id=7), Tag(1, id=2), Tag()) == Graph(Tag(), Tag(id=23), Tag(1, id=1)) - - def test_cases(self, Graph): - assert Graph(AgentIdentifier(1), AgentIdentifier(1), AgentIdentifier(1)) == Graph(AgentIdentifier(1), AgentIdentifier(1), AgentIdentifier(1)) - assert Graph(AgentIdentifier(seed=1), AgentIdentifier(seed=1), AgentIdentifier(seed=1)) == Graph(AgentIdentifier(seed=1), AgentIdentifier(seed=1), AgentIdentifier(seed=1)) - - data = Graph( - Person(0, name='Barb Dylan', identifiers=[AgentIdentifier(seed=1)]), - Person(1, name='Barb Dylan', identifiers=[AgentIdentifier(seed=1)]), - Person(2, name='Barb Dylan', identifiers=[AgentIdentifier(seed=1)]) - ) - assert len(data) == 6 - - assert data == Graph( - Person(0, name='Barb Dylan', identifiers=[AgentIdentifier(seed=1)]), - Person(1, name='Barb Dylan', identifiers=[AgentIdentifier(seed=1)]), - Person(2, name='Barb Dylan', identifiers=[AgentIdentifier(seed=1)]) - ) - - identifiers = list(x for x in data if x.type == 'agentidentifier') - assert len(identifiers) == 3 - assert len(set(i.id for i in identifiers)) == 3 - - identifiers = list(n.to_jsonld() for n in identifiers) - for i in identifiers: - i = {**i} - i.pop('@id') - i['agent'].pop('@id', None) - for j in identifiers: - j = {**j} - j.pop('@id') - j['agent'].pop('@id', None) - assert i == j diff --git a/tests/share/normalize/test_json.py b/tests/share/normalize/test_json.py deleted file mode 100644 index 5034bd64b..000000000 --- a/tests/share/normalize/test_json.py +++ /dev/null @@ -1,61 +0,0 @@ -from share.transform.chain import * # noqa - - -EXAMPLE = { - "article_id": 3436874, - "title": "Photochemical Carbon Dioxide Reduction on Mg-Doped\nGa(In)N Nanowire Arrays under Visible Light Irradiation", - "DOI": "https://dx.doi.org/10.1021/acsenergylett.6b00119.s001", - "description": "The photochemical reduction of carbon\ndioxide (CO2)\ninto energy-rich products can potentially address some of the critical\nchallenges we face today, including energy resource shortages and\ngreenhouse gas emissions. Our ab initio calculations show that CO2 molecules can be spontaneously activated on the clean nonpolar\nsurfaces of wurtzite metal nitrides, for example, Ga\u00ad(In)\u00adN. We have\nfurther demonstrated the photoreduction of CO2 into methanol\n(CH3OH) with sunlight as the only energy input. A conversion\nrate of CO2 into CH3OH (\u223c0.5 mmol gcat\u20131 h\u20131) is achieved\nunder visible light illumination (>400 nm). Moreover, we have discovered\nthat the photocatalytic activity for CO2 reduction can\nbe drastically enhanced by incorporating a small amount of Mg dopant.\nThe definitive role of Mg dopant in Ga\u00ad(In)\u00adN, at both the atomic and\ndevice levels, has been identified. This study reveals the potential\nof III-nitride semiconductor nanostructures in solar-powered reduction\nof CO2 into hydrocarbon fuels.", - "type": "paper", - "url": "https://api.figshare.com/v1/articles/3436874", - "published_date": "00:00, 08 Jun, 2016", - "authors": [ - {"author_name": "B. AlOtaibi"}, - {"author_name": "X. Kong"}, - {"author_name": "S. Vanka"}, - {"author_name": "S. Y. Woo"}, - {"author_name": "A. Pofelski"}, - {"author_name": "F. Oudjedi"}, - {"author_name": "S. Fan"}, - {"author_name": "M. G. Kibria"}, - {"author_name": "G. A. Botton"}, - {"author_name": "W. Ji"}, - {"author_name": "H. Guo"}, - {"author_name": "Z. Mi"} - ], - "links": [], - "defined_type": "paper", - "modified_date": "17:37, 14 Jun, 2016" -} - - -class Person(Parser): - given_name = ParseName(ctx.author_name).first - family_name = ParseName(ctx.author_name).last - - -class Creator(Parser): - agent = Delegate(Person, ctx) - - -class Article(Parser): - title = ctx.title - description = ctx.description - # publish_date = ParseDate(ctx.published_date) - related_agents = Map(Delegate(Creator, ctx.authors)) - - class Extra: - type = ctx.defined_type - defined_type = ctx.defined_type - - -class TestParser: - - def test_parser(self): - parsed = Article(EXAMPLE).parse() - normalized = ctx.pool[parsed] - assert normalized['extra'] == {'type': 'paper', 'defined_type': 'paper'} - - # no newlines, leading/trailing white space, or multiple spaces - assert normalized['title'] == 'Photochemical Carbon Dioxide Reduction on Mg-Doped Ga(In)N Nanowire Arrays under Visible Light Irradiation' - assert normalized['description'] == 'The photochemical reduction of carbon dioxide (CO2) into energy-rich products can potentially address some of the critical challenges we face today, including energy resource shortages and greenhouse gas emissions. Our ab initio calculations show that CO2 molecules can be spontaneously activated on the clean nonpolar surfaces of wurtzite metal nitrides, for example, Ga\u00ad(In)\u00adN. We have further demonstrated the photoreduction of CO2 into methanol (CH3OH) with sunlight as the only energy input. A conversion rate of CO2 into CH3OH (\u223c0.5 mmol gcat\u20131 h\u20131) is achieved under visible light illumination (>400 nm). Moreover, we have discovered that the photocatalytic activity for CO2 reduction can be drastically enhanced by incorporating a small amount of Mg dopant. The definitive role of Mg dopant in Ga\u00ad(In)\u00adN, at both the atomic and device levels, has been identified. This study reveals the potential of III-nitride semiconductor nanostructures in solar-powered reduction of CO2 into hydrocarbon fuels.' diff --git a/tests/share/normalize/test_links.py b/tests/share/normalize/test_links.py deleted file mode 100644 index b25b5833f..000000000 --- a/tests/share/normalize/test_links.py +++ /dev/null @@ -1,840 +0,0 @@ -import pytest -import pendulum - -from share.transform.chain import exceptions -from share.transform.chain.links import ( - ARKLink, - ArXivLink, - DOILink, - DateParserLink, - GuessAgentTypeLink, - IRILink, - ISNILink, - ISSNLink, - InfoURILink, - OrcidLink, - URNLink, -) - -UPPER_BOUND = pendulum.today().add(years=100).isoformat() - - -@pytest.mark.parametrize('date, result', [ - (None, exceptions.InvalidDate('None is not a valid date.')), - ('0059-11-01T00:00:00Z', exceptions.InvalidDate('0059-11-01T00:00:00Z is before the lower bound 1200-01-01T00:00:00+00:00.')), - ('20010101', '2001-01-01T00:00:00+00:00'), - ('2013', '2013-01-01T00:00:00+00:00'), - ('03:04, 19 Nov, 2014', '2014-11-19T03:04:00+00:00'), - ('invalid date', exceptions.InvalidDate('Unknown string format: invalid date')), - ('2001-1-01', '2001-01-01T00:00:00+00:00'), - ('2001-2-30', exceptions.InvalidDate('day is out of range for month: 2001-2-30')), - # skip until dateutil 2.8.2 with https://github.com/dateutil/dateutil/pull/987 - # ('14/2001', exceptions.InvalidDate(14)), - ('11/20/1990', '1990-11-20T00:00:00+00:00'), - ('1990-11-20T00:00:00Z', '1990-11-20T00:00:00+00:00'), - ('19 Nov, 2014', '2014-11-19T00:00:00+00:00'), - ('Nov 2012', '2012-11-01T00:00:00+00:00'), - ('January 1 2014', '2014-01-01T00:00:00+00:00'), - ('3009-11-01T00:00:00Z', exceptions.InvalidDate('3009-11-01T00:00:00Z is after the upper bound ' + UPPER_BOUND + '.')), - ('2016-01-01T15:03:04-05:00', '2016-01-01T20:03:04+00:00'), - ('2016-01-01T15:03:04+5:00', '2016-01-01T10:03:04+00:00'), - ('2016-01-01T15:03:04-3', '2016-01-01T18:03:04+00:00'), - ('2016-01-01T15:03:04-3:30', '2016-01-01T18:33:04+00:00'), - ('2016-01-01T15:03:04+99', exceptions.InvalidDate('offset must be a timedelta strictly between -timedelta(hours=24) and timedelta(hours=24).')), - # rolls over extra minutes - ('2016-01-01T15:03:04-3:70', '2016-01-01T19:13:04+00:00'), -]) -def test_dateparser_link(date, result): - if isinstance(result, Exception): - with pytest.raises(type(result)) as e: - DateParserLink().execute(date) - assert e.value.args == result.args - else: - assert DateParserLink().execute(date) == result - - -@pytest.mark.parametrize('issn, result', [ - ('0378-5955', 'urn://issn/0378-5955'), - ('1534-0481', 'urn://issn/1534-0481'), - ('1476-4687', 'urn://issn/1476-4687'), - ('0028-0836', 'urn://issn/0028-0836'), - ('1144-875x', 'urn://issn/1144-875X'), - ('1144-875X', 'urn://issn/1144-875X'), - ('0378-5950', exceptions.InvalidIRI('\'03785950\' is not a valid ISSN; failed checksum.')), - ('0000-0002-4869-2419', exceptions.InvalidIRI('\'0000-0002-4869-2419\' cannot be expressed as an ISSN.')), -]) -def test_issn_link(issn, result): - if isinstance(result, Exception): - with pytest.raises(type(result)) as e: - ISSNLink().execute(issn) - assert e.value.args == result.args - else: - assert ISSNLink().execute(issn)['IRI'] == result - - -@pytest.mark.parametrize('isni, result', [ - (None, exceptions.InvalidIRI('\'None\' is not of type str.')), - ('', exceptions.InvalidIRI('\'\' cannot be expressed as an ISNI.')), - ('0000000121032683', 'http://isni.org/0000000121032683'), - ('0000000346249680', exceptions.InvalidIRI('\'0000000346249680\' is outside reserved ISNI range.')), - ('0000-0001-2103-2683', 'http://isni.org/0000000121032683'), - ('http://isni.org/0000000121032683', 'http://isni.org/0000000121032683'), - ('000000012150090X', 'http://isni.org/000000012150090X'), - ('000000012150090x', 'http://isni.org/000000012150090X'), - ('0000-0001-2150-090X', 'http://isni.org/000000012150090X'), - ('0000-0001-2150-090x', 'http://isni.org/000000012150090X'), -]) -def test_isni_link(isni, result): - if isinstance(result, Exception): - with pytest.raises(type(result)) as e: - ISNILink().execute(isni) - assert e.value.args == result.args - else: - assert ISNILink().execute(isni)['IRI'] == result - - -@pytest.mark.parametrize('orcid, result', [ - (None, exceptions.InvalidIRI('\'None\' is not of type str.')), - ('', exceptions.InvalidIRI('\'\' cannot be expressed as an ORCID.')), - ('000000000248692419', exceptions.InvalidIRI('\'000000000248692419\' cannot be expressed as an ORCID.')), - ('0000000346249680', 'http://orcid.org/0000-0003-4624-9680'), - ('0000000317011251', 'http://orcid.org/0000-0003-1701-1251'), - ('0000000229129030', 'http://orcid.org/0000-0002-2912-9030'), - ('0000000248692412', exceptions.InvalidIRI('\'0000000248692412\' is not a valid ORCID; failed checksum.')), - ('0000000248692419', 'http://orcid.org/0000-0002-4869-2419'), - ('0000-0002-4869-2419', 'http://orcid.org/0000-0002-4869-2419'), - ('0000-0002-4869-2419', 'http://orcid.org/0000-0002-4869-2419'), - ('https://orcid.org/0000-0002-1694-233X', 'http://orcid.org/0000-0002-1694-233X'), - ('https://orcid.org/0000-0002-4869-2419', 'http://orcid.org/0000-0002-4869-2419'), - ('0000-0001-2150-090X', exceptions.InvalidIRI('\'000000012150090X\' is outside reserved ORCID range.')), -]) -def test_orcid_link(orcid, result): - if isinstance(result, Exception): - with pytest.raises(type(result)) as e: - OrcidLink().execute(orcid) - assert e.value.args == result.args - else: - assert OrcidLink().execute(orcid)['IRI'] == result - - -@pytest.mark.parametrize('doi, result', [ - (None, exceptions.InvalidIRI('\'None\' is not of type str.')), - ('', exceptions.InvalidIRI('\'\' is not a valid DOI.')), - ('105517/ccdc.csd.cc1lj81f', exceptions.InvalidIRI('\'105517/ccdc.csd.cc1lj81f\' is not a valid DOI.')), - ('0.5517/ccdc.csd.cc1lj81f', exceptions.InvalidIRI('\'0.5517/ccdc.csd.cc1lj81f\' is not a valid DOI.')), - ('10.5517ccdc.csd.cc1lj81f', exceptions.InvalidIRI('\'10.5517ccdc.csd.cc1lj81f\' is not a valid DOI.')), - ('10.517ccdc.csd.cc1lj81f', exceptions.InvalidIRI('\'10.517ccdc.csd.cc1lj81f\' is not a valid DOI.')), - ('10.517/ccdc.csd.cc1lj81f', exceptions.InvalidIRI('\'10.517/ccdc.csd.cc1lj81f\' is not a valid DOI.')), - ('10.517ccdc.csd.c>c1lj81f', exceptions.InvalidIRI('\'10.517ccdc.csd.c>c1lj81f\' is not a valid DOI.')), - ('http://www.scirp.org/journal/PaperDownload.aspx?DOI=10.4236/wjcd.2016.69035', exceptions.InvalidIRI('\'http://www.scirp.org/journal/PaperDownload.aspx?DOI=10.4236/wjcd.2016.69035\' is not a valid DOI.')), - ('10.5517/ccdc.csd.cc1lj81f', 'http://dx.doi.org/10.5517/CCDC.CSD.CC1LJ81F'), - (' 10.5517/ccdc.csd.cc1lj81f', 'http://dx.doi.org/10.5517/CCDC.CSD.CC1LJ81F'), - (' 10.5517/ccdc.csd.cc1lj81f ', 'http://dx.doi.org/10.5517/CCDC.CSD.CC1LJ81F'), - ('DOI:10.5517/ccdc.csd.cc1lj81f', 'http://dx.doi.org/10.5517/CCDC.CSD.CC1LJ81F'), - ('doi:10.5517/ccdc.csd.cc1lj81f', 'http://dx.doi.org/10.5517/CCDC.CSD.CC1LJ81F'), - ('The DOI is 10.5517/ccdc.csd.cc1lj81f', 'http://dx.doi.org/10.5517/CCDC.CSD.CC1LJ81F'), - ('10.5517/ccdc.csd.cc1lj81f\n', 'http://dx.doi.org/10.5517/CCDC.CSD.CC1LJ81F'), - ('https://dx.doi.org/10.1674/0003-0031(1998)140[0358:CAPWBS]2.0.CO;2', 'http://dx.doi.org/10.1674/0003-0031(1998)140%5B0358:CAPWBS%5D2.0.CO;2'), - ('http://dx.doi.org/10.1002/1096-8644(200101)114:1<18::AID-AJPA1002>3.0.CO;2-2', 'http://dx.doi.org/10.1002/1096-8644(200101)114:1%3C18::AID-AJPA1002%3E3.0.CO;2-2'), - ('10.18142/8', 'http://dx.doi.org/10.18142/8'), - ('10.3389%2Ffpls.2014.00388', 'http://dx.doi.org/10.3389/FPLS.2014.00388'), -]) -def test_doi_link(doi, result): - if isinstance(result, Exception): - with pytest.raises(type(result)) as e: - DOILink().execute(doi) - assert e.value.args == result.args - else: - assert DOILink().execute(doi)['IRI'] == result - - -@pytest.mark.parametrize('arxiv_id, result', [ - (None, exceptions.InvalidIRI('\'None\' is not of type str.')), - ('', exceptions.InvalidIRI('\'\' is not a valid ArXiv Identifier.')), - ('arXiv:1023..20382', exceptions.InvalidIRI('\'arXiv:1023..20382\' is not a valid ArXiv Identifier.')), - ('something else', exceptions.InvalidIRI('\'something else\' is not a valid ArXiv Identifier.')), - ('arXiv//1234.34543', exceptions.InvalidIRI('\'arXiv//1234.34543\' is not a valid ArXiv Identifier.')), - ('arXiv:101022232', exceptions.InvalidIRI('\'arXiv:101022232\' is not a valid ArXiv Identifier.')), - ('arXiv:10102.22322', exceptions.InvalidIRI('\'arXiv:10102.22322\' is not a valid ArXiv Identifier.')), - ('arXiv:2.2', exceptions.InvalidIRI('\'arXiv:2.2\' is not a valid ArXiv Identifier.')), - ('arxiv:1212.20282', 'http://arxiv.org/abs/1212.20282'), - (' arxiv:1212.20282', 'http://arxiv.org/abs/1212.20282'), - (' arxiv:1212.20282 ', 'http://arxiv.org/abs/1212.20282'), - ('arxiv:arXiv:1212.20282', 'http://arxiv.org/abs/1212.20282'), -]) -def test_arxiv_link(arxiv_id, result): - if isinstance(result, Exception): - with pytest.raises(type(result)) as e: - ArXivLink().execute(arxiv_id) - assert e.value.args == result.args - else: - assert ArXivLink().execute(arxiv_id)['IRI'] == result - - -@pytest.mark.parametrize('ark_id, result', [ - (None, exceptions.InvalidIRI('\'None\' is not of type str.')), - ('', exceptions.InvalidIRI('\'\' is not a valid ARK Identifier.')), - ('ark:/blah-blah-blah', exceptions.InvalidIRI('\'ark:/blah-blah-blah\' is not a valid ARK Identifier.')), - ('something else', exceptions.InvalidIRI('\'something else\' is not a valid ARK Identifier.')), - ('ark//1234/blah-blah-blah', exceptions.InvalidIRI('\'ark//1234/blah-blah-blah\' is not a valid ARK Identifier.')), - ('ark:/1234', exceptions.InvalidIRI('\'ark:/1234\' is not a valid ARK Identifier.')), - ('bark:/1234/blah-blah', exceptions.InvalidIRI('\'bark:/1234/blah-blah\' is not a valid ARK Identifier.')), - ('ark:/1234a/blah-blah', exceptions.InvalidIRI('\'ark:/1234a/blah-blah\' is not a valid ARK Identifier.')), - ('ark:/1234/blah-blah-blah', 'ark://1234/blah-blah-blah'), - (' ark:/1234/blah-blah-blah', 'ark://1234/blah-blah-blah'), - ('ark:/1234/blah-blah-blah ', 'ark://1234/blah-blah-blah'), - ('http://namemappingauthority.org/ark:/1234/blah-blah-blah', 'ark://1234/blah-blah-blah'), - ('ark:/383838/this/one/has/path', 'ark://383838/this/one/has/path'), -]) -def test_ark_link(ark_id, result): - if isinstance(result, Exception): - with pytest.raises(type(result)) as e: - ARKLink().execute(ark_id) - assert e.value.args == result.args - else: - assert ARKLink().execute(ark_id)['IRI'] == result - - -@pytest.mark.parametrize('urn, result', [ - (None, exceptions.InvalidIRI('\'None\' is not of type str.')), - ('', exceptions.InvalidIRI('\'\' is not a valid URN.')), - ('something else', exceptions.InvalidIRI('\'something else\' is not a valid URN.')), - ('oai:missing.path', exceptions.InvalidIRI('\'oai:missing.path\' is not a valid URN.')), - ('oai::blank', exceptions.InvalidIRI('\'oai::blank\' is not a valid URN.')), - ('oai://cos.io/fun', 'oai://cos.io/fun'), - ('oai://cos.io/fun/times', 'oai://cos.io/fun/times'), - ('oai://cos.io/fun/times/with/slashes', 'oai://cos.io/fun/times/with/slashes'), - ('oai://cos.io/fun/ti mes', exceptions.InvalidIRI('\'oai://cos.io/fun/ti mes\' is not a valid URN.')), - ('zenodo.com', exceptions.InvalidIRI('\'zenodo.com\' is not a valid URN.')), - ('oai:invalid domain:this.is.stuff', exceptions.InvalidIRI('\'oai:invalid domain:this.is.stuff\' is not a valid URN.')), - ('oai:domain.com:', exceptions.InvalidIRI('\'oai:domain.com:\' is not a valid URN.')), - ('urn:missing.path', exceptions.InvalidIRI('\'urn:missing.path\' is not a valid URN.')), - ('urn::blank', exceptions.InvalidIRI('\'urn::blank\' is not a valid URN.')), - ('urn://cos.io/fun', 'urn://cos.io/fun'), - ('urn:invalid domain:this.is.stuff', exceptions.InvalidIRI('\'urn:invalid domain:this.is.stuff\' is not a valid URN.')), - ('urn:domain.com:', exceptions.InvalidIRI('\'urn:domain.com:\' is not a valid URN.')), - ('oai:cos.io:this.is.stuff', 'oai://cos.io/this.is.stuff'), - ('oai:subdomain.cos.io:this.is.stuff', 'oai://subdomain.cos.io/this.is.stuff'), - (' oai:cos.io:stuff', 'oai://cos.io/stuff'), - (' oai:cos.io:stuff ', 'oai://cos.io/stuff'), - ('oai:cos.io:long:list:of:things', 'oai://cos.io/long:list:of:things'), - ('urn:share:this.is.stuff', 'urn://share/this.is.stuff'), - (' urn:share:stuff', 'urn://share/stuff'), - (' urn:share:stuff ', 'urn://share/stuff'), - ('urn:share:long:list:of/things', 'urn://share/long:list:of/things'), -]) -def test_urn_link(urn, result): - if isinstance(result, Exception): - with pytest.raises(type(result)) as e: - URNLink().execute(urn) - assert e.value.args == result.args - else: - assert URNLink().execute(urn)['IRI'] == result - - -@pytest.mark.parametrize('uri, result', [ - ('info:eu-repo/grantAgreement/EC/FP7/280632/', 'info://eu-repo/grantAgreement/EC/FP7/280632/'), - ('info:eu-repo/semantics/objectFile', 'info://eu-repo/semantics/objectFile'), - (' info:eu-repo/dai/nl/12345', 'info://eu-repo/dai/nl/12345'), - ('\tinfo:eu-repo/dai/nl/12345\n', 'info://eu-repo/dai/nl/12345'), - ('info:ddc/22/eng//004.678', 'info://ddc/22/eng//004.678'), - ('info:lccn/2002022641', 'info://lccn/2002022641'), - ('info:sici/0363-0277(19950315)120:5%3C%3E1.0.TX;2-V', 'info://sici/0363-0277(19950315)120:5%3C%3E1.0.TX;2-V'), - ('fo:eu-repo/dai/nl/12345\n', exceptions.InvalidIRI("'fo:eu-repo/dai/nl/12345\n' is not a valid Info URI.")), -]) -def test_info_link(uri, result): - if isinstance(result, Exception): - with pytest.raises(type(result)) as e: - InfoURILink().execute(uri) - assert e.value.args == result.args - else: - assert InfoURILink().execute(uri)['IRI'] == result - - -class TestIRILink: - - def _do_test(self, input, output, urn_fallback=False): - if isinstance(output, Exception): - with pytest.raises(type(output)) as e: - IRILink().execute(input) - assert e.value.args == output.args - else: - assert {k: v for k, v in IRILink(urn_fallback=urn_fallback).execute(input).items() if k in output} == output - - @pytest.mark.parametrize('input, output', [ - ('trexy@dinosaurs.sexy', { - 'scheme': 'mailto', - 'authority': 'dinosaurs.sexy', - 'IRI': 'mailto:trexy@dinosaurs.sexy', - }), - ('mailto:trexy@dinosaurs.sexy', { - 'scheme': 'mailto', - 'authority': 'dinosaurs.sexy', - 'IRI': 'mailto:trexy@dinosaurs.sexy', - }), - ('mailto:trexy@dinosaurs.sexy?subject=Dinosaurs', { - 'scheme': 'mailto', - 'authority': 'dinosaurs.sexy', - 'IRI': 'mailto:trexy@dinosaurs.sexy', - }), - ('rééééééé@french-place.fr', { - 'scheme': 'mailto', - 'authority': 'french-place.fr', - 'IRI': 'mailto:rééééééé@french-place.fr', - }), - # This has a unicode hyphen "‐" - ('JamesBond@chuchu\u2010train.fr', { - 'scheme': 'mailto', - 'authority': 'chuchu-train.fr', - 'IRI': 'mailto:JamesBond@chuchu-train.fr', - }), - - ]) - def test_emails(self, input, output): - return self._do_test(input, output) - - @pytest.mark.parametrize('input, output', [ - ('http://api.elsevier.com/content/article/PII:B9780081005965212365?httpAccept=text/xml', { - 'scheme': 'http', - 'authority': 'api.elsevier.com', - 'IRI': 'http://api.elsevier.com/content/article/PII:B9780081005965212365?httpAccept=text/xml', - }), - ('https://google.com/', { - 'scheme': 'http', - 'authority': 'google.com', - 'IRI': 'http://google.com/', - }), - ('https://GOOGLE.com/', { - 'scheme': 'http', - 'authority': 'google.com', - 'IRI': 'http://google.com/', - }), - ('https://GOOGLE.com/MixedCases', { - 'scheme': 'http', - 'authority': 'google.com', - 'IRI': 'http://google.com/MixedCases', - }), - ('https://GOOGLE.com:80/MixedCases', { - 'scheme': 'http', - 'authority': 'google.com', - 'IRI': 'http://google.com/MixedCases', - }), - ('https://GOOGLE.com:443/MixedCases', { - 'scheme': 'http', - 'authority': 'google.com', - 'IRI': 'http://google.com/MixedCases', - }), - ('www.GOOGLE.com:443/MixedCases', { - 'scheme': 'http', - 'authority': 'www.google.com', - 'IRI': 'http://www.google.com/MixedCases', - }), - ('https://google.com/#', { - 'scheme': 'http', - 'authority': 'google.com', - 'IRI': 'http://google.com/', - }), - ('https://google.com/#flooby', { - 'scheme': 'http', - 'authority': 'google.com', - 'IRI': 'http://google.com/#flooby', - }), - ('https://google.com/#fr/ag/ment', { - 'scheme': 'http', - 'authority': 'google.com', - 'IRI': 'http://google.com/#fr/ag/ment', - }), - ('https://google.com:666/', { - 'scheme': 'http', - 'authority': 'google.com:666', - 'IRI': 'http://google.com:666/', - }), - ('https://google.com:666/yay/path#yay/fragment', { - 'scheme': 'http', - 'authority': 'google.com:666', - 'IRI': 'http://google.com:666/yay/path#yay/fragment', - }), - ('http://www.scirp.org/journal/PaperDownload.aspx?DOI=10.4236/wjcd.2016.69035', { - 'scheme': 'http', - 'authority': 'www.scirp.org', - 'IRI': 'http://www.scirp.org/journal/PaperDownload.aspx?DOI=10.4236/wjcd.2016.69035', - }), - ('http://linkinghub.elsevier.com/retrieve/pii/s1053811912011895', { - 'scheme': 'http', - 'authority': 'linkinghub.elsevier.com', - 'IRI': 'http://linkinghub.elsevier.com/retrieve/pii/s1053811912011895', - }), - ('http://api.elsevier.com/content/article/PII:0168952590900517?httpAccept=text/xml', { - 'scheme': 'http', - 'authority': 'api.elsevier.com', - 'IRI': 'http://api.elsevier.com/content/article/PII:0168952590900517?httpAccept=text/xml', - }), - ('http://api.elsevier.com/content/article/PII:0168952590901608?httpAccept=text/xml', { - 'scheme': 'http', - 'authority': 'api.elsevier.com', - 'IRI': 'http://api.elsevier.com/content/article/PII:0168952590901608?httpAccept=text/xml', - }), - ('https://cn.dataone.org/cn/v2/resolve/http%3A%2F%2Fdx.doi.org%2F10.5061%2Fdryad.34s63%3Fformat%3Dd1rem%26ver%3D2016-11-03T17%3A08%3A53.816-04%3A00', { - 'scheme': 'http', - 'authority': 'cn.dataone.org', - 'IRI': 'http://cn.dataone.org/cn/v2/resolve/http%3A%2F%2Fdx.doi.org%2F10.5061%2Fdryad.34s63%3Fformat%3Dd1rem%26ver%3D2016-11-03T17%3A08%3A53.816-04%3A00' - }), - ('http://scitation.aip.org/deliver/fulltext/aip/journal/jcp/143/18/1.4935171.pdf?itemId=/content/aip/journal/jcp/143/18/10.1063/1.4935171&mimeType=pdf&containerItemId=content/aip/journal/jcp', { - 'scheme': 'http', - 'authority': 'scitation.aip.org', - 'IRI': 'http://scitation.aip.org/deliver/fulltext/aip/journal/jcp/143/18/1.4935171.pdf?itemId=/content/aip/journal/jcp/143/18/10.1063/1.4935171&mimeType=pdf&containerItemId=content/aip/journal/jcp', - }), - # ('http://www.rcaap.pt/detail.jsp?id=oai:http://repositorio.utad.pt/:10348/5661', { - # 'scheme': 'http', - # 'authority': 'www.rcaap.pt', - # 'IRI': 'http://scitation.aip.org/deliver/fulltext/aip/journal/jcp/143/18/1.4935171.pdf?itemId=/content/aip/journal/jcp/143/18/10.1063/1.4935171&mimeType=pdf&containerItemId=content/aip/journal/jcp', - # }), - ('http://www.frontiersin.org/Behavioral_Neuroscience/10.3389/fnbeh.2014.00245/abstract', { - 'scheme': 'http', - 'authority': 'www.frontiersin.org', - 'IRI': 'http://www.frontiersin.org/Behavioral_Neuroscience/10.3389/fnbeh.2014.00245/abstract', - }), - ('https://cn.dataone.org/cn/v2/resolve/doi%3A10.18739%2FA2M37F', { - 'scheme': 'http', - 'authority': 'cn.dataone.org', - 'IRI': 'http://cn.dataone.org/cn/v2/resolve/doi%3A10.18739%2FA2M37F', - }), - ('http://scitation.aip.org/deliver/fulltext/aip/journal/apl/109/18/1.4966994.pdf?itemId=/content/aip/journal/apl/109/18/10.1063/1.4966994&mimeType=pdf&containerItemId=content/aip/journal/apl', { - 'scheme': 'http', - 'authority': 'scitation.aip.org', - 'IRI': 'http://scitation.aip.org/deliver/fulltext/aip/journal/apl/109/18/1.4966994.pdf?itemId=/content/aip/journal/apl/109/18/10.1063/1.4966994&mimeType=pdf&containerItemId=content/aip/journal/apl', - }) - ]) - def test_urls(self, input, output): - return self._do_test(input, output) - - @pytest.mark.parametrize('input, output', [ - ('10.5517/aadc.csd.cc1lj81f', { - 'scheme': 'http', - 'authority': 'dx.doi.org', - 'IRI': 'http://dx.doi.org/10.5517/AADC.CSD.CC1LJ81F', - }), - (' 10.5517/bbdc.csd.cc1lj81f', { - 'scheme': 'http', - 'authority': 'dx.doi.org', - 'IRI': 'http://dx.doi.org/10.5517/BBDC.CSD.CC1LJ81F', - }), - (' 10.5517/ccdc.csd.cc1lj81f ', { - 'scheme': 'http', - 'authority': 'dx.doi.org', - 'IRI': 'http://dx.doi.org/10.5517/CCDC.CSD.CC1LJ81F', - }), - ('DOI:10.5517/dddc.csd.cc1lj81f', { - 'scheme': 'http', - 'authority': 'dx.doi.org', - 'IRI': 'http://dx.doi.org/10.5517/DDDC.CSD.CC1LJ81F', - }), - ('doi:10.5517/eedc.csd.cc1lj81f', { - 'scheme': 'http', - 'authority': 'dx.doi.org', - 'IRI': 'http://dx.doi.org/10.5517/EEDC.CSD.CC1LJ81F', - }), - ('The DOI is 10.5517/ffdc.csd.cc1lj81f', { - 'scheme': 'http', - 'authority': 'dx.doi.org', - 'IRI': 'http://dx.doi.org/10.5517/FFDC.CSD.CC1LJ81F', - }), - ('10.5517/ggdc.csd.cc1lj81f\n', { - 'scheme': 'http', - 'authority': 'dx.doi.org', - 'IRI': 'http://dx.doi.org/10.5517/GGDC.CSD.CC1LJ81F', - }), - ('https://dx.doi.org/10.1674/0003-0031(1998)140[0358:CAPWBS]2.0.CO;2', { - 'scheme': 'http', - 'authority': 'dx.doi.org', - 'IRI': 'http://dx.doi.org/10.1674/0003-0031(1998)140%5B0358:CAPWBS%5D2.0.CO;2', - }), - ('http://dx.doi.org/10.1002/1096-8644(200101)114:1<18::AID-AJPA1002>3.0.CO;2-2', { - 'scheme': 'http', - 'authority': 'dx.doi.org', - 'IRI': 'http://dx.doi.org/10.1002/1096-8644(200101)114:1%3C18::AID-AJPA1002%3E3.0.CO;2-2', - }), - ]) - def test_dois(self, input, output): - return self._do_test(input, output) - - @pytest.mark.parametrize('input, output', [ - ('0000000346249680', { - 'scheme': 'http', - 'authority': 'orcid.org', - 'IRI': 'http://orcid.org/0000-0003-4624-9680', - }), - ('0000000317011251', { - 'scheme': 'http', - 'authority': 'orcid.org', - 'IRI': 'http://orcid.org/0000-0003-1701-1251', - }), - ('0000000229129030', { - 'scheme': 'http', - 'authority': 'orcid.org', - 'IRI': 'http://orcid.org/0000-0002-2912-9030', - }), - ('0000000248692419', { - 'scheme': 'http', - 'authority': 'orcid.org', - 'IRI': 'http://orcid.org/0000-0002-4869-2419', - }), - ('0000-0002-4869-2419', { - 'scheme': 'http', - 'authority': 'orcid.org', - 'IRI': 'http://orcid.org/0000-0002-4869-2419', - }), - ('0000-0002-4869-2419', { - 'scheme': 'http', - 'authority': 'orcid.org', - 'IRI': 'http://orcid.org/0000-0002-4869-2419', - }), - ('http://orcid.org/0000-0002-1694-233X', { - 'scheme': 'http', - 'authority': 'orcid.org', - 'IRI': 'http://orcid.org/0000-0002-1694-233X', - }), - ('http://orcid.org/0000-0002-1694-233x', { - 'scheme': 'http', - 'authority': 'orcid.org', - 'IRI': 'http://orcid.org/0000-0002-1694-233X', - }), - ('http://orcid.org/0000-0002-4869-2419', { - 'scheme': 'http', - 'authority': 'orcid.org', - 'IRI': 'http://orcid.org/0000-0002-4869-2419', - }), - ]) - def test_orcids(self, input, output): - return self._do_test(input, output) - - @pytest.mark.parametrize('input, output', [ - ('0000000121032683', { - 'scheme': 'http', - 'authority': 'isni.org', - 'IRI': 'http://isni.org/0000000121032683' - }), - ('0000-0001-2103-2683', { - 'scheme': 'http', - 'authority': 'isni.org', - 'IRI': 'http://isni.org/0000000121032683' - }), - ('http://isni.org/0000000121032683', { - 'scheme': 'http', - 'authority': 'isni.org', - 'IRI': 'http://isni.org/0000000121032683' - }), - ('000000012150090X', { - 'scheme': 'http', - 'authority': 'isni.org', - 'IRI': 'http://isni.org/000000012150090X', - }), - ('000000012150090x', { - 'scheme': 'http', - 'authority': 'isni.org', - 'IRI': 'http://isni.org/000000012150090X', - }), - ('0000-0001-2150-090X', { - 'scheme': 'http', - 'authority': 'isni.org', - 'IRI': 'http://isni.org/000000012150090X', - }), - ('0000-0001-2150-090x', { - 'scheme': 'http', - 'authority': 'isni.org', - 'IRI': 'http://isni.org/000000012150090X', - }), - ]) - def test_isnis(self, input, output): - return self._do_test(input, output) - - @pytest.mark.parametrize('input, output', [ - ('arxiv:1212.20282', { - 'scheme': 'http', - 'authority': 'arxiv.org', - 'IRI': 'http://arxiv.org/abs/1212.20282' - }), - (' arxiv:1212.20282', { - 'scheme': 'http', - 'authority': 'arxiv.org', - 'IRI': 'http://arxiv.org/abs/1212.20282' - }), - (' arxiv:1212.20282 ', { - 'scheme': 'http', - 'authority': 'arxiv.org', - 'IRI': 'http://arxiv.org/abs/1212.20282' - }), - ('arxiv:arXiv:1212.20282', { - 'scheme': 'http', - 'authority': 'arxiv.org', - 'IRI': 'http://arxiv.org/abs/1212.20282' - }), - ]) - def test_arxiv_ids(self, input, output): - return self._do_test(input, output) - - @pytest.mark.parametrize('input, output', [ - ('ark:/1234/blah-blah-blah', { - 'scheme': 'ark', - 'authority': '1234', - 'path': '/blah-blah-blah' - }), - (' ark:/1234/blah-blah-blah', { - 'scheme': 'ark', - 'authority': '1234', - 'path': '/blah-blah-blah' - }), - ('ark:/1234/blah-blah-blah ', { - 'scheme': 'ark', - 'authority': '1234', - 'path': '/blah-blah-blah' - }), - ('http://namemappingauthority.org/ark:/1234/blah-blah-blah', { - 'scheme': 'ark', - 'authority': '1234', - 'path': '/blah-blah-blah' - }), - ('ark:/383838/this/one/has/path', { - 'scheme': 'ark', - 'authority': '383838', - 'path': '/this/one/has/path' - }), - ('ark://04030/p7833zk7g', { - 'scheme': 'ark', - 'authority': '04030', - 'path': '/p7833zk7g' - }), - ]) - def test_ark_ids(self, input, output): - return self._do_test(input, output) - - @pytest.mark.parametrize('input, output', [ - ('oai:cos.io:this.is.stuff', { - 'scheme': 'oai', - 'authority': 'cos.io', - 'IRI': 'oai://cos.io/this.is.stuff' - }), - ('oai:subdomain.cos.io:this.is.stuff', { - 'scheme': 'oai', - 'authority': 'subdomain.cos.io', - 'IRI': 'oai://subdomain.cos.io/this.is.stuff' - }), - (' oai:cos.io:stuff', { - 'scheme': 'oai', - 'authority': 'cos.io', - 'IRI': 'oai://cos.io/stuff' - }), - (' oai:cos.io:stuff ', { - 'scheme': 'oai', - 'authority': 'cos.io', - 'IRI': 'oai://cos.io/stuff' - }), - ('oai:cos.io:long:list:of:things', { - 'scheme': 'oai', - 'authority': 'cos.io', - 'IRI': 'oai://cos.io/long:list:of:things' - }), - ('urn:cos.io:this.is.stuff', { - 'scheme': 'urn', - 'authority': 'cos.io', - 'IRI': 'urn://cos.io/this.is.stuff' - }), - (' urn:cos.io:stuff', { - 'scheme': 'urn', - 'authority': 'cos.io', - 'IRI': 'urn://cos.io/stuff' - }), - (' urn:cos.io:stuff ', { - 'scheme': 'urn', - 'authority': 'cos.io', - 'IRI': 'urn://cos.io/stuff' - }), - ('urn:cos.io:long:list:of/things', { - 'scheme': 'urn', - 'authority': 'cos.io', - 'IRI': 'urn://cos.io/long:list:of/things' - }), - ]) - def test_urn_ids(self, input, output): - return self._do_test(input, output) - - @pytest.mark.parametrize('input, output', [ - ('info:eu-repo/grantAgreement/EC/FP7/280632/', { - 'scheme': 'info', - 'authority': 'eu-repo', - 'IRI': 'info://eu-repo/grantAgreement/EC/FP7/280632/', - }), - ('info:eu-repo/semantics/objectFile', { - 'scheme': 'info', - 'authority': 'eu-repo', - 'IRI': 'info://eu-repo/semantics/objectFile', - }), - (' info:eu-repo/dai/nl/12345', { - 'scheme': 'info', - 'authority': 'eu-repo', - 'IRI': 'info://eu-repo/dai/nl/12345', - }), - ('\tinfo:eu-repo/dai/nl/12345\n', { - 'scheme': 'info', - 'authority': 'eu-repo', - 'IRI': 'info://eu-repo/dai/nl/12345', - }), - ('info:ddc/22/eng//004.678', { - 'scheme': 'info', - 'authority': 'ddc', - 'IRI': 'info://ddc/22/eng//004.678', - }), - ('info:lccn/2002022641', { - 'scheme': 'info', - 'authority': 'lccn', - 'IRI': 'info://lccn/2002022641' - }), - ('info:sici/0363-0277(19950315)120:5%3C%3E1.0.TX;2-V', { - 'scheme': 'info', - 'authority': 'sici', - 'IRI': 'info://sici/0363-0277(19950315)120:5%3C%3E1.0.TX;2-V' - }), - ]) - def test_info_uri(self, input, output): - return self._do_test(input, output) - - @pytest.mark.parametrize('input, output', [ - ('978-91-89673-31-1', { - 'scheme': 'urn', - 'authority': 'isbn', - 'IRI': 'urn://isbn/978-91-8967-331-1', - }), - ('urn://isbn/978-91-8967-331-1', { - 'scheme': 'urn', - 'authority': 'isbn', - 'IRI': 'urn://isbn/978-91-8967-331-1', - }), - ('urn://isbn/978-91-89673-31-5', exceptions.InvalidIRI("'urn://isbn/978-91-89673-31-5' is not a valid ISBN; failed checksum.")), - ('ISBN: 978-91-89673-31-1', { - 'scheme': 'urn', - 'authority': 'isbn', - 'IRI': 'urn://isbn/978-91-8967-331-1', - }), - ('ISBN 978-0-306-40615-7', { - 'scheme': 'urn', - 'authority': 'isbn', - 'IRI': 'urn://isbn/978-03-0640-615-7', - }), - ('978-91-7402-405-0', { - 'scheme': 'urn', - 'authority': 'isbn', - 'IRI': 'urn://isbn/978-91-7402-405-0', - }), - ('91-7192-550-3', { - 'scheme': 'urn', - 'authority': 'isbn', - 'IRI': 'urn://isbn/978-91-7192-550-3', - }), - ('ISBN 0-201-53082-1', { - 'scheme': 'urn', - 'authority': 'isbn', - 'IRI': 'urn://isbn/978-02-0153-082-7', - }), - ('0-9752298-0-X', { - 'scheme': 'urn', - 'authority': 'isbn', - 'IRI': 'urn://isbn/978-09-7522-980-4', - }), - ('0-9752298-0-x', { - 'scheme': 'urn', - 'authority': 'isbn', - 'IRI': 'urn://isbn/978-09-7522-980-4', - }), - ('http://arxiv.org/index.php?view&id=12', { - 'scheme': 'http', - 'authority': 'arxiv.org', - 'IRI': 'http://arxiv.org/index.php?view&id=12', - }), - ('ISSN 978-0-306-40615-7', exceptions.InvalidIRI("'ISSN 978-0-306-40615-7' could not be identified as an Identifier.")), - ('ISBN 978-0-306-40615-0', exceptions.InvalidIRI("'ISBN 978-0-306-40615-0' is not a valid ISBN; failed checksum.")), - ]) - def test_isbn(self, input, output): - return self._do_test(input, output) - - @pytest.mark.parametrize('input, output', [ - (None, exceptions.InvalidIRI('\'None\' is not of type str.')), - ('', exceptions.InvalidIRI('\'\' could not be identified as an Identifier.')), - ('105517/ccdc.csd.cc1lj81f', exceptions.InvalidIRI('\'105517/ccdc.csd.cc1lj81f\' could not be identified as an Identifier.')), - ('0.5517/ccdc.csd.cc1lj81f', exceptions.InvalidIRI('\'0.5517/ccdc.csd.cc1lj81f\' could not be identified as an Identifier.')), - ('10.5517ccdc.csd.cc1lj81f', exceptions.InvalidIRI('\'10.5517ccdc.csd.cc1lj81f\' could not be identified as an Identifier.')), - ('10.517ccdc.csd.cc1lj81f', exceptions.InvalidIRI('\'10.517ccdc.csd.cc1lj81f\' could not be identified as an Identifier.')), - ('10.517/ccdc.csd.cc1lj81f', exceptions.InvalidIRI('\'10.517/ccdc.csd.cc1lj81f\' could not be identified as an Identifier.')), - ('10.517ccdc.csd.c>c1lj81f', exceptions.InvalidIRI('\'10.517ccdc.csd.c>c1lj81f\' could not be identified as an Identifier.')), - ('0000000248692412', exceptions.InvalidIRI('\'0000000248692412\' could not be identified as an Identifier.')), - ('0000000000000000', exceptions.InvalidIRI('\'0000000000000000\' could not be identified as an Identifier.')), - ('arXiv:1023..20382', exceptions.InvalidIRI('\'arXiv:1023..20382\' could not be identified as an Identifier.')), - ('arXiv:10102.22322', exceptions.InvalidIRI('\'arXiv:10102.22322\' could not be identified as an Identifier.')), - ('arXiv:2.2', exceptions.InvalidIRI('\'arXiv:2.2\' could not be identified as an Identifier.')), - ('fo:eu-repo/dai/nl/12345\n', exceptions.InvalidIRI("'fo:eu-repo/dai/nl/12345\n' could not be identified as an Identifier.")), - ]) - def test_malformed(self, input, output): - return self._do_test(input, output) - - @pytest.mark.parametrize('input', [ - '10.5517/ggdc.csd.cc1lj81f', - 'The DOI is 10.5517/ffdc.csd.cc1lj81f', - 'https://dx.doi.org/10.1674/0003-0031(1998)140[0358:CAPWBS]2.0.CO;2', - 'https://orcid.org/0000-0002-1694-233X', - '0000-0002-4869-2419', - '0000000317011251', - 'trexy@dinosaurs.sexy', - 'mailto:trexy@dinosaurs.sexy', - '0000-0001-2150-090X', - ]) - def test_benchmark(self, input, benchmark): - benchmark(IRILink().execute, input) - - -class TestGuessAgentTypeLink: - @pytest.mark.parametrize('name, result', [ - ('University of Whales', 'institution'), - ('Thomas Jefferson', 'person'), - ('The Thomas Jefferson thing', 'organization'), - ('Center For Open Science', 'organization'), - ('Science Council', 'organization'), - ('Open Science Foundation', 'organization'), - ('American Chemical Society', 'organization'), - ('School for Clowns', 'institution'), - ('Clown College', 'institution'), - ('Clowning Institute', 'institution'), - ('The Clown Institution', 'institution'), - ('Clowns and Clown Accessories, Inc.', 'organization'), - ('All of the clowns', 'organization'), - ('Clown Group', 'organization'), - ('CLWN', 'organization'), - ('Mr. Clown', 'person'), - ('Ronald McDonald', 'person'), - ]) - def test_without_explicit_default(self, name, result): - assert GuessAgentTypeLink().execute(name) == result - - @pytest.mark.parametrize('name, default, result', [ - ('University of Whales', 'organization', 'institution'), - ('Thomas Jefferson', 'person', 'person'), - ('Thomas Jefferson', 'organization', 'organization'), - ('Thomas Jefferson', 'institution', 'institution'), - ('The Thomas Jefferson thing', 'institution', 'organization'), - ('Center For Open Science', 'person', 'organization'), - ('Science Council', 'person', 'organization'), - ('Open Science Foundation', 'person', 'organization'), - ('American Chemical Society', 'person', 'organization'), - ('School for Clowns', 'person', 'institution'), - ('Clown College', 'person', 'institution'), - ('Clowning Institute', 'person', 'institution'), - ('The Clown Institution', 'person', 'institution'), - ('Clowns and Clown Accessories, Inc.', 'person', 'organization'), - ('All of the clowns', 'person', 'organization'), - ('Clown Group', 'person', 'organization'), - ('CLWN', 'person', 'organization'), - ('Mr. Clown', 'organization', 'organization'), - ('Ronald McDonald', 'person', 'person'), - ('Ronald McDonald', 'organization', 'organization'), - ('Ronald McDonald', 'institution', 'institution'), - ]) - def test_with_default(self, name, default, result): - assert GuessAgentTypeLink(default=default).execute(name) == result diff --git a/tests/share/normalize/test_models.py b/tests/share/normalize/test_models.py deleted file mode 100644 index 2bf1afd4a..000000000 --- a/tests/share/normalize/test_models.py +++ /dev/null @@ -1,496 +0,0 @@ -import pytest - -from share.regulate import Regulator - -from tests.share.normalize.factories import ( - Agent, - AgentIdentifier, - Contributor, - CreativeWork, - Creator, - Funder, - Host, - Institution, - IsPartOf, - Organization, - Person, - Publisher, - Tag, - WorkIdentifier, -) - - -class TestModelNormalization: - - # test each tag resolves to lowercased, tokenized name - @pytest.mark.parametrize('input, output', [(i, o) for input, o in [ - ([ - Tag(name=''), - Tag(name=' '), - Tag(name='\n\n\n'), - ], []), - ([ - Tag(name='foo'), - Tag(name='foO'), - Tag(name='Foo'), - Tag(name='FOO'), - Tag(name=' FOO'), - Tag(name=' foo\n\n\n'), - ], [Tag(name='foo')]), - ([ - Tag(name='Rocket League'), - Tag(name='rocket league'), - Tag(name='ROCKET LEAGUE'), - Tag(name='Rocket League'), - Tag(name='\nRocket \n League\t'), - Tag(name='rocket\nleague'), - ], [Tag(name='rocket league')]), - ([ - Tag(name='Crash; Bandicoot'), - Tag(name='Crash; Bandicoot'), - Tag(name='\nCrash; Bandicoot'), - Tag(name='crash, bandicoot'), - Tag(name='Crash ,Bandicoot '), - ], [Tag(name='bandicoot'), Tag(name='crash')]), - ] for i in input]) - def test_normalize_tag(self, input, output, Graph, ExpectedGraph): - graph = Graph(CreativeWork(tags=[input])) - Regulator(validate=False).regulate(graph) - - assert graph == ExpectedGraph(CreativeWork(tags=output)) - - # test tags with the same name are merged on a work - @pytest.mark.parametrize('input, output', [ - ([ - Tag(name=''), - Tag(name=' '), - Tag(name='\n\n\n'), - ], []), - ([ - Tag(name='foo'), - Tag(name='foO'), - Tag(name='Foo'), - Tag(name='FOO'), - Tag(name=' FOO'), - Tag(name=' foo\n\n\n'), - ], [Tag(name='foo')]), - ([ - Tag(name='Rocket League'), - Tag(name='rocket league'), - Tag(name='ROCKET LEAGUE'), - Tag(name='Rocket League'), - Tag(name='\nRocket \n League\t'), - Tag(name='rocket\nleague'), - ], [Tag(name='rocket league')]), - ([ - Tag(name='Crash; Bandicoot'), - Tag(name='Crash; Bandicoot'), - Tag(name='\nCrash; Bandicoot'), - Tag(name='crash, bandicoot'), - Tag(name='Crash ,Bandicoot '), - ], [Tag(name='bandicoot'), Tag(name='crash')]), - ]) - @pytest.mark.skip - def test_normalize_tags_on_work(self, input, output, Graph, ExpectedGraph): - graph = Graph(CreativeWork(tags=input)) - Regulator(validate=False).regulate(graph) - assert graph == ExpectedGraph(CreativeWork(tags=output)) - - @pytest.mark.parametrize('input, output', [(i, o) for input, o in [ - ([ - Person(name='Smith, J'), - Person(name='Smith, J'), - ], Person(name='Smith, J')), - ([ - Person(name='J Smith '), - ], Person(name='J Smith')), - ([ - Person(given_name='J', family_name='Smith'), - Person(given_name=' J', family_name='\n\nSmith'), - ], Person(name='J Smith', family_name='Smith', given_name='J')), - ([ - Person(name='Johnathan James Doe'), - ], Person(name='Johnathan James Doe')), - ([ - Person(name='johnathan james doe'), - ], Person(name='johnathan james doe')), - ([ - Person(name='johnathan james doe JR'), - ], Person(name='johnathan james doe JR')), - ([ - Person(name='none'), - Person(name=''), - Person(name='NULL'), - Person(name='None'), - Person(name=' '), - Person(name=' None '), - ], None) - ] for i in input]) - def test_normalize_person(self, input, output, Graph, ExpectedGraph): - graph = Graph(input) - Regulator(validate=False).regulate(graph) - assert graph == ExpectedGraph(output or []) - - # test two people with the same identifier are merged - # sort by length and then alphabetize name field - @pytest.mark.parametrize('input, output', [ - # same name, same identifier - ([ - Person(0, name='Barb Dylan', identifiers=[AgentIdentifier(1)]), - Person(1, name='Barb Dylan', identifiers=[AgentIdentifier(1)]), - Person(2, name='Barb Dylan', identifiers=[AgentIdentifier(1)]), - ], [Person(2, name='Barb Dylan', identifiers=[AgentIdentifier(1)])]), - ([ - Person(0, name='Barb Dylan', identifiers=[AgentIdentifier(1)]), - Person(1, name='Barb Dylan', identifiers=[AgentIdentifier(1)]), - Person(2, name='Barb Dylan', identifiers=[AgentIdentifier(1)]), - Person(3, name='Barb Dylan', identifiers=[AgentIdentifier(1)]), - ], [Person(3, name='Barb Dylan', identifiers=[AgentIdentifier(1)])]), - # same name, different identifiers - ([ - Person(name='Barb Dylan', identifiers=[AgentIdentifier(1)]), - Person(name='Barb Dylan', identifiers=[AgentIdentifier(2)]) - ], [ - Person(name='Barb Dylan', identifiers=[AgentIdentifier(1)]), - Person(name='Barb Dylan', identifiers=[AgentIdentifier(2)]) - ]), - # no name - name, same identifier - ([ - Person(name='', identifiers=[AgentIdentifier(1)]), - Person(name='Barb Dylan', identifiers=[AgentIdentifier(1)]) - ], [Person(name='Barb Dylan', identifiers=[AgentIdentifier(1)])]), - # two names, same identifier, take longer name - ([ - Person(name='Barb Dylan', identifiers=[AgentIdentifier(1)]), - Person(name='Barbra Dylan', identifiers=[AgentIdentifier(1)]) - ], [Person(name='Barbra Dylan', identifiers=[AgentIdentifier(1)])]), - # two sames, same length, same identifier, alphabetize and take first - ([ - Person(name='Barb Dylan', identifiers=[AgentIdentifier(1)]), - Person(name='Aarb Dylan', identifiers=[AgentIdentifier(1)]) - ], [Person(name='Aarb Dylan', identifiers=[AgentIdentifier(1)])]), - # 3 different names, take longest of each name field - ([ - # Below case WILL FAIL. Haven't seen just a last name... yet - # Person(name='Dylan', identifiers=[AgentIdentifier(1)]), - Person(name='Dylan, B', identifiers=[AgentIdentifier(1)]), - Person(name='Barb Dylan', identifiers=[AgentIdentifier(1)]), - Person(name='B. D. Dylan', identifiers=[AgentIdentifier(1)]) - ], [Person(name='B. D. Dylan', identifiers=[AgentIdentifier(1)])]), - ]) - @pytest.mark.skip - def test_normalize_person_relation(self, input, output, Graph, ExpectedGraph): - graph = Graph(*input) - Regulator(validate=False).regulate(graph) - assert graph == ExpectedGraph(*output) - - @pytest.mark.parametrize('input, output', [ - (Agent(name='none'), None), - (Agent(name=''), None), - (Agent(name='NULL'), None), - (Agent(name='None'), None), - (Agent(name=' '), None), - (Agent(name=' None '), None), - (Agent(name=' Empty Foundation '), Organization(name='Empty Foundation')), - (Agent(name='University \n of Arizona '), Institution(name='University of Arizona')), - (Agent(name='NMRC, University College, Cork, Ireland'), Institution(name='NMRC, University College', location='Cork, Ireland')), - (Agent(name='Ioffe Physico-Technical Institute'), Institution(name='Ioffe Physico-Technical Institute')), - (Agent(name='DPTA'), Organization(name='DPTA')), - (Agent(name='B. Verkin Institute for Low Temperatures Physics & Engineering, Kharkov, Ukraine'), Institution(name='B. Verkin Institute for Low Temperatures Physics & Engineering', location='Kharkov, Ukraine', type='institution')), - (Agent(name='Physikalisches Institut, University Wuerzburg, Germany'), Agent(name='Physikalisches Institut', location='University Wuerzburg, Germany', type='institution')), - (Agent(name='Centro de Biotecnologia e Departamento de Biofísica; UFRGS; Av Bento Goncalves 9500, Predio 43431 sala 213 91501-970 Porto Alegre Rio Grande do Sul Brazi'), Agent(name='UFRGS - Centro de Biotecnologia e Departamento de Biofísica', location='Av Bento Goncalves 9500, Predio 43431 sala 213 91501-970 Porto Alegre Rio Grande do Sul Brazi')), - (Agent(name='Department of Chemistry; ZheJiang University; HangZhou ZheJiang CHINA'), Institution(name='ZheJiang University - Department of Chemistry', location='HangZhou ZheJiang CHINA')), - (Agent(name='Marine Evolution and Conservation; Groningen Institute for Evolutionary Life Sciences; University of Groningen; Nijenborgh 7, 9747 AG Groningen The Netherlands'), Institution(name='University of Groningen - Marine Evolution and Conservation; Groningen Institute for Evolutionary Life Sciences', location='Nijenborgh 7, 9747 AG Groningen The Netherlands')), - (Agent(name='Institute of Marine Research; PO Box 1870 Nordnes, 5817 Bergen Norway'), Institution(name='Institute of Marine Research', location='PO Box 1870 Nordnes, 5817 Bergen Norway')), - (Agent(name=' PeerJ Inc. '), Organization(name='PeerJ Inc.')), - (Agent(name=' Clinton Foundation\n '), Organization(name='Clinton Foundation')), - ]) - def test_normalize_agent(self, input, output, Graph, ExpectedGraph): - graph = Graph(input) - Regulator(validate=False).regulate(graph) - assert graph == ExpectedGraph(output or []) - - # test two organizations/institutions with the same name are merged - # sort by length and then alphabetize name field - @pytest.mark.parametrize('input, output', [ - # same name, same identifiers - ([ - Organization(name='American Heart Association', identifiers=[AgentIdentifier(1)]), - Organization(name='American Heart Association', identifiers=[AgentIdentifier(1)]) - ], [Organization(name='American Heart Association', identifiers=[AgentIdentifier(1)])]), - # same name, different identifiers - ([ - Organization(name='Money Foundation', identifiers=[AgentIdentifier(1)]), - Organization(name='Money Foundation', identifiers=[AgentIdentifier(2)]) - ], [ - Organization(name='Money Foundation', identifiers=[AgentIdentifier(1)]), - Organization(name='Money Foundation', identifiers=[AgentIdentifier(2)]), - ]), - # same name, different identifiers, different capitilization - ([ - Organization(name='Money Foundation', identifiers=[AgentIdentifier(1)]), - Organization(name='MONEY FOUNDATION', identifiers=[AgentIdentifier(2)]) - ], [ - Organization(name='Money Foundation', identifiers=[AgentIdentifier(1)]), - Organization(name='MONEY FOUNDATION', identifiers=[AgentIdentifier(2)]) - ]), - # same identifier, different type, accept more specific type - ([ - Institution(name='University of Virginia', identifiers=[AgentIdentifier(1)]), - Organization(name='University of Virginia', identifiers=[AgentIdentifier(1)]), - ], [ - Institution(name='University of Virginia', identifiers=[AgentIdentifier(1)]) - ]), - # same identifier, same name, same length, different capitilization, alphabetize - ([ - Organization(name='Share', identifiers=[AgentIdentifier(1)]), - Organization(name='SHARE', identifiers=[AgentIdentifier(1)]) - ], [Organization(name='SHARE', identifiers=[AgentIdentifier(1)])]), - # same name, one identifier, add identifier - ([ - Organization(name='Timetables Inc.'), - Organization(name='Timetables Inc.', identifiers=[AgentIdentifier(1)]) - ], [ - Organization(name='Timetables Inc.'), - Organization(name='Timetables Inc.', identifiers=[AgentIdentifier(1)]) - ]), - # same identifier, different name, accept longest alphabetize - ([ - Institution(name='Cooking Institute', identifiers=[AgentIdentifier(1)]), - Institution(name='Cooking Instituze', identifiers=[AgentIdentifier(1)]), - Institution(name='Cook Institute', identifiers=[AgentIdentifier(1)]) - ], [Institution(name='Cooking Institute', identifiers=[AgentIdentifier(1)])]), - ]) - @pytest.mark.skip - def test_normalize_organization_institution_name(self, input, output, Graph, ExpectedGraph): - graph = Graph(*input) - Regulator(validate=False).regulate(graph) - assert graph == ExpectedGraph(*output) - - # test different types of agent work relations - # Funder, Publisher, Host - @pytest.mark.parametrize('input, output', [ - # same name, same identifiers - ([ - Host(cited_as='American Heart Association', agent=Organization(1, name='American Heart Association', identifiers=[AgentIdentifier(1)])), - Funder(cited_as='American Heart Association', agent=Organization(1, id=1, name='American Heart Association', identifiers=[AgentIdentifier(1, id=0)])), - ], [ - Host(cited_as='American Heart Association', agent=Organization(1, id=1, name='American Heart Association', identifiers=[AgentIdentifier(1, id=0)])), - Funder(cited_as='American Heart Association', agent=Organization(id=1)), - ]), - # same name, different identifiers - ([ - Host(cited_as='Money Foundation', agent=Organization(name='Money Foundation', identifiers=[AgentIdentifier(1)])), - Funder(cited_as='Money Foundation', agent=Organization(id=1, name='Money Foundation', identifiers=[AgentIdentifier(2)])), - ], [ - Host(cited_as='Money Foundation', agent=Organization(name='Money Foundation', identifiers=[AgentIdentifier(1)])), - Funder(cited_as='Money Foundation', agent=Organization(id=1, name='Money Foundation', identifiers=[AgentIdentifier(2)])), - ]), - # same identifier, different type - ([ - Publisher(cited_as='University of Virginia', agent=Institution(name='University of Virginia', identifiers=[AgentIdentifier(1)])), - Funder(cited_as='University of Virginia', agent=Institution(id=1, name='University of Virginia', identifiers=[AgentIdentifier(1, id=0)])), - ], [ - Publisher(cited_as='University of Virginia', agent=Institution(id=1, name='University of Virginia', identifiers=[AgentIdentifier(1, id=0)])), - Funder(cited_as='University of Virginia', agent=Institution(id=1)), - ]), - # same identifier, same name, same length, different capitilization, alphabetize - ([ - Publisher(cited_as='Share', agent=Organization(id=0, name='Share', identifiers=[AgentIdentifier(1, id=2)])), - Host(cited_as='SHARE', agent=Organization(id=1, name='SHARE', identifiers=[AgentIdentifier(1, id=3)])) - ], [ - Publisher(cited_as='Share', agent=Organization(id=0, name='SHARE', identifiers=[AgentIdentifier(1, id=3)])), - Host(cited_as='SHARE', agent=Organization(id=0)) - ]), - # same name, one identifier, add identifier - ([ - Funder(cited_as='Timetables Inc.', agent=Organization(id=1, name='Timetables Inc.')), - Publisher(cited_as='Timetables Inc.', agent=Organization(id=2, name='Timetables Inc.', identifiers=[AgentIdentifier(1)])) - ], [ - Funder(cited_as='Timetables Inc.', agent=Organization(id=1, name='Timetables Inc.')), - Publisher(cited_as='Timetables Inc.', agent=Organization(id=2, name='Timetables Inc.', identifiers=[AgentIdentifier(1)])) - ]), - # same identifier, different name, accept longest alphabetize - ([ - Funder(cited_as='Cooking Institute', agent=Organization(id=1, name='Cooking Notaninstitute', identifiers=[AgentIdentifier(1)])), - Publisher(cited_as='Cooking Instituze', agent=Organization(id=2, name='Cooking Notaninstituze', identifiers=[AgentIdentifier(1)])), - Host(cited_as='Cook Institute', agent=Organization(id=3, name='Cook Notaninstitute', identifiers=[AgentIdentifier(1)])) - ], [ - Funder(cited_as='Cooking Institute', agent=Organization(id=1, name='Cooking Notaninstitute', identifiers=[AgentIdentifier(1)])), - Publisher(cited_as='Cooking Instituze', agent=Organization(id=1)), - Host(cited_as='Cook Institute', agent=Organization(id=1)) - ]), - # same identifier, different name, different type, accept longest alphabetize, more specific - ([ - Funder(cited_as='Cooking Institute', agent=Institution(id=1, name='Cooking Notaninstitute', identifiers=[AgentIdentifier(1)])), - Publisher(cited_as='Cooking Instituze', agent=Organization(id=2, name='Cooking Notaninstituze', identifiers=[AgentIdentifier(1)])), - Host(cited_as='Cook Institute', agent=Institution(id=3, name='Cook Notaninstitute', identifiers=[AgentIdentifier(1)])) - ], [ - Funder(cited_as='Cooking Institute', agent=Institution(id=1, name='Cooking Notaninstitute', identifiers=[AgentIdentifier(1)])), - Publisher(cited_as='Cooking Instituze', agent=Institution(id=1)), - Host(cited_as='Cook Institute', agent=Institution(id=1)) - ]), - ]) - @pytest.mark.skip - def test_normalize_mixed_agent_relation(self, input, output, Graph, ExpectedGraph): - graph = Graph(CreativeWork(agent_relations=input)) - Regulator(validate=False).regulate(graph) - assert graph == ExpectedGraph(CreativeWork(agent_relations=output)) - - # test different types of agent work relations - # Contributor, Creator - @pytest.mark.parametrize('input, output', [ - # same name, same identifiers, different type, same type tree, organization - ([ - Creator(cited_as='American Heart Association', agent=Organization(id=0, name='American Heart Association', identifiers=[AgentIdentifier(1, id=1)])), - Contributor(cited_as='American Heart Association', agent=Organization(id=1, name='American Heart Association', identifiers=[AgentIdentifier(1, id=2)])) - ], [ - Creator(cited_as='American Heart Association', agent=Organization(id=1, name='American Heart Association', identifiers=[AgentIdentifier(1, id=2)])), - Contributor(cited_as='American Heart Association', agent=Organization(id=1)), - ]), - # same name, different identifiers, different type, same type tree - ([ - Creator(cited_as='Money Foundation', agent=Organization(id=1, name='Money Foundation', identifiers=[AgentIdentifier()])), - Contributor(cited_as='Money Foundation', agent=Organization(id=2, name='Money Foundation', identifiers=[AgentIdentifier()])), - ], [ - Creator(cited_as='Money Foundation', agent=Organization(id=1, name='Money Foundation', identifiers=[AgentIdentifier()])), - Contributor(cited_as='Money Foundation', agent=Organization(id=2, name='Money Foundation', identifiers=[AgentIdentifier()])), - ]), - # same identifier, same name, different type - ([ - Contributor(cited_as='University of Virginia', agent=Institution(id=0, name='University of Virginia', identifiers=[AgentIdentifier(1)])), - Publisher(cited_as='University of Virginia', agent=Institution(id=1, name='University of Virginia', identifiers=[AgentIdentifier(1)])) - ], [ - Contributor(cited_as='University of Virginia', agent=Institution(id=1, name='University of Virginia', identifiers=[AgentIdentifier(1)])), - Publisher(cited_as='University of Virginia', agent=Institution(id=1)) - ]), - # same identifier, same name, different type, same type tree, person - ([ - Creator(cited_as='Bob Dylan', agent=Person(id=0, name='Bob Dylan', identifiers=[AgentIdentifier(1, id=0)])), - Contributor(cited_as='Bob Dylan', agent=Person(id=1, name='Bob Dylan', identifiers=[AgentIdentifier(1, id=1)])), - ], [ - Creator(cited_as='Bob Dylan', agent=Person(id=0, name='Bob Dylan', identifiers=[AgentIdentifier(1, id=0)])), - Contributor(cited_as='Bob Dylan', agent=Person(id=0)), - ]), - # same identifier, different name, different type - ([ - Creator(cited_as='B. Dylan', agent=Person(id=0, name='B. Dylan', identifiers=[AgentIdentifier(1, id=0)])), - Contributor(cited_as='Bob Dylan', agent=Person(id=1, name='Bob Dylan', identifiers=[AgentIdentifier(1, id=1)])), - ], [ - Creator(cited_as='B. Dylan', agent=Person(id=0, name='Bob Dylan', identifiers=[AgentIdentifier(1, id=0)])), - Contributor(cited_as='Bob Dylan', agent=Person(id=0)), - ]), - # same name, one identifier, add identifier - ([ - Creator(1, id=0, order_cited=4, cited_as='Timetables Inc.', agent=Organization(id=0, name='Timetables Inc.')), - Creator(1, id=1, order_cited=20, cited_as='Timetables Inc.', agent=Organization(id=1, name='Timetables Inc.', identifiers=[AgentIdentifier()])) - ], [ - Creator(1, id=0, order_cited=4, cited_as='Timetables Inc.', agent=Organization(id=0, name='Timetables Inc.')), - Creator(1, id=1, order_cited=20, cited_as='Timetables Inc.', agent=Organization(id=1, name='Timetables Inc.', identifiers=[AgentIdentifier()])) - ]), - # same identifier, different name, accept longest alphabetize - ([ - Creator(cited_as='Cooking Institute', agent=Organization(id=1, name='Cooking Institute', identifiers=[AgentIdentifier(1, id=1)])), - Contributor(cited_as='Cooking Instituze', agent=Organization(id=2, name='Cooking Instituze', identifiers=[AgentIdentifier(1, id=2)])), - Funder(cited_as='Cook Institute', agent=Organization(id=3, name='Cook Institute', identifiers=[AgentIdentifier(1, id=3)])) - ], [ - Creator(cited_as='Cooking Institute', agent=Institution(id=1, name='Cooking Institute', identifiers=[AgentIdentifier(1, id=3)])), - Contributor(cited_as='Cooking Instituze', agent=Organization(id=1)), - Funder(cited_as='Cook Institute', agent=Institution(id=1)) - ]), - # same identifier, different name, different type, accept longest alphabetize, more specific - ([ - Creator(cited_as='Cooking Institute', order_cited=10, agent=Institution(id=0, name='Cooking Institute', identifiers=[AgentIdentifier(1, id=1)])), - Contributor(cited_as='Cooking Instituze', agent=Organization(id=1, name='Cooking Instituze', identifiers=[AgentIdentifier(1, id=2)])), - Funder(cited_as='Cook Institute', agent=Institution(id=2, name='Cook Institute', identifiers=[AgentIdentifier(1, id=3)])) - ], [ - Creator(cited_as='Cooking Institute', order_cited=10, agent=Institution(id=0, name='Cooking Institute', identifiers=[AgentIdentifier(1, id=3)])), - Contributor(cited_as='Cooking Instituze', agent=Institution(id=0)), - Funder(cited_as='Cook Institute', agent=Institution(id=0)) - ]), - # Related agent removed - ([ - Creator(cited_as='', agent=Person(id=0, name='None', identifiers=[AgentIdentifier(1, id=1)])), - ], [ - ]), - # Nameless agent with cited_as - ([ - Creator(cited_as='Magpie', agent=Person(id=0, name='', identifiers=[AgentIdentifier(1, id=1)])), - ], [ - Creator(cited_as='Magpie', agent=Person(id=0, name='Magpie', identifiers=[AgentIdentifier(1, id=1)])), - ]), - ]) - @pytest.mark.skip - def test_normalize_contributor_creator_relation(self, input, output, Graph, ExpectedGraph): - graph = Graph(CreativeWork(agent_relations=input)) - Regulator(validate=False).regulate(graph) - assert graph == ExpectedGraph(CreativeWork(agent_relations=output)) - - # test work with related work - @pytest.mark.parametrize('input, output', [ - # different identifiers - ( - CreativeWork(1, identifiers=[WorkIdentifier(1)], related_works=[ - CreativeWork(2, identifiers=[WorkIdentifier(2)]), - ]), - CreativeWork(1, identifiers=[WorkIdentifier(1)], related_works=[ - CreativeWork(2, identifiers=[WorkIdentifier(2)]), - ]), - ), - # same and different identifiers - ( - CreativeWork(1, identifiers=[WorkIdentifier(1)], outgoing_creative_work_relations=[ - IsPartOf(1, related=CreativeWork(1, identifiers=[WorkIdentifier(1)])), - IsPartOf(2, related=CreativeWork(2, identifiers=[WorkIdentifier(2)])), - ]), - CreativeWork(1, identifiers=[WorkIdentifier(1)], outgoing_creative_work_relations=[ - IsPartOf(2, related=CreativeWork(2, identifiers=[WorkIdentifier(2)])), - ]), - ), - # circular relation - ( - CreativeWork(1, id=1, related_works=[CreativeWork(id=1)]), - CreativeWork(1, id=1), - ), - ]) - @pytest.mark.skip - def test_normalize_related_work(self, input, output, Graph, ExpectedGraph): - graph = Graph(input) - Regulator(validate=False).regulate(graph) - assert graph == ExpectedGraph(output) - - @pytest.mark.parametrize('input, output', [ - ({'title': '', 'description': ''}, {'title': '', 'description': ''}), - ({'title': ' ', 'description': ' '}, {'title': '', 'description': ''}), - ({'title': 'Title\nLine'}, {'title': 'Title Line'}), - ({'description': 'Line\nAfter\nLine\nAfter\nLine'}, {'description': 'Line After Line After Line'}), - ({'description': 'null'}, {'description': ''}), - ]) - def test_normalize_creativework(self, input, output, Graph, ExpectedGraph): - graph = Graph(CreativeWork(**input)) - Regulator(validate=False).regulate(graph) - assert graph == ExpectedGraph(CreativeWork(**output)) - - @pytest.mark.parametrize('input, output', [ - (input, Creator(cited_as='James Bond', agent=Person(name='James Bond')),) - for input in [ - Creator(cited_as=' \t James\n Bond \t ', agent=Person(name='James Bond')), - Creator(cited_as='', agent=Person(name='James Bond')), - Creator(cited_as='', agent=Person(name='James Bond')), - ] - ] + [ - (input, Contributor(cited_as='James Bond', agent=Person(name='James Bond')),) - for input in [ - Contributor(cited_as=' \t James\n Bond \t ', agent=Person(name='James Bond')), - Contributor(cited_as='', agent=Person(name='James Bond')), - ] - ] + [ - ( - Creator(cited_as='', agent=Person(given_name='James', family_name='Bond')), - Creator(cited_as='James Bond', agent=Person(name='James Bond', given_name='James', family_name='Bond')), - ), - ]) - def test_normalize_agentworkrelation(self, input, output, Graph, ExpectedGraph): - graph = Graph(input) - Regulator(validate=False).regulate(graph) - assert graph == ExpectedGraph(output) diff --git a/tests/share/normalize/test_v1.py b/tests/share/normalize/test_v1.py deleted file mode 100644 index d4e92d1f8..000000000 --- a/tests/share/normalize/test_v1.py +++ /dev/null @@ -1,202 +0,0 @@ -import json -import pytest - -from share.transform.chain import ctx -from share.transformers.v1_push import V1Transformer - - -class TestV1Transformer: - - @pytest.mark.parametrize('input, expected', [ - ({ - "contributors": [{ - "name": "Roger Movies Ebert", - "sameAs": ["https://osf.io/thing"], - "familyName": "Ebert", - "givenName": "Roger", - "additionalName": "Danger", - "email": "rogerebert@example.com" - }, { - "name": "Roger Madness Ebert" - }], - "languages": ["eng"], - "description": "This is a thing", - "providerUpdatedDateTime": "2014-12-12T00:00:00Z", - "freeToRead": { - "startDate": "2014-09-12", - "endDate": "2014-10-12" - }, - "licenses": [{ - "uri": "http://www.mitlicense.com", - "startDate": "2014-10-12T00:00:00Z", - "endDate": "2014-11-12T00:00:00Z" - }], - "publisher": { - "name": "Roger Ebert Inc", - "email": "roger@example.com" - }, - "sponsorships": [{ - "award": { - "awardName": "Participation", - "awardIdentifier": "http://example.com" - }, - "sponsor": { - "sponsorName": "Orange", - "sponsorIdentifier": "http://example.com/orange" - } - }], - "title": "Interesting research", - "version": {"versionId": "someID"}, - "uris": { - "canonicalUri": "http://example.com/document1", - "providerUris": [ - "http://example.com/document1uri1", - "http://example.com/document1uri2" - ] - } - }, { - '@type': 'creativework', - 'date_updated': '2014-12-12T00:00:00+00:00', - 'description': 'This is a thing', - 'language': 'eng', - 'identifiers': [ - {'@type': 'workidentifier', 'uri': 'http://example.com/document1'}, - {'@type': 'workidentifier', 'uri': 'http://example.com/document1uri1'}, - {'@type': 'workidentifier', 'uri': 'http://example.com/document1uri2'}, - ], - 'related_agents': [{ - '@type': 'creator', - 'cited_as': 'Roger Movies Ebert', - 'order_cited': 0, - 'agent': { - '@type': 'person', - 'name': 'Roger Movies Ebert', - 'related_agents': [], - 'identifiers': [ - {'@type': 'agentidentifier', 'uri': 'http://osf.io/thing'}, - {'@type': 'agentidentifier', 'uri': 'mailto:rogerebert@example.com'} - ], - }, - }, { - '@type': 'creator', - 'cited_as': 'Roger Madness Ebert', - 'order_cited': 1, - 'agent': { - '@type': 'person', - 'name': 'Roger Madness Ebert', - 'related_agents': [], - 'identifiers': [] - } - }, { - '@type': 'publisher', - 'cited_as': 'Roger Ebert Inc', - 'agent': { - '@type': 'organization', - 'name': 'Roger Ebert Inc', - 'related_agents': [], - 'identifiers': [ - {'@type': 'agentidentifier', 'uri': 'mailto:roger@example.com'}, - ] - } - }, { - '@type': 'funder', - 'awards': [ - { - '@type': 'throughawards', - 'award': {'@type': 'award', 'name': 'Participation', 'uri': 'http://example.com'} - } - ], - 'cited_as': 'Orange', - 'agent': { - '@type': 'organization', - 'name': 'Orange', - 'identifiers': [ - {'@type': 'agentidentifier', 'uri': 'http://example.com/orange'}, - ] - } - }], - 'subjects': [], - 'tags': [], - 'title': 'Interesting research', - }), ({ - "contributors": [], - "languages": ["eng"], - "description": "This is a thing", - "providerUpdatedDateTime": "2014-12-12T00:00:00Z", - "title": "Interesting research", - "uris": { - "canonicalUri": "http://example.com/document1", - "providerUris": [ - "http://example.com/document1uri1", - "http://example.com/document1uri2", - "http://example.com/document1uri2", - 'http://example.com/document1', - ] - } - }, { - '@type': 'creativework', - 'date_updated': '2014-12-12T00:00:00+00:00', - 'description': 'This is a thing', - 'language': 'eng', - 'identifiers': [ - {'@type': 'workidentifier', 'uri': 'http://example.com/document1'}, - {'@type': 'workidentifier', 'uri': 'http://example.com/document1uri1'}, - {'@type': 'workidentifier', 'uri': 'http://example.com/document1uri2'}, - ], - 'related_agents': [], - 'subjects': [], - 'tags': [], - 'title': 'Interesting research', - }), ({ - "contributors": [], - "languages": ["eng"], - "description": "This is a thing", - "providerUpdatedDateTime": "2014-12-12T00:00:00Z", - "title": "Interesting research", - "otherProperties": [{"name": "status", "properties": {"status": "deleted"}}], - "uris": { - "canonicalUri": "http://example.com/document1", - "providerUris": [ - 'http://example.com/document1', - "http://example.com/document1uri1", - "http://example.com/document1uri2", - "http://example.com/document1uri2", - ] - } - }, { - '@type': 'creativework', - 'date_updated': '2014-12-12T00:00:00+00:00', - 'description': 'This is a thing', - 'is_deleted': True, - 'language': 'eng', - 'identifiers': [ - {'@type': 'workidentifier', 'uri': 'http://example.com/document1'}, - {'@type': 'workidentifier', 'uri': 'http://example.com/document1uri1'}, - {'@type': 'workidentifier', 'uri': 'http://example.com/document1uri2'}, - ], - 'related_agents': [], - 'subjects': [], - 'tags': [], - 'title': 'Interesting research', - }) - ]) - def test_normalize(self, input, expected): - ctx.clear() - _, root_ref = V1Transformer({}).do_transform(json.dumps(input), clean_up=False) - actual = self.reconstruct(ctx.pool.pop(root_ref)) - assert expected == actual - - def reconstruct(self, document, extra=False): - for key, val in tuple(document.items()): - if isinstance(val, dict) and key != 'extra': - related = ctx.pool.pop(val, None) - if related: - document[key] = self.reconstruct(related, extra=extra) - else: - document.pop(key) - if isinstance(val, list): - document[key] = [self.reconstruct(ctx.pool.pop(v), extra=extra) for v in val] - del document['@id'] - if not extra: - document.pop('extra', None) - return document diff --git a/tests/share/normalize/test_xml.py b/tests/share/normalize/test_xml.py deleted file mode 100644 index fc92382e8..000000000 --- a/tests/share/normalize/test_xml.py +++ /dev/null @@ -1,111 +0,0 @@ -import xmltodict - -from share.transform.chain import * # noqa - - -EXAMPLE = ''' - - http://arxiv.org/abs/cond-mat/0102536v1 - 2001-02-28T20:12:09Z - 2001-02-28T20:12:09Z - Impact of Electron-Electron Cusp - on Configuration Interaction Energies - The effect of the electron-electron cusp on the convergence of configuration -interaction (CI) wave functions is examined. By analogy with the -pseudopotential approach for electron-ion interactions, an effective -electron-electron interaction is developed which closely reproduces the -scattering of the Coulomb interaction but is smooth and finite at zero -electron-electron separation. The exact many-electron wave function for this -smooth effective interaction has no cusp at zero electron-electron separation. -We perform CI and quantum Monte Carlo calculations for He and Be atoms, both -with the Coulomb electron-electron interaction and with the smooth effective -electron-electron interaction. We find that convergence of the CI expansion of -the wave function for the smooth electron-electron interaction is not -significantly improved compared with that for the divergent Coulomb interaction -for energy differences on the order of 1 mHartree. This shows that, contrary to -popular belief, description of the electron-electron cusp is not a limiting -factor, to within chemical accuracy, for CI calculations. - - - David Prendergast - Department of Physics - - - M. Nolan - NMRC, University College, Cork, Ireland - - - Claudia Filippi - Department of Physics - - - Stephen Fahy - Department of Physics - - - J. C. Greer - NMRC, University College, Cork, Ireland - - 10.1063/1.1383585 - - 11 pages, 6 figures, 3 tables, LaTeX209, submitted to The Journal of - Chemical Physics - J. Chem. Phys. 115, 1626 (2001) - - - - - -''' - - -class Organization(Parser): - name = ctx - - -class IsAffiliatedWith(Parser): - related = Delegate(Organization, ctx) - - -class Person(Parser): - related_agents = Map(Delegate(IsAffiliatedWith), ctx.affiliation) - given_name = ParseName(ctx.name).first - family_name = ParseName(ctx.name).last - - -class Creator(Parser): - agent = Delegate(Person, ctx) - - -class Preprint(Parser): - title = ctx.entry.title - description = ctx.entry.summary - related_agents = Map(Delegate(Creator), ctx.entry.author) - - class Extra: - comment = ctx.entry.comment - journal_ref = ctx.entry.journal_ref - - -class TestParser: - - def test_preprint_parser(self): - parsed = Preprint( - xmltodict.parse( - EXAMPLE, - process_namespaces=True, - namespaces={ - 'http://www.w3.org/2005/Atom': None, - 'http://arxiv.org/schemas/atom': None, - } - ) - ).parse() - - assert isinstance(parsed, dict) - assert parsed['@type'] == 'preprint' - normalized = ctx.pool[parsed] - assert normalized['extra'] == {'comment': '11 pages, 6 figures, 3 tables, LaTeX209, submitted to The Journal of\n Chemical Physics', 'journal_ref': 'J. Chem. Phys. 115, 1626 (2001)'} - - # no newlines, leading/trailing white space, or multiple spaces - assert normalized['title'] == 'Impact of Electron-Electron Cusp on Configuration Interaction Energies' - assert normalized['description'] == 'The effect of the electron-electron cusp on the convergence of configuration interaction (CI) wave functions is examined. By analogy with the pseudopotential approach for electron-ion interactions, an effective electron-electron interaction is developed which closely reproduces the scattering of the Coulomb interaction but is smooth and finite at zero electron-electron separation. The exact many-electron wave function for this smooth effective interaction has no cusp at zero electron-electron separation. We perform CI and quantum Monte Carlo calculations for He and Be atoms, both with the Coulomb electron-electron interaction and with the smooth effective electron-electron interaction. We find that convergence of the CI expansion of the wave function for the smooth electron-electron interaction is not significantly improved compared with that for the divergent Coulomb interaction for energy differences on the order of 1 mHartree. This shows that, contrary to popular belief, description of the electron-electron cusp is not a limiting factor, to within chemical accuracy, for CI calculations.' diff --git a/tests/share/regulate/steps/test_bases.py b/tests/share/regulate/steps/test_bases.py deleted file mode 100644 index d962bc8e4..000000000 --- a/tests/share/regulate/steps/test_bases.py +++ /dev/null @@ -1,13 +0,0 @@ -import pytest - -from share.regulate.steps import NodeStep, GraphStep, ValidationStep -from share.util.extensions import Extensions - - -@pytest.mark.parametrize('namespace, base_class', [ - ('share.regulate.steps.node', NodeStep), - ('share.regulate.steps.graph', GraphStep), - ('share.regulate.steps.validate', ValidationStep), -]) -def test_step_bases(namespace, base_class): - assert all(issubclass(e.plugin, base_class) for e in Extensions._load_namespace(namespace)) diff --git a/tests/share/regulate/steps/test_block_extra_values.py b/tests/share/regulate/steps/test_block_extra_values.py deleted file mode 100644 index c068fa053..000000000 --- a/tests/share/regulate/steps/test_block_extra_values.py +++ /dev/null @@ -1,55 +0,0 @@ -import pytest - -from share.regulate.steps.block_extra_values import BlockExtraValues -from share.util.graph import MutableGraph - - -class TestBlockExtraValuesStep: - @pytest.fixture - def graph(self): - g = MutableGraph() - g.add_node(1, 'creativework', { - 'title': 'A work!', - 'extra': { - 'foo': 'flooby', - 'bah': 'hab', - }, - }) - g.add_node(2, 'creativework', { - 'title': 'Another work!', - 'extra': { - 'extra': 'extra', - 'bah': 'hab', - }, - }) - g.add_node(3, 'creativework', {'title': 'No extra :('}) - return g - - @pytest.mark.parametrize('blocked_values, expected_nodes', [ - ({'foo': 'flooby'}, {2, 3}), - ({'foo': 'flooby', 'match': 'nothing'}, {1, 2, 3}), - ({'extra': 'extra'}, {1, 3}), - ({'bah': 'hab'}, {3}), - ]) - def test_block_extras(self, graph, blocked_values, expected_nodes): - step = BlockExtraValues(blocked_values=blocked_values) - for node in list(graph): - step.regulate_node(node) - if node.id in expected_nodes: - assert node in graph - else: - assert node not in graph - assert len(graph) == len(expected_nodes) - - def test_error_on_bad_setting(self): - with pytest.raises(TypeError): - BlockExtraValues(bad_setting=True) - - # blocked_values required, must be non-empty dict - with pytest.raises(TypeError): - BlockExtraValues() - with pytest.raises(TypeError): - BlockExtraValues(blocked_values=['bad']) - with pytest.raises(TypeError): - BlockExtraValues(blocked_values={}) - BlockExtraValues(blocked_values={'this': 'works'}) diff --git a/tests/share/regulate/steps/test_deduplicate.py b/tests/share/regulate/steps/test_deduplicate.py deleted file mode 100644 index f5f002de3..000000000 --- a/tests/share/regulate/steps/test_deduplicate.py +++ /dev/null @@ -1,66 +0,0 @@ -import pytest - -from share.regulate.steps.deduplicate import Deduplicate - -from tests.share.normalize.factories import ( - CreativeWork, - Preprint, - Registration, - Subject, - WorkIdentifier, -) - - -class TestDeduplicate: - @pytest.mark.parametrize('input', [ - [Preprint(0, identifiers=[WorkIdentifier(1)])] - ]) - def test_no_change(self, Graph, input): - graph = Graph(input) - Deduplicate().run(graph) - assert graph == Graph(input) - - @pytest.mark.parametrize('input, output', [ - ([ - Preprint(0, identifiers=[WorkIdentifier(id=1, uri='http://osf.io/guidguid')]), - CreativeWork(id=1, sparse=True, identifiers=[WorkIdentifier(uri='http://osf.io/guidguid')]) - ], [ - Preprint(0, identifiers=[WorkIdentifier(uri='http://osf.io/guidguid')]), - ]), - ([ - Preprint(0, identifiers=[ - WorkIdentifier(uri='http://osf.io/guidguid'), - WorkIdentifier(4), - ]), - CreativeWork(id=1, sparse=True, identifiers=[WorkIdentifier(uri='http://osf.io/guidguid')]) - ], [ - Preprint(0, identifiers=[ - WorkIdentifier(4), - WorkIdentifier(uri='http://osf.io/guidguid'), - ]), - ]), - ([ - Registration(0, subjects=[ - Subject( - 0, - name='custom-child', - central_synonym=Subject(1, name='central-child', parent=Subject(3, name='central-parent')), - parent=Subject(2, name='custom-parent', central_synonym=Subject(3, name='central-parent')), - ) - for _ in range(3) - ]), - ], [ - Registration(0, subjects=[ - Subject( - 0, - name='custom-child', - central_synonym=Subject(1, name='central-child', parent=Subject(3, id='central-parent', name='central-parent')), - parent=Subject(2, name='custom-parent', central_synonym=Subject(id='central-parent')), - ) - ]), - ]), - ]) - def test_deduplicate(self, Graph, input, output): - graph = Graph(input) - Deduplicate().run(graph) - assert graph == Graph(output) diff --git a/tests/share/regulate/steps/test_normalize_iris.py b/tests/share/regulate/steps/test_normalize_iris.py deleted file mode 100644 index 4618571d1..000000000 --- a/tests/share/regulate/steps/test_normalize_iris.py +++ /dev/null @@ -1,93 +0,0 @@ -import pytest - -from share.regulate.steps.normalize_iris import NormalizeIRIs -from share.util.graph import MutableGraph - - -class TestNormalizeIRIsStep: - @pytest.mark.parametrize('schemes, authorities, expected_identifiers', [ - ([], [], 4), - (['mailto'], [], 3), - (['mailto', 'http'], [], 1), - ([], ['issn'], 3), - ([], ['osf.io', 'foo'], 3), - (['nothing'], ['everything'], 4), - (['http'], ['example.com', 'issn'], 0), - ]) - def test_blocks(self, schemes, authorities, expected_identifiers): - identifiers = [ - # (uri, scheme, authority) - ('http://osf.io/mst3k/', 'http', 'osf.io'), - ('mailto:foo@example.com', 'mailto', 'example.com'), - ('2049-3630', 'urn', 'issn'), - ('0000-0002-1825-0097', 'http', 'orcid.org'), - ] - - step = NormalizeIRIs(blocked_schemes=schemes, blocked_authorities=authorities) - graph = MutableGraph() - - for uri, scheme, authority in identifiers: - node = graph.add_node('id_{}'.format(authority), 'workidentifier', {'uri': uri}) - assert node['scheme'] is None - assert node['host'] is None - - step.regulate_node(node) - - if scheme not in schemes and authority not in authorities: - assert node['scheme'] == scheme - assert node['host'] == authority - - assert len(graph.filter_type('workidentifier')) == expected_identifiers - - @pytest.mark.parametrize('input, output', [ - ('', None), - (' ', None), - ('0000000248692412', None), - ('000000000248692419', None), - ('urn://issn/1476-4687', 'urn://issn/1476-4687'), - ('0000000248692419', 'http://orcid.org/0000-0002-4869-2419'), - ('0000-0002-4869-2419', 'http://orcid.org/0000-0002-4869-2419'), - ('0000-0002-4869-2419', 'http://orcid.org/0000-0002-4869-2419'), - ('Beau, R ; Douglas, I ; Evans, S ; Clayton, T ; Smeeth, L ; (2011) How Long Do Children Stay on Antiepileptic Treatments in the UK? [Conference or Workshop Item]', None), - ]) - def test_normalize_agentidentifier(self, input, output): - graph = MutableGraph() - node = graph.add_node('1', 'agentidentifier', {'uri': input}) - NormalizeIRIs().regulate_node(node) - if output: - assert node['uri'] == output - else: - assert len(graph) == 0 - - @pytest.mark.parametrize('input, output', [ - ('', None), - ('htp://google.com', None), - ('blackmagic://goat.hooves', None), - ('1476-4687 ', None), - ('urn://issn/1476-4687', None), - ('0000000248692412', None), - ('https://orcid.org/0000-0002-1694-233X', None), - ('aperson@dinosaurs.sexy', None), - ('10.517ccdc.csd.c>c1lj81f', None), - ('http://arxiv.org/index.php?view&id=12', 'http://arxiv.org/index.php?view&id=12'), - ('10.5517/ccdc.csd.cc1lj81f', 'http://dx.doi.org/10.5517/CCDC.CSD.CC1LJ81F'), - (' arxiv:1212.20282 ', 'http://arxiv.org/abs/1212.20282'), - ('oai:subdomain.cos.io:this.is.stuff', 'oai://subdomain.cos.io/this.is.stuff'), - ('Beau, R ; Douglas, I ; Evans, S ; Clayton, T ; Smeeth, L ; (2011) How Long Do Children Stay on Antiepileptic Treatments in the UK? [Conference or Workshop Item]', None), - ]) - def test_normalize_workidentifier(self, input, output): - graph = MutableGraph() - node = graph.add_node('1', 'workidentifier', {'uri': input}) - step = NormalizeIRIs(blocked_schemes=['mailto'], blocked_authorities=['issn', 'orcid.org']) - step.regulate_node(node) - if output: - assert node['uri'] == output - else: - assert len(graph) == 0 - - def test_error_on_bad_settings(self): - with pytest.raises(TypeError): - NormalizeIRIs(bad_setting=True) - - # No required settings - NormalizeIRIs() diff --git a/tests/share/regulate/test_regulator.py b/tests/share/regulate/test_regulator.py deleted file mode 100644 index 83a51458b..000000000 --- a/tests/share/regulate/test_regulator.py +++ /dev/null @@ -1,63 +0,0 @@ -import pytest -from unittest import mock - -from share.regulate.regulator import Regulator, Steps, InfiniteRegulationError, RegulatorConfigError -from share.regulate.steps import NodeStep, GraphStep, ValidationStep -from share.util.graph import MutableGraph - - -@pytest.mark.parametrize('num_node_steps', [0, 1, 5]) -@pytest.mark.parametrize('num_graph_steps', [0, 1, 5]) -@pytest.mark.parametrize('num_validation_steps', [0, 1, 5]) -@pytest.mark.parametrize('num_nodes', range(0, 100, 20)) -class TestRegulatorCallsRun: - - @pytest.fixture - def mock_steps(self, monkeypatch, num_node_steps, num_graph_steps, num_validation_steps): - mock_steps = { - 'node': [mock.Mock(NodeStep, logs=[]) for _ in range(num_node_steps)], - 'graph': [mock.Mock(GraphStep, logs=[]) for _ in range(num_graph_steps)], - 'validate': [mock.Mock(ValidationStep, logs=[]) for _ in range(num_validation_steps)], - } - - def patched_steps(self, _, namespace): - return mock_steps[namespace.split('.')[-1]] - - monkeypatch.setattr(Steps, '_load_steps', patched_steps) - return mock_steps - - def test_calls_run(self, mock_steps, num_nodes): - graph = MutableGraph() - for i in range(num_nodes): - graph.add_node(i, 'creativework') - Regulator(regulator_config={'not': 'empty'}).regulate(graph) - assert all(s.run.call_count == 1 for st in mock_steps.values() for s in st) - - -class InfiniteGraphStep(GraphStep): - counter = 0 - - def regulate_graph(self, graph): - node = next(n for n in graph) - node['foo'] = self.counter - self.counter += 1 - - -class TestRegulatorError: - - def test_infinite_regulate(self): - reg = Regulator() - reg._default_steps.graph_steps = (InfiniteGraphStep(),) - graph = MutableGraph() - graph.add_node(None, 'agent', {'name': 'Agent Agent'}) - with pytest.raises(InfiniteRegulationError): - reg.regulate(graph) - - @pytest.mark.parametrize('config', [ - {'NODE_STEPS': 7}, - {'NODE_STEPS': [7]}, - {'GRAPH_STEPS': 'NODE_STEPS'}, - ]) - def test_broken_config(self, config): - with pytest.raises(RegulatorConfigError): - Regulator(regulator_config=config) diff --git a/tests/share/schema/test_schema.py b/tests/share/schema/test_schema.py deleted file mode 100644 index 05eb6cb7a..000000000 --- a/tests/share/schema/test_schema.py +++ /dev/null @@ -1,241 +0,0 @@ -import pytest - -from share.schema import ShareV2Schema -from share.schema.exceptions import SchemaKeyError -from share.schema.shapes import AttributeDataType, AttributeDataFormat, RelationShape -from share.schema.shapes import ShareV2SchemaType, ShareV2SchemaAttribute, ShareV2SchemaRelation - -WORK_TYPES = { - 'CreativeWork', - 'DataSet', - 'Patent', - 'Poster', - 'Publication', - 'Article', - 'Book', - 'ConferencePaper', - 'Dissertation', - 'Preprint', - 'Project', - 'Registration', - 'Report', - 'Thesis', - 'WorkingPaper', - 'Presentation', - 'Repository', - 'Retraction', - 'Software', -} - -EXPLICIT_WORK_FIELDS = { - 'title', - 'description', - 'is_deleted', - 'date_published', - 'date_updated', - 'free_to_read_type', - 'free_to_read_date', - 'rights', - 'language', - 'registration_type', - 'withdrawn', - 'justification', - 'extra', - 'subjects', - 'tags', - 'related_agents', - 'related_works', -} - -AGENT_TYPES = { - 'Agent', - 'Organization', - 'Consortium', - 'Department', - 'Institution', - 'Person', -} - -EXPLICIT_AGENT_FIELDS = { - 'name', - 'location', - 'family_name', - 'given_name', - 'additional_name', - 'suffix', - 'extra', - 'related_agents', - 'related_works', -} - - -class TestStaticSchema: - @pytest.fixture(scope='class') - def schema(self): - return ShareV2Schema() - - @pytest.mark.parametrize('type_name, expected', [ - ('registration', ShareV2SchemaType( - 'Registration', - 'AbstractCreativeWork', - EXPLICIT_WORK_FIELDS, - ('Registration', 'Publication', 'CreativeWork'), - )), - ('publication', ShareV2SchemaType( - 'Publication', - 'AbstractCreativeWork', - EXPLICIT_WORK_FIELDS, - ('Publication', 'CreativeWork'), - )), - ('creativework', ShareV2SchemaType( - 'CreativeWork', - 'AbstractCreativeWork', - EXPLICIT_WORK_FIELDS, - ('CreativeWork',), - )), - ('consortium', ShareV2SchemaType( - 'Consortium', - 'AbstractAgent', - EXPLICIT_AGENT_FIELDS, - ('Consortium', 'Organization', 'Agent'), - )), - ('person', ShareV2SchemaType( - 'Person', - 'AbstractAgent', - EXPLICIT_AGENT_FIELDS, - ('Person', 'Agent'), - )), - ('agent', ShareV2SchemaType( - 'Agent', - 'AbstractAgent', - EXPLICIT_AGENT_FIELDS, - ('Agent',), - )), - ]) - def test_get_type(self, schema, type_name, expected): - actual = schema.get_type(type_name) - assert actual == expected - - @pytest.mark.parametrize('type_name', WORK_TYPES) - @pytest.mark.parametrize('field_name, expected', [ - ('title', ShareV2SchemaAttribute( - 'title', - data_type=AttributeDataType.STRING, - data_format=None, - is_required=False, - )), - ('free_to_read_type', ShareV2SchemaAttribute( - 'free_to_read_type', - data_type=AttributeDataType.STRING, - data_format=AttributeDataFormat.URI, - is_required=False, - )), - ('extra', ShareV2SchemaAttribute( - 'extra', - data_type=AttributeDataType.OBJECT, - data_format=None, - is_required=False, - )), - ('tags', ShareV2SchemaRelation( - 'tags', - relation_shape=RelationShape.MANY_TO_MANY, - related_concrete_type='Tag', - through_concrete_type='ThroughTags', - outgoing_through_relation='tag', - incoming_through_relation='creative_work', - inverse_relation='creative_works', - is_required=False, - is_implicit=False, - )), - ('agent_relations', ShareV2SchemaRelation( - 'agent_relations', - relation_shape=RelationShape.ONE_TO_MANY, - related_concrete_type='AbstractAgentWorkRelation', - inverse_relation='creative_work', - is_required=False, - is_implicit=True, - )), - ]) - def test_spot_check_work_fields(self, schema, type_name, field_name, expected): - actual = schema.get_field(type_name, field_name) - assert actual == expected - - @pytest.mark.parametrize('type_name', AGENT_TYPES) - @pytest.mark.parametrize('field_name, expected', [ - ('name', ShareV2SchemaAttribute( - 'name', - data_type=AttributeDataType.STRING, - data_format=None, - is_required=False, - )), - ('suffix', ShareV2SchemaAttribute( - 'suffix', - data_type=AttributeDataType.STRING, - data_format=None, - is_required=False, - )), - ('identifiers', ShareV2SchemaRelation( - 'identifiers', - relation_shape=RelationShape.ONE_TO_MANY, - related_concrete_type='AgentIdentifier', - inverse_relation='agent', - is_required=False, - is_implicit=True, - )), - ('related_works', ShareV2SchemaRelation( - 'related_works', - relation_shape=RelationShape.MANY_TO_MANY, - related_concrete_type='AbstractCreativeWork', - through_concrete_type='AbstractAgentWorkRelation', - outgoing_through_relation='creative_work', - incoming_through_relation='agent', - inverse_relation='related_agents', - is_required=False, - is_implicit=False, - )), - ('work_relations', ShareV2SchemaRelation( - 'work_relations', - relation_shape=RelationShape.ONE_TO_MANY, - related_concrete_type='AbstractAgentWorkRelation', - inverse_relation='agent', - is_required=False, - is_implicit=True, - )), - ]) - def test_spot_check_agent_fields(self, schema, type_name, field_name, expected): - actual = schema.get_field(type_name, field_name) - assert actual == expected - - @pytest.mark.parametrize('concrete_type, expected_type_names', ( - ('abstractcreativework', WORK_TYPES), - ('ABSTRACTCREATIVEWORK', WORK_TYPES), - ('abstractagent', AGENT_TYPES), - ('tag', {'Tag'}), - ('award', {'Award'}), - ('throughtags', {'ThroughTags'}), - ('Subject', {'Subject'}), - ('throughsubjects', {'ThroughSubjects'}), - ('throuGHAWards', {'ThroughAwards'}), - ('award', {'Award'}), - )) - def test_get_type_names(self, schema, concrete_type, expected_type_names): - type_names = schema.get_type_names(concrete_type) - assert set(type_names) == expected_type_names - - @pytest.mark.parametrize('type_name, field_name', ( - ('preprint', 'name'), - ('bad_type', 'bad_field'), - )) - def test_get_invalid_field(self, schema, type_name, field_name): - with pytest.raises(SchemaKeyError): - schema.get_field(type_name, field_name) - - @pytest.mark.parametrize('type_name', ( - 'abstractcreativework', - 'AbstractCreativeWork', - 'bad', - 'abstractagent', - )) - def test_get_invalid_type(self, type_name, schema): - with pytest.raises(SchemaKeyError): - schema.get_type('bad') diff --git a/tests/share/schema/test_schema_loader.py b/tests/share/schema/test_schema_loader.py deleted file mode 100644 index 685c3b360..000000000 --- a/tests/share/schema/test_schema_loader.py +++ /dev/null @@ -1,237 +0,0 @@ -import pytest - -from share.schema.exceptions import SchemaLoadError -from share.schema.loader import SchemaLoader -from share.schema.shapes import ( - ShareV2SchemaType, - ShareV2SchemaAttribute, - ShareV2SchemaRelation, - AttributeDataType, - RelationShape, -) - - -@pytest.mark.parametrize('bad_attribute', [ - {}, - {'name': 'foo'}, - {'name': 'foo', 'data_type': 'bad_data_type'}, - {'name': 'foo', 'data_type': 'string', 'data_format': 'bad_data_format'}, -]) -def test_bad_attributes(bad_attribute): - type_spec_list = [{ - 'concrete_type': 'cement', - 'attributes': [bad_attribute], - }] - with pytest.raises(SchemaLoadError): - SchemaLoader(type_spec_list) - - -@pytest.mark.parametrize('bad_relations', [ - [{}], - [{'name': 'foo'}], - [{'name': 'foo', 'relation_shape': 'one_to_many'}], - [{ - 'name': 'foo', - 'relation_shape': 'one_to_many', - 'related_concrete_type': 'bad_cement', - }], - [ - {'name': 'foo', 'relation_shape': 'one_to_many', 'related_concrete_type': 'cement', 'inverse_relation': 'bar'}, - {'name': 'bar', 'relation_shape': 'one_to_many', 'related_concrete_type': 'cement', 'inverse_relation': 'foo'}, - ], -]) -def test_bad_relations(bad_relations): - type_spec_list = [ - { - 'concrete_type': 'cement', - 'relations': bad_relations, - } - ] - with pytest.raises(SchemaLoadError): - SchemaLoader(type_spec_list) - - -conflictly_type_spec_list = [{ - 'concrete_type': 'cement', - 'attributes': [ - {'name': 'foo', 'data_type': 'string'}, - ], - 'relations': [ - {'name': 'foo', 'relation_shape': 'one_to_many', 'related_concrete_type': 'cement', 'inverse_relation': 'bar'}, - ] -}] - - -def test_conflicts(): - with pytest.raises(SchemaLoadError): - SchemaLoader(conflictly_type_spec_list) - - -class TestGoodSchema: - @pytest.fixture(scope='class') - def loader(self): - return SchemaLoader([{ - 'concrete_type': 'cement', - 'attributes': [ - {'name': 'ash', 'data_type': 'string'}, - ], - 'relations': [ - { - 'name': 'foo', - 'relation_shape': 'many_to_many', - 'related_concrete_type': 'cement', - 'through_concrete_type': 'foobar', - 'incoming_through_relation': 'inverse_bar', - 'outgoing_through_relation': 'inverse_foo', - 'inverse_relation': 'bar', - }, - { - 'name': 'bar', - 'relation_shape': 'many_to_many', - 'related_concrete_type': 'cement', - 'through_concrete_type': 'foobar', - 'incoming_through_relation': 'inverse_foo', - 'outgoing_through_relation': 'inverse_bar', - 'inverse_relation': 'foo', - }, - ], - }, { - 'concrete_type': 'asphalt', - 'type_tree': { - 'bitumen': { - 'dilbit': {}, - }, - 'tarmac': None, - }, - 'attributes': [ - {'name': 'gravel', 'data_type': 'integer'}, - ], - 'relations': [ - { - 'name': 'cement', - 'relation_shape': 'many_to_one', - 'related_concrete_type': 'cement', - 'inverse_relation': 'implicit_asphalts', - }, - { - 'name': 'cements', - 'relation_shape': 'one_to_many', - 'related_concrete_type': 'cement', - 'inverse_relation': 'implicit_asphalt', - }, - ], - }, { - 'concrete_type': 'foobar', - 'relations': [ - { - 'name': 'inverse_foo', - 'relation_shape': 'many_to_one', - 'related_concrete_type': 'cement', - 'inverse_relation': 'foo_bars', - }, - { - 'name': 'inverse_bar', - 'relation_shape': 'many_to_one', - 'related_concrete_type': 'cement', - 'inverse_relation': 'bar_foos', - }, - ], - }]) - - def test_type_names(self, loader): - assert loader.concrete_types == {'cement', 'asphalt', 'foobar'} - - # concrete type 'asphalt' has subtypes, so shouldn't be in schema_types - actual_type_names = set(st.name for st in loader.schema_types.values()) - assert actual_type_names == {'cement', 'bitumen', 'dilbit', 'tarmac', 'foobar'} - - @pytest.mark.parametrize('type_name, expected', [ - ('cement', ShareV2SchemaType( - 'cement', - 'cement', - {'ash', 'foo', 'bar'}, - )), - ('bitumen', ShareV2SchemaType( - 'bitumen', - 'asphalt', - {'gravel', 'cement', 'cements'}, - ('bitumen',), - )), - ('dilbit', ShareV2SchemaType( - 'dilbit', - 'asphalt', - {'gravel', 'cement', 'cements'}, - ('dilbit', 'bitumen'), - )), - ('tarmac', ShareV2SchemaType( - 'tarmac', - 'asphalt', - {'gravel', 'cement', 'cements'}, - ('tarmac',), - )), - ]) - def test_schema_types(self, loader, type_name, expected): - actual = loader.schema_types[type_name] - assert actual == expected - - @pytest.mark.parametrize('type_name, field_name, expected', [ - ('cement', 'ash', ShareV2SchemaAttribute( - 'ash', - data_type=AttributeDataType.STRING, - data_format=None, - is_required=False, - )), - ('cement', 'foo', ShareV2SchemaRelation( - 'foo', - relation_shape=RelationShape.MANY_TO_MANY, - related_concrete_type='cement', - through_concrete_type='foobar', - incoming_through_relation='inverse_bar', - outgoing_through_relation='inverse_foo', - inverse_relation='bar', - )), - ('cement', 'bar', ShareV2SchemaRelation( - 'bar', - relation_shape=RelationShape.MANY_TO_MANY, - related_concrete_type='cement', - through_concrete_type='foobar', - incoming_through_relation='inverse_foo', - outgoing_through_relation='inverse_bar', - inverse_relation='foo', - )), - ('cement', 'implicit_asphalt', ShareV2SchemaRelation( - 'implicit_asphalt', - relation_shape=RelationShape.MANY_TO_ONE, - related_concrete_type='asphalt', - inverse_relation='cements', - is_implicit=True, - )), - ('cement', 'implicit_asphalts', ShareV2SchemaRelation( - 'implicit_asphalts', - relation_shape=RelationShape.ONE_TO_MANY, - related_concrete_type='asphalt', - inverse_relation='cement', - is_implicit=True, - )), - ('asphalt', 'gravel', ShareV2SchemaAttribute( - 'gravel', - data_type=AttributeDataType.INTEGER, - data_format=None, - is_required=False, - )), - ('asphalt', 'cement', ShareV2SchemaRelation( - 'cement', - relation_shape=RelationShape.MANY_TO_ONE, - related_concrete_type='cement', - inverse_relation='implicit_asphalts', - )), - ('asphalt', 'cements', ShareV2SchemaRelation( - 'cements', - relation_shape=RelationShape.ONE_TO_MANY, - related_concrete_type='cement', - inverse_relation='implicit_asphalt', - )), - ]) - def test_schema_fields(self, loader, type_name, field_name, expected): - actual = loader.schema_fields[(type_name, field_name)] - assert actual == expected diff --git a/tests/share/test_harvester.py b/tests/share/test_harvester.py deleted file mode 100644 index 6cab31f08..000000000 --- a/tests/share/test_harvester.py +++ /dev/null @@ -1,171 +0,0 @@ -from unittest import mock -import datetime -import pytest - -import pendulum -import stevedore -import pkg_resources - -from share.harvest.base import BaseHarvester -from share.harvest.serialization import DeprecatedDefaultSerializer, StringLikeSerializer -from share.util.extensions import Extensions - -from tests import factories - - -@pytest.fixture(scope='class') -def mock_harvester_key(): - stevedore.ExtensionManager('share.harvesters') # Force extensions to load - _harvester_key = 'mockmock' - - class MockHarvester(BaseHarvester): - KEY = _harvester_key - VERSION = 1 - SERIALIZER_CLASS = StringLikeSerializer - _do_fetch = factories.ListGenerator() - - mock_entry = mock.create_autospec(pkg_resources.EntryPoint, instance=True) - mock_entry.name = _harvester_key - mock_entry.module_name = _harvester_key - mock_entry.resolve.return_value = MockHarvester - stevedore.ExtensionManager.ENTRY_POINT_CACHE['share.harvesters'].append(mock_entry) - Extensions._load_namespace('share.harvesters') - return _harvester_key - - -@pytest.mark.usefixtures('nested_django_db') -class TestHarvesterInterface: - - @pytest.fixture(scope='class', params=[(True, True), (True, False), (False, True), (False, False)]) - def source_config(self, request, class_scoped_django_db, mock_harvester_key): - config_disabled, source_deleted = request.param - return factories.SourceConfigFactory( - disabled=config_disabled, - source__is_deleted=source_deleted, - harvester_key=mock_harvester_key, - ) - - @pytest.fixture(scope='class') - def harvester(self, source_config, class_scoped_django_db): - return source_config.get_harvester() - - def test_passes_kwargs(self, source_config): - config_kwargs = { - 'one': 'kwarg', - 'another': 'kwarg', - } - custom_kwargs = { - 'test': 'value', - 'one': 'overridden', - } - start = pendulum.parse('2017-07-01') - end = pendulum.parse('2017-07-05') - source_config.harvester_kwargs = config_kwargs - harvester = source_config.get_harvester() - harvester._do_fetch = mock.MagicMock() - - [x for x in harvester.fetch_date_range(start, end, **custom_kwargs)] - - harvester._do_fetch.assert_called_once_with(start, end, **{**config_kwargs, **custom_kwargs}) - - def test_no_do_harvest(self, harvester): - assert not hasattr(harvester, 'do_harvest') - - def test__do_fetch_not_implemented(self, harvester): - with pytest.raises(NotImplementedError): - BaseHarvester._do_fetch(harvester, None, None) - - def test_fetch_date(self, harvester, monkeypatch): - monkeypatch.setattr(harvester, 'fetch_date_range', mock.Mock(), raising=False) - - harvester.fetch_date(pendulum.parse('2016-01-05'), custom='kwarg') - - assert harvester.fetch_date_range.assert_called_once_with( - pendulum.parse('2016-01-04'), - pendulum.parse('2016-01-05'), - custom='kwarg' - ) is None - - @pytest.mark.parametrize('start, end', [ - (1, 2), - (0, None), - (None, None), - ('2016-01-01', '2015-01-01'), - (pendulum.parse('2016-01-01').date(), datetime.timedelta(days=1)), - ]) - def test_requires_dates(self, harvester, start, end): - with pytest.raises(TypeError): - list(harvester.fetch_date_range(start, end)) - - def test_start_must_be_before_end(self, harvester): - with pytest.raises(ValueError) as e: - list(harvester.fetch_date_range( - pendulum.parse('2016-01-05'), - pendulum.parse('2016-01-04'), - )) - assert e.value.args == ("start must be before end. DateTime(2016, 1, 5, 0, 0, 0, tzinfo=Timezone('UTC')) > DateTime(2016, 1, 4, 0, 0, 0, tzinfo=Timezone('UTC'))", ) - - def test__do_fetch_must_be_generator(self, harvester): - harvester._do_fetch = lambda *_, **__: [1, 2] - - with pytest.raises(TypeError) as e: - list(harvester.fetch()) - - assert e.value.args == ('{!r}._do_fetch must return a GeneratorType for optimal performance and memory usage'.format(harvester), ) - - def test_harvest_no_pretty(self, harvester): - assert harvester.serializer.pretty is False - harvester.serializer.pretty = True - - assert harvester.serializer.pretty is True - with pytest.raises(ValueError) as e: - list(harvester.harvest()) - - assert e.value.args == ('To ensure that data is optimally deduplicated, harvests may not occur while using a pretty serializer.', ) - - def fetch_pretty(self, harvester): - assert harvester.serializer.pretty is False - list(harvester.fetch()) - - harvester.serializer.pretty = True - - assert harvester.serializer.pretty is True - list(harvester.fetch()) - - -@pytest.mark.usefixtures('nested_django_db') -class TestHarvesterBackwardsCompat: - - @pytest.fixture(scope='class') - def source_config(self, class_scoped_django_db, mock_harvester_key): - return factories.SourceConfigFactory(harvester_key=mock_harvester_key) - - @pytest.fixture(scope='class') - def harvester(self, source_config, class_scoped_django_db): - harvester = source_config.get_harvester() - harvester.serializer = DeprecatedDefaultSerializer() - return harvester - - def test_fetch_date_range_calls_do_harvest(self, harvester, monkeypatch): - monkeypatch.setattr(harvester, 'do_harvest', mock.Mock(), raising=False) - - BaseHarvester._do_fetch( - harvester, - pendulum.parse('2017-01-01').date(), - pendulum.parse('2017-01-02').date(), - ) - - assert harvester.do_harvest.assert_called_once_with( - pendulum.parse('2017-01-01').date(), - pendulum.parse('2017-01-02').date(), - ) is None - - def test_default_serializer(self, harvester): - assert isinstance(harvester.serializer.serialize('data'), str) - assert isinstance(harvester.serializer.serialize(b'data'), str) - assert isinstance(harvester.serializer.serialize({'data': 'value'}), str) - - def test_calls_shift_range(self, harvester, monkeypatch): - monkeypatch.setattr(harvester, 'shift_range', mock.Mock(return_value=(1, 2)), raising=False) - list(harvester.fetch()) - assert harvester.shift_range.called is True diff --git a/tests/share/transformers/test_mods.py b/tests/share/transformers/test_mods.py deleted file mode 100644 index d2ec6b5d2..000000000 --- a/tests/share/transformers/test_mods.py +++ /dev/null @@ -1,41 +0,0 @@ -import pytest - -from share.transformers.mods import MODSTransformer - -from tests.share.transformers.test_oai_dc import TestSetsFilter - - -class TestModsSetsFilter(TestSetsFilter): - - TRANSFORMER_CLASS = MODSTransformer - - @pytest.fixture - def datum(self): - return ''' - -
- urn:identifier - 2017-10-11T01:00:12Z - set1 - set2 -
- - - - Greg Universe - - - 2017-03 - - Abstract abstract - - en - - - Title title title - - Presentation - - -
- ''' diff --git a/tests/share/transformers/test_oai_dc.py b/tests/share/transformers/test_oai_dc.py deleted file mode 100644 index ab98b0a42..000000000 --- a/tests/share/transformers/test_oai_dc.py +++ /dev/null @@ -1,58 +0,0 @@ -import pytest - -from share.models import SourceConfig -from share.transformers.oai import OAITransformer - - -class TestSetsFilter: - - TRANSFORMER_CLASS = OAITransformer - - @pytest.fixture - def datum(self): - return ''' - -
- urn:identifier - 2017-09-12T13:35:17Z - set1 - set2 -
- - - Title title title - Greg Universe - - 2017-09-11T03:13:21Z - article - - -
- '''.strip() - - @pytest.mark.parametrize('approved_sets, blocked_sets, expect_allowed', [ - (None, None, True), - ([], [], True), - (['set1'], None, True), - (['set2'], None, True), - (None, ['set1'], False), - (None, ['set2'], False), - (['other'], [], False), - ([], ['foo', 'bar'], True), - (['set1'], ['set1'], False), - (['set1'], ['set2'], False), - (['one', 'two'], ['three', 'four'], False), - (['one', 'two', 'set2'], ['three', 'four'], True), - (['one', 'two'], ['three', 'four', 'set1'], False), - ]) - def test_sets(self, datum, approved_sets, blocked_sets, expect_allowed): - source_config = SourceConfig(transformer_kwargs={ - 'approved_sets': approved_sets, - 'blocked_sets': blocked_sets - }) - transformer = self.TRANSFORMER_CLASS(source_config) - res = transformer.transform(datum) - if expect_allowed: - assert res is not None - else: - assert res is None diff --git a/tests/share/transformers/test_socialscienceregistry_transformer.py b/tests/share/transformers/test_socialscienceregistry_transformer.py deleted file mode 100644 index a8a9d66ab..000000000 --- a/tests/share/transformers/test_socialscienceregistry_transformer.py +++ /dev/null @@ -1,65 +0,0 @@ -import pytest - -from share.models import SourceConfig - -data = r''' -{ - "record": [ - "OpenTeQ - Opening the black box of Teacher Quality", - "https://www.socialscienceregistry.org/trials/1638", - "June 06, 2017", - "2017-06-06 11:59:10 -0400", - "2017-06-06", - "AEARCTR-0001638", - "Daniel Adam, dan@gmail.com", - "on_going", - "2016-03-01", - "2018-09-22", - "[\"education\", \"\"]", - "", - "test description", - "2016-10-20", - "2017-07-15", - "demo text", - "description", - "plan", - "", - "Randomization done in office by a computer, by a researcher external to the project staff (Giovanni Abbiati - IRVAPP, abbiati@irvapp.it", - "schools (whithin blocks)", - "198 schools whithin 8 blocks", - "around 2.200 teachers teaching Math or Italian to 7th gradersaround 24.000 students for each grade (6th, 7th, 8th)", - "50 schools individual treatment50 schools collective treatment98 schools control", - "demo text", - "Private", - "This section is unavailable to the public.", - "", - "", - "", - "", - "", - "", - "", - "This section is unavailable to the public. Use the button below to request access to this information.", - "", - "", - "", - "", - "", - "" - ] -} -''' - - -@pytest.mark.django_db -def test_AEA_transformer(): - config = SourceConfig.objects.get(label='org.socialscienceregistry') - transformer = config.get_transformer() - graph = transformer.transform(data) - registration = graph.filter_nodes(lambda n: n.type == 'registration')[0] - - assert registration.type == 'registration' - assert registration['description'] == 'test description' - assert registration['title'] == 'OpenTeQ - Opening the black box of Teacher Quality' - assert registration['extra']['primary_investigator'] == {'email': 'dan@gmail.com', 'name': 'Daniel Adam'} - assert registration['extra']['interventions'] == {'end-date': '2017-07-15', 'start-date': '2016-10-20'} diff --git a/tests/share/transformers/test_swbiodiversity_transformer.py b/tests/share/transformers/test_swbiodiversity_transformer.py deleted file mode 100644 index c2a24c69e..000000000 --- a/tests/share/transformers/test_swbiodiversity_transformer.py +++ /dev/null @@ -1,81 +0,0 @@ -import pytest - -from share.harvest.base import FetchResult -from share.models import SourceConfig, RawDatum - -data = ''' -
-

A. Michael Powell Herbarium (SRSC)

-
-Sample description -
-
-Contact: Test Author (author@email.com) -
-
-
-
-Collection Type: Preserved Specimens -
-
-Management: Data snapshot of local collection database
Last Update: 1 October 2016
-
- -
-Rights Holder: Sul Ross University -
-
-
Collection Statistics:
-
    -
  • 4,868 specimen records
  • -
  • 1,195 (25%) georeferenced
  • -
  • 2,954 (61%) with images
  • 2,849 (59%) identified to species
  • -
  • 104 families
  • -
  • 361 genera
  • -
  • 661 species
  • -
  • 762 total taxa (including subsp. and var.)
  • -
-
-
-
-''' - - -@pytest.mark.django_db -def test_swbiodiversity_transformer(): - config = SourceConfig.objects.get(label=('org.swbiodiversity')) - transformer = config.get_transformer() - fetch_result = FetchResult('http://swbiodiversity.org/seinet/collections/misc/collprofiles.php?collid=187', data) - raw_datum = RawDatum.objects.store_data(config, fetch_result) - - graph = transformer.transform(raw_datum) - - dataset = graph.filter_nodes(lambda n: n.type == 'dataset')[0] - - assert dataset.type == 'dataset' - assert dataset['description'] == 'Sample description' - assert dataset['title'] == 'A. Michael Powell Herbarium (SRSC)' - assert dataset['extra']['usage_rights'] == 'CC BY-NC (Attribution-Non-Commercial)' - assert dataset['extra']['access_rights'] == 'Sul Ross University' - assert dataset['extra']['collection_statistics'] == { - "(25%) georeferenced": "1,195", - "(59%) identified to species": "2,849", - "(61%) with images": "2,954", - "families": "104", - "genera": "361", - "species": "661", - "specimen records": "4,868", - "total taxa (including subsp. and var.)": "762" - } - - agent_relations = dataset['agent_relations'] - assert len(agent_relations) == 1 - agent = agent_relations[0]['agent'] - assert agent['given_name'] == 'Test' - assert agent['identifiers'][0]['uri'] == 'mailto:author@email.com' - - identifiers = dataset['identifiers'] - assert len(identifiers) == 1 - assert identifiers[0]['uri'] == 'http://swbiodiversity.org/seinet/collections/misc/collprofiles.php?collid=187' From ba7ac2f5426b625708913abc2616786bc49ca3cb Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Thu, 6 Mar 2025 13:28:10 -0500 Subject: [PATCH 05/46] wip --- share/admin/__init__.py | 134 +----------- share/management/commands/loadsources.py | 125 ----------- .../commands/maketriggermigrations.py | 104 --------- .../migrations/0001_squashed_0058_big_rend.py | 8 +- share/models/__init__.py | 32 +-- share/models/celery.py | 2 +- share/models/ingest.py | 199 +----------------- .../search/index_strategy/sharev2_elastic5.py | 28 ++- share/version.py | 4 + .../_common_trovesearch_tests.py | 47 +---- trove/digestive_tract.py | 73 ++----- trove/exceptions.py | 4 + 12 files changed, 68 insertions(+), 692 deletions(-) delete mode 100644 share/management/commands/loadsources.py delete mode 100644 share/management/commands/maketriggermigrations.py diff --git a/share/admin/__init__.py b/share/admin/__init__.py index 7174cc418..24af3c3ce 100644 --- a/share/admin/__init__.py +++ b/share/admin/__init__.py @@ -1,40 +1,25 @@ -from prettyjson import PrettyJSONWidget - -from django import forms from django.apps import apps from django.urls import re_path as url from django.contrib import admin -from django.contrib.admin.widgets import AdminDateWidget from django.http import HttpResponseRedirect from django.template.response import TemplateResponse from django.urls import path, reverse -from django.utils import timezone from django.utils.html import format_html from oauth2_provider.models import AccessToken -from share import tasks from share.admin.celery import CeleryTaskResultAdmin -from share.admin.jobs import HarvestJobAdmin -from share.admin.readonly import ReadOnlyAdmin from share.admin.search import search_indexes_view, search_index_mappings_view from share.admin.util import TimeLimitedPaginator, linked_fk, linked_many, SourceConfigFilter -from share.harvest.scheduler import HarvestScheduler from share.models import ( CeleryTaskResult, - DateTimeAwareJSONField, FeatureFlag, - FormattedMetadataRecord, - HarvestJob, IndexBackfill, - NormalizedData, - ProviderRegistration, RawDatum, ShareUser, SiteBanner, Source, SourceConfig, - SourceStat, SourceUniqueIdentifier, ) from trove import digestive_tract @@ -66,24 +51,6 @@ class ShareUserAdmin(admin.ModelAdmin): search_fields = ['username'] -@linked_fk('raw') -@linked_fk('source') -class NormalizedDataAdmin(admin.ModelAdmin): - date_hierarchy = 'created_at' - list_filter = ['source', ] - raw_id_fields = ('tasks',) - paginator = TimeLimitedPaginator - formfield_overrides = { - DateTimeAwareJSONField: { - 'widget': PrettyJSONWidget(attrs={ - 'initial': 'parsed', - 'cols': 120, - 'rows': 20 - }) - } - } - - @linked_fk('suid') class RawDatumAdmin(admin.ModelAdmin): show_full_result_count = False @@ -102,7 +69,7 @@ def source_config_label(self, obj): def datum__pre(self, instance): return format_html('
{}
', instance.datum) - datum__pre.short_description = 'datum' + datum__pre.short_description = 'datum' # type: ignore[attr-defined] class AccessTokenAdmin(admin.ModelAdmin): @@ -110,15 +77,6 @@ class AccessTokenAdmin(admin.ModelAdmin): list_display = ('token', 'user', 'scope') -class ProviderRegistrationAdmin(ReadOnlyAdmin): - list_display = ('source_name', 'status_', 'submitted_at', 'submitted_by', 'direct_source') - list_filter = ('direct_source', 'status',) - readonly_fields = ('submitted_at', 'submitted_by',) - - def status_(self, obj): - return ProviderRegistration.STATUS[obj.status].title() - - class SiteBannerAdmin(admin.ModelAdmin): list_display = ('title', 'color', 'icon', 'active') list_editable = ('active',) @@ -132,17 +90,6 @@ def save_model(self, request, obj, form, change): super().save_model(request, obj, form, change) -class HarvestForm(forms.Form): - start = forms.DateField(widget=AdminDateWidget()) - end = forms.DateField(widget=AdminDateWidget()) - superfluous = forms.BooleanField(required=False) - - def clean(self): - super().clean() - if self.cleaned_data['start'] > self.cleaned_data['end']: - raise forms.ValidationError('Start date cannot be after end date.') - - @linked_fk('source') class SourceConfigAdmin(admin.ModelAdmin): list_display = ('label', 'source_', 'version', 'enabled', 'button_actions') @@ -156,7 +103,7 @@ def source_(self, obj): def enabled(self, obj): return not obj.disabled - enabled.boolean = True + enabled.boolean = True # type: ignore[attr-defined] @admin.action(description='schedule re-ingest of all raw data for each source config') def schedule_full_ingest(self, request, queryset): @@ -165,11 +112,6 @@ def schedule_full_ingest(self, request, queryset): def get_urls(self): return [ - url( - r'^(?P.+)/harvest/$', - self.admin_site.admin_view(self.harvest), - name='source-config-harvest' - ), url( r'^(?P.+)/ingest/$', self.admin_site.admin_view(self.start_ingest), @@ -180,38 +122,11 @@ def get_urls(self): def button_actions(self, obj): return format_html( ' '.join(( - ('Harvest' if obj.harvester_key else ''), ('Ingest' if not obj.disabled else ''), )), - harvest_href=reverse('admin:source-config-harvest', args=[obj.pk]), ingest_href=reverse('admin:source-config-ingest', args=[obj.pk]), ) - button_actions.short_description = 'Buttons' - - def harvest(self, request, config_id): - config = self.get_object(request, config_id) - if config.harvester_key is None: - raise ValueError('You need a harvester to harvest.') - if request.method == 'POST': - form = HarvestForm(request.POST) - if form.is_valid(): - for job in HarvestScheduler(config, claim_jobs=True).range(form.cleaned_data['start'], form.cleaned_data['end']): - tasks.harvest.apply_async((), {'job_id': job.id, 'superfluous': form.cleaned_data['superfluous']}) - self.message_user(request, 'Started harvesting {}!'.format(config.label)) - url = reverse('admin:share_harvestjob_changelist', current_app=self.admin_site.name) - return HttpResponseRedirect(url) - else: - initial = {'start': config.earliest_date, 'end': timezone.now().date()} - for field in HarvestForm.base_fields.keys(): - if field in request.GET: - initial[field] = request.GET[field] - form = HarvestForm(initial=initial) - context = self.admin_site.each_context(request) - context['opts'] = self.model._meta - context['form'] = form - context['source_config'] = config - context['title'] = 'Harvest {}'.format(config.label) - return TemplateResponse(request, 'admin/harvest.html', context) + button_actions.short_description = 'Buttons' # type: ignore[attr-defined] def start_ingest(self, request, config_id): config = self.get_object(request, config_id) @@ -240,36 +155,6 @@ def access_token(self, obj): return None -class SourceStatAdmin(admin.ModelAdmin): - search_fields = ('config__label', 'config__source__long_title') - list_display = ('label', 'date_created', 'base_urls_match', 'earliest_datestamps_match', 'response_elapsed_time', 'response_status_code', 'grade_') - list_filter = ('grade', 'response_status_code', 'config__label') - - GRADE_COLORS = { - 0: 'red', - 5: 'orange', - 10: 'green', - } - GRADE_LETTERS = { - 0: 'F', - 5: 'C', - 10: 'A', - } - - def source(self, obj): - return obj.config.source.long_title - - def label(self, obj): - return obj.config.label - - def grade_(self, obj): - return format_html( - '{}', - self.GRADE_COLORS[obj.grade], - self.GRADE_LETTERS[obj.grade], - ) - - @linked_fk('source_config') @linked_fk('focus_identifier') @linked_many('formattedmetadatarecord_set', defer=('formatted_metadata',)) @@ -295,7 +180,6 @@ def reingest(self, request, queryset): def delete_cards_for_suid(self, request, queryset): for suid in queryset: - FormattedMetadataRecord.objects.delete_formatted_records(suid) digestive_tract.expel_suid(suid) def get_search_results(self, request, queryset, search_term): @@ -308,13 +192,6 @@ def get_search_results(self, request, queryset, search_term): ) -@linked_fk('suid') -class FormattedMetadataRecordAdmin(admin.ModelAdmin): - readonly_fields = ('record_format',) - paginator = TimeLimitedPaginator - show_full_result_count = False - - class IndexBackfillAdmin(admin.ModelAdmin): readonly_fields = ( 'index_strategy_name', @@ -342,15 +219,10 @@ class FeatureFlagAdmin(admin.ModelAdmin): admin_site.register(AccessToken, AccessTokenAdmin) admin_site.register(CeleryTaskResult, CeleryTaskResultAdmin) admin_site.register(FeatureFlag, FeatureFlagAdmin) -admin_site.register(FormattedMetadataRecord, FormattedMetadataRecordAdmin) -admin_site.register(HarvestJob, HarvestJobAdmin) admin_site.register(IndexBackfill, IndexBackfillAdmin) -admin_site.register(NormalizedData, NormalizedDataAdmin) -admin_site.register(ProviderRegistration, ProviderRegistrationAdmin) admin_site.register(RawDatum, RawDatumAdmin) admin_site.register(ShareUser, ShareUserAdmin) admin_site.register(SiteBanner, SiteBannerAdmin) admin_site.register(Source, SourceAdmin) admin_site.register(SourceConfig, SourceConfigAdmin) -admin_site.register(SourceStat, SourceStatAdmin) admin_site.register(SourceUniqueIdentifier, SourceUniqueIdentifierAdmin) diff --git a/share/management/commands/loadsources.py b/share/management/commands/loadsources.py deleted file mode 100644 index c793b475f..000000000 --- a/share/management/commands/loadsources.py +++ /dev/null @@ -1,125 +0,0 @@ -import os -import yaml - -from django.apps import apps -from django.core.files import File -from django.core.management.base import BaseCommand -from django.db import transaction -from django.dispatch import receiver -from django.core.exceptions import FieldDoesNotExist -from django.db.models.signals import post_save - -import share -from share.models.core import user_post_save -from share.util.extensions import Extensions - -SOURCES_DIR = 'sources' - - -class Command(BaseCommand): - def add_arguments(self, parser): - parser.add_argument('sources', nargs='*', type=str, help='Names of the sources to load (if omitted, load all)') - parser.add_argument('--overwrite', action='store_true', help='Overwrite existing sources and source configs') - - def handle(self, *args, **options): - # If we're running in a migrations we need to use the correct apps - self.apps = options.get('apps', apps) - - sources = options.get('sources') - sources_dir = os.path.join(share.__path__[0], SOURCES_DIR) - if sources: - source_dirs = [os.path.join(sources_dir, s) for s in sources] - else: - source_dirs = [os.path.join(sources_dir, s) for s in os.listdir(sources_dir)] - - if self.apps.get_model('share.ShareUser').__module__ == '__fake__': - receiver(post_save, sender=self.apps.get_model('share.ShareUser'), dispatch_uid='__fake__.share.models.share_user_post_save_handler')(user_post_save) - - with transaction.atomic(): - self.known_harvesters = set(Extensions.get_names('share.harvesters')) - self.known_transformers = set(Extensions.get_names('share.transformers')) - self.update_sources(source_dirs, overwrite=options.get('overwrite')) - - def update_sources(self, source_dirs, overwrite): - Source = self.apps.get_model('share.Source') - loaded_sources = set() - loaded_configs = set() - for source_dir in source_dirs: - with open(os.path.join(source_dir, 'source.yaml')) as fobj: - serialized = yaml.load(fobj, Loader=yaml.CLoader) - configs = serialized.pop('configs') - name = serialized.pop('name') - assert name not in loaded_sources - loaded_sources.add(name) - - user = self.get_or_create_user(serialized.pop('user')) - source_defaults = { - 'user': user, - **self.process_defaults(Source, serialized) - } - if overwrite: - source, _ = Source.objects.update_or_create(name=name, defaults=source_defaults) - else: - source, _ = Source.objects.get_or_create(name=name, defaults=source_defaults) - - with open(os.path.join(source_dir, 'icon.ico'), 'rb') as fobj: - source.icon.save(name, File(fobj)) - for config in configs: - assert config['label'] not in loaded_configs - loaded_configs.add(config['label']) - self.update_source_config(source, config, overwrite) - - def update_source_config(self, source, serialized, overwrite): - label = serialized.pop('label') - _harvester_key = serialized['harvester_key'] = serialized.pop('harvester', None) - _transformer_key = serialized['transformer_key'] = serialized.pop('transformer', None) - if _harvester_key and _harvester_key not in self.known_harvesters: - print('Unknown harvester {}! Skipping source config {}'.format(_harvester_key, label)) - return - if _transformer_key and _transformer_key not in self.known_transformers: - print('Unknown transformer {}! Skipping source config {}'.format(_transformer_key, label)) - return - - SourceConfig = self.apps.get_model('share.SourceConfig') - config_defaults = { - 'source': source, - **self.process_defaults(SourceConfig, serialized) - } - - # Shouldn't reenable a disabled source config - if config_defaults.get('disabled') is False: - config_defaults.pop('disabled') - - if overwrite: - source_config, created = SourceConfig.objects.update_or_create(label=label, defaults=config_defaults) - else: - source_config, created = SourceConfig.objects.get_or_create(label=label, defaults=config_defaults) - - def get_or_create_user(self, username): - ShareUser = self.apps.get_model('share.ShareUser') - - try: - return ShareUser.objects.get(username=username) - except ShareUser.DoesNotExist: - return ShareUser.objects.create_robot_user( - username=username, - robot=username, - ) - - def process_defaults(self, model, defaults): - ret = {} - for k, v in defaults.items(): - try: - field = model._meta.get_field(k) - except FieldDoesNotExist: - # This script gets run by the migrations fairly early on - # If new fields have been added the original run of this script will - # fail unless we ignore those fields. - self.stderr.write('Found extra field {}, skipping...'.format(k)) - continue - if field.is_relation and v is not None: - natural_key = tuple(v) if isinstance(v, list) else (v,) - ret[k] = field.related_model.objects.get_by_natural_key(natural_key) - else: - ret[k] = v - return ret diff --git a/share/management/commands/maketriggermigrations.py b/share/management/commands/maketriggermigrations.py deleted file mode 100644 index 23a3bddde..000000000 --- a/share/management/commands/maketriggermigrations.py +++ /dev/null @@ -1,104 +0,0 @@ -import datetime -import os - -from django.apps import apps -from django.core.management.base import BaseCommand -from django.db import connection -from django.db.migrations import Migration -from django.db.migrations import operations -from django.db.migrations.loader import MigrationLoader -from django.db.migrations.writer import MigrationWriter - - -# Triggers are Faster and will run in any insert/update situation -# Model based logic will not run in certain scenarios. IE Bulk operations -class Command(BaseCommand): - can_import_settings = True - - PROCEDURE = ''' - CREATE OR REPLACE FUNCTION before_{concrete}_change() RETURNS trigger AS $$ - DECLARE - vid INTEGER; - BEGIN - INSERT INTO {version}({columns}) VALUES ({new_columns}) RETURNING (id) INTO vid; - NEW.version_id = vid; - RETURN NEW; - END; - $$ LANGUAGE plpgsql; - ''' - - PROCEDURE_REVERSE = ''' - DROP FUNCTION before_{concrete}_change(); - ''' - - TRIGGER = ''' - DROP TRIGGER IF EXISTS {concrete}_change ON {concrete}; - - CREATE TRIGGER {concrete}_change - BEFORE INSERT OR UPDATE ON {concrete} - FOR EACH ROW - EXECUTE PROCEDURE before_{concrete}_change(); - ''' - - TRIGGER_REVERSE = ''' - DROP TRIGGER {concrete}_change - ''' - - def collect_fields(self, model): - concrete_fields = ['NEW.' + f.column for f in model._meta.fields] - version_fields = [f.column for f in model.VersionModel._meta.fields] - - version_fields.remove('id') - version_fields.remove('action') - version_fields.remove('persistent_id') - concrete_fields.remove('NEW.id') - concrete_fields.remove('NEW.version_id') - - assert len(version_fields) == len(concrete_fields) - - return concrete_fields, version_fields - - def build_operations(self, model): - concrete_fields, version_fields = self.collect_fields(model) - - params = { - 'concrete': model._meta.db_table, - 'version': model.VersionModel._meta.db_table, - 'columns': ', '.join(['persistent_id', 'action'] + sorted(version_fields)), - 'new_columns': ', '.join(['NEW.id', 'TG_OP'] + sorted(concrete_fields)), - } - - return [ - operations.RunSQL(self.PROCEDURE.format(**params).strip(), reverse_sql=self.PROCEDURE_REVERSE.format(**params).strip()), - operations.RunSQL(self.TRIGGER.format(**params).strip(), reverse_sql=self.TRIGGER_REVERSE.format(**params).strip()), - ] - - def write_migration(self, migration): - writer = MigrationWriter(migration) - os.makedirs(os.path.dirname(writer.path), exist_ok=True) - with open(writer.path, 'w') as fp: - fp.write(writer.as_string()) - - def handle(self, *args, **options): - ops = [] - - for model in apps.get_models(include_auto_created=True): - if not hasattr(model, 'VersionModel') or model._meta.proxy: - continue - ops.extend(self.build_operations(model)) - if options['initial']: - m = Migration('0003_triggers', 'share') - m.dependencies = [('share', '0002_create_share_user')] - else: - ml = MigrationLoader(connection=connection) - ml.build_graph() - last_share_migration = [x[1] for x in ml.graph.leaf_nodes() if x[0] == 'share'][0] - next_number = '{0:04d}'.format(int(last_share_migration[0:4]) + 1) - m = Migration('{}_update_trigger_migrations_{}'.format(next_number, datetime.datetime.now().strftime("%Y%m%d_%H%M")), 'share') - m.dependencies = [('share', '0002_create_share_user'), ('share', last_share_migration)] - m.operations = ops - self.write_migration(m) - - def add_arguments(self, parser): - parser.add_argument('--initial', action='store_true', help='Create initial trigger migrations') - parser.add_argument('--update', action='store_true', help='Update trigger migrations after schema change') diff --git a/share/migrations/0001_squashed_0058_big_rend.py b/share/migrations/0001_squashed_0058_big_rend.py index 9b7000fad..501fe8044 100644 --- a/share/migrations/0001_squashed_0058_big_rend.py +++ b/share/migrations/0001_squashed_0058_big_rend.py @@ -12,8 +12,8 @@ import share.models.core import share.models.fields import share.models.ingest -import share.models.jobs import share.models.validators +import share.version class Migration(migrations.Migration): @@ -150,7 +150,7 @@ class Migration(migrations.Migration): ('date_started', models.DateTimeField(blank=True, null=True)), ('date_created', models.DateTimeField(auto_now_add=True)), ('date_modified', models.DateTimeField(auto_now=True, db_index=True)), - ('share_version', models.TextField(default=share.models.jobs.get_share_version, editable=False)), + ('share_version', models.TextField(default=share.version.get_share_version, editable=False)), ('source_config_version', models.PositiveIntegerField()), ('end_date', models.DateTimeField(db_index=True)), ('start_date', models.DateTimeField(db_index=True)), @@ -323,7 +323,7 @@ class Migration(migrations.Migration): ('traceback', models.TextField(blank=True, editable=False, null=True)), ('date_created', models.DateTimeField(auto_now_add=True)), ('date_modified', models.DateTimeField(auto_now=True, db_index=True)), - ('share_version', models.TextField(default=share.models.jobs.get_share_version, editable=False)), + ('share_version', models.TextField(default=share.version.get_share_version, editable=False)), ], options={ 'verbose_name': 'Celery Task Result', @@ -440,7 +440,7 @@ class Migration(migrations.Migration): ('date_started', models.DateTimeField(blank=True, null=True)), ('date_created', models.DateTimeField(auto_now_add=True)), ('date_modified', models.DateTimeField(auto_now=True, db_index=True)), - ('share_version', models.TextField(default=share.models.jobs.get_share_version, editable=False)), + ('share_version', models.TextField(default=share.version.get_share_version, editable=False)), ('source_config_version', models.PositiveIntegerField()), ('transformer_version', models.PositiveIntegerField()), ('regulator_version', models.PositiveIntegerField()), diff --git a/share/models/__init__.py b/share/models/__init__.py index 5eb67ee18..338e34ecf 100644 --- a/share/models/__init__.py +++ b/share/models/__init__.py @@ -1,23 +1,25 @@ -# NOTE: The order of these imports actually matter from share.models.source_unique_identifier import SourceUniqueIdentifier from share.models.index_backfill import IndexBackfill from share.models.feature_flag import FeatureFlag -from share.models.core import ShareUser, NormalizedData, FormattedMetadataRecord -from share.models.ingest import * # noqa -from share.models.registration import * # noqa -from share.models.banner import * # noqa -from share.models.jobs import * # noqa -from share.models.sources import * # noqa -from share.models.celery import * # noqa - -# TODO: replace all the `import * # noqa` above with explicit imports and a full __all__ +from share.models.core import ShareUser +from share.models.ingest import ( + Source, + SourceConfig, + RawDatum, +) +from share.models.banner import SiteBanner +from share.models.celery import CeleryTaskResult +from share.models.fields import DateTimeAwareJSONField __all__ = ( + 'CeleryTaskResult', + 'FeatureFlag', + 'IndexBackfill', + 'RawDatum', 'ShareUser', - 'NormalizedData', - 'FormattedMetadataRecord', + 'SiteBanner', + 'Source', + 'SourceConfig', 'SourceUniqueIdentifier', - 'IndexBackfill', - 'FeatureFlag', - # ... + 'DateTimeAwareJSONField', ) diff --git a/share/models/celery.py b/share/models/celery.py index 071932d6d..df5aba191 100644 --- a/share/models/celery.py +++ b/share/models/celery.py @@ -3,7 +3,7 @@ from django.db import models from share.models.fields import DateTimeAwareJSONField -from share.models.jobs import get_share_version +from share.version import get_share_version ALL_STATES = sorted(states.ALL_STATES) diff --git a/share/models/ingest.py b/share/models/ingest.py index 1ccd8befd..bf93b6d60 100644 --- a/share/models/ingest.py +++ b/share/models/ingest.py @@ -1,4 +1,3 @@ -import contextlib import datetime import hashlib import logging @@ -6,22 +5,17 @@ from django.core import validators from django.core.files.base import ContentFile from django.core.files.storage import Storage -from django.db import DEFAULT_DB_ALIAS from django.db import connection -from django.db import connections from django.db import models from django.db.models.functions import Coalesce from django.urls import reverse -from django.utils import timezone from django.utils.deconstruct import deconstructible import sentry_sdk from share.models.core import ShareUser -from share.models.fields import EncryptedJSONField from share.models.fuzzycount import FuzzyCountManager from share.models.source_unique_identifier import SourceUniqueIdentifier -from share.util import chunked, placeholders, BaseJSONAPIMeta -from share.util.extensions import Extensions +from share.util import chunked, BaseJSONAPIMeta logger = logging.getLogger(__name__) @@ -138,32 +132,9 @@ class SourceConfig(models.Model): source = models.ForeignKey('Source', on_delete=models.CASCADE, related_name='source_configs') base_url = models.URLField(null=True) - earliest_date = models.DateField(null=True, blank=True) - rate_limit_allowance = models.PositiveIntegerField(default=5) - rate_limit_period = models.PositiveIntegerField(default=1) - - # Allow null for push sources - harvester_key = models.TextField(null=True) - harvester_kwargs = models.JSONField(null=True, blank=True) - harvest_interval = models.DurationField(default=datetime.timedelta(days=1)) - harvest_after = models.TimeField(default='02:00') - full_harvest = models.BooleanField(default=False, help_text=( - 'Whether or not this SourceConfig should be fully harvested. ' - 'Requires earliest_date to be set. ' - 'The schedule harvests task will create all jobs necessary if this flag is set. ' - 'This should never be set to True by default. ' - )) - - transformer_key = models.TextField(null=True) - transformer_kwargs = models.JSONField(null=True, blank=True) - - regulator_steps = models.JSONField(null=True, blank=True) disabled = models.BooleanField(default=False) - private_harvester_kwargs = EncryptedJSONField(blank=True, null=True) - private_transformer_kwargs = EncryptedJSONField(blank=True, null=True) - objects = SourceConfigManager('label') class JSONAPIMeta(BaseJSONAPIMeta): @@ -172,49 +143,6 @@ class JSONAPIMeta(BaseJSONAPIMeta): def natural_key(self): return (self.label,) - def get_harvester_class(self): - return Extensions.get('share.harvesters', self.harvester_key) - - def get_transformer_class(self): - return Extensions.get('share.transformers', self.transformer_key) - - def get_harvester(self, **kwargs): - """Return a harvester instance configured for this SourceConfig. - - **kwargs: passed to the harvester's initializer - """ - return self.get_harvester_class()(self, **kwargs) - - def get_transformer(self, **kwargs): - """Return a transformer instance configured for this SourceConfig. - - **kwargs: passed to the transformer's initializer - """ - return self.get_transformer_class()(self, **kwargs) - - @contextlib.contextmanager - def acquire_lock(self, required=True, using='default'): - from share.harvest.exceptions import HarvesterConcurrencyError - - # NOTE: Must be in transaction - logger.debug('Attempting to lock %r', self) - with connections[using].cursor() as cursor: - cursor.execute("SELECT pg_try_advisory_lock(%s::regclass::integer, %s);", (self._meta.db_table, self.id)) - locked = cursor.fetchone()[0] - if not locked and required: - logger.warning('Lock failed; another task is already harvesting %r.', self) - raise HarvesterConcurrencyError('Unable to lock {!r}'.format(self)) - elif locked: - logger.debug('Lock acquired on %r', self) - else: - logger.warning('Lock not acquired on %r', self) - try: - yield - finally: - if locked: - cursor.execute("SELECT pg_advisory_unlock(%s::regclass::integer, %s);", (self._meta.db_table, self.id)) - logger.debug('Lock released on %r', self) - def __repr__(self): return '<{}({}, {})>'.format(self.__class__.__name__, self.pk, self.label) @@ -245,122 +173,12 @@ def link_to_job(self, job, datum_ids): ), [(raw_id, job.id) for raw_id in chunk]) return True - # TODO: remove `store_chunk` (and the rest of the non-rdf legacy pipeline) - def store_chunk(self, source_config, data, limit=None, db=DEFAULT_DB_ALIAS): - """Store a large amount of data for a single source_config. - - Data MUST be a utf-8 encoded string (Just a str type). - Take special care to make sure you aren't destroying data by mis-encoding it. - - Args: - source_config (SourceConfig): - data Generator[FetchResult]: - - Returns: - Generator[RawDatum] - """ - hashes = {} - identifiers = {} - now = timezone.now() - - if limit == 0: - return [] - - for chunk in chunked(data, 500): - if not chunk: - break - - new = [] - new_identifiers = set() - for fr in chunk: - if limit and len(hashes) >= limit: - break - - if fr.sha256 in hashes: - if hashes[fr.sha256] != fr.identifier: - raise ValueError( - '{!r} has already been seen or stored with identifier "{}". ' - 'Perhaps your identifier extraction is incorrect?'.format(fr, hashes[fr.sha256]) - ) - logger.warning('Recieved duplicate datum %s from %s', fr, source_config) - continue - - new.append(fr) - hashes[fr.sha256] = fr.identifier - new_identifiers.add(fr.identifier) - - if new_identifiers: - suids = SourceUniqueIdentifier.objects.raw(''' - INSERT INTO "{table}" - ("{identifier}", "{source_config}") - VALUES - {values} - ON CONFLICT - ("{identifier}", "{source_config}") - DO UPDATE SET - id = "{table}".id - RETURNING {fields} - '''.format( - table=SourceUniqueIdentifier._meta.db_table, - identifier=SourceUniqueIdentifier._meta.get_field('identifier').column, - source_config=SourceUniqueIdentifier._meta.get_field('source_config').column, - values=placeholders(len(new_identifiers)), # Nasty hack. Fix when psycopg2 2.7 is released with execute_values - fields=', '.join('"{}"'.format(field.column) for field in SourceUniqueIdentifier._meta.concrete_fields), - ), [(identifier, source_config.id) for identifier in new_identifiers]) - - for suid in suids: - identifiers[suid.identifier] = suid.pk - - if new: - # Defer 'datum' by omitting it from the returned fields - yield from RawDatum.objects.raw( - ''' - INSERT INTO "{table}" - ("{suid}", "{hash}", "{datum}", "{datestamp}", "{date_modified}", "{date_created}") - VALUES - {values} - ON CONFLICT - ("{suid}", "{hash}") - DO UPDATE SET - "{datestamp}" = EXCLUDED."{datestamp}", - "{date_modified}" = EXCLUDED."{date_modified}" - RETURNING id, "{suid}", "{hash}", "{datestamp}", "{date_modified}", "{date_created}" - '''.format( - table=RawDatum._meta.db_table, - suid=RawDatum._meta.get_field('suid').column, - hash=RawDatum._meta.get_field('sha256').column, - datum=RawDatum._meta.get_field('datum').column, - datestamp=RawDatum._meta.get_field('datestamp').column, - date_modified=RawDatum._meta.get_field('date_modified').column, - date_created=RawDatum._meta.get_field('date_created').column, - values=', '.join('%s' for _ in range(len(new))), # Nasty hack. Fix when psycopg2 2.7 is released with execute_values - ), [ - (identifiers[fr.identifier], fr.sha256, fr.datum, fr.datestamp or now, now, now) - for fr in new - ] - ) - - if limit and len(hashes) >= limit: - break - - def store_data(self, config, fetch_result): - """ - """ - (rd, ) = self.store_chunk(config, [fetch_result]) - - if rd.created: - logger.debug('New %r', rd) - else: - logger.debug('Found existing %r', rd) - - return rd - def store_datum_for_suid( self, *, suid, datum: str, - mediatype: str | None, # `None` indicates sharev2-legacy ingestion + mediatype: str, datestamp: datetime.datetime, expiration_date: datetime.date | None = None, ): @@ -406,15 +224,6 @@ def latest_by_suid_queryset(self, suid_queryset) -> models.QuerySet: )) -# Explicit through table to match legacy names -class RawDatumJob(models.Model): - datum = models.ForeignKey('RawDatum', db_column='rawdatum_id', on_delete=models.CASCADE) - job = models.ForeignKey('HarvestJob', db_column='harvestlog_id', on_delete=models.CASCADE) - - class Meta: - db_table = 'share_rawdatum_logs' - - class RawDatum(models.Model): datum = models.TextField() @@ -442,12 +251,10 @@ class RawDatum(models.Model): no_output = models.BooleanField(null=True, help_text=( 'Indicates that this RawDatum resulted in an empty graph when transformed. ' 'This allows the RawDataJanitor to find records that have not been processed. ' - 'Records that result in an empty graph will not have a NormalizedData associated with them, ' + 'Records that result in an empty graph will not have an Indexcard associated with them, ' 'which would otherwise look like data that has not yet been processed.' )) - jobs = models.ManyToManyField('HarvestJob', related_name='raw_data', through=RawDatumJob) - objects = RawDatumManager() @property diff --git a/share/search/index_strategy/sharev2_elastic5.py b/share/search/index_strategy/sharev2_elastic5.py index 8e775569c..8fea36be9 100644 --- a/share/search/index_strategy/sharev2_elastic5.py +++ b/share/search/index_strategy/sharev2_elastic5.py @@ -7,13 +7,15 @@ import elasticsearch5 import elasticsearch5.helpers -from share.models import FormattedMetadataRecord, SourceUniqueIdentifier +from share.models import SourceUniqueIdentifier from share.search import exceptions, messages from share.search.index_status import IndexStatus from share.search.index_strategy._base import IndexStrategy from share.search.index_strategy._util import timestamp_to_readable_datetime from share.util import IDObfuscator from share.util.checksum_iri import ChecksumIri +from trove import models as trove_db +from trove.vocab.namespaces import SHAREv2 logger = logging.getLogger(__name__) @@ -300,14 +302,20 @@ def _build_elastic_actions(self, messages_chunk): '_type': 'creativeworks', } suid_ids = set(messages_chunk.target_ids_chunk) - record_qs = FormattedMetadataRecord.objects.filter( - suid_id__in=suid_ids, - record_format='sharev2_elastic', # TODO specify in config? or don't + _derived_qs = ( + trove_db.DerivedIndexcard.objects + .filter(upriver_indexcard__source_record_suid_id=suid_ids) + .filter(deriver_identifier__in=( + trove_db.ResourceIdentifier.objects + .queryset_for_iri(SHAREv2.sharev2_elastic) + )) + .select_related('upriver_indexcard') ) - for record in record_qs: - doc_id = get_doc_id(record.suid_id) - suid_ids.remove(record.suid_id) - source_doc = json.loads(record.formatted_metadata) + for _derived_card in _derived_qs: + _suid_id = _derived_card.upriver_indexcard.source_record_suid_id + doc_id = get_doc_id(_suid_id) + suid_ids.remove(_suid_id) + source_doc = json.loads(_derived_card.derived_text) assert source_doc['id'] == doc_id if source_doc.pop('is_deleted', False): action = { @@ -322,9 +330,9 @@ def _build_elastic_actions(self, messages_chunk): '_op_type': 'index', '_source': source_doc, } - logger.debug('built action for suid_id=%s: %s', record.suid_id, action) + logger.debug('built action for suid_id=%s: %s', _suid_id, action) yield action - # delete any that don't have the expected FormattedMetadataRecord + # delete any that don't have the expected DerivedIndexcard for leftover_suid_id in suid_ids: logger.debug('deleting suid_id=%s', leftover_suid_id) action = { diff --git a/share/version.py b/share/version.py index e740fd289..27ae3d93c 100644 --- a/share/version.py +++ b/share/version.py @@ -1 +1,5 @@ __version__ = '25.1.1' + + +def get_share_version() -> str: + return __version__ diff --git a/tests/share/search/index_strategy/_common_trovesearch_tests.py b/tests/share/search/index_strategy/_common_trovesearch_tests.py index b237d150c..c40509b80 100644 --- a/tests/share/search/index_strategy/_common_trovesearch_tests.py +++ b/tests/share/search/index_strategy/_common_trovesearch_tests.py @@ -214,6 +214,7 @@ def _assert_cardsearch_iris(self, queryparams: dict, expected_focus_iris: Iterab assert isinstance(_cardsearch_params, CardsearchParams) _cardsearch_handle = self.index_strategy.pls_handle_cardsearch(_cardsearch_params) # assumes all results fit on one page + breakpoint() # TODO: _indexcard_focus_by_uuid _actual_result_iris: set[str] | list[str] = [ self._indexcard_focus_by_uuid[_result.card_uuid] for _result in _cardsearch_handle.search_result_page @@ -621,49 +622,3 @@ def _delete_indexcards(self, indexcards: Iterable[trove_db.Indexcard]): for _indexcard in indexcards: _indexcard.pls_delete(notify_indexes=False) # notify by hand to know when done self._index_indexcards(indexcards) - - def _create_indexcard(self, focus_iri: str, rdf_tripledict: rdf.RdfTripleDictionary) -> trove_db.Indexcard: - _suid = factories.SourceUniqueIdentifierFactory() - _indexcard = trove_db.Indexcard.objects.create(source_record_suid=_suid) - self._update_indexcard_content(_indexcard, focus_iri, rdf_tripledict) - # an osfmap_json card is required for indexing, but not used in these tests - trove_db.DerivedIndexcard.objects.get_or_create( - upriver_indexcard=_indexcard, - deriver_identifier=trove_db.ResourceIdentifier.objects.get_or_create_for_iri(TROVE['derive/osfmap_json']), - ) - return _indexcard - - def _update_indexcard_content( - self, - indexcard: trove_db.Indexcard, - focus_iri: str, - rdf_tripledict: rdf.RdfTripleDictionary, - ) -> None: - _raw = factories.RawDatumFactory(suid=indexcard.source_record_suid) - trove_db.LatestIndexcardRdf.objects.update_or_create( - indexcard=indexcard, - defaults={ - 'from_raw_datum': _raw, - 'focus_iri': focus_iri, - 'rdf_as_turtle': rdf.turtle_from_tripledict(rdf_tripledict), - 'turtle_checksum_iri': 'foo', # not enforced - }, - ) - self._indexcard_focus_by_uuid[str(indexcard.uuid)] = focus_iri - - def _create_supplement( - self, - indexcard: trove_db.Indexcard, - focus_iri: str, - rdf_tripledict: rdf.RdfTripleDictionary, - ) -> trove_db.SupplementaryIndexcardRdf: - _supp_suid = factories.SourceUniqueIdentifierFactory() - _supp_raw = factories.RawDatumFactory(suid=_supp_suid) - return trove_db.SupplementaryIndexcardRdf.objects.create( - from_raw_datum=_supp_raw, - indexcard=indexcard, - supplementary_suid=_supp_suid, - focus_iri=focus_iri, - rdf_as_turtle=rdf.turtle_from_tripledict(rdf_tripledict), - turtle_checksum_iri='sup', # not enforced - ) diff --git a/trove/digestive_tract.py b/trove/digestive_tract.py index 2a95fb056..3099a585d 100644 --- a/trove/digestive_tract.py +++ b/trove/digestive_tract.py @@ -11,7 +11,6 @@ import copy import datetime import logging -import typing import celery from django.db import transaction @@ -21,7 +20,11 @@ from share.search import IndexMessenger from share.util.checksum_iri import ChecksumIri from trove import models as trove_db -from trove.exceptions import DigestiveError, CannotDigestExpiredDatum +from trove.exceptions import ( + CannotDigestExpiredDatum, + DigestiveError, + MissingMediatype, +) from trove.extract import get_rdf_extractor_class from trove.derive import get_deriver_classes from trove.vocab.namespaces import RDFS, RDF, OWL @@ -81,34 +84,6 @@ def swallow( return _task.id -@transaction.atomic -def swallow__sharev2_legacy( - *, # all keyword-args - from_user: share_db.ShareUser, - record: str, - record_identifier: str, - transformer_key: str, - datestamp=None, # default "now" - urgent=False, -): - _source_config = ( - share_db.SourceConfig.objects - .get_or_create_push_config(from_user, transformer_key) - ) - _suid, _suid_created = share_db.SourceUniqueIdentifier.objects.get_or_create( - source_config=_source_config, - identifier=record_identifier, - ) - _raw = share_db.RawDatum.objects.store_datum_for_suid( - suid=_suid, - datum=record, - mediatype=None, # indicate sharev2-legacy flow - datestamp=(datestamp or datetime.datetime.now(tz=datetime.timezone.utc)), - ) - _task = task__extract_and_derive.delay(_raw.id, urgent=urgent) - return _task.id - - def extract(raw: share_db.RawDatum, *, undelete_indexcards=False) -> list[trove_db.Indexcard]: '''extract: gather rdf graph from a record; store as index card(s) @@ -253,15 +228,14 @@ def task__extract_and_derive(task: celery.Task, raw_id: int, urgent=False): if _source_config.disabled or _source_config.source.is_deleted: expel_suid(_raw.suid) else: - if _raw.mediatype: - _indexcards = extract(_raw, undelete_indexcards=urgent) - if _raw.is_latest(): - _messenger = IndexMessenger(celery_app=task.app) - for _indexcard in _indexcards: - derive(_indexcard) - _messenger.notify_indexcard_update(_indexcards, urgent=urgent) - else: # no mediatype => legacy flow - _sharev2_legacy_ingest(_raw, urgent=urgent) + if not _raw.mediatype: + raise MissingMediatype(_raw) + _indexcards = extract(_raw, undelete_indexcards=urgent) + if _raw.is_latest(): + _messenger = IndexMessenger(celery_app=task.app) + for _indexcard in _indexcards: + derive(_indexcard) + _messenger.notify_indexcard_update(_indexcards, urgent=urgent) @celery.shared_task(acks_late=True, bind=True) @@ -311,24 +285,3 @@ def task__schedule_all_for_deriver(deriver_iri: str, notify_index=False): @celery.shared_task(acks_late=True) def task__expel_expired_data(): expel_expired_data(datetime.date.today()) - - -# TODO: remove legacy ingest -def _sharev2_legacy_ingest(raw, urgent: bool): - assert raw.mediatype is None, 'raw datum has a mediatype -- did you mean to call non-legacy extract?' - _extractor = get_rdf_extractor_class(None)(raw.suid.source_config) - if typing.TYPE_CHECKING: - from trove.extract.legacy_sharev2 import LegacySharev2Extractor - assert isinstance(_extractor, LegacySharev2Extractor) - _sharev2graph = _extractor.extract_sharev2_graph(raw.datum) - _centralnode = _sharev2graph.get_central_node(guess=True) - _normd = share_db.NormalizedData.objects.create( - data=_centralnode.graph.to_jsonld(), - source=raw.suid.source_config.source.user, - raw=raw, - ) - share_db.FormattedMetadataRecord.objects.save_formatted_records( - raw.suid, - normalized_datum=_normd, - ) - IndexMessenger().notify_suid_update([raw.suid_id], urgent=urgent) diff --git a/trove/exceptions.py b/trove/exceptions.py index 4a9ddd286..37cd4bfd7 100644 --- a/trove/exceptions.py +++ b/trove/exceptions.py @@ -19,6 +19,10 @@ class DigestiveError(TroveError): pass +class MissingMediatype(DigestiveError): + pass + + class CannotDigestMediatype(DigestiveError): pass From 8cb4987db35e46d3270065621b17ab2386edc856 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Thu, 6 Mar 2025 14:05:54 -0500 Subject: [PATCH 06/46] wipwip --- trove/derive/sharev2_elastic.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/trove/derive/sharev2_elastic.py b/trove/derive/sharev2_elastic.py index 064cf736b..dea6ae924 100644 --- a/trove/derive/sharev2_elastic.py +++ b/trove/derive/sharev2_elastic.py @@ -217,7 +217,19 @@ def _related_agent(self, relation_iri, agent_iri): } def _sharev2_type(self, type_iri): - if type_iri in SHAREv2: + try: + return ShareV2Schema().get_type(_typename) + except SchemaKeyError: + return None + + def _single_type(self, focus_iri): + _type_iris = set(self.data.q(focus_iri, RDF.type)) + _sharev2_types = set( + _type_iri + for _type_iri in _type_iris + if _type_iri in SHAREv2 + ) + if _sharev2_types: _typename = primitive_rdf.iri_minus_namespace(type_iri, namespace=SHAREv2) elif type_iri in OSFMAP: _typename = primitive_rdf.iri_minus_namespace(type_iri, namespace=OSFMAP) @@ -227,20 +239,10 @@ def _sharev2_type(self, type_iri): _typename = 'Project' else: return None - try: - return ShareV2Schema().get_type(_typename) - except SchemaKeyError: - return None - - def _single_type(self, focus_iri): def _type_sortkey(sharev2_type): return sharev2_type.distance_from_concrete_type _types = filter(None, ( self._sharev2_type(_type_iri) - for _type_iri in self.data.q( - focus_iri, - RDF.type, - ) )) _sorted_types = sorted(_types, key=_type_sortkey, reverse=True) if not _sorted_types: From 1f3da718253d0454e727182b7428ed2f918930a6 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Thu, 6 Mar 2025 14:43:05 -0500 Subject: [PATCH 07/46] wip --- trove/derive/sharev2_elastic.py | 87 +++++++++++++++++++++------------ 1 file changed, 56 insertions(+), 31 deletions(-) diff --git a/trove/derive/sharev2_elastic.py b/trove/derive/sharev2_elastic.py index dea6ae924..2b29376e8 100644 --- a/trove/derive/sharev2_elastic.py +++ b/trove/derive/sharev2_elastic.py @@ -4,8 +4,6 @@ from primitive_metadata import primitive_rdf -from share.schema import ShareV2Schema -from share.schema.exceptions import SchemaKeyError from share.util import IDObfuscator from share import models as share_db from trove.vocab.namespaces import ( @@ -21,8 +19,38 @@ from ._base import IndexcardDeriver +# the sharev2 type hierarchy is limited but nested +_SHAREv2_TYPES_BY_SPECIFICITY = ( + { # most specific + SHAREv2.Article, + SHAREv2.Book, + SHAREv2.ConferencePaper, + SHAREv2.Dissertation, + SHAREv2.Preprint, + SHAREv2.Project, + SHAREv2.Registration, + SHAREv2.Report, + SHAREv2.Thesis, + SHAREv2.WorkingPaper, + }, + { # middling specific + SHAREv2.DataSet, + SHAREv2.Patent, + SHAREv2.Poster, + SHAREv2.Publication, + SHAREv2.Presentation, + SHAREv2.Repository, + SHAREv2.Retraction, + SHAREv2.Software, + }, + { # least specific + SHAREv2.CreativeWork, + }, +) + + # values that, for the purpose of indexing in elasticsearch, are equivalent to absence -EMPTY_VALUES = (None, '', []) +EMPTY_VALUES = (None, '', []) # type: ignore[var-annotated] def strip_empty_values(thing): @@ -216,38 +244,35 @@ def _related_agent(self, relation_iri, agent_iri): # TODO 'order_cited': } - def _sharev2_type(self, type_iri): - try: - return ShareV2Schema().get_type(_typename) - except SchemaKeyError: - return None - - def _single_type(self, focus_iri): - _type_iris = set(self.data.q(focus_iri, RDF.type)) - _sharev2_types = set( - _type_iri - for _type_iri in _type_iris - if _type_iri in SHAREv2 - ) - if _sharev2_types: - _typename = primitive_rdf.iri_minus_namespace(type_iri, namespace=SHAREv2) - elif type_iri in OSFMAP: - _typename = primitive_rdf.iri_minus_namespace(type_iri, namespace=OSFMAP) + def _single_type_iri(self, type_iris) -> str | None: + # try SHAREv2 types + _sharev2_type_iris = set(filter(SHAREv2.__contains__, type_iris)) + if _sharev2_type_iris: + for _sharev2_typeset in _SHAREv2_TYPES_BY_SPECIFICITY: + _matching_type_iris = _sharev2_type_iris.intersection(_sharev2_typeset) + if _matching_type_iris: + return _matching_type_iris.pop() # take any one + # try for an OSFMAP type + _osfmap_type_iri = next(filter(OSFMAP.__contains__, type_iris), None) + if _osfmap_type_iri: + _typename = primitive_rdf.iri_minus_namespace(_osfmap_type_iri, namespace=OSFMAP) + # sharev2 backcompat: components are not a separate type if _typename == 'RegistrationComponent': _typename = 'Registration' elif _typename == 'ProjectComponent': _typename = 'Project' - else: - return None - def _type_sortkey(sharev2_type): - return sharev2_type.distance_from_concrete_type - _types = filter(None, ( - self._sharev2_type(_type_iri) - )) - _sorted_types = sorted(_types, key=_type_sortkey, reverse=True) - if not _sorted_types: - return None - return self._format_typename(_sorted_types[0].name) + # try for a matching type in SHAREv2, but fall back to CreativeWork + return self._single_type_iri([SHAREv2[_typename], SHAREv2.CreativeWork]) + return None + + def _single_type(self, focus_iri): + _type_iris = set(self.data.q(focus_iri, RDF.type)) + _type_iri = self._single_type_iri(_type_iris) + return ( + self._format_type_iri(_type_iri) + if _type_iri + else None + ) def _type_list(self, focus_iri): return sorted( From f3ed6fc673a2ae6e93a026e67e054267cac51082 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Thu, 6 Mar 2025 14:43:12 -0500 Subject: [PATCH 08/46] wip --- .../search/index_strategy/sharev2_elastic5.py | 28 ++++++++++--------- .../search/index_strategy/sharev2_elastic8.py | 2 ++ 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/share/search/index_strategy/sharev2_elastic5.py b/share/search/index_strategy/sharev2_elastic5.py index 8fea36be9..c8cb990dd 100644 --- a/share/search/index_strategy/sharev2_elastic5.py +++ b/share/search/index_strategy/sharev2_elastic5.py @@ -342,6 +342,21 @@ def _build_elastic_actions(self, messages_chunk): } yield action + # optional method from IndexStrategy + def pls_handle_search__passthru(self, request_body=None, request_queryparams=None) -> dict: + '''the definitive sharev2-search api: passthru to elasticsearch version 5 + ''' + if request_queryparams: + request_queryparams.pop('indexStrategy', None) + try: + return self.es5_client.search( + index=self.STATIC_INDEXNAME, + body=request_body or {}, + params=request_queryparams or {}, + ) + except elasticsearch5.TransportError as error: + raise exceptions.IndexStrategyError() from error # TODO: error messaging + class SpecificIndex(IndexStrategy.SpecificIndex): index_strategy: Sharev2Elastic5IndexStrategy # narrow type @@ -439,16 +454,3 @@ def pls_get_status(self) -> IndexStatus: ), doc_count=index_stats['primaries']['docs']['count'], ) - - # optional method from IndexStrategy.SpecificIndex - def pls_handle_search__passthru(self, request_body=None, request_queryparams=None) -> dict: - '''the definitive sharev2-search api: passthru to elasticsearch version 5 - ''' - try: - return self.index_strategy.es5_client.search( - index=self.full_index_name, - body=request_body or {}, - params=request_queryparams or {}, - ) - except elasticsearch5.TransportError as error: - raise exceptions.IndexStrategyError() from error # TODO: error messaging diff --git a/share/search/index_strategy/sharev2_elastic8.py b/share/search/index_strategy/sharev2_elastic8.py index 868b72981..886353eb1 100644 --- a/share/search/index_strategy/sharev2_elastic8.py +++ b/share/search/index_strategy/sharev2_elastic8.py @@ -179,6 +179,8 @@ def _load_docs(self, suid_ids) -> typing.Iterable[tuple[int, str]]: # optional method from IndexStrategy def pls_handle_search__passthru(self, request_body=None, request_queryparams=None) -> dict: + if request_queryparams: + request_queryparams.pop('indexStrategy', None) try: json_response = self.es8_client.search( index=self.get_index('').full_index_name, From a8d6daa64f3192bb8b7814e78843e770985ae616 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Thu, 6 Mar 2025 14:57:19 -0500 Subject: [PATCH 09/46] wip --- share/apps.py | 3 - share/models/validators.py | 7 + share/signals.py | 11 - tests/factories/__init__.py | 3 - tests/features/environment.py | 39 -- tests/features/harvesting/laziness.feature | 43 -- tests/features/harvesting/scheduling.feature | 66 --- tests/features/steps/harvesting.py | 76 ---- tests/features/steps/source.py | 78 ---- tests/share/search/_util.py | 53 +++ tests/share/tasks/__init__.py | 23 - tests/share/tasks/test_harvest.py | 429 ------------------- tests/share/tasks/test_job_consumers.py | 85 ---- trove/extract/__init__.py | 3 - trove/extract/legacy_sharev2.py | 266 ------------ 15 files changed, 60 insertions(+), 1125 deletions(-) create mode 100644 share/models/validators.py delete mode 100644 share/signals.py delete mode 100644 tests/features/environment.py delete mode 100644 tests/features/harvesting/laziness.feature delete mode 100644 tests/features/harvesting/scheduling.feature delete mode 100644 tests/features/steps/harvesting.py delete mode 100644 tests/features/steps/source.py create mode 100644 tests/share/search/_util.py delete mode 100644 tests/share/tasks/__init__.py delete mode 100644 tests/share/tasks/test_harvest.py delete mode 100644 tests/share/tasks/test_job_consumers.py delete mode 100644 trove/extract/legacy_sharev2.py diff --git a/share/apps.py b/share/apps.py index 910fdd491..88107bac6 100644 --- a/share/apps.py +++ b/share/apps.py @@ -1,7 +1,5 @@ from django.apps import AppConfig from django.core import checks -from django.db.models.signals import post_migrate -from share.signals import post_migrate_load_sources from share.checks import check_all_index_strategies_current @@ -9,5 +7,4 @@ class ShareConfig(AppConfig): name = 'share' def ready(self): - post_migrate.connect(post_migrate_load_sources, sender=self) checks.register(check_all_index_strategies_current) diff --git a/share/models/validators.py b/share/models/validators.py new file mode 100644 index 000000000..394a279fc --- /dev/null +++ b/share/models/validators.py @@ -0,0 +1,7 @@ +from django.utils.deconstruct import deconstructible + + +@deconstructible +class JSONLDValidator: + def __call__(self, *args, **kwargs): + raise Exception('Deprecated; stop doing sharev2 stuff') diff --git a/share/signals.py b/share/signals.py deleted file mode 100644 index afaae1e30..000000000 --- a/share/signals.py +++ /dev/null @@ -1,11 +0,0 @@ -from django.core import management -from django.db.utils import ProgrammingError - - -def post_migrate_load_sources(sender, **kwargs): - Source = sender.get_model('Source') - try: - Source.objects.all()[0] - except ProgrammingError: - return - management.call_command('loadsources') diff --git a/tests/factories/__init__.py b/tests/factories/__init__.py index ed5e649e6..b5eaab2f2 100644 --- a/tests/factories/__init__.py +++ b/tests/factories/__init__.py @@ -45,10 +45,7 @@ def __call__(self, *args, **kwargs): class SourceConfigFactory(DjangoModelFactory): label = factory.Faker('sentence') base_url = factory.Faker('url') - harvest_after = '00:00' source = factory.SubFactory(SourceFactory) - harvester_key = None - transformer_key = None class Meta: model = share_db.SourceConfig diff --git a/tests/features/environment.py b/tests/features/environment.py deleted file mode 100644 index 519903f04..000000000 --- a/tests/features/environment.py +++ /dev/null @@ -1,39 +0,0 @@ -import os - -import django -from django.test import TestCase -from django.test.utils import ( - setup_test_environment, - teardown_test_environment, - setup_databases, - teardown_databases, -) - - -os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'project.settings') -django.setup() - - -# Run with -D RESETDB=1 to rebuild the SQL database -def before_all(context): - setup_test_environment() - context.db_cfg = setup_databases( - verbosity=False, - interactive=False, - keepdb=not context.config.userdata.getbool('RESETDB') - ) - - -def before_scenario(context, scenario): - context.test_case = TestCase(methodName='__init__') - context.test_case._pre_setup() - - -def after_scenario(context, scenario): - context.test_case._post_teardown() - - -def after_all(context): - if context.config.userdata.getbool('RESETDB'): - teardown_databases(context.db_cfg, verbosity=False) - teardown_test_environment() diff --git a/tests/features/harvesting/laziness.feature b/tests/features/harvesting/laziness.feature deleted file mode 100644 index bf5ee8328..000000000 --- a/tests/features/harvesting/laziness.feature +++ /dev/null @@ -1,43 +0,0 @@ -Feature: Harvester Laziness - Harvesting will be as lazy as possible. - If it can be determined that the system already has - all the data, the task will be marked as "skipped" - - Background: - Given the source Neat.io - And Neat.io has a source config, io.neat - And a succeeded harvest of io.neat for 2012-11-10 to 2012-11-11 - And a succeeded harvest of io.neat for 2012-11-12 to 2012-11-15 - - Scenario Outline: Skippable harvest tasks - When io.neat is harvested for to - Then io.neat's latest harvest job's status will be skipped - And it will be completed 2 times - And it's error_context will be - - Examples: - | START_DATE | END_DATE | REASON | - | 2012-11-10 | 2012-11-11 | Previously Succeeded | - # Future improvements - # | 2012-11-13 | 2012-11-14 | Encompassing task succeeded | - # | 2012-11-13 | 2012-11-15 | Encompassing task succeeded | - # | 2012-11-12 | 2012-11-14 | Encompassing task succeeded | - # | 2012-11-10 | 2012-11-15 | Comprised of succeeded tasks | - - Scenario: Version's must match - Given io.neat is updated to version 2 - When io.neat is harvested for 2012-11-10 to 2012-11-11 - Then io.neat will have 2 harvest jobs for 2012-11-10 to 2012-11-11 - And io.neat's latest harvest job's status will be succeeded - - Scenario Outline: Past harvests must have been successful - Given a harvest of io.neat for 2012-11-01 to 2012-11-02 - When io.neat is harvested for 2012-11-01 to 2012-11-02 - Then io.neat will have 1 harvest job for 2012-11-01 to 2012-11-02 - And io.neat's latest harvest job's status will be succeeded - - Examples: - | STATUS | - | failed | - | skipped | - | forced | diff --git a/tests/features/harvesting/scheduling.feature b/tests/features/harvesting/scheduling.feature deleted file mode 100644 index 5dc36bba7..000000000 --- a/tests/features/harvesting/scheduling.feature +++ /dev/null @@ -1,66 +0,0 @@ -Feature: Harvester Scheduling - - Scenario Outline: Scheduling harvests - Given a source config, neat.o, that harvests - And the last harvest of neat.o was - When harvests are scheduled on - Then neat.o will have harvest jobs - - Examples: - | INTERVAL | PREVIOUS END DATE | DATE | NUM | - | daily | 2017-01-01 | 2017-01-02 | 2 | - | daily | 2017-01-01 | 2017-01-03 | 3 | - | daily | 2016-01-01 | 2017-01-01 | 367 | - | weekly | 2017-01-01 | 2017-01-03 | 1 | - | weekly | 2017-01-01 | 2017-01-08 | 2 | - | weekly | 2017-01-01 | 2017-01-09 | 2 | - | monthly | 2017-01-01 | 2017-01-09 | 1 | - | monthly | 2017-01-01 | 2017-02-09 | 2 | - | monthly | 2017-01-01 | 2017-03-02 | 3 | - | fortnightly | 2017-01-01 | 2017-01-15 | 2 | - | fortnightly | 2016-12-28 | 2017-01-01 | 1 | - | fortnightly | 2016-12-28 | 2017-02-01 | 3 | - | yearly | 2016-02-01 | 2017-02-01 | 2 | - | yearly | 2016-02-01 | 2017-01-29 | 1 | - - # We need a new term for backharvest - Scenario Outline: Automatically scheduling back harvests - Given a source config, neat.o, that harvests - And neat.o is allowed to be backharvested - And neat.o's earliest record is - When harvests are scheduled on 2017-01-01 - Then neat.o will have harvest jobs - - Examples: - | INTERVAL | EARLIEST RECORD | NUM | - | daily | 2000-01-01 | 6210 | - | weekly | 1990-01-01 | 1408 | - | monthly | 2014-05-07 | 32 | - | yearly | 2001-01-01 | 16 | - - Scenario Outline: Scheduling first time harvests - Given a source config, neat.o, that harvests - When harvests are scheduled on 2017-01-01 - Then neat.o will have 1 harvest jobs - - Examples: - | INTERVAL | - | daily | - | weekly | - | fortnightly | - | yearly | - - Scenario Outline: Scheduling idempotency - Given a source config, neat.o, that harvests - When harvests are scheduled on 2017-01-02 - And harvests are scheduled on 2017-01-02 - And harvests are scheduled on 2017-01-01 - And harvests are scheduled on 2017-01-01 - Then neat.o will have 1 harvest jobs - - Examples: - | INTERVAL | - | daily | - | weekly | - | fortnightly | - | yearly | diff --git a/tests/features/steps/harvesting.py b/tests/features/steps/harvesting.py deleted file mode 100644 index 8b335a0e5..000000000 --- a/tests/features/steps/harvesting.py +++ /dev/null @@ -1,76 +0,0 @@ -import behave - -import pendulum - -from share import models -from share import tasks - - -@behave.given('{label} is allowed to be backharvested') -def allow_backharvesting(context, label): - models.SourceConfig.objects.filter(label=label).update(full_harvest=True) - - -@behave.given('{label}\'s earliest record is {date}') -def set_earliest_record(context, label, date): - models.SourceConfig.objects.filter(label=label).update(earliest_date=date) - - -@behave.given('a {status} harvest of {label}') -@behave.given('the last harvest of {label} was {end}') -@behave.given('a {status} harvest of {label} for {start} to {end}') -def make_harvest_job(context, label, status='succeeded', start=None, end=None): - source_config = models.SourceConfig.objects.get(label=label) - - if end: - end = pendulum.parse(end) - - if start: - start = pendulum.parse(start) - - if end and not start: - start = end - source_config.harvest_interval - - models.HarvestJob.objects.create( - completions=1, - end_date=end, - start_date=start, - status=getattr(models.HarvestJob.STATUS, status), - source_config=source_config, - source_config_version=source_config.version, - harvester_version=source_config.get_harvester_class().VERSION, - ) - - -@behave.when('harvests are scheduled at {time} on {date}') -@behave.when('harvests are scheduled on {date}') -def schedule_harvests(context, date, time='00:00'): - tasks.schedule_harvests(cutoff=pendulum.parse('{}T{}+00:00'.format(date, time))) - - -@behave.then('{label} will have {number} harvest job') -@behave.then('{label} will have {number} harvest jobs') -@behave.then('{label} will have {number} harvest job for {start} to {end}') -@behave.then('{label} will have {number} harvest jobs for {start} to {end}') -def assert_num_harvest_jobs(context, label, number, start=None, end=None): - qs = models.HarvestJob.objects.filter(source_config__label=label) - - if start: - qs = qs.filter(start_date=start) - - if end: - qs = qs.filter(end_date=end) - - assert qs.count() == int(number), '{!r} has {} jobs not {}'.format(models.SourceConfig.objects.get(label=label), qs.count(), number) - - -@behave.then('{label}\'s latest harvest job\'s {field} will be {value}') -def assert_latest_harvestjob_value(context, label, field, value): - context.subject = job = models.HarvestJob.objects.filter( - source_config__label=label - ).first() - - if field == 'status': - value = getattr(models.HarvestJob.STATUS, value) - - assert getattr(job, field) == value, '{!r}.{} ({!r}) != {!r}'.format(job, field, getattr(job, field), value) diff --git a/tests/features/steps/source.py b/tests/features/steps/source.py deleted file mode 100644 index a0bbe9aff..000000000 --- a/tests/features/steps/source.py +++ /dev/null @@ -1,78 +0,0 @@ -import behave - -import pendulum - -from share import models -from share import tasks -from share.harvest.scheduler import HarvestScheduler - -from tests import factories - - -@behave.given('the source {name}') -def make_source(context, name): - if not hasattr(context, 'source'): - context.sources = {} - context.sources[name] = factories.SourceFactory(name=name) - context.subject = ('sources', context.sources[name]) - - -@behave.given('{name} has a source config, {label}') -@behave.given('a source config, {label}, that harvests {interval}') -@behave.given('a source config, {label}, that harvests {interval} after {time}') -def make_source_config(context, label, name=None, interval=None, time=None): - kwargs = {'label': label} - - if name is None: - kwargs['source'] = factories.SourceFactory() - else: - kwargs['source'] = models.Source.objects.get(name=name) - - if interval is not None: - kwargs['harvest_interval'] = { - 'daily': '1 day', - 'weekly': '1 week', - 'fortnightly': '2 weeks', - 'yearly': '1 year', - 'monthly': '1 month', - }[interval] - - if time is not None: - kwargs['harvest_after'] = time - - factories.SourceConfigFactory(**kwargs) - - -@behave.given('{label} is updated to version {version}') -def update_harvester(context, label, version): - models.SourceConfig.objects.get(label=label).get_harvester_class().VERSION = int(version) - - -@behave.when('{label} is harvested') -@behave.when('{label} is harvested for {start} to {end}') -def start_harvest(context, label, start=None, end=None): - job = HarvestScheduler(models.SourceConfig.objects.get(label=label), claim_jobs=True).range( - pendulum.parse(start), - pendulum.parse(end), - )[0] - - tasks.harvest(job_id=job.id) - - -@behave.then('it\'s {field} will be {value}') -def assert_subject_value(context, field, value): - assert hasattr(context, 'subject'), 'No subject has been set, don\'t use pronouns!' - assert hasattr(context.subject, field), '{!r} has not attribute {!r}'.format(context.subject, field) - - if hasattr(context.subject, 'refresh_from_db'): - context.subject.refresh_from_db() - assert getattr(context.subject, field) == value, '{!r}.{} ({!r}) != {!r}'.format(context.subject, field, getattr(context.subject, field), value) - - -@behave.then('it will be completed {number} time') -@behave.then('it will be completed {number} times') -def assert_subject_completions(context, number): - assert_subject_value(context, 'completions', int(number)) - # assert hasattr(context.subject, 'completions') - # # context.subject.refresh_from_db() - # assert context.subject.completions == int(number), '{!r}.{} ({!r}) != {!r}'.format(context.subject, 'completions) diff --git a/tests/share/search/_util.py b/tests/share/search/_util.py new file mode 100644 index 000000000..eac9825c5 --- /dev/null +++ b/tests/share/search/_util.py @@ -0,0 +1,53 @@ +from tests import factories + +from primitive_metadata import primitive_rdf as rdf + +from trove import models as trove_db +from trove.vocab.namespaces import RDFS, TROVE, RDF, DCTERMS, OWL, FOAF, DCAT + + +def create_indexcard(focus_iri: str, rdf_tripledict: rdf.RdfTripleDictionary) -> trove_db.Indexcard: + _suid = factories.SourceUniqueIdentifierFactory() + _indexcard = trove_db.Indexcard.objects.create(source_record_suid=_suid) + update_indexcard_content(_indexcard, focus_iri, rdf_tripledict) + # an osfmap_json card is required for indexing, but not used in these tests + trove_db.DerivedIndexcard.objects.get_or_create( + upriver_indexcard=_indexcard, + deriver_identifier=trove_db.ResourceIdentifier.objects.get_or_create_for_iri(TROVE['derive/osfmap_json']), + ) + return _indexcard + + +def update_indexcard_content( + indexcard: trove_db.Indexcard, + focus_iri: str, + rdf_tripledict: rdf.RdfTripleDictionary, +) -> None: + _raw = factories.RawDatumFactory(suid=indexcard.source_record_suid) + trove_db.LatestIndexcardRdf.objects.update_or_create( + indexcard=indexcard, + defaults={ + 'from_raw_datum': _raw, + 'focus_iri': focus_iri, + 'rdf_as_turtle': rdf.turtle_from_tripledict(rdf_tripledict), + 'turtle_checksum_iri': 'foo', # not enforced + }, + ) + self._indexcard_focus_by_uuid[str(indexcard.uuid)] = focus_iri + + +def create_supplement( + indexcard: trove_db.Indexcard, + focus_iri: str, + rdf_tripledict: rdf.RdfTripleDictionary, +) -> trove_db.SupplementaryIndexcardRdf: + _supp_suid = factories.SourceUniqueIdentifierFactory() + _supp_raw = factories.RawDatumFactory(suid=_supp_suid) + return trove_db.SupplementaryIndexcardRdf.objects.create( + from_raw_datum=_supp_raw, + indexcard=indexcard, + supplementary_suid=_supp_suid, + focus_iri=focus_iri, + rdf_as_turtle=rdf.turtle_from_tripledict(rdf_tripledict), + turtle_checksum_iri='sup', # not enforced + ) diff --git a/tests/share/tasks/__init__.py b/tests/share/tasks/__init__.py deleted file mode 100644 index 1ceaab124..000000000 --- a/tests/share/tasks/__init__.py +++ /dev/null @@ -1,23 +0,0 @@ -import threading - - -class SyncedThread(threading.Thread): - - def __init__(self, target, args=(), kwargs={}): - self._end = threading.Event() - self._start = threading.Event() - - def _target(*args, **kwargs): - with target(*args, **kwargs): - self._start.set() - self._end.wait(10) - - super().__init__(target=_target, args=args, kwargs=kwargs) - - def start(self): - super().start() - self._start.wait(10) - - def join(self, timeout=1): - self._end.set() - return super().join(timeout) diff --git a/tests/share/tasks/test_harvest.py b/tests/share/tasks/test_harvest.py deleted file mode 100644 index 9bb47f1b8..000000000 --- a/tests/share/tasks/test_harvest.py +++ /dev/null @@ -1,429 +0,0 @@ -from unittest import mock -import random -import uuid - -from faker import Factory - -import pytest - -import pendulum - -from django.conf import settings -from django.db import DatabaseError - -from share.harvest.base import FetchResult -from share.harvest.exceptions import HarvesterConcurrencyError -from share.models import Source -from share.models import HarvestJob -from share.models import RawDatum -from share import tasks -from share.harvest.scheduler import HarvestScheduler - -from tests import factories -from tests.share.tasks import SyncedThread - - -@pytest.mark.django_db -def test_sources_have_access_tokens(): - for source in Source.objects.exclude(user__username=settings.APPLICATION_USERNAME)[:10]: - assert source.user.authorization() - - -@pytest.mark.django_db -class TestHarvestTaskWithJob: - - def test_not_found(self): - with pytest.raises(HarvestJob.DoesNotExist): - tasks.harvest(job_id=12) - - # def test_disabled_source_config(self): - # with pytest.raises(HarvesterConcurrencyError): - # tasks.harvest(job_id=12) - - -@pytest.mark.usefixtures('nested_django_db') -class TestHarvestTask: - - @pytest.fixture(scope='class') - def source_config(self, class_scoped_django_db): - return factories.SourceConfigFactory() - - @pytest.fixture - def do_fetch_list(self, source_config): - with mock.patch.object(source_config.get_harvester_class(), '_do_fetch', factories.ListGenerator()) as _do_fetch_list: - yield _do_fetch_list - - @pytest.mark.parametrize('source_config_kwargs, task_kwargs, lock_config, exception', [ - ({}, {}, True, HarvesterConcurrencyError), - ]) - def test_failure_cases(self, source_config_kwargs, task_kwargs, lock_config, exception): - source_config = factories.SourceConfigFactory(**source_config_kwargs) - job = factories.HarvestJobFactory(source_config=source_config) - - if lock_config: - t = SyncedThread(source_config.acquire_lock) - t.start() - - try: - with pytest.raises(exception): - tasks.harvest(job_id=job.id, **task_kwargs) - finally: - if lock_config: - t.join() - - def test_harvest_fails(self, source_config, do_fetch_list): - do_fetch_list.side_effect = ValueError('In a test') - job = factories.HarvestJobFactory(source_config=source_config) - - with pytest.raises(ValueError) as e: - tasks.harvest(job_id=job.id) - - job.refresh_from_db() - - assert e.value.args == ('In a test', ) - assert job.status == HarvestJob.STATUS.failed - assert job.completions == 0 - assert 'ValueError: In a test' in job.error_context - - def test_harvest_database_error(self, source_config, monkeypatch): - job = factories.HarvestJobFactory(source_config=source_config) - - def _do_fetch(*args, **kwargs): - yield ('doc1', b'doc1data') - yield ('doc2', b'doc2data') - yield ('doc3', b'doc3data') - raise DatabaseError('In a test') - monkeypatch.setattr(source_config.get_harvester_class(), '_do_fetch', _do_fetch) - - with pytest.raises(DatabaseError) as e: - tasks.harvest(job_id=job.id) - - job.refresh_from_db() - - assert job.raw_data.count() == 3 - assert e.value.args == ('In a test', ) - assert job.status == HarvestJob.STATUS.failed - assert job.completions == 0 - assert 'DatabaseError: In a test' in job.error_context - - def test_partial_harvest_fails(self, source_config, monkeypatch): - job = factories.HarvestJobFactory(source_config=source_config) - - def _do_fetch(*args, **kwargs): - yield ('doc1', b'doc1data') - yield ('doc2', b'doc2data') - yield ('doc3', b'doc3data') - raise ValueError('In a test') - monkeypatch.setattr(source_config.get_harvester_class(), '_do_fetch', _do_fetch) - - with pytest.raises(ValueError) as e: - tasks.harvest(job_id=job.id) - - job.refresh_from_db() - - assert job.raw_data.count() == 3 - assert e.value.args == ('In a test', ) - assert job.status == HarvestJob.STATUS.failed - assert job.completions == 0 - assert 'ValueError: In a test' in job.error_context - - def test_job_values(self, source_config, do_fetch_list): - task_id = uuid.uuid4() - job = factories.HarvestJobFactory(source_config=source_config) - - tasks.harvest.apply((), {'job_id': job.id}, task_id=str(task_id), throw=True) - - job.refresh_from_db() - - assert job.task_id == task_id - assert job.status == HarvestJob.STATUS.succeeded - assert job.error_context == '' - assert job.completions == 1 - assert job.source_config == source_config - assert job.share_version == settings.VERSION - assert job.harvester_version == source_config.get_harvester().VERSION - assert job.source_config_version == source_config.version - - @pytest.mark.parametrize('count, rediscovered, superfluous, limit, ingest', { - (count, int(rediscovered), False, int(limit) if limit is not None else None, True) - for count in (0, 1, 500, 501, 1010) - for limit in (None, 0, 1, count * .5, count, count * 2) - for rediscovered in (0, 1, count * .5, count) - if rediscovered <= count - } | { - (count, int(rediscovered), superfluous, None, ingest) - for count in (0, 150) - for ingest in (True, False) - for superfluous in (True, False) - for rediscovered in (0, count * .5, count) - if rediscovered <= count - }) - def test_data_flow(self, source_config, do_fetch_list, count, rediscovered, superfluous, limit, ingest, django_assert_num_queries): - assert rediscovered <= count, 'Y tho' - - fake = Factory.create() - - do_fetch_list.extend((fake.sentence(), str(i * 50)) for i in range(count)) - list(RawDatum.objects.store_chunk(source_config, ( - FetchResult(*tup) for tup in - random.sample(source_config.get_harvester()._do_fetch, rediscovered)) - )) - - job = factories.HarvestJobFactory(source_config=source_config) - - tasks.harvest(job_id=job.id, superfluous=superfluous, limit=limit, ingest=ingest) - - job.refresh_from_db() - - assert job.completions == 1 - assert job.status == HarvestJob.STATUS.succeeded - assert job.raw_data.count() == (count if limit is None or count < limit else limit) - - if limit is not None and rediscovered: - assert RawDatum.objects.filter().count() >= rediscovered - assert RawDatum.objects.filter().count() <= rediscovered + max(0, min(limit, count - rediscovered)) - else: - assert RawDatum.objects.filter().count() == (count if limit is None or count < limit else limit) - - def test_handles_duplicate_values(self, source_config, do_fetch_list): - fake = Factory.create() - job = factories.HarvestJobFactory(source_config=source_config) - - do_fetch_list.extend([(fake.sentence(), str(i * 50)) for i in range(100)] * 3) - - tasks.harvest(job_id=job.id, ingest=False) - - job.refresh_from_db() - - assert job.completions == 1 - assert job.status == HarvestJob.STATUS.succeeded - assert job.raw_data.count() == 100 - - def test_handles_duplicate_values_limit(self, source_config, do_fetch_list): - fake = Factory.create() - job = factories.HarvestJobFactory(source_config=source_config) - - padding = [] - for _ in range(20): - s = fake.sentence() - padding.append((s, s * 5)) - - for i in range(10): - s = fake.sentence() - do_fetch_list.extend([(s, s * 5)] * 5) - do_fetch_list.extend(padding) - - tasks.harvest(job_id=job.id, limit=60, ingest=False) - - job.refresh_from_db() - - assert job.completions == 1 - assert job.status == HarvestJob.STATUS.succeeded - assert job.raw_data.count() == 30 - - def test_duplicate_data_different_identifiers(self, source_config, do_fetch_list): - do_fetch_list.extend([ - ('identifier1', 'samedata'), - ('identifier2', 'samedata'), - ]) - - with pytest.raises(ValueError) as e: - list(source_config.get_harvester().harvest()) - - assert e.value.args == (' has already been seen or stored with identifier "identifier1". Perhaps your identifier extraction is incorrect?', ) - - def test_datestamps(self, source_config, do_fetch_list): - do_fetch_list.extend([ - ('identifier{}'.format(i), 'data{}'.format(i), pendulum.datetime(2017, 1, i)) - for i in range(1, 10) - ]) - - for i, raw in enumerate(source_config.get_harvester().harvest_date_range( - pendulum.parse('2017-01-01'), - pendulum.parse('2017-02-01'), - )): - assert raw.datestamp is not None - assert raw.datestamp.day == (i + 1) - assert raw.datestamp.year == 2017 - - def test_datestamps_out_of_range(self, source_config, do_fetch_list): - do_fetch_list.extend([ - ('identifier{}'.format(i), 'data{}'.format(i), pendulum.datetime(2016, 1, i)) - for i in range(1, 10) - ]) - - with pytest.raises(ValueError) as e: - list(source_config.get_harvester().harvest_date(pendulum.parse('2016-01-01'))) - - assert e.value.args == ('result.datestamp is outside of the requested date range. 2016-01-03T00:00:00+00:00 from identifier3 is not within [2015-12-31T00:00:00+00:00 - 2016-01-01T00:00:00+00:00]', ) - - def test_datestamps_within_24_hours(self, source_config, do_fetch_list): - do_fetch_list.extend([ - ('identifier{}'.format(timestamp), 'data{}'.format(timestamp), timestamp) - for timestamp in (pendulum.parse('2016-01-01') - pendulum.parse('2016-01-03')).range('hours') - ]) - - list(source_config.get_harvester().harvest_date(pendulum.parse('2016-01-02'))) - - @pytest.mark.parametrize('now, end_date, harvest_after, should_run', [ - ( - # Too early - pendulum.parse('2017-01-01T00:00'), - pendulum.parse('2017-01-01').date(), - pendulum.parse('01:00').time(), - False - ), - ( - # Just right - pendulum.parse('2017-01-01T02:00'), - pendulum.parse('2017-01-01').date(), - pendulum.parse('01:00').time(), - True - ), - ( - # Equal - pendulum.parse('2017-01-01T01:00'), - pendulum.parse('2017-01-01').date(), - pendulum.parse('01:00').time(), - True - ), - ( - # Way in the past - pendulum.parse('2017-01-01T01:00'), - pendulum.parse('2016-01-01').date(), - pendulum.parse('01:00').time(), - True - ), - ( - # In the future... ? - pendulum.parse('2017-01-01T01:00'), - pendulum.parse('2018-01-01').date(), - pendulum.parse('01:00').time(), - False - ), - ( - # Late harvester - pendulum.parse('2017-01-01T01:00'), - pendulum.parse('2017-01-01').date(), - pendulum.parse('20:00').time(), - False - ), - ]) - def test_harvest_after(self, monkeypatch, now, end_date, harvest_after, should_run, source_config): - monkeypatch.setattr('share.tasks.harvest.apply_async', mock.Mock()) - - source_config.harvest_after = harvest_after - source_config.save() - monkeypatch.setattr('django.utils.timezone.now', lambda: now) - with mock.patch.object(source_config.get_harvester_class(), '_do_fetch', return_value=[]) as _mock_do_fetch: - - HarvestScheduler(source_config).date(end_date.add(days=-1)) - - tasks.harvest() - - assert _mock_do_fetch.called == should_run - - def test_latest_date(self, source_config): - source_config.full_harvest = True - source_config.earliest_date = pendulum.parse('2017-01-01').date() - source_config.save() - - # We have a harvest job with start_date equal to earliest_date - # but a different source_config - factories.HarvestJobFactory( - start_date=pendulum.parse('2017-01-01').date(), - end_date=pendulum.parse('2017-01-02').date(), - ) - - assert len(HarvestScheduler(source_config).all(cutoff=pendulum.parse('2018-01-01').date())) == 365 - - def test_caught_up(self, source_config): - source_config.full_harvest = True - source_config.earliest_date = pendulum.parse('2017-01-01').date() - source_config.save() - - factories.HarvestJobFactory( - source_config=source_config, - start_date=pendulum.parse('2017-01-01').date(), - end_date=pendulum.parse('2017-01-02').date(), - ) - - factories.HarvestJobFactory( - source_config=source_config, - start_date=pendulum.parse('2018-01-01').date(), - end_date=pendulum.parse('2018-01-02').date(), - ) - - assert len(HarvestScheduler(source_config).all(cutoff=pendulum.parse('2018-01-01').date())) == 0 - - def test_latest_date_null(self, source_config): - source_config.full_harvest = True - source_config.earliest_date = pendulum.parse('2017-01-01').date() - source_config.save() - assert len(HarvestScheduler(source_config).all(cutoff=pendulum.parse('2018-01-01').date())) == 365 - - def test_obsolete(self, source_config): - _harvester_class = source_config.get_harvester_class() - - hlv1 = factories.HarvestJobFactory( - harvester_version=_harvester_class.VERSION, - source_config=source_config, - start_date=pendulum.parse('2017-01-01').date(), - ) - - old_version = _harvester_class.VERSION - _harvester_class.VERSION += 1 - new_version = _harvester_class.VERSION - - hlv2 = factories.HarvestJobFactory( - harvester_version=_harvester_class.VERSION, - source_config=source_config, - start_date=pendulum.parse('2017-01-01').date(), - ) - - tasks.harvest(job_id=hlv2.id) - tasks.harvest(job_id=hlv1.id) - - hlv1.refresh_from_db() - hlv2.refresh_from_db() - - assert hlv2.status == HarvestJob.STATUS.succeeded - assert hlv2.harvester_version == new_version - - assert hlv1.status == HarvestJob.STATUS.skipped - assert hlv1.harvester_version == old_version - assert hlv1.error_context == HarvestJob.SkipReasons.obsolete.value - - @pytest.mark.parametrize('completions, status, new_version, updated', [ - (0, HarvestJob.STATUS.created, 2, True), - (1, HarvestJob.STATUS.created, 2, False), - (88, HarvestJob.STATUS.created, 2, False), - (88, HarvestJob.STATUS.failed, 2, False), - (0, HarvestJob.STATUS.failed, 2, True), - (0, HarvestJob.STATUS.succeeded, 2, True), - ]) - def test_autoupdate(self, source_config, completions, status, new_version, updated, do_fetch_list): - _harvester_class = source_config.get_harvester_class() - _harvester_class.VERSION = 1 - - hl = factories.HarvestJobFactory( - status=status, - completions=completions, - harvester_version=_harvester_class.VERSION, - source_config=source_config, - start_date=pendulum.parse('2017-01-01').date(), - ) - - _harvester_class.VERSION = new_version - - tasks.harvest(job_id=hl.id) - - hl.refresh_from_db() - - if updated: - assert hl.status == HarvestJob.STATUS.succeeded - elif new_version > 1: - assert hl.status == HarvestJob.STATUS.skipped - assert hl.error_context == HarvestJob.SkipReasons.obsolete.value - - assert (hl.harvester_version == new_version) == updated diff --git a/tests/share/tasks/test_job_consumers.py b/tests/share/tasks/test_job_consumers.py deleted file mode 100644 index 478f7e985..000000000 --- a/tests/share/tasks/test_job_consumers.py +++ /dev/null @@ -1,85 +0,0 @@ -from unittest import mock -import uuid - -import pytest - -from share.tasks import harvest -from share.tasks.jobs import HarvestJobConsumer -from tests import factories - - -@pytest.mark.parametrize('task, Consumer', [ - (harvest, HarvestJobConsumer), -]) -@pytest.mark.parametrize('kwargs', [ - {}, - {'foo': 1}, - {'foo': 1, 'bar': 'baz'}, -]) -def test_task_calls_consumer(task, Consumer, kwargs, monkeypatch): - monkeypatch.setattr(Consumer, 'consume', mock.Mock()) - task.apply(kwargs=kwargs) - assert Consumer.consume.call_count == 1 - assert Consumer.consume.call_args == ((), kwargs) - - -@pytest.mark.usefixtures('nested_django_db') -@pytest.mark.parametrize('Consumer, JobFactory', [ - (HarvestJobConsumer, factories.HarvestJobFactory), -]) -class TestJobConsumer: - - @pytest.fixture - def consumer(self, Consumer, JobFactory, monkeypatch): - monkeypatch.setattr(Consumer, '_consume_job', mock.Mock()) - return Consumer(task=mock.Mock(**{'request.id': uuid.uuid4()})) - - def test_no_job(self, consumer): - consumer.consume() - assert not consumer._consume_job.called - - def test_job_not_found(self, consumer): - with pytest.raises(consumer.Job.DoesNotExist): - consumer.consume(job_id=17) - assert not consumer._consume_job.called - - @pytest.mark.skip(reason='consume() with no job_id is temporarily a noop') - def test_job_locked(self, consumer, JobFactory): - job = JobFactory() - with consumer.Job.objects.all().lock_first(consumer.lock_field): - consumer.consume() - assert not consumer._consume_job.called - job.refresh_from_db() - assert job.status == job.STATUS.created - - def test_skip_duplicated(self, consumer, JobFactory): - job = JobFactory(completions=1, status=consumer.Job.STATUS.succeeded) - consumer.consume(job_id=job.id) - job.refresh_from_db() - assert job.status == job.STATUS.skipped - assert job.task_id == consumer.task.request.id - assert not consumer._consume_job.called - - @pytest.mark.skip(reason='consume() with no job_id is temporarily a noop') - def test_obsolete(self, consumer, JobFactory, monkeypatch): - monkeypatch.setattr(consumer, '_update_versions', mock.Mock(return_value=False)) - job = JobFactory() - consumer.consume() - job.refresh_from_db() - assert job.status == job.STATUS.skipped - assert job.error_context == job.SkipReasons.obsolete.value - assert job.task_id == consumer.task.request.id - assert not consumer._consume_job.called - - @pytest.mark.skip(reason='consume() with no job_id is temporarily a noop') - @pytest.mark.parametrize('exhaust', [True, False]) - def test_consume(self, consumer, JobFactory, exhaust): - job = JobFactory() - consumer.consume(exhaust=exhaust) - if exhaust: - assert consumer.task.apply_async.call_count == 1 - assert consumer.task.apply_async.call_args == ((consumer.task.request.args, consumer.task.request.kwargs), {}) - else: - assert not consumer.task.apply_async.called - assert consumer._consume_job.call_count == 1 - assert consumer._consume_job.call_args == ((job,), {'superfluous': False}) diff --git a/trove/extract/__init__.py b/trove/extract/__init__.py index b31cda5d6..b2bde949e 100644 --- a/trove/extract/__init__.py +++ b/trove/extract/__init__.py @@ -1,7 +1,6 @@ from trove import exceptions as trove_exceptions from ._base import BaseRdfExtractor -from .legacy_sharev2 import LegacySharev2Extractor from .turtle import TurtleRdfExtractor @@ -9,8 +8,6 @@ def get_rdf_extractor_class(mediatype) -> type[BaseRdfExtractor]: - if mediatype is None: - return LegacySharev2Extractor if mediatype == 'text/turtle': return TurtleRdfExtractor raise trove_exceptions.CannotDigestMediatype(mediatype) diff --git a/trove/extract/legacy_sharev2.py b/trove/extract/legacy_sharev2.py deleted file mode 100644 index 896c62251..000000000 --- a/trove/extract/legacy_sharev2.py +++ /dev/null @@ -1,266 +0,0 @@ -import datetime -import typing - -from django.conf import settings -from primitive_metadata import primitive_rdf, gather - -from share.util.graph import MutableNode -from share.regulate import Regulator -from trove import exceptions as trove_exceptions -from trove.vocab.namespaces import OSFMAP, DCTERMS, FOAF, DCAT, SHAREv2, RDF -from trove.vocab.osfmap import OSFMAP_NORMS -from ._base import BaseRdfExtractor - - -class LegacySharev2Extractor(BaseRdfExtractor): - # side-effected by extract_rdf (to support back-compat shenanigans) - extracted_focus_iri: typing.Optional[str] = None - sharev2graph_centralnode: typing.Optional[MutableNode] = None - - def extract_sharev2_graph(self, input_document): - _transformer = self.source_config.get_transformer() - _sharev2graph = _transformer.transform(input_document) - if _sharev2graph: # in-place update - Regulator(source_config=self.source_config).regulate(_sharev2graph) - return _sharev2graph - - def extract_rdf(self, input_document): - _sharev2graph = self.extract_sharev2_graph(input_document) - return self.extract_rdf_from_sharev2graph(_sharev2graph) - - def extract_rdf_from_sharev2graph(self, sharev2graph): - _centralnode = sharev2graph.get_central_node(guess=True) - self.sharev2graph_centralnode = _centralnode - _central_focus = _focus_for_mnode(_centralnode) - _gathering = osfmap_from_normd.new_gathering({ - 'source_config': self.source_config, - 'mnode': None, # provided by focus - }) - _gathering.ask_all_about(_central_focus) - _tripledict = _gathering.leaf_a_record() - self.extracted_focus_iri = next( - _iri - for _iri in _central_focus.iris - if _iri in _tripledict - ) - return _tripledict - - -### -# gathering OSFMAP-ish RDF from SHAREv2 NormalizedData - -osfmap_from_normd = gather.GatheringOrganizer( - namestory=( - primitive_rdf.literal('sharev2-normd'), - ), - norms=OSFMAP_NORMS, - gatherer_params={ - 'mnode': SHAREv2.MutableNode, - 'source_config': SHAREv2.SourceConfig, - }, -) - - -# gatherers: - -@osfmap_from_normd.gatherer(focustype_iris={ - SHAREv2.CreativeWork, -}) -def _gather_work(focus, *, mnode, source_config): - for _iri in focus.iris: - yield (DCTERMS.identifier, primitive_rdf.literal(_iri)) - _language_tag = mnode['language'] - _language_iri = ( - primitive_rdf.IANA_LANGUAGE[_language_tag] - if _language_tag - else None - ) - yield (DCTERMS.title, primitive_rdf.literal(mnode['title'], datatype_iris={_language_iri})) - yield (DCTERMS.description, primitive_rdf.literal(mnode['description'], datatype_iris={_language_iri})) - yield (DCTERMS.created, _date_or_none(mnode['date_published'])) - yield (DCTERMS.modified, _date_or_none(mnode['date_updated'])) - yield (DCTERMS.date, _date_or_none(mnode['date_published'] or mnode['date_updated'])) - yield (DCTERMS.rights, primitive_rdf.literal(mnode['free_to_read_type'])) - yield (DCTERMS.available, primitive_rdf.literal(mnode['free_to_read_date'])) - yield (DCTERMS.rights, primitive_rdf.literal(mnode['rights'])) - yield (DCTERMS.language, primitive_rdf.literal(_language_tag)) - if mnode['registration_type']: - yield (DCTERMS.conformsTo, frozenset(( - (FOAF.name, primitive_rdf.literal(mnode['registration_type'])), - ))) - if mnode['withdrawn']: - yield (OSFMAP.dateWithdrawn, _date_or_none(mnode['date_updated'])) - yield (OSFMAP.withdrawalJustification, primitive_rdf.literal(mnode['justification'])) # TODO: not in OSFMAP - for _tag in mnode['tags']: - yield (OSFMAP.keyword, primitive_rdf.literal(_tag['name'])) - for _agent_relation in mnode['agent_relations']: - yield ( - _agentwork_relation_iri(_agent_relation), - _focus_for_mnode(_agent_relation['agent']), - ) - for _work_relation in mnode['outgoing_creative_work_relations']: - yield ( - _workwork_relation_iri(_work_relation), - _focus_for_mnode(_work_relation['related']), - ) - for _work_relation in mnode['incoming_creative_work_relations']: - yield ( - _focus_for_mnode(_work_relation['subject']), - _workwork_relation_iri(_work_relation), - focus, - ) - - -@osfmap_from_normd.gatherer(DCTERMS.subject, focustype_iris={ - SHAREv2.CreativeWork, -}) -def _gather_work_subjects(focus, *, mnode, source_config): - _source_name = source_config.source.long_title - for _thru_subject_mnode in mnode['subject_relations']: - _subject_mnode = _thru_subject_mnode['subject'] - if not (_thru_subject_mnode['is_deleted'] or _subject_mnode['is_deleted']): - yield (DCTERMS.subject, primitive_rdf.literal(_subject_mnode['name'])) - yield (DCTERMS.subject, primitive_rdf.literal(_serialize_subject(_subject_mnode, _source_name))) - _synonym_mnode = _subject_mnode['central_synonym'] - if _synonym_mnode and not _synonym_mnode['is_deleted']: - yield (DCTERMS.subject, primitive_rdf.literal(_synonym_mnode['name'])) - yield (DCTERMS.subject, primitive_rdf.literal(_serialize_subject(_synonym_mnode, _source_name))) - - -@osfmap_from_normd.gatherer(focustype_iris={ - SHAREv2.Agent, -}) -def _gather_agent(focus, *, mnode, source_config): - for _iri in focus.iris: - if not _iri.startswith('_:'): # HACK: non-blank blank node (stop that) - yield (DCTERMS.identifier, primitive_rdf.literal(_iri)) - if 'Person' in mnode.schema_type.type_lineage: - yield (RDF.type, FOAF.Person) - if 'Organization' in mnode.schema_type.type_lineage: - yield (RDF.type, FOAF.Organization) - yield (FOAF.name, primitive_rdf.literal(mnode['name'])) - for _agent_relation in mnode['outgoing_agent_relations']: - yield ( - OSFMAP.affiliation, - _focus_for_mnode(_agent_relation['related']), - ) - for _agent_relation in mnode['incoming_agent_relations']: - yield ( - _focus_for_mnode(_agent_relation['subject']), - OSFMAP.affiliation, - focus, - ) - - -# helpers: - -def _iris_for_mnode(mnode: MutableNode) -> typing.Iterable[str]: - _identifiers = set(mnode['identifiers']) - if _identifiers: - for _identifier in _identifiers: - yield _identifier['uri'] - else: - yield mnode.id - - -def _choose_iri(iris): - return sorted(iris, key=len)[0] - - -def _focus_for_mnode(mnode: MutableNode): - return gather.Focus.new( - frozenset(_iris_for_mnode(mnode)), - frozenset(_focustype_iris(mnode)), - {'mnode': mnode}, - ) - - -def _has_parent(mnode: MutableNode) -> bool: - return any( - relation_node.type == 'ispartof' - for relation_node in mnode['outgoing_creative_work_relations'] - ) - - -def _date_or_none(maybe_date) -> typing.Optional[datetime.date]: - if isinstance(maybe_date, str): - _datetime = datetime.datetime.fromisoformat(maybe_date) - return _datetime.date() - if isinstance(maybe_date, datetime.datetime): - return maybe_date.date() - if isinstance(maybe_date, datetime.date): - return maybe_date - if maybe_date is None: - return None - raise trove_exceptions.CannotDigestDateValue(maybe_date) - - -def _focustype_iris(mnode: MutableNode) -> typing.Iterable[str]: - _typenames = { - mnode.schema_type.name, - *mnode.schema_type.type_lineage, - } - for _typename in _typenames: - yield SHAREv2[_typename] - - -def _agentwork_relation_iri(agentwork_relation: MutableNode): - _sharev2_relation_types = set(agentwork_relation.schema_type.type_lineage) - if 'Creator' in _sharev2_relation_types: - return DCTERMS.creator - if 'Contributor' in _sharev2_relation_types: - return DCTERMS.contributor - if 'Funder' in _sharev2_relation_types: - return OSFMAP.funder # TODO: different kind of osfmap expression - if 'Publisher' in _sharev2_relation_types: - return DCTERMS.publisher - if 'Host' in _sharev2_relation_types: - return DCAT.accessService - # generic AgentWorkRelation - _sharev2_agent_types = set(agentwork_relation['agent'].schema_type.type_lineage) - if 'Organization' in _sharev2_agent_types: - return OSFMAP.affiliation - return DCTERMS.contributor - - -WORKWORK_RELATION_MAP = { - 'cites': DCTERMS.references, - 'compiles': DCTERMS.references, - 'corrects': DCTERMS.references, - 'discusses': DCTERMS.references, - 'disputes': DCTERMS.references, - 'documents': DCTERMS.references, - 'extends': DCTERMS.references, - 'isderivedfrom': DCTERMS.references, - 'ispartof': DCTERMS.isPartOf, - 'issupplementto': OSFMAP.supplements, - 'references': DCTERMS.references, - 'repliesto': DCTERMS.references, - 'retracts': DCTERMS.references, - 'reviews': DCTERMS.references, - 'usesdatafrom': DCTERMS.references, -} - - -def _workwork_relation_iri(workwork_relation: MutableNode): - try: - return WORKWORK_RELATION_MAP[workwork_relation.type] - except KeyError: - return DCTERMS.relation - - -def _serialize_subject(subject_node: MutableNode, source_name=None): - '''a specific serialization of a subject, for backcompat with questionable decisions - ''' - _subject_lineage = [subject_node['name']] - _next_subject = subject_node['parent'] - while _next_subject: - _subject_lineage.insert(0, _next_subject['name']) - _next_subject = _next_subject['parent'] - _taxonomy_name = ( - source_name - if source_name and subject_node['central_synonym'] - else settings.SUBJECTS_CENTRAL_TAXONOMY - ) - _subject_lineage.insert(0, _taxonomy_name) - return '|'.join(_subject_lineage) From fea31e756b9ec89c636320350df6411844373daf Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Thu, 6 Mar 2025 15:30:58 -0500 Subject: [PATCH 10/46] wip --- share/admin/__init__.py | 1 - share/migrations/0077_big_cleanup_2025.py | 135 ++++++++++++++++++ share/models/ingest.py | 1 + tests/factories/__init__.py | 1 + tests/share/search/_util.py | 18 +-- .../_common_trovesearch_tests.py | 30 +++- .../index_strategy/test_sharev2_elastic8.py | 29 ++-- 7 files changed, 187 insertions(+), 28 deletions(-) create mode 100644 share/migrations/0077_big_cleanup_2025.py diff --git a/share/admin/__init__.py b/share/admin/__init__.py index 24af3c3ce..1f21210e4 100644 --- a/share/admin/__init__.py +++ b/share/admin/__init__.py @@ -58,7 +58,6 @@ class RawDatumAdmin(admin.ModelAdmin): list_display = ('id', 'identifier', 'source_config_label', 'datestamp', 'date_created', 'date_modified', ) readonly_fields = ('datum__pre', 'sha256') exclude = ('datum',) - raw_id_fields = ('jobs',) paginator = TimeLimitedPaginator def identifier(self, obj): diff --git a/share/migrations/0077_big_cleanup_2025.py b/share/migrations/0077_big_cleanup_2025.py new file mode 100644 index 000000000..bf94aed06 --- /dev/null +++ b/share/migrations/0077_big_cleanup_2025.py @@ -0,0 +1,135 @@ +# Generated by Django 3.2.25 on 2025-03-06 19:58 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('share', '0076_rawdatum_share_rawdatum_expiration_idx'), + ] + + operations = [ + migrations.AlterUniqueTogether( + name='formattedmetadatarecord', + unique_together=None, + ), + migrations.RemoveField( + model_name='formattedmetadatarecord', + name='suid', + ), + migrations.AlterUniqueTogether( + name='harvestjob', + unique_together=None, + ), + migrations.RemoveField( + model_name='harvestjob', + name='source_config', + ), + migrations.RemoveField( + model_name='normalizeddata', + name='raw', + ), + migrations.RemoveField( + model_name='normalizeddata', + name='source', + ), + migrations.RemoveField( + model_name='normalizeddata', + name='tasks', + ), + migrations.RemoveField( + model_name='providerregistration', + name='submitted_by', + ), + migrations.RemoveField( + model_name='rawdatumjob', + name='datum', + ), + migrations.RemoveField( + model_name='rawdatumjob', + name='job', + ), + migrations.RemoveField( + model_name='sourcestat', + name='config', + ), + migrations.DeleteModel( + name='PGLock', + ), + migrations.RemoveField( + model_name='rawdatum', + name='jobs', + ), + migrations.RemoveField( + model_name='sourceconfig', + name='earliest_date', + ), + migrations.RemoveField( + model_name='sourceconfig', + name='full_harvest', + ), + migrations.RemoveField( + model_name='sourceconfig', + name='harvest_after', + ), + migrations.RemoveField( + model_name='sourceconfig', + name='harvest_interval', + ), + migrations.RemoveField( + model_name='sourceconfig', + name='harvester_key', + ), + migrations.RemoveField( + model_name='sourceconfig', + name='harvester_kwargs', + ), + migrations.RemoveField( + model_name='sourceconfig', + name='private_harvester_kwargs', + ), + migrations.RemoveField( + model_name='sourceconfig', + name='private_transformer_kwargs', + ), + migrations.RemoveField( + model_name='sourceconfig', + name='rate_limit_allowance', + ), + migrations.RemoveField( + model_name='sourceconfig', + name='rate_limit_period', + ), + migrations.RemoveField( + model_name='sourceconfig', + name='regulator_steps', + ), + migrations.RemoveField( + model_name='sourceconfig', + name='transformer_kwargs', + ), + migrations.AlterField( + model_name='rawdatum', + name='no_output', + field=models.BooleanField(help_text='Indicates that this RawDatum resulted in an empty graph when transformed. This allows the RawDataJanitor to find records that have not been processed. Records that result in an empty graph will not have an Indexcard associated with them, which would otherwise look like data that has not yet been processed.', null=True), + ), + migrations.DeleteModel( + name='FormattedMetadataRecord', + ), + migrations.DeleteModel( + name='HarvestJob', + ), + migrations.DeleteModel( + name='NormalizedData', + ), + migrations.DeleteModel( + name='ProviderRegistration', + ), + migrations.DeleteModel( + name='RawDatumJob', + ), + migrations.DeleteModel( + name='SourceStat', + ), + ] diff --git a/share/models/ingest.py b/share/models/ingest.py index bf93b6d60..12a27761a 100644 --- a/share/models/ingest.py +++ b/share/models/ingest.py @@ -132,6 +132,7 @@ class SourceConfig(models.Model): source = models.ForeignKey('Source', on_delete=models.CASCADE, related_name='source_configs') base_url = models.URLField(null=True) + transformer_key = models.TextField(null=True) disabled = models.BooleanField(default=False) diff --git a/tests/factories/__init__.py b/tests/factories/__init__.py index b5eaab2f2..9d71348a7 100644 --- a/tests/factories/__init__.py +++ b/tests/factories/__init__.py @@ -46,6 +46,7 @@ class SourceConfigFactory(DjangoModelFactory): label = factory.Faker('sentence') base_url = factory.Faker('url') source = factory.SubFactory(SourceFactory) + transformer_key = None class Meta: model = share_db.SourceConfig diff --git a/tests/share/search/_util.py b/tests/share/search/_util.py index eac9825c5..beda6dc10 100644 --- a/tests/share/search/_util.py +++ b/tests/share/search/_util.py @@ -1,20 +1,23 @@ +import typing + from tests import factories from primitive_metadata import primitive_rdf as rdf from trove import models as trove_db -from trove.vocab.namespaces import RDFS, TROVE, RDF, DCTERMS, OWL, FOAF, DCAT +from trove import digestive_tract -def create_indexcard(focus_iri: str, rdf_tripledict: rdf.RdfTripleDictionary) -> trove_db.Indexcard: +def create_indexcard( + focus_iri: str, + rdf_tripledict: rdf.RdfTripleDictionary, + deriver_iris: typing.Collection[str] = (), +) -> trove_db.Indexcard: _suid = factories.SourceUniqueIdentifierFactory() _indexcard = trove_db.Indexcard.objects.create(source_record_suid=_suid) update_indexcard_content(_indexcard, focus_iri, rdf_tripledict) - # an osfmap_json card is required for indexing, but not used in these tests - trove_db.DerivedIndexcard.objects.get_or_create( - upriver_indexcard=_indexcard, - deriver_identifier=trove_db.ResourceIdentifier.objects.get_or_create_for_iri(TROVE['derive/osfmap_json']), - ) + if deriver_iris: + digestive_tract.derive(_indexcard, deriver_iris) return _indexcard @@ -33,7 +36,6 @@ def update_indexcard_content( 'turtle_checksum_iri': 'foo', # not enforced }, ) - self._indexcard_focus_by_uuid[str(indexcard.uuid)] = focus_iri def create_supplement( diff --git a/tests/share/search/index_strategy/_common_trovesearch_tests.py b/tests/share/search/index_strategy/_common_trovesearch_tests.py index c40509b80..9f886a346 100644 --- a/tests/share/search/index_strategy/_common_trovesearch_tests.py +++ b/tests/share/search/index_strategy/_common_trovesearch_tests.py @@ -6,12 +6,17 @@ from primitive_metadata import primitive_rdf as rdf -from tests import factories from share.search import messages from trove import models as trove_db from trove.trovesearch.search_params import CardsearchParams, ValuesearchParams from trove.trovesearch.search_handle import PropertypathUsage -from trove.vocab.namespaces import RDFS, TROVE, RDF, DCTERMS, OWL, FOAF, DCAT +from trove.vocab.namespaces import RDFS, RDF, DCTERMS, OWL, FOAF, DCAT, TROVE +from tests.share.search._util import ( + create_indexcard, + update_indexcard_content, + create_supplement, +) + from ._with_real_services import RealElasticTestCase @@ -214,7 +219,6 @@ def _assert_cardsearch_iris(self, queryparams: dict, expected_focus_iris: Iterab assert isinstance(_cardsearch_params, CardsearchParams) _cardsearch_handle = self.index_strategy.pls_handle_cardsearch(_cardsearch_params) # assumes all results fit on one page - breakpoint() # TODO: _indexcard_focus_by_uuid _actual_result_iris: set[str] | list[str] = [ self._indexcard_focus_by_uuid[_result.card_uuid] for _result in _cardsearch_handle.search_result_page @@ -300,7 +304,7 @@ def _fill_test_data_for_querying(self): FOAF.name: {rdf.literal('some one else')}, }, }) - self._create_supplement(_card_a, BLARG.a, { + create_supplement(_card_a, BLARG.a, { BLARG.a: { DCTERMS.replaces: {BLARG.a_past}, DCAT.servesDataset: { @@ -308,7 +312,7 @@ def _fill_test_data_for_querying(self): }, }, }) - self._create_supplement(_card_b, BLARG.b, { + create_supplement(_card_b, BLARG.b, { BLARG.b: { DCTERMS.replaces: {BLARG.b_past}, DCAT.servesDataset: { @@ -316,7 +320,7 @@ def _fill_test_data_for_querying(self): }, }, }) - self._create_supplement(_card_c, BLARG.c, { + create_supplement(_card_c, BLARG.c, { BLARG.c: { DCTERMS.replaces: {BLARG.c_past}, DCAT.servesDataset: { @@ -607,6 +611,20 @@ def valuesearch_sameas_cases(self): {BLARG.subj_ac, BLARG.subj_a, BLARG.subj_c, BLARG.subj_bc}, ) + def _create_indexcard(self, focus_iri: str, rdf_tripledict: rdf.RdfTripleDictionary) -> trove_db.Indexcard: + _indexcard = create_indexcard(focus_iri, rdf_tripledict, (TROVE['derive/osfmap_json'],)) + self._indexcard_focus_by_uuid[str(_indexcard.uuid)] = focus_iri + return _indexcard + + def _update_indexcard_content( + self, + indexcard: trove_db.Indexcard, + focus_iri: str, + rdf_tripledict: rdf.RdfTripleDictionary, + ) -> None: + update_indexcard_content(indexcard, focus_iri, rdf_tripledict) + self._indexcard_focus_by_uuid[str(indexcard.uuid)] = focus_iri + def _index_indexcards(self, indexcards: Iterable[trove_db.Indexcard]): _messages_chunk = messages.MessagesChunk( messages.MessageType.UPDATE_INDEXCARD, diff --git a/tests/share/search/index_strategy/test_sharev2_elastic8.py b/tests/share/search/index_strategy/test_sharev2_elastic8.py index 7b1c76845..f2afc4b42 100644 --- a/tests/share/search/index_strategy/test_sharev2_elastic8.py +++ b/tests/share/search/index_strategy/test_sharev2_elastic8.py @@ -1,12 +1,15 @@ -import json +from primitive_metadata import primitive_rdf as rdf -from tests import factories from share.search import messages from share.search.index_strategy.sharev2_elastic8 import Sharev2Elastic8IndexStrategy -from share.util import IDObfuscator +from trove.vocab.namespaces import DCTERMS, SHAREv2 +from tests.share.search._util import create_indexcard from ._with_real_services import RealElasticTestCase +BLARG = rdf.IriNamespace('http://blarg.example/') + + class TestSharev2Elastic8(RealElasticTestCase): # for RealElasticTestCase def get_index_strategy(self): @@ -14,20 +17,20 @@ def get_index_strategy(self): def setUp(self): super().setUp() - self.__suid = factories.SourceUniqueIdentifierFactory() - self.__fmr = factories.FormattedMetadataRecordFactory( - suid=self.__suid, - record_format='sharev2_elastic', - formatted_metadata=json.dumps({ - 'id': IDObfuscator.encode(self.__suid), - 'title': 'hello', - }) + self.__indexcard = create_indexcard( + BLARG.hello, + { + BLARG.hello: { + DCTERMS.title: {rdf.Literal('hello', language='en')}, + }, + }, + deriver_iris=[SHAREv2.sharev2_elastic], ) def test_without_daemon(self): _messages_chunk = messages.MessagesChunk( messages.MessageType.INDEX_SUID, - [self.__suid.id], + [self.__indexcard.source_record_suid_id], ) self._assert_happypath_without_daemon( _messages_chunk, @@ -37,7 +40,7 @@ def test_without_daemon(self): def test_with_daemon(self): _messages_chunk = messages.MessagesChunk( messages.MessageType.INDEX_SUID, - [self.__suid.id], + [self.__indexcard.source_record_suid_id], ) self._assert_happypath_with_daemon( _messages_chunk, From 8f0fb3e49b2f8c1757c2a3a6768668345011efac Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Fri, 7 Mar 2025 08:54:13 -0500 Subject: [PATCH 11/46] get tests passing; remove SourceIcon --- api/sources/serializers.py | 40 +-- api/sources/views.py | 2 +- project/settings.py | 21 -- share/bin/__init__.py | 1 - share/bin/harvest.py | 162 ---------- share/migrations/0077_big_cleanup_2025.py | 7 + share/models/fields.py | 6 +- share/models/ingest.py | 41 +-- share/search/index_messenger.py | 1 - share/search/index_strategy/elastic8.py | 4 +- tests/api/test_sources_endpoint.py | 128 ++------ tests/factories/__init__.py | 9 - tests/share/bin/test_sharectl.py | 20 -- tests/share/models/test_rawdata.py | 64 ++-- tests/share/search/_util.py | 4 +- .../_common_trovesearch_tests.py | 5 +- .../index_strategy/test_sharev2_elastic5.py | 30 +- .../index_strategy/test_sharev2_elastic8.py | 8 +- tests/share/search/test_index_backfill.py | 2 +- tests/share/util/test_contact_extract.py | 48 --- tests/share/util/test_encrypted_field.py | 1 + tests/share/util/test_force_text.py | 32 -- tests/share/util/test_mutable_graph.py | 281 ------------------ tests/share/util/test_osf.py | 57 ---- tests/trove/derive/_inputs.py | 4 +- tests/trove/derive/test_osfmap_json.py | 12 +- tests/trove/digestive_tract/test_derive.py | 24 +- tests/trove/digestive_tract/test_expel.py | 11 +- tests/trove/digestive_tract/test_extract.py | 12 +- tests/trove/digestive_tract/test_swallow.py | 22 +- tests/trove/render/_inputs.py | 3 +- tests/trove/render/test_jsonapi_renderer.py | 26 +- tests/trove/render/test_jsonld_renderer.py | 40 +-- .../trove/render/test_simple_json_renderer.py | 13 +- tests/trove/render/test_turtle_renderer.py | 29 +- tests/validation/test_changeset.py | 264 ---------------- trove/derive/sharev2_elastic.py | 51 +++- trove/digestive_tract.py | 4 +- trove/render/jsonapi.py | 3 +- trove/vocab/namespaces.py | 12 +- 40 files changed, 263 insertions(+), 1241 deletions(-) delete mode 100644 share/bin/harvest.py delete mode 100644 tests/share/util/test_contact_extract.py delete mode 100644 tests/share/util/test_force_text.py delete mode 100644 tests/share/util/test_mutable_graph.py delete mode 100644 tests/share/util/test_osf.py delete mode 100644 tests/validation/test_changeset.py diff --git a/api/sources/serializers.py b/api/sources/serializers.py index 34e8d0a86..55ebdee72 100644 --- a/api/sources/serializers.py +++ b/api/sources/serializers.py @@ -1,15 +1,10 @@ import logging import re -import requests - from share import models -from django.core.files.base import ContentFile from django.db import transaction -from rest_framework_json_api import serializers - from api.base import ShareSerializer from api.base import exceptions from api.fields import ShareIdentityField @@ -30,7 +25,6 @@ class Meta: 'name', 'home_page', 'long_title', - 'icon', 'url', 'source_configs', ) @@ -48,39 +42,15 @@ class UpdateSourceSerializer(ShareSerializer): # link to self url = ShareIdentityField(view_name='api:source-detail') - # URL to fetch the source's icon - icon_url = serializers.URLField(write_only=True) - class Meta: model = models.Source - fields = ('name', 'home_page', 'long_title', 'canonical', 'icon', 'icon_url', 'user', 'url') - read_only_fields = ('icon', 'user', 'url') + fields = ('name', 'home_page', 'long_title', 'canonical', 'user', 'url') + read_only_fields = ('user', 'url') view_name = 'api:source-detail' class JSONAPIMeta: included_resources = ['user'] - def update(self, instance, validated_data): - # TODO: when long_title is changed, reindex works accordingly - icon_url = validated_data.pop('icon_url', None) - with transaction.atomic(): - instance = super().update(instance, validated_data) - if icon_url: - icon_file = self._fetch_icon_file(icon_url) - instance.icon.save(instance.name, content=icon_file) - return instance - - def _fetch_icon_file(self, icon_url): - try: - r = requests.get(icon_url, timeout=5) - header_type = r.headers['content-type'].split(';')[0].lower() - if header_type not in self.VALID_ICON_TYPES: - raise serializers.ValidationError('Invalid image type.') - return ContentFile(r.content) - except Exception as e: - logger.warning('Exception occured while downloading icon %s', e) - raise serializers.ValidationError('Could not download/process image.') - class CreateSourceSerializer(UpdateSourceSerializer): @@ -92,11 +62,8 @@ class Meta(UpdateSourceSerializer.Meta): } def create(self, validated_data): - icon_url = validated_data.pop('icon_url') long_title = validated_data.pop('long_title') - icon_file = self._fetch_icon_file(icon_url) - username = re.sub(r'[^\w.@+-]', '_', long_title).lower() name = validated_data.pop('name', username) @@ -114,8 +81,7 @@ def create(self, validated_data): user = self._create_trusted_user(username=username) source.user_id = user.id - source.icon.save(name, content=icon_file) - + source.save() return source def _create_trusted_user(self, username): diff --git a/api/sources/views.py b/api/sources/views.py index 001c4333d..3394e2552 100644 --- a/api/sources/views.py +++ b/api/sources/views.py @@ -39,7 +39,7 @@ def get_serializer_class(self): return UpdateSourceSerializer def get_queryset(self): - return Source.objects.exclude(icon='').exclude(is_deleted=True) + return Source.objects.exclude(is_deleted=True) def create(self, request, *args, **kwargs): try: diff --git a/project/settings.py b/project/settings.py index 06a8c4aab..7ccfc628a 100644 --- a/project/settings.py +++ b/project/settings.py @@ -344,31 +344,12 @@ def split(string, delim): CELERY_BEAT_SCHEDULER = 'django_celery_beat.schedulers:DatabaseScheduler' CELERY_BEAT_SCHEDULE = { - # Every 2 minutes - 'Harvest Task': { - 'task': 'share.tasks.harvest', - 'schedule': 120, - }, 'Expel expired data': { 'task': 'trove.digestive_tract.task__expel_expired_data', 'schedule': crontab(hour=0, minute=0), # every day at midnight UTC }, } -if not DEBUG: - CELERY_BEAT_SCHEDULE = { - **CELERY_BEAT_SCHEDULE, - 'Schedule Harvests': { - 'task': 'share.tasks.schedule_harvests', - 'schedule': crontab(minute=0) # hourly - }, - 'Source Stats': { - 'task': 'share.tasks.source_stats', - 'schedule': crontab(minute=0, hour='3,9,15,21'), # every 6 hours - 'args': (), - }, - } - CELERY_RESULT_EXPIRES = 60 * 60 * 24 * 3 # 4 days CELERY_RESULT_BACKEND = 'share.celery:CeleryDatabaseBackend' @@ -403,14 +384,12 @@ def route_urgent_task(name, args, kwargs, options, task=None, **kw): CELERY_TASK_ROUTES = [ route_urgent_task, { - 'share.tasks.harvest': {'queue': 'harvest'}, 'trove.digestive_tract.*': {'queue': 'digestive_tract'}, }, ] CELERY_TASK_QUEUES = { 'share_default': {}, 'elasticsearch': {}, - 'harvest': {}, 'digestive_tract': {}, 'digestive_tract.urgent': {}, } diff --git a/share/bin/__init__.py b/share/bin/__init__.py index e89ed5e32..5727bdb70 100644 --- a/share/bin/__init__.py +++ b/share/bin/__init__.py @@ -7,7 +7,6 @@ MODULES = ( - 'harvest', 'search', ) diff --git a/share/bin/harvest.py b/share/bin/harvest.py deleted file mode 100644 index c7268fcb5..000000000 --- a/share/bin/harvest.py +++ /dev/null @@ -1,162 +0,0 @@ -import re -import os - -import pendulum - -from share import tasks -from share.bin.util import command -from share.harvest.scheduler import HarvestScheduler -from share.models import SourceConfig - - -def get_sourceconfig(name): - try: - return SourceConfig.objects.get(label=name) - except SourceConfig.DoesNotExist: - print('SourceConfig "{}" not found.'.format(name)) - fuzzy = list(SourceConfig.objects.filter(label__icontains=name).values_list('label', flat=True)) - if fuzzy: - print('Did you mean?\n\t{}'.format('\n\t'.join(fuzzy))) - return None - - -@command('Fetch data to disk or stdout, using the specified SourceConfig') -def fetch(args, argv): - """ - Usage: - {0} fetch [ | --start=YYYY-MM-DD --end=YYYY-MM-DD] [--limit=LIMIT] [--print | --out=DIR] [--set-spec=SET] - {0} fetch --ids ... [--print | --out=DIR] - - Options: - -l, --limit=NUMBER Limit the harvester to NUMBER of documents - -p, --print Print results to stdout rather than to a file - -o, --out=DIR The directory to store the fetched data in. Defaults to ./fetched/ - -s, --start=YYYY-MM-DD The date at which to start fetching data. - -e, --end=YYYY-MM-DD The date at which to stop fetching data. - --set-spec=SET The OAI setSpec to limit harvesting to. - --ids IDs of specific records to fetch. - """ - config = get_sourceconfig(args['']) - if not config: - return -1 - - harvester = config.get_harvester(pretty=True) - - ids = args[''] - if ids: - gen = (harvester.fetch_by_id(id) for id in ids) - else: - kwargs = {k: v for k, v in { - 'limit': int(args['--limit']) if args.get('--limit') else None, - 'set_spec': args.get('--set-spec'), - }.items() if v is not None} - - if not args[''] and not (args['--start'] and args['--end']): - gen = harvester.fetch(**kwargs) - elif args['']: - gen = harvester.fetch_date(pendulum.parse(args['']), **kwargs) - else: - gen = harvester.fetch_date_range(pendulum.parse(args['--start']), pendulum.parse(args['--end']), **kwargs) - - if not args['--print']: - args['--out'] = args['--out'] or os.path.join(os.curdir, 'fetched', config.label) - os.makedirs(args['--out'], exist_ok=True) - - for result in gen: - if args['--print']: - print('Harvested data with identifier "{}"'.format(result.identifier)) - print(result.datum) - print('\n') - else: - suffix = '.xml' if result.datum.startswith('<') else '.json' - with open(os.path.join(args['--out'], re.sub(r'[:\\\/\?\*]', '', str(result.identifier))) + suffix, 'w') as fobj: - fobj.write(result.datum) - - -@command('Harvest data using the specified SourceConfig') -def harvest(args, argv): - """ - Usage: - {0} harvest [] [options] - {0} harvest [] [options] - {0} harvest --all [] [options] - {0} harvest (--start=YYYY-MM-DD --end=YYYY-MM-DD) [options] - - Options: - -l, --limit=NUMBER Limit the harvester to NUMBER of documents - -s, --start=YYYY-MM-DD The date at which to start fetching data. - -e, --end=YYYY-MM-DD The date at which to stop fetching data. - -q, --quiet Do not print out the harvested records - --set-spec=SET The OAI setSpec to limit harvesting to. - """ - config = get_sourceconfig(args['']) - if not config: - return -1 - - kwargs = {k: v for k, v in { - 'limit': int(args['--limit']) if args.get('--limit') else None, - 'set_spec': args.get('--set-spec'), - }.items() if v is not None} - - if not args[''] and not (args['--start'] and args['--end']): - gen = config.get_harvester().harvest(**kwargs) - elif args['']: - gen = config.get_harvester().harvest_date(pendulum.parse(args['']), **kwargs) - else: - gen = config.get_harvester().harvest_date_range(pendulum.parse(args['--start']), pendulum.parse(args['--end']), **kwargs) - - # "Spin" the generator but don't keep the documents in memory - for datum in gen: - if args['--quiet']: - continue - print(datum) - - -@command('Create HarvestJobs for the specified SourceConfig') -def schedule(args, argv): - """ - Usage: - {0} schedule [ | (--start=YYYY-MM-DD --end=YYYY-MM-DD) | --complete] [--tasks | --run] - {0} schedule [ | (--start=YYYY-MM-DD --end=YYYY-MM-DD) | --complete] [--tasks | --run] --all - - Options: - -t, --tasks Spawn harvest tasks for each created job. - -r, --run Run the harvest task for each created job. - -a, --all Schedule jobs for all enabled SourceConfigs. - -c, --complete Schedule all jobs between today and the SourceConfig's earliest date. - -s, --start=YYYY-MM-DD The date at which to start fetching data. - -e, --end=YYYY-MM-DD The date at which to stop fetching data. - -j, --no-ingest Do not process harvested data. - """ - if not args['--all']: - configs = [get_sourceconfig(args[''])] - if not configs[0]: - return -1 - else: - configs = SourceConfig.objects.exclude(disabled=True).exclude(source__is_deleted=True) - - kwargs = {k: v for k, v in { - 'ingest': not args.get('--no-ingest'), - }.items() if v is not None} - - claim_jobs = args['--run'] or args['--tasks'] - - jobs = [] - for config in configs: - scheduler = HarvestScheduler(config, claim_jobs=claim_jobs) - - if not (args[''] or args['--start'] or args['--end']): - jobs.append(scheduler.today()) - elif args['']: - jobs.append(scheduler.date(pendulum.parse(args['']))) - else: - jobs.extend(scheduler.range(pendulum.parse(args['--start']), pendulum.parse(args['--end']))) - - if not claim_jobs: - return - - for job in jobs: - if args['--run']: - tasks.harvest.apply((), {'job_id': job.id, **kwargs}, retry=False, throw=True) - elif args['--tasks']: - tasks.harvest.apply_async((), {'job_id': job.id, **kwargs}) diff --git a/share/migrations/0077_big_cleanup_2025.py b/share/migrations/0077_big_cleanup_2025.py index bf94aed06..ef0865d7d 100644 --- a/share/migrations/0077_big_cleanup_2025.py +++ b/share/migrations/0077_big_cleanup_2025.py @@ -50,6 +50,10 @@ class Migration(migrations.Migration): model_name='rawdatumjob', name='job', ), + migrations.RemoveField( + model_name='source', + name='icon', + ), migrations.RemoveField( model_name='sourcestat', name='config', @@ -129,6 +133,9 @@ class Migration(migrations.Migration): migrations.DeleteModel( name='RawDatumJob', ), + migrations.DeleteModel( + name='SourceIcon', + ), migrations.DeleteModel( name='SourceStat', ), diff --git a/share/models/fields.py b/share/models/fields.py index d154f534d..27ff7ec38 100644 --- a/share/models/fields.py +++ b/share/models/fields.py @@ -58,11 +58,6 @@ def __init__(self, *args, encoder=None, decoder=None, **kwargs): ) -# stub left just for migrations -class TypedManyToManyField(models.ManyToManyField): - pass - - class ShareURLField(models.TextField): default_validators = [validators.URLValidator()] description = _("URL") @@ -87,6 +82,7 @@ def formfield(self, **kwargs): return super(ShareURLField, self).formfield(**defaults) +# TODO: remove after migrations have been fully squashed class EncryptedJSONField(models.BinaryField): """ This field transparently encrypts data in the database. It should probably only be used with PG unless diff --git a/share/models/ingest.py b/share/models/ingest.py index 12a27761a..3e2d6426a 100644 --- a/share/models/ingest.py +++ b/share/models/ingest.py @@ -3,13 +3,9 @@ import logging from django.core import validators -from django.core.files.base import ContentFile -from django.core.files.storage import Storage from django.db import connection from django.db import models from django.db.models.functions import Coalesce -from django.urls import reverse -from django.utils.deconstruct import deconstructible import sentry_sdk from share.models.core import ShareUser @@ -22,34 +18,12 @@ __all__ = ('Source', 'SourceConfig', 'RawDatum', ) -class SourceIcon(models.Model): - source_name = models.TextField(unique=True) - image = models.BinaryField() +def icon_name(): + ... # removed; stub for past migrations only -@deconstructible -class SourceIconStorage(Storage): - def _open(self, name, mode='rb'): - assert mode == 'rb' - icon = SourceIcon.objects.get(source_name=name) - return ContentFile(icon.image) - - def _save(self, name, content): - SourceIcon.objects.update_or_create(source_name=name, defaults={'image': content.read()}) - return name - - def delete(self, name): - SourceIcon.objects.get(source_name=name).delete() - - def get_available_name(self, name, max_length=None): - return name - - def url(self, name): - return reverse('source_icon', kwargs={'source_name': name}) - - -def icon_name(instance, filename): - return instance.name +def SourceIconStorage(): + ... # removed; stub for past migrations only class NaturalKeyManager(models.Manager): @@ -67,7 +41,6 @@ class Source(models.Model): name = models.TextField(unique=True) long_title = models.TextField(unique=True) home_page = models.URLField(null=True, blank=True) - icon = models.ImageField(upload_to=icon_name, storage=SourceIconStorage(), blank=True) is_deleted = models.BooleanField(default=False) # Whether or not this SourceConfig collects original content @@ -203,7 +176,7 @@ def store_datum_for_suid( # keep the latest datestamp if (not _raw.datestamp) or (datestamp > _raw.datestamp): _raw.datestamp = datestamp - _raw.save(update_fields=('mediatype', 'datestamp', 'expiration_date')) + _raw.save(update_fields=('mediatype', 'datestamp', 'expiration_date', 'date_modified')) return _raw def latest_by_suid_id(self, suid_id) -> models.QuerySet: @@ -258,10 +231,6 @@ class RawDatum(models.Model): objects = RawDatumManager() - @property - def created(self): - return self.date_modified == self.date_created - def is_latest(self): return ( RawDatum.objects diff --git a/share/search/index_messenger.py b/share/search/index_messenger.py index d7ee2655e..34cfb9e7d 100644 --- a/share/search/index_messenger.py +++ b/share/search/index_messenger.py @@ -10,7 +10,6 @@ import requests import sentry_sdk -from share.models import FeatureFlag from share.search.messages import MessagesChunk, MessageType from share.search import index_strategy diff --git a/share/search/index_strategy/elastic8.py b/share/search/index_strategy/elastic8.py index 751af06c0..c73af5989 100644 --- a/share/search/index_strategy/elastic8.py +++ b/share/search/index_strategy/elastic8.py @@ -285,10 +285,12 @@ def _elastic_actions_with_index( _docid = _elastic_action['_id'] for _indexname in _indexnames: action_tracker.add_action(_actionset.message_target_id, _indexname, _docid) - yield { + _elastic_action_with_index = { **_elastic_action, '_index': _indexname, } + logger.debug('%s: elastic action: %r', self, _elastic_action_with_index) + yield _elastic_action_with_index action_tracker.done_scheduling(_actionset.message_target_id) def _get_indexnames_for_action( diff --git a/tests/api/test_sources_endpoint.py b/tests/api/test_sources_endpoint.py index a6f2ce8b9..5c9cbce7c 100644 --- a/tests/api/test_sources_endpoint.py +++ b/tests/api/test_sources_endpoint.py @@ -1,13 +1,12 @@ import json import pytest -import httpretty - from django.contrib.auth.models import Permission from django.contrib.contenttypes.models import ContentType from share.models import Source, ShareUser from share.util import IDObfuscator +from tests.factories import SourceFactory PROPER_ICON_URL = 'https://staging-cdn.osf.io/preprints-assets/bitss/square_color_no_transparent.png' @@ -46,44 +45,12 @@ def source_add_change_user(): return user -@pytest.fixture -def mock_icon_urls(): - httpretty.enable() - httpretty.HTTPretty.allow_net_connect = False - - # smallest valid png, from https://github.com/mathiasbynens/small/blob/master/png-transparent.png - httpretty.register_uri( - httpretty.GET, - PROPER_ICON_URL, - body=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\nIDATx\x9cc\x00\x01\x00\x00\x05\x00\x01\r\n-\xb4\x00\x00\x00\x00IEND\xaeB`\x82', - content_type='image/png' - ) - httpretty.register_uri( - httpretty.GET, - IMPROPER_ICON_URL, - body=b'\n\n\n\n\n\n\n\n \n', - content_type='text/html' - ) - httpretty.register_uri( - httpretty.GET, - INVALID_URL - ) - httpretty.register_uri( - httpretty.GET, - TIMEOUT_URL, - body=exceptionCallback - ) - yield - httpretty.disable() - - -def get_post_body(icon=PROPER_ICON_URL, id=None, **kwargs): +def get_post_body(id=None, **kwargs): body = { 'data': { 'type': 'Source', 'attributes': { 'long_title': 'Test User', - 'icon_url': icon, **kwargs } } @@ -131,17 +98,25 @@ def fetch_all_pages(client, url, results=None): class TestSourcesGet: endpoint = '/api/v2/sources/' - def test_count(self, client): - sources_qs = Source.objects.exclude(icon='').exclude(is_deleted=True) + @pytest.fixture + def sources(self): + return [ + SourceFactory(), + SourceFactory(), + SourceFactory(), + ] + + def test_count(self, client, sources): + sources_qs = Source.objects.exclude(is_deleted=True) source_count = sources_qs.count() results = fetch_all_pages(client, self.endpoint) - assert source_count > 0 + assert source_count == len(sources) + 1 # auto-created "SHARE System" source assert len(results) == source_count - def test_is_deleted(self, client): - sources_qs = Source.objects.exclude(icon='').exclude(is_deleted=True) + def test_is_deleted(self, client, sources): + sources_qs = Source.objects.exclude(is_deleted=True) source_count = sources_qs.count() sources_before = fetch_all_pages(client, self.endpoint) @@ -149,7 +124,7 @@ def test_is_deleted(self, client): assert len(sources_before) == source_count - deleted_source = sources_qs.first() + deleted_source = sources_qs.last() deleted_source.is_deleted = True deleted_source.save() @@ -160,28 +135,8 @@ def test_is_deleted(self, client): missing_ids = source_ids_before - source_ids_after assert missing_ids == {IDObfuscator.encode(deleted_source)} - def test_no_icon(self, client): - sources_qs = Source.objects.exclude(icon='').exclude(is_deleted=True) - source_count = sources_qs.count() - - sources_before = fetch_all_pages(client, self.endpoint) - source_ids_before = {s['id'] for s in sources_before} - - assert len(sources_before) == source_count - - iconless_source = sources_qs.first() - iconless_source.icon = None - iconless_source.save() - - sources_after = fetch_all_pages(client, self.endpoint) - source_ids_after = {s['id'] for s in sources_after} - - assert len(sources_after) == len(sources_before) - 1 - missing_ids = source_ids_before - source_ids_after - assert missing_ids == {IDObfuscator.encode(iconless_source)} - - def test_by_id(self, client): - source = Source.objects.exclude(icon='').exclude(is_deleted=True).last() + def test_by_id(self, client, sources): + source = Source.objects.exclude(is_deleted=True).last() resp = client.get('{}{}/'.format(self.endpoint, IDObfuscator.encode(source))) assert resp.status_code == 200 @@ -189,7 +144,6 @@ def test_by_id(self, client): assert resp.json()['data']['type'] == 'Source' assert resp.json()['data']['attributes'] == { 'name': source.name, - 'icon': 'http://testserver{}'.format(source.icon.url), 'homePage': source.home_page, 'longTitle': source.long_title, } @@ -206,7 +160,7 @@ def test_unauthorized_post(self, client): content_type='application/vnd.api+json' ).status_code == 401 - def test_improper_scope_post(self, client, share_user, mock_icon_urls): + def test_improper_scope_post(self, client, share_user): assert client.post( self.endpoint, json.dumps(get_post_body()), @@ -214,7 +168,7 @@ def test_improper_scope_post(self, client, share_user, mock_icon_urls): HTTP_AUTHORIZATION=share_user.authorization(), ).status_code == 403 - def test_successful_post_no_home_page(self, client, source_add_user, mock_icon_urls): + def test_successful_post_no_home_page(self, client, source_add_user): test_data = get_post_body() resp = client.post( self.endpoint, @@ -235,7 +189,7 @@ def test_successful_post_no_home_page(self, client, source_add_user, mock_icon_u assert data['user']['token'] == created_user.oauth2_provider_accesstoken.first().token assert created_user.is_trusted is True - def test_successful_post_home_page(self, client, source_add_user, mock_icon_urls): + def test_successful_post_home_page(self, client, source_add_user): test_data = get_post_body(home_page='http://test.homepage.net') resp = client.post( self.endpoint, @@ -250,7 +204,7 @@ def test_successful_post_home_page(self, client, source_add_user, mock_icon_urls assert data['source']['homePage'] == test_data['data']['attributes']['home_page'] assert data['source']['canonical'] - def test_successful_repost_home_page(self, client, source_add_user, mock_icon_urls): + def test_successful_repost_home_page(self, client, source_add_user): test_data = get_post_body(home_page='http://test.homepage.net') resp_one = client.post( self.endpoint, @@ -275,7 +229,7 @@ def test_successful_repost_home_page(self, client, source_add_user, mock_icon_ur assert data_one == data_two - def test_successful_post_put_home_page(self, client, source_add_change_user, mock_icon_urls): + def test_successful_post_put_home_page(self, client, source_add_change_user): test_data = get_post_body(home_page='http://test.homepage.net') resp_one = client.post( self.endpoint, @@ -306,7 +260,7 @@ def test_successful_post_put_home_page(self, client, source_add_change_user, moc assert data_two['source']['homePage'] == new_home_page assert data_one != data_two - def test_successful_post_patch_home_page(self, client, source_add_change_user, mock_icon_urls): + def test_successful_post_patch_home_page(self, client, source_add_change_user): test_data = get_post_body(home_page='http://test.homepage.net') resp_one = client.post( self.endpoint, @@ -336,39 +290,7 @@ def test_successful_post_patch_home_page(self, client, source_add_change_user, m assert data_two['source']['homePage'] == new_home_page assert data_one != data_two - def test_bad_image_url(self, client, source_add_user, mock_icon_urls): - resp = client.post( - self.endpoint, - json.dumps(get_post_body(icon=IMPROPER_ICON_URL)), - content_type='application/vnd.api+json', - HTTP_AUTHORIZATION=source_add_user.authorization(), - ) - - assert resp.status_code == 400 - assert resp.data[0]['detail'] == 'Could not download/process image.' - - def test_invalid_url(self, client, source_add_user, mock_icon_urls): - resp = client.post( - self.endpoint, - json.dumps(get_post_body(icon=INVALID_URL)), - content_type='application/vnd.api+json', - HTTP_AUTHORIZATION=source_add_user.authorization(), - ) - - assert resp.status_code == 400 - assert resp.data[0]['detail'] == 'Enter a valid URL.' - - def test_timeout_url(self, client, source_add_user, mock_icon_urls): - resp = client.post( - self.endpoint, - json.dumps(get_post_body(icon=TIMEOUT_URL)), - content_type='application/vnd.api+json', - HTTP_AUTHORIZATION=source_add_user.authorization(), - ) - - assert resp.data[0]['detail'] == 'Could not download/process image.' - - def test_canonical_source(self, client, source_add_change_user, mock_icon_urls): + def test_canonical_source(self, client, source_add_change_user): # add a canonical source test_data = get_post_body(canonical=True) resp = client.post( diff --git a/tests/factories/__init__.py b/tests/factories/__init__.py index 9d71348a7..d23f4f316 100644 --- a/tests/factories/__init__.py +++ b/tests/factories/__init__.py @@ -26,7 +26,6 @@ class Meta: class SourceFactory(DjangoModelFactory): name = factory.Sequence(lambda x: '{}{}'.format(fake.name(), x)) long_title = factory.Sequence(lambda x: '{}{}'.format(fake.sentence(), x)) - icon = factory.SelfAttribute('name') user = factory.SubFactory(ShareUserFactory, source=None) @@ -34,14 +33,6 @@ class Meta: model = share_db.Source -class ListGenerator(list): - - def __call__(self, *args, **kwargs): - if hasattr(self, 'side_effect'): - raise self.side_effect - return (x for x in self) - - class SourceConfigFactory(DjangoModelFactory): label = factory.Faker('sentence') base_url = factory.Faker('url') diff --git a/tests/share/bin/test_sharectl.py b/tests/share/bin/test_sharectl.py index a8e5c6325..98b8e1684 100644 --- a/tests/share/bin/test_sharectl.py +++ b/tests/share/bin/test_sharectl.py @@ -71,23 +71,3 @@ def test_daemon(self, settings): with mock.patch('share.bin.search.IndexerDaemonControl') as mock_daemon_control: run_sharectl('search', 'daemon') mock_daemon_control.return_value.start_all_daemonthreads.assert_called_once() - - -# TODO unit tests, not just a smoke test -def test_fetch_runs(): - with mock.patch('share.bin.harvest.SourceConfig'): - run_sharectl('fetch', 'foo.sourceconfig', '2021-05-05', '--print') - - -# TODO unit tests, not just a smoke test -def test_harvest_runs(): - with mock.patch('share.bin.harvest.SourceConfig'): - run_sharectl('harvest', 'foo.sourceconfig') - - -# TODO unit tests, not just a smoke test -def test_schedule_runs(): - with mock.patch('share.bin.harvest.SourceConfig'): - with mock.patch('share.bin.harvest.HarvestScheduler'): - with mock.patch('share.bin.harvest.tasks'): - run_sharectl('schedule', 'foo.sourceconfig') diff --git a/tests/share/models/test_rawdata.py b/tests/share/models/test_rawdata.py index 1258ca0b1..4c046f89d 100644 --- a/tests/share/models/test_rawdata.py +++ b/tests/share/models/test_rawdata.py @@ -6,7 +6,10 @@ from django.db.utils import IntegrityError from share.models import RawDatum -from share.harvest.base import FetchResult + + +def get_now(): + return datetime.datetime.now(tz=datetime.timezone.utc) @pytest.mark.django_db @@ -35,37 +38,48 @@ def test_must_have_suid(self): assert 'null value in column "suid_id"' in e.value.args[0] - def test_store_data(self, source_config): - rd = RawDatum.objects.store_data(source_config, FetchResult('unique', 'mydatums')) + def test_store_data_by_suid(self, suid): + _now = get_now() + rd = RawDatum.objects.store_datum_for_suid( + suid=suid, + datum='mydatums', + mediatype='text/plain', + datestamp=_now, + ) assert rd.date_modified is not None assert rd.date_created is not None assert rd.datum == 'mydatums' - assert rd.suid.identifier == 'unique' - assert rd.suid.source_config == source_config + assert rd.datestamp == _now + assert rd.suid_id == suid.id assert rd.sha256 == hashlib.sha256(b'mydatums').hexdigest() - def test_store_data_dedups_simple(self, source_config): - rd1 = RawDatum.objects.store_data(source_config, FetchResult('unique', 'mydatums')) - rd2 = RawDatum.objects.store_data(source_config, FetchResult('unique', 'mydatums')) - - assert rd1.pk == rd2.pk - assert rd1.created is True - assert rd2.created is False - assert rd1.date_created == rd2.date_created - assert rd1.date_modified < rd2.date_modified - - def test_store_data_dedups_complex(self, source_config): - data = '{"providerUpdatedDateTime":"2016-08-25T11:37:40Z","uris":{"canonicalUri":"https://provider.domain/files/7d2792031","providerUris":["https://provider.domain/files/7d2792031"]},"contributors":[{"name":"Person1","email":"one@provider.domain"},{"name":"Person2","email":"two@provider.domain"},{"name":"Person3","email":"three@provider.domain"},{"name":"Person4","email":"dxm6@psu.edu"}],"title":"ReducingMorbiditiesinNeonatesUndergoingMRIScannig"}' - rd1 = RawDatum.objects.store_data(source_config, FetchResult('unique', data)) - rd2 = RawDatum.objects.store_data(source_config, FetchResult('unique', data)) - - assert rd1.pk == rd2.pk - assert rd1.created is True - assert rd2.created is False - assert rd1.date_modified < rd2.date_modified - assert rd1.date_created == rd2.date_created + def test_store_data_dedups_simple(self, suid): + rd1 = RawDatum.objects.store_datum_for_suid( + suid=suid, + datum='mydatums', + mediatype='text/plain', + datestamp=get_now(), + ) + rd2 = RawDatum.objects.store_datum_for_suid( + suid=suid, + datum='mydatums', + mediatype='text/plain', + datestamp=get_now(), + ) + rd3 = RawDatum.objects.store_datum_for_suid( + suid=suid, + datum='mydatums', + mediatype='text/plain', + datestamp=get_now(), + ) + + assert rd1.pk == rd2.pk == rd3.pk + assert rd1.sha256 == rd2.sha256 == rd3.sha256 + assert rd1.datestamp < rd2.datestamp < rd3.datestamp < get_now() + assert rd1.date_created == rd2.date_created == rd3.date_created + assert rd1.date_modified < rd2.date_modified < rd3.date_modified def test_is_expired(self): rd = RawDatum() diff --git a/tests/share/search/_util.py b/tests/share/search/_util.py index beda6dc10..475cdc80f 100644 --- a/tests/share/search/_util.py +++ b/tests/share/search/_util.py @@ -1,4 +1,4 @@ -import typing +from collections.abc import Collection from tests import factories @@ -11,7 +11,7 @@ def create_indexcard( focus_iri: str, rdf_tripledict: rdf.RdfTripleDictionary, - deriver_iris: typing.Collection[str] = (), + deriver_iris: Collection[str] = (), ) -> trove_db.Indexcard: _suid = factories.SourceUniqueIdentifierFactory() _indexcard = trove_db.Indexcard.objects.create(source_record_suid=_suid) diff --git a/tests/share/search/index_strategy/_common_trovesearch_tests.py b/tests/share/search/index_strategy/_common_trovesearch_tests.py index 9f886a346..5a4c7d349 100644 --- a/tests/share/search/index_strategy/_common_trovesearch_tests.py +++ b/tests/share/search/index_strategy/_common_trovesearch_tests.py @@ -10,7 +10,7 @@ from trove import models as trove_db from trove.trovesearch.search_params import CardsearchParams, ValuesearchParams from trove.trovesearch.search_handle import PropertypathUsage -from trove.vocab.namespaces import RDFS, RDF, DCTERMS, OWL, FOAF, DCAT, TROVE +from trove.vocab.namespaces import RDFS, RDF, DCTERMS, OWL, FOAF, DCAT, TROVE, BLARG from tests.share.search._util import ( create_indexcard, update_indexcard_content, @@ -20,9 +20,6 @@ from ._with_real_services import RealElasticTestCase -BLARG = rdf.IriNamespace('https://blarg.example/blarg/') - - class CommonTrovesearchTests(RealElasticTestCase): _indexcard_focus_by_uuid: dict[str, str] diff --git a/tests/share/search/index_strategy/test_sharev2_elastic5.py b/tests/share/search/index_strategy/test_sharev2_elastic5.py index 88e1d6b13..8c9e15a8c 100644 --- a/tests/share/search/index_strategy/test_sharev2_elastic5.py +++ b/tests/share/search/index_strategy/test_sharev2_elastic5.py @@ -1,12 +1,12 @@ -import json import unittest from django.conf import settings +from primitive_metadata import primitive_rdf as rdf -from tests import factories from share.search import messages from share.search.index_strategy.sharev2_elastic5 import Sharev2Elastic5IndexStrategy -from share.util import IDObfuscator +from tests.share.search._util import create_indexcard +from trove.vocab.namespaces import DCTERMS, SHAREv2, RDF, BLARG from ._with_real_services import RealElasticTestCase @@ -19,6 +19,19 @@ def get_index_strategy(self): index_strategy.STATIC_INDEXNAME = f'test_{index_strategy.STATIC_INDEXNAME}' return index_strategy + def setUp(self): + super().setUp() + self.__indexcard = create_indexcard( + BLARG.hello, + { + BLARG.hello: { + RDF.type: {SHAREv2.CreativeWork}, + DCTERMS.title: {rdf.literal('hello', language='en')}, + }, + }, + deriver_iris=[SHAREv2.sharev2_elastic], + ) + def test_without_daemon(self): _formatted_record = self._get_formatted_record() _messages_chunk = messages.MessagesChunk( @@ -41,17 +54,6 @@ def test_with_daemon(self): expected_doc_count=1, ) - def _get_formatted_record(self): - suid = factories.SourceUniqueIdentifierFactory() - return factories.FormattedMetadataRecordFactory( - suid=suid, - record_format='sharev2_elastic', - formatted_metadata=json.dumps({ - 'id': IDObfuscator.encode(suid), - 'title': 'hello', - }) - ) - # override RealElasticTestCase to match hacks done with assumptions # (single index that will not be updated again before being deleted) def _assert_happypath_until_ingest(self): diff --git a/tests/share/search/index_strategy/test_sharev2_elastic8.py b/tests/share/search/index_strategy/test_sharev2_elastic8.py index f2afc4b42..5e39ed360 100644 --- a/tests/share/search/index_strategy/test_sharev2_elastic8.py +++ b/tests/share/search/index_strategy/test_sharev2_elastic8.py @@ -2,14 +2,11 @@ from share.search import messages from share.search.index_strategy.sharev2_elastic8 import Sharev2Elastic8IndexStrategy -from trove.vocab.namespaces import DCTERMS, SHAREv2 +from trove.vocab.namespaces import DCTERMS, SHAREv2, RDF, BLARG from tests.share.search._util import create_indexcard from ._with_real_services import RealElasticTestCase -BLARG = rdf.IriNamespace('http://blarg.example/') - - class TestSharev2Elastic8(RealElasticTestCase): # for RealElasticTestCase def get_index_strategy(self): @@ -21,7 +18,8 @@ def setUp(self): BLARG.hello, { BLARG.hello: { - DCTERMS.title: {rdf.Literal('hello', language='en')}, + RDF.type: {SHAREv2.CreativeWork}, + DCTERMS.title: {rdf.literal('hello', language='en')}, }, }, deriver_iris=[SHAREv2.sharev2_elastic], diff --git a/tests/share/search/test_index_backfill.py b/tests/share/search/test_index_backfill.py index e3934de25..959214b94 100644 --- a/tests/share/search/test_index_backfill.py +++ b/tests/share/search/test_index_backfill.py @@ -23,7 +23,7 @@ def index_backfill(self, fake_strategy): def test_happypath(self, index_backfill: IndexBackfill, fake_strategy): assert index_backfill.backfill_status == IndexBackfill.INITIAL assert index_backfill.strategy_checksum == '' - with mock.patch('share.tasks.schedule_index_backfill') as mock_task: + with mock.patch('share.models.index_backfill.task__schedule_index_backfill') as mock_task: index_backfill.pls_start(fake_strategy) mock_task.apply_async.assert_called_once_with((index_backfill.pk,)) assert index_backfill.backfill_status == IndexBackfill.WAITING diff --git a/tests/share/util/test_contact_extract.py b/tests/share/util/test_contact_extract.py deleted file mode 100644 index f8939e9f0..000000000 --- a/tests/share/util/test_contact_extract.py +++ /dev/null @@ -1,48 +0,0 @@ -import pytest -from share.transform.chain.utils import contact_extract - - -@pytest.mark.parametrize('input_text, output_text', [ - ('Contact: Lisa Floyd-Hanna (Herbarium Curator); Becky McLemore (Collections Manager) (collections@naturalhistoryinstitute.org)', {'email': 'collections@naturalhistoryinstitute.org', 'name': 'Lisa Floyd-Hanna'}), - ('Contact: Cassandra Leah Quave (cassandra.leah.quave@emory.edu); Tharanga Samarakoon (tsamarakoon@emory.edu)', {'email': 'cassandra.leah.quave@emory.edu', 'name': 'Cassandra Leah Quave'}), - ('Contact: Harvey E. Ballard, Jr., Director of the Floyd Bartley Herbarium and Curator of Vascular Plants (ballardh@ohio.edu)', {'email': 'ballardh@ohio.edu', 'name': 'Harvey E. Ballard'}), - ('Contact: Dra. María Socorro Gonzalez Elizondo (herbario_ciidir@yahoo.com.mx)', {'email': 'herbario_ciidir@yahoo.com.mx', 'name': 'Dra. María Socorro Gonzalez Elizondo'}), - ('Contact: Patti J. Anderson, Ph.D., Botanist (Patti.Anderson@freshfromflorida.com)', {'email': 'patti.anderson@freshfromflorida.com', 'name': 'Patti J. Anderson'}), - ('Contact: Leslie R. Goertzen / Curtis J. Hansen (goertzen@auburn.edu / hansecj@auburn.edu)', {'email': 'goertzen@auburn.edu', 'name': 'Leslie R. Goertzen'}), - ('Contact: Teresa Mayfield (tmayfield@utep.edu) or Richard Worthington (rworthin@utep.edu) ()', {'email': 'tmayfield@utep.edu', 'name': 'Teresa Mayfield'}), - ('Contact: Taylor-Davenport, Robin M -FS (robintaylordavenport@fs.fed.us)', {'email': 'robintaylordavenport@fs.fed.us', 'name': 'Robin Taylor-Davenport'}), - ('Contact: Diana Hurlbut, Collection Manager, Vascular P (dhurlbut@mail.nysed.gov)', {'email': 'dhurlbut@mail.nysed.gov', 'name': 'Diana Hurlbut'}), - ('Contact: Michael D. Windham / Layne Huiet (mdw26 at duke.edu / rlh22 at duke.edu)', {'email': 'mdw26 at duke.edu', 'name': 'Michael D. Windham'}), - ('Contact: Melissa Islam, Head Curator (melissa.islam@botanicgardens.org)', {'email': 'melissa.islam@botanicgardens.org', 'name': 'Melissa Islam'}), - ('Contact: Dr. Joann M Chang, Science Division Chair (joann.chang@azwestern.edu)', {'email': 'joann.chang@azwestern.edu', 'name': 'Joann Chang'}), - ('Contact: Steffi Ickert-Bond, herbarium curator (smickertbond@alaska.edu)', {'email': 'smickertbond@alaska.edu', 'name': 'Steffi Ickert-Bond'}), - ('Contact: Andrew Miller, Ph.D., Mycologist and Director (amiller7@illinois.edu)', {'email': 'amiller7@illinois.edu', 'name': 'Andrew Miller'}), - ('Contact: Ellen Dean, Director; Tom Starbuck, Data (tjstarbuck@ucdavis.edu)', {'email': 'tjstarbuck@ucdavis.edu', 'name': 'Ellen Dean'}), - ('Contact: Christopher Tyrrell, Collection Manager (tyrrell@mpm.edu)', {'email': 'tyrrell@mpm.edu', 'name': 'Christopher Tyrrell'}), - ('Contact: Dr. Dayle Saar, Director and Curator (dsaar@murraystate.edu)', {'email': 'dsaar@murraystate.edu', 'name': 'Dayle Saar'}), - ('Contact: Jenn Yost, Director and Assistant Professor o (jyost@calpoly.edu)', {'email': 'jyost@calpoly.edu', 'name': 'Jenn Yost'}), - ('Contact: Dr. Zack E. Murrell, Curator (murrellze@appstate.edu)', {'email': 'murrellze@appstate.edu', 'name': 'Zack E. Murrell'}), - ('Contact: Gordon C. Tucker, Professor & Curator (gctucker@eiu.edu)', {'email': 'gctucker@eiu.edu', 'name': 'Gordon C. Tucker'}), - ('Contact: Dr. Travis Almquist (talmquist@benedictine.edu)', {'email': 'talmquist@benedictine.edu', 'name': 'Travis Almquist'}), - ('Contact: Tina Ayers (tina.ayers@nau.edu) (deaver.herbarium@nau.edu)', {'email': 'tina.ayers@nau.edu', 'name': 'Tina Ayers'}), - ('Contact: Jennifer S. Kluse, Collections Manager (jkluse@lsu.edu)', {'email': 'jkluse@lsu.edu', 'name': 'Jennifer S. Kluse'}), - ('Contact: Janelle Burke, Ph.D. (janelle.burke@howard.edu)', {'email': 'janelle.burke@howard.edu', 'name': 'Janelle Burke'}), - ('Contact: Erin Lonergan (erinrlonergan@fs.fed.us) and M ()', {'email': 'erinrlonergan@fs.fed.us', 'name': 'Erin Lonergan'}), - ('Contact: Mason Brock, Collections Manager (mbrock2@my.apsu.edu)', {'email': 'mbrock2@my.apsu.edu', 'name': 'Mason Brock'}), - ('Contact: C.F. Rick Williams, Curator (willcha2@isu.edu)', {'email': 'willcha2@isu.edu', 'name': 'C.F. Rick Williams'}), - ('Contact: Weckesser, Wendy (wendy_weckesser@nps.gov)', {'email': 'wendy_weckesser@nps.gov', 'name': 'Wendy Weckesser'}), - ('Contact: Dr. Johnnie L. Gentry Jr. (gentry@uark.edu)', {'email': 'gentry@uark.edu', 'name': 'Johnnie L. Gentry'}), - ('Contact: Deborah Lewis, Curator (dlewis@iastate.edu)', {'email': 'dlewis@iastate.edu', 'name': 'Deborah Lewis'}), - ('Contact: Robert Johnson / Leigh Johnson (bry.db@byu.edu)', {'email': 'bry.db@byu.edu', 'name': 'Robert Johnson'}), - ('Contact: Rebecca Dolan, Director (rdolan@butler.edu)', {'email': 'rdolan@butler.edu', 'name': 'Rebecca Dolan'}), - ('Contact: Chris Lay, Administrative Director (cml@ucsc.edu)', {'email': 'cml@ucsc.edu', 'name': 'Chris Lay'}), - ('Contact: Patricia Fall (fall@asu.edu)', {'email': 'fall@asu.edu', 'name': 'Patricia Fall'}), - ('Contact: (Seney@fws.gov)', {'email': 'seney@fws.gov'}), - ('Contact: Susan Hepler ()', {'name': 'Susan Hepler'}), - ('Contact: ()', {}), - ('Contact:', {}), - ('', {}) -]) -def test_contact_extract(input_text, output_text): - extracted = contact_extract(input_text) - assert extracted == output_text diff --git a/tests/share/util/test_encrypted_field.py b/tests/share/util/test_encrypted_field.py index a5c579e6f..543d85cc9 100644 --- a/tests/share/util/test_encrypted_field.py +++ b/tests/share/util/test_encrypted_field.py @@ -3,6 +3,7 @@ from share.models.fields import EncryptedJSONField +# TODO: remove after migrations have been fully squashed class TestEncryptedJsonField: @pytest.fixture diff --git a/tests/share/util/test_force_text.py b/tests/share/util/test_force_text.py deleted file mode 100644 index 2a9abe90c..000000000 --- a/tests/share/util/test_force_text.py +++ /dev/null @@ -1,32 +0,0 @@ -import pytest -from share.transform.chain.utils import force_text - - -@pytest.mark.parametrize('input_text, output_text', [ - (['architecture', 'art', 'mechanical'], ['architecture', 'art', 'mechanical']), - (['architecture', {'#text': 'art'}, 'mechanical'], ['architecture', 'art', 'mechanical']), - (['architecture', {'#text': 'art'}, None], ['architecture', 'art']), - ([None, {'#text': 'art'}, None], ['art']), - ({'#text': 'art'}, 'art'), - ('mechanical', 'mechanical'), - (None, ''), - ({}, ''), - ({'test': 'value'}, '') -]) -def test_force_text(input_text, output_text): - assert force_text(input_text) == output_text - - -@pytest.mark.parametrize('input_text, list_sep, output_text', [ - (['architecture', 'art', 'mechanical'], None, ['architecture', 'art', 'mechanical']), - (['architecture', 'art', 'mechanical'], '\n', 'architecture\nart\nmechanical'), - (['architecture', 'art', 'mechanical'], ' word ', 'architecture word art word mechanical'), - (['architecture', {'#text': 'art'}, 'mechanical'], '\n', 'architecture\nart\nmechanical'), - (['architecture', {'#text': 'art'}, None], ' f', 'architecture fart'), - ([None, {'#text': 'art'}, None], '|', 'art'), - ({'#text': 'art'}, ' word ', 'art'), - ('mechanical', 'foo', 'mechanical'), - (None, '\n', ''), -]) -def test_force_text_joined(input_text, list_sep, output_text): - assert force_text(input_text, list_sep) == output_text diff --git a/tests/share/util/test_mutable_graph.py b/tests/share/util/test_mutable_graph.py deleted file mode 100644 index 46f4eb4a3..000000000 --- a/tests/share/util/test_mutable_graph.py +++ /dev/null @@ -1,281 +0,0 @@ -import pytest - -from share.util.graph import MutableGraph, MutableGraphError - - -work_id = '_:6203fec461bb4b3fa956772acbd9c50d' -org_id = '_:d486fd737bea4fbe9566b7a2842651ef' -uni_id = '_:d486fd737beeffbe9566b7a2842651ef' -person_id = '_:f4cec0271c7d4085bac26dbb2b32a002' -creator_id = '_:a17f28109536459ca02d99bf777400ae' -identifier_id = '_:a27f2810e536459ca02d99bf707400be' - - -@pytest.fixture -def example_graph_nodes(): - return [ - {'@id': uni_id, '@type': 'Institution', 'name': 'University of Whales'}, - {'@id': org_id, '@type': 'Organization', 'name': 'Department of Physics'}, - - {'@id': '_:org-uni', '@type': 'IsMemberOf', 'subject': {'@id': org_id, '@type': 'Organization'}, 'related': {'@id': uni_id, '@type': 'Institution'}}, - - {'@id': '_:tag1', '@type': 'Tag', 'name': 'you\'re'}, - {'@id': '_:tag2', '@type': 'Tag', 'name': 'it'}, - {'@id': '_:ttag1', '@type': 'ThroughTags', 'tag': {'@id': '_:tag1', '@type': 'Tag'}, 'creative_work': {'@id': work_id, '@type': 'Article'}}, - {'@id': '_:ttag2', '@type': 'ThroughTags', 'tag': {'@id': '_:tag2', '@type': 'Tag'}, 'creative_work': {'@id': work_id, '@type': 'Article'}}, - - {'@id': '_:c4f10e02785a4b4d878f48d08ffc7fce', 'related': {'@type': 'Organization', '@id': org_id}, '@type': 'IsAffiliatedWith', 'subject': {'@type': 'Person', '@id': '_:7e742fa3377e4f119e36f8629144a0bc'}}, - {'@id': '_:7e742fa3377e4f119e36f8629144a0bc', 'agent_relations': [{'@type': 'IsAffiliatedWith', '@id': '_:c4f10e02785a4b4d878f48d08ffc7fce'}], '@type': 'Person', 'family_name': 'Prendergast', 'given_name': 'David'}, - {'@id': '_:687a4ba2cbd54ab7a2f2c3cd1777ea8a', '@type': 'Creator', 'creative_work': {'@type': 'Article', '@id': work_id}, 'agent': {'@type': 'Person', '@id': '_:7e742fa3377e4f119e36f8629144a0bc'}}, - - {'@id': '_:69e859cefed140bd9b717c5b610d300c', '@type': 'Organization', 'name': 'NMRC, University College, Cork, Ireland'}, - - {'@id': '_:2fd829eeda214adca2d4d34d02b10328', 'related': {'@type': 'Organization', '@id': '_:69e859cefed140bd9b717c5b610d300c'}, '@type': 'IsAffiliatedWith', 'subject': {'@type': 'Person', '@id': '_:ed3cc2a50f6d499db933a28d16bca5d6'}}, - {'@id': '_:ed3cc2a50f6d499db933a28d16bca5d6', 'agent_relations': [{'@type': 'IsAffiliatedWith', '@id': '_:2fd829eeda214adca2d4d34d02b10328'}], '@type': 'Person', 'family_name': 'Nolan', 'given_name': 'M.'}, - {'@id': '_:27961f3c7c644101a500772477aff304', '@type': 'Creator', 'creative_work': {'@type': 'Article', '@id': work_id}, 'agent': {'@type': 'Person', '@id': '_:ed3cc2a50f6d499db933a28d16bca5d6'}}, - - {'@id': '_:d4f10e02785a4b4d878f48d08ffc7fce', 'related': {'@type': 'Organization', '@id': org_id}, '@type': 'IsAffiliatedWith', 'subject': {'@type': 'Person', '@id': '_:9a1386475d314b9bb524931e24361aaa'}}, - {'@id': '_:9a1386475d314b9bb524931e24361aaa', 'agent_relations': [{'@type': 'IsAffiliatedWith', '@id': '_:d4f10e02785a4b4d878f48d08ffc7fce'}], '@type': 'Person', 'family_name': 'Filippi', 'given_name': 'Claudia'}, - {'@id': '_:bf7726af4542405888463c796e5b7686', '@type': 'Creator', 'creative_work': {'@type': 'Article', '@id': work_id}, 'agent': {'@type': 'Person', '@id': '_:9a1386475d314b9bb524931e24361aaa'}}, - - {'@id': '_:e4f10e02785a4b4d878f48d08ffc7fce', 'related': {'@type': 'Organization', '@id': org_id}, '@type': 'IsAffiliatedWith', 'subject': {'@type': 'Person', '@id': '_:78639db07e2e4ee88b422a8920d8a095'}}, - {'@id': '_:78639db07e2e4ee88b422a8920d8a095', 'agent_relations': [{'@type': 'IsAffiliatedWith', '@id': '_:e4f10e02785a4b4d878f48d08ffc7fce'}], '@type': 'Person', 'family_name': 'Fahy', 'given_name': 'Stephen'}, - {'@id': '_:18d151204d7c431388a7e516defab1bc', '@type': 'Creator', 'creative_work': {'@type': 'Article', '@id': work_id}, 'agent': {'@type': 'Person', '@id': '_:78639db07e2e4ee88b422a8920d8a095'}}, - - {'@id': '_:5fd829eeda214adca2d4d34d02b10328', 'related': {'@type': 'Organization', '@id': '_:69e859cefed140bd9b717c5b610d300c'}, '@type': 'IsAffiliatedWith', 'subject': {'@type': 'Person', '@id': person_id}}, - {'@id': person_id, 'agent_relations': [{'@type': 'IsAffiliatedWith', '@id': '_:5fd829eeda214adca2d4d34d02b10328'}], '@type': 'Person', 'family_name': 'Greer', 'given_name': 'J.'}, - {'@id': creator_id, '@type': 'Creator', 'creative_work': {'@type': 'Article', '@id': work_id}, 'agent': {'@type': 'Person', '@id': person_id}}, - {'@id': identifier_id, '@type': 'WorkIdentifier', 'creative_work': {'@type': 'Article', '@id': work_id}, 'uri': 'http://example.com/things'}, - {'@id': work_id, 'date_updated': '2016-10-20T00:00:00+00:00', 'identifiers': [{'@type': 'WorkIdentifier', '@id': identifier_id}], 'agent_relations': [{'@type': 'Creator', '@id': '_:687a4ba2cbd54ab7a2f2c3cd1777ea8a'}, {'@type': 'Creator', '@id': '_:27961f3c7c644101a500772477aff304'}, {'@type': 'Creator', '@id': '_:bf7726af4542405888463c796e5b7686'}, {'@type': 'Creator', '@id': '_:18d151204d7c431388a7e516defab1bc'}, {'@type': 'Creator', '@id': creator_id}], 'title': 'Impact of Electron-Electron Cusp on Configuration Interaction Energies', '@type': 'Article', 'description': ' The effect of the electron-electron cusp on the convergence of configuration\ninteraction (CI) wave functions is examined. By analogy with the\npseudopotential approach for electron-ion interactions, an effective\nelectron-electron interaction is developed which closely reproduces the\nscattering of the Coulomb interaction but is smooth and finite at zero\nelectron-electron separation. The exact many-electron wave function for this\nsmooth effective interaction has no cusp at zero electron-electron separation.\nWe perform CI and quantum Monte Carlo calculations for He and Be atoms, both\nwith the Coulomb electron-electron interaction and with the smooth effective\nelectron-electron interaction. We find that convergence of the CI expansion of\nthe wave function for the smooth electron-electron interaction is not\nsignificantly improved compared with that for the divergent Coulomb interaction\nfor energy differences on the order of 1 mHartree. This shows that, contrary to\npopular belief, description of the electron-electron cusp is not a limiting\nfactor, to within chemical accuracy, for CI calculations.\n'} # noqa - ] - - -@pytest.fixture -def example_graph(example_graph_nodes): - return MutableGraph.from_jsonld(example_graph_nodes) - - -class TestMutableGraph: - def test_graph(self, example_graph): - assert example_graph.number_of_nodes() == 25 - - @pytest.mark.parametrize('node_id', [work_id, org_id, person_id, creator_id]) - def test_get_node(self, example_graph, node_id): - assert example_graph.get_node(node_id).id == node_id - - def test_get_nonexistent_node(self, example_graph): - assert example_graph.get_node('not_an_id') is None - - def test_edge(self, example_graph): - creator_node = example_graph.get_node(creator_id) - assert creator_node['creative_work'] == example_graph.get_node(work_id) - assert creator_node['agent'] == example_graph.get_node(person_id) - - @pytest.mark.parametrize('node_id, key, value', [ - (work_id, 'title', 'title title'), - (work_id, 'description', 'woo'), - (identifier_id, 'creative_work', None), - (identifier_id, 'foo', 'bar'), - ]) - def test_set_attrs(self, example_graph, node_id, key, value): - n = example_graph.get_node(node_id) - n[key] = value - assert n[key] == value - - @pytest.mark.parametrize('set_none', [True, False]) - def test_del_attrs(self, example_graph, set_none): - work = example_graph.get_node(work_id) - assert work['title'] - if set_none: - work['title'] = None - else: - del work['title'] - assert work['title'] is None - assert 'title' not in work.attrs() - - identifier = example_graph.get_node(identifier_id) - assert identifier['creative_work'] == work - if set_none: - identifier['creative_work'] = None - else: - del identifier['creative_work'] - - @pytest.mark.parametrize('node_id, reverse_edge_name, count', [ - (work_id, 'agent_relations', 5), - (work_id, 'incoming_creative_work_relations', 0), - (work_id, 'identifiers', 1), - (org_id, 'incoming_agent_relations', 3), - (org_id, 'outgoing_agent_relations', 1), - ]) - def test_reverse_edge(self, example_graph, node_id, reverse_edge_name, count): - node = example_graph.get_node(node_id) - assert len(node[reverse_edge_name]) == count - - @pytest.mark.parametrize('node_id, m2m_name, count', [ - (work_id, 'related_agents', 5), - (work_id, 'related_works', 0), - (work_id, 'subjects', 0), - (work_id, 'tags', 2), - (org_id, 'related_works', 0), - (org_id, 'related_agents', 4), - ]) - def test_many_to_many(self, example_graph, node_id, m2m_name, count): - node = example_graph.get_node(node_id) - assert len(node[m2m_name]) == count - - @pytest.mark.parametrize('node_id, count', [ - (work_id, 16), - (org_id, 20), - (person_id, 22), - (creator_id, 24), - ]) - def test_remove_node_cascades(self, example_graph, node_id, count): - example_graph.remove_node(node_id) - assert example_graph.number_of_nodes() == count - - def test_add_node(self, example_graph): - identifier_id = '_:foo' - uri = 'mailto:person@example.com' - person = example_graph.get_node(person_id) - node_count = example_graph.number_of_nodes() - assert len(person['identifiers']) == 0 - - example_graph.add_node(identifier_id, 'AgentIdentifier', {'uri': uri, 'agent': person}) - - assert example_graph.number_of_nodes() == node_count + 1 - identifier_node = example_graph.get_node(identifier_id) - assert identifier_node['uri'] == uri - assert identifier_node['agent'] == person - - identifiers = person['identifiers'] - assert len(identifiers) == 1 - assert identifier_node == next(iter(identifiers)) - - @pytest.mark.parametrize('count, filter', [ - (5, lambda n, g: n.type == 'person'), - (0, lambda n, g: not g.degree(n.id)), - (1, lambda n, g: len(g.out_edges(n.id)) == 1), - ]) - def test_filter_nodes(self, example_graph, filter, count): - filtered = list(example_graph.filter_nodes(lambda n: filter(n, example_graph))) - assert len(filtered) == count - - def test_jsonld(self, example_graph_nodes, example_graph): - def clean_jsonld(value): - if isinstance(value, list): - return [clean_jsonld(n) for n in sorted(value, key=lambda n: n['@id'])] - if isinstance(value, dict): - return { - k: v.lower() if k == '@type' else clean_jsonld(v) - for k, v in value.items() if not isinstance(v, list) - } - return value - expected_nodes = clean_jsonld(example_graph_nodes) - actual = example_graph.to_jsonld(in_edges=False) - actual_nodes = clean_jsonld(actual['@graph']) - assert expected_nodes == actual_nodes - assert actual['central_node_id'] is None - - -class TestCentralWork: - def test_obvious_example(self, example_graph): - assert example_graph.central_node_id is None - assert example_graph.get_central_node() is None - - guessed = example_graph.get_central_node(guess=True) - assert guessed.id == work_id - # side-effect: now the graph knows its central node - assert example_graph.central_node_id == work_id - assert example_graph.get_central_node() == guessed - - def test_no_central_node(self): - graph = MutableGraph() - agent = graph.add_node(None, 'agent') - graph.add_node(None, 'agentidentifier', {'agent': agent}) - - assert graph.get_central_node() is None - assert graph.get_central_node(guess=True) is None - assert graph.central_node_id is None - - def test_explicit_central_node(self): - central_id = '_:center' - - graph = MutableGraph.from_jsonld({ - 'central_node_id': central_id, - '@graph': [ - {'@id': central_id, '@type': 'creativework'}, - {'@id': '_:other', '@type': 'creativework', 'title': 'looks like the center'}, - ], - }) - assert graph.central_node_id == central_id - assert graph.get_central_node().id == central_id - assert graph.get_central_node(guess=True).id == central_id - - def test_ambiguous_central_node(self): - graph = MutableGraph.from_jsonld({ - '@graph': [ - {'@id': '_:thing1', '@type': 'creativework', 'title': 'looks like the center'}, - {'@id': '_:thing2', '@type': 'creativework', 'title': 'also looks like the center'}, - ], - }) - with pytest.raises(MutableGraphError): - graph.get_central_node(guess=True) - - @pytest.mark.parametrize('graph_nodes, expected_guess', [ - ( - [ - {'@id': '_:thing1', '@type': 'creativework', 'title': 'only thing'}, - ], - '_:thing1', - ), - ( - [ - {'@id': '_:thing1', '@type': 'creativework', 'title': 'one thing'}, - {'@id': '_:thing2', '@type': 'creativework', 'title': 'another thing', 'description': 'tiebreaker'}, - ], - '_:thing2', - ), - ( - [ - {'@id': '_:thing1', '@type': 'creativework', 'title': 'looks like the center'}, - {'@id': '_:thing2', '@type': 'creativework', 'title': 'also looks like the center'}, - { - '@id': '_:tiebreaker', - '@type': 'workidentifier', - 'uri': 'http://example.com/woo', - 'creative_work': {'@id': '_:thing2', '@type': 'creativework'}, - }, - ], - '_:thing2', - ), - ( - [ - {'@id': '_:thing1', '@type': 'creativework', 'title': 'looks like the center', 'description': 'confounder'}, - {'@id': '_:thing2', '@type': 'creativework', 'title': 'also looks like the center'}, - { - '@id': '_:tiebreaker', - '@type': 'workidentifier', - 'uri': 'http://example.com/woo', - 'creative_work': {'@id': '_:thing2', '@type': 'creativework'}, - }, - ], - '_:thing2', - ), - ( - [ - {'@id': '_:thing1', '@type': 'creativework', 'title': 'parent'}, - {'@id': '_:thing2', '@type': 'creativework', 'title': 'child'}, - { - '@id': '_:choose-the-child', - '@type': 'ispartof', - 'subject': {'@id': '_:thing2', '@type': 'creativework'}, - 'related': {'@id': '_:thing1', '@type': 'creativework'}, - }, - ], - '_:thing2', - ), - ]) - def test_guessing(self, graph_nodes, expected_guess): - graph = MutableGraph.from_jsonld({'@graph': graph_nodes}) - guessed = graph.get_central_node(guess=True) - assert guessed.id == expected_guess diff --git a/tests/share/util/test_osf.py b/tests/share/util/test_osf.py deleted file mode 100644 index 61f74b999..000000000 --- a/tests/share/util/test_osf.py +++ /dev/null @@ -1,57 +0,0 @@ -import pytest - -from share.util.osf import guess_osf_guid, get_guid_from_uri - -from tests.share.normalize import factories as f - - -@pytest.mark.parametrize('uri, expected', [ - ('http://osf.io/mst3k/', 'mst3k'), - ('https://osf.io/mst3k', 'mst3k'), - ('http://staging.osf.io/mst3k', 'mst3k'), - ('https://staging.osf.io/mst3k/', 'mst3k'), - ('http://staging2.osf.io/mst3k', 'mst3k'), - ('https://test.osf.io/mst3k', 'mst3k'), - ('https://future-staging.osf.io/mst3k/', 'mst3k'), - ('http://osf.io/mst3k/files', None), - ('https://nope.staging.osf.io/mst3k', None), - ('https://example.com', None), - ('foo', None), - ('https://meow.osfdio/mst3k', None), - ('https://osflio/mst3k', None), - ('https://meowosf.io/mst3k', None), -]) -def test_get_guid_from_uri(uri, expected): - actual = get_guid_from_uri(uri) - assert actual == expected - - -@pytest.mark.parametrize('graph_input, expected', [ - (f.CreativeWork(), None), - (f.CreativeWork( - identifiers=[f.WorkIdentifier(uri='http://osf.io/mst3k')], - ), 'mst3k'), - (f.CreativeWork( - identifiers=[ - f.WorkIdentifier(uri='http://osf.io/mst3k'), - f.WorkIdentifier(uri='http://osf.io/ohnoe'), - ], - ), None), - ([ - f.CreativeWork( - sparse=True, - id='a', - identifiers=[f.WorkIdentifier(uri='http://osf.io/mst3k')], - ), - f.CreativeWork( - sparse=True, - id='b', - title='this one', - identifiers=[f.WorkIdentifier(uri='http://osf.io/other')], - ), - ], 'other'), - (f.Agent(), None), -]) -def test_guess_osf_guid(Graph, graph_input, expected): - actual = guess_osf_guid(Graph(graph_input)) - assert actual == expected diff --git a/tests/trove/derive/_inputs.py b/tests/trove/derive/_inputs.py index e2659c4ee..da03102c7 100644 --- a/tests/trove/derive/_inputs.py +++ b/tests/trove/derive/_inputs.py @@ -12,12 +12,10 @@ OWL, PROV, SHAREv2, + BLARG, ) -BLARG = rdf.IriNamespace('http://blarg.example/vocab/') - - @dataclasses.dataclass class DeriverTestDoc: focus_iri: str diff --git a/tests/trove/derive/test_osfmap_json.py b/tests/trove/derive/test_osfmap_json.py index b408e07e3..4ee90fbd0 100644 --- a/tests/trove/derive/test_osfmap_json.py +++ b/tests/trove/derive/test_osfmap_json.py @@ -12,31 +12,31 @@ def assert_outputs_equal(self, expected, actual): expected_outputs = { 'blarg-item': { - "@id": "http://blarg.example/vocab/my_item", - "resourceType": [{"@id": "http://blarg.example/vocab/Item"}], + "@id": "blarg:my_item", + "resourceType": [{"@id": "blarg:Item"}], "title": [{ "@value": "title", "@language": "en" }], "creator": [{ - "@id": "http://blarg.example/vocab/me", + "@id": "blarg:me", "resourceType": [{"@id": "Person"}], "name": [{"@value": "me me"}] }], "dateCreated": [{"@value": "2024-02-14"}], }, 'blarg-project': { - "@id": "http://blarg.example/vocab/my_project", + "@id": "blarg:my_project", "resourceType": [ {"@id": "Project"}, - {"@id": "http://blarg.example/vocab/Item"}, + {"@id": "blarg:Item"}, ], "title": [{ "@value": "title", "@language": "en", }], "creator": [{ - "@id": "http://blarg.example/vocab/me", + "@id": "blarg:me", "resourceType": [{"@id": "Person"}], "name": [{"@value": "me me"}] }], diff --git a/tests/trove/digestive_tract/test_derive.py b/tests/trove/digestive_tract/test_derive.py index 79fefe859..4b7aedd02 100644 --- a/tests/trove/digestive_tract/test_derive.py +++ b/tests/trove/digestive_tract/test_derive.py @@ -1,18 +1,14 @@ import json from django.test import TestCase -from primitive_metadata import primitive_rdf as rdf from tests import factories from trove import digestive_tract from trove import models as trove_db -from trove.vocab.namespaces import TROVE +from trove.vocab.namespaces import TROVE, BLARG as _BLARG from trove.util.iris import get_sufficiently_unique_iri -_BLARG = rdf.IriNamespace('https://blarg.example/') - - class TestDigestiveTractDerive(TestCase): @classmethod def setUpTestData(cls): @@ -25,7 +21,7 @@ def setUpTestData(cls): indexcard=cls.indexcard, from_raw_datum=_raw, focus_iri=cls.focus_iri, - rdf_as_turtle='''@prefix blarg: . + rdf_as_turtle='''@prefix blarg: . blarg:this a blarg:Thing ; blarg:like blarg:that . @@ -37,9 +33,9 @@ def test_derive(self): self.assertEqual(_derived.upriver_indexcard_id, self.indexcard.id) self.assertEqual(_derived.deriver_identifier.sufficiently_unique_iri, get_sufficiently_unique_iri(TROVE['derive/osfmap_json'])) self.assertEqual(json.loads(_derived.derived_text), { - '@id': _BLARG.this, - 'resourceType': [{'@id': _BLARG.Thing}], - _BLARG.like: [{'@id': _BLARG.that}], + '@id': 'blarg:this', + 'resourceType': [{'@id': 'blarg:Thing'}], + 'blarg:like': [{'@id': 'blarg:that'}], }) def test_derive_with_supplementary(self): @@ -51,7 +47,7 @@ def test_derive_with_supplementary(self): from_raw_datum=_supp_raw, supplementary_suid=_supp_raw.suid, focus_iri=self.focus_iri, - rdf_as_turtle='''@prefix blarg: . + rdf_as_turtle='''@prefix blarg: . blarg:this blarg:unlike blarg:nonthing . ''', ) @@ -59,8 +55,8 @@ def test_derive_with_supplementary(self): self.assertEqual(_derived.upriver_indexcard_id, self.indexcard.id) self.assertEqual(_derived.deriver_identifier.sufficiently_unique_iri, get_sufficiently_unique_iri(TROVE['derive/osfmap_json'])) self.assertEqual(json.loads(_derived.derived_text), { - '@id': _BLARG.this, - 'resourceType': [{'@id': _BLARG.Thing}], - _BLARG.like: [{'@id': _BLARG.that}], - _BLARG.unlike: [{'@id': _BLARG.nonthing}], + '@id': 'blarg:this', + 'resourceType': [{'@id': 'blarg:Thing'}], + 'blarg:like': [{'@id': 'blarg:that'}], + 'blarg:unlike': [{'@id': 'blarg:nonthing'}], }) diff --git a/tests/trove/digestive_tract/test_expel.py b/tests/trove/digestive_tract/test_expel.py index 2517172ea..88a2d6f47 100644 --- a/tests/trove/digestive_tract/test_expel.py +++ b/tests/trove/digestive_tract/test_expel.py @@ -2,22 +2,19 @@ from unittest import mock from django.test import TestCase -from primitive_metadata import primitive_rdf as rdf from share import models as share_db from tests import factories from trove import digestive_tract from trove import models as trove_db - - -_BLARG = rdf.IriNamespace('https://blarg.example/') +from trove.vocab.namespaces import BLARG class TestDigestiveTractExpel(TestCase): @classmethod def setUpTestData(cls): - cls.focus_1 = _BLARG.this1 - cls.focus_2 = _BLARG.this2 + cls.focus_1 = BLARG.this1 + cls.focus_2 = BLARG.this2 cls.raw_1, cls.indexcard_1 = _setup_ingested(cls.focus_1) cls.raw_2, cls.indexcard_2 = _setup_ingested(cls.focus_2) cls.raw_supp = _setup_supplementary(cls.focus_1, cls.raw_1.suid, cls.indexcard_1) @@ -179,7 +176,7 @@ def _setup_ingested(focus_iri: str): focus_iri=focus_iri, rdf_as_turtle=_latest_rdf.rdf_as_turtle, ) - _deriver_iri = _BLARG.deriver + _deriver_iri = BLARG.deriver _deriver_ident = trove_db.ResourceIdentifier.objects.get_or_create_for_iri(_deriver_iri) trove_db.DerivedIndexcard.objects.create( upriver_indexcard=_indexcard, diff --git a/tests/trove/digestive_tract/test_extract.py b/tests/trove/digestive_tract/test_extract.py index 5a87e8099..57afd3ca0 100644 --- a/tests/trove/digestive_tract/test_extract.py +++ b/tests/trove/digestive_tract/test_extract.py @@ -6,9 +6,7 @@ from trove import digestive_tract from trove import exceptions as trove_exceptions from trove import models as trove_db - - -_BLARG = rdf.IriNamespace('https://blarg.example/') +from trove.vocab.namespaces import BLARG as _BLARG class TestDigestiveTractExtract(TestCase): @@ -17,7 +15,7 @@ def setUpTestData(cls): _focus_ident = trove_db.ResourceIdentifier.objects.get_or_create_for_iri(_BLARG.this) cls.raw = factories.RawDatumFactory( mediatype='text/turtle', - datum='''@prefix blarg: . + datum='''@prefix blarg: . blarg:this a blarg:Thing ; blarg:like blarg:that . @@ -26,7 +24,7 @@ def setUpTestData(cls): ) cls.supplementary_raw = factories.RawDatumFactory( mediatype='text/turtle', - datum='''@prefix blarg: . + datum='''@prefix blarg: . blarg:this blarg:like blarg:another ; blarg:unlike blarg:nonthing . ''', @@ -49,11 +47,11 @@ def test_extract(self): _focus_idents = list( _indexcard.focus_identifier_set.values_list('sufficiently_unique_iri', flat=True), ) - self.assertEqual(_focus_idents, ['://blarg.example/this']) + self.assertEqual(_focus_idents, ['://blarg.example/vocab/this']) _focustype_idents = list( _indexcard.focustype_identifier_set.values_list('sufficiently_unique_iri', flat=True), ) - self.assertEqual(_focustype_idents, ['://blarg.example/Thing']) + self.assertEqual(_focustype_idents, ['://blarg.example/vocab/Thing']) self.assertEqual(list(_indexcard.supplementary_rdf_set.all()), []) _latest_rdf = _indexcard.latest_rdf self.assertEqual(_latest_rdf.from_raw_datum_id, self.raw.id) diff --git a/tests/trove/digestive_tract/test_swallow.py b/tests/trove/digestive_tract/test_swallow.py index 096f26ae2..968b8d668 100644 --- a/tests/trove/digestive_tract/test_swallow.py +++ b/tests/trove/digestive_tract/test_swallow.py @@ -12,7 +12,7 @@ class TestDigestiveTractSwallow(TestCase): def setUpTestData(cls): cls.user = factories.ShareUserFactory() cls.turtle = ''' -@prefix blarg: . +@prefix blarg: . blarg:this a blarg:Thing ; blarg:like blarg:that . @@ -28,14 +28,14 @@ def test_swallow(self): record=self.turtle, record_identifier='blarg', record_mediatype='text/turtle', - focus_iri='https://blarg.example/this', + focus_iri='http://blarg.example/vocab/this', ) (_raw,) = share_db.RawDatum.objects.all() self.assertEqual(_raw.datum, self.turtle) self.assertEqual(_raw.mediatype, 'text/turtle') self.assertIsNone(_raw.expiration_date) self.assertEqual(_raw.suid.identifier, 'blarg') - self.assertEqual(_raw.suid.focus_identifier.sufficiently_unique_iri, '://blarg.example/this') + self.assertEqual(_raw.suid.focus_identifier.sufficiently_unique_iri, '://blarg.example/vocab/this') self.assertEqual(_raw.suid.source_config.source.user_id, self.user.id) self.assertFalse(_raw.suid.is_supplementary) _mock_task.delay.assert_called_once_with(_raw.id, urgent=False) @@ -47,7 +47,7 @@ def test_swallow_urgent(self): record=self.turtle, record_identifier='blarg', record_mediatype='text/turtle', - focus_iri='https://blarg.example/this', + focus_iri='http://blarg.example/vocab/this', urgent=True ) (_raw,) = share_db.RawDatum.objects.all() @@ -55,7 +55,7 @@ def test_swallow_urgent(self): self.assertEqual(_raw.mediatype, 'text/turtle') self.assertIsNone(_raw.expiration_date) self.assertEqual(_raw.suid.identifier, 'blarg') - self.assertEqual(_raw.suid.focus_identifier.sufficiently_unique_iri, '://blarg.example/this') + self.assertEqual(_raw.suid.focus_identifier.sufficiently_unique_iri, '://blarg.example/vocab/this') self.assertEqual(_raw.suid.source_config.source.user_id, self.user.id) self.assertFalse(_raw.suid.is_supplementary) _mock_task.delay.assert_called_once_with(_raw.id, urgent=True) @@ -67,7 +67,7 @@ def test_swallow_supplementary(self): record=self.turtle, record_identifier='blarg', record_mediatype='text/turtle', - focus_iri='https://blarg.example/this', + focus_iri='http://blarg.example/vocab/this', is_supplementary=True, ) (_raw,) = share_db.RawDatum.objects.all() @@ -75,7 +75,7 @@ def test_swallow_supplementary(self): self.assertEqual(_raw.mediatype, 'text/turtle') self.assertIsNone(_raw.expiration_date) self.assertEqual(_raw.suid.identifier, 'blarg') - self.assertEqual(_raw.suid.focus_identifier.sufficiently_unique_iri, '://blarg.example/this') + self.assertEqual(_raw.suid.focus_identifier.sufficiently_unique_iri, '://blarg.example/vocab/this') self.assertEqual(_raw.suid.source_config.source.user_id, self.user.id) self.assertTrue(_raw.suid.is_supplementary) _mock_task.delay.assert_called_once_with(_raw.id, urgent=False) @@ -87,7 +87,7 @@ def test_swallow_with_expiration(self): record=self.turtle, record_identifier='blarg', record_mediatype='text/turtle', - focus_iri='https://blarg.example/this', + focus_iri='http://blarg.example/vocab/this', expiration_date=datetime.date(2048, 1, 3), ) (_raw,) = share_db.RawDatum.objects.all() @@ -95,7 +95,7 @@ def test_swallow_with_expiration(self): self.assertEqual(_raw.mediatype, 'text/turtle') self.assertEqual(_raw.expiration_date, datetime.date(2048, 1, 3)) self.assertEqual(_raw.suid.identifier, 'blarg') - self.assertEqual(_raw.suid.focus_identifier.sufficiently_unique_iri, '://blarg.example/this') + self.assertEqual(_raw.suid.focus_identifier.sufficiently_unique_iri, '://blarg.example/vocab/this') self.assertEqual(_raw.suid.source_config.source.user_id, self.user.id) self.assertFalse(_raw.suid.is_supplementary) _mock_task.delay.assert_called_once_with(_raw.id, urgent=False) @@ -107,7 +107,7 @@ def test_swallow_supplementary_with_expiration(self): record=self.turtle, record_identifier='blarg', record_mediatype='text/turtle', - focus_iri='https://blarg.example/this', + focus_iri='http://blarg.example/vocab/this', is_supplementary=True, expiration_date=datetime.date(2047, 1, 3), ) @@ -116,7 +116,7 @@ def test_swallow_supplementary_with_expiration(self): self.assertEqual(_raw.mediatype, 'text/turtle') self.assertEqual(_raw.expiration_date, datetime.date(2047, 1, 3)) self.assertEqual(_raw.suid.identifier, 'blarg') - self.assertEqual(_raw.suid.focus_identifier.sufficiently_unique_iri, '://blarg.example/this') + self.assertEqual(_raw.suid.focus_identifier.sufficiently_unique_iri, '://blarg.example/vocab/this') self.assertEqual(_raw.suid.source_config.source.user_id, self.user.id) self.assertTrue(_raw.suid.is_supplementary) _mock_task.delay.assert_called_once_with(_raw.id, urgent=False) diff --git a/tests/trove/render/_inputs.py b/tests/trove/render/_inputs.py index 0a97a22c6..29d6cb9ad 100644 --- a/tests/trove/render/_inputs.py +++ b/tests/trove/render/_inputs.py @@ -10,10 +10,9 @@ FOAF, RDF, TROVE, + BLARG, ) -BLARG = rdf.IriNamespace('http://blarg.example/vocab/') - @dataclasses.dataclass class RdfCase: diff --git a/tests/trove/render/test_jsonapi_renderer.py b/tests/trove/render/test_jsonapi_renderer.py index 5c175b931..a5e8bdc6d 100644 --- a/tests/trove/render/test_jsonapi_renderer.py +++ b/tests/trove/render/test_jsonapi_renderer.py @@ -3,7 +3,7 @@ from trove.render.jsonapi import RdfJsonapiRenderer from trove.render._rendering import SimpleRendering -from ._inputs import BLARG +from trove.vocab.namespaces import BLARG from . import _base @@ -47,11 +47,11 @@ class TestJsonapiRenderer(_BaseJsonapiRendererTest): } }, "links": { - "self": BLARG.aCard + "self": BLARG.aCard, }, "meta": { "foaf:primaryTopic": [ - BLARG.anItem + "blarg:anItem" ], "dcterms:issued": [ "2024-01-01" @@ -68,14 +68,14 @@ class TestJsonapiRenderer(_BaseJsonapiRendererTest): rendered_content=json.dumps({ "data": { "id": "aHR0cDovL2JsYXJnLmV4YW1wbGUvdm9jYWIvYVN1YmplY3Q=", - "type": BLARG.aType, + "type": "blarg:aType", "meta": { - BLARG.hasIri: [BLARG.anIri], - BLARG.hasRdfStringLiteral: ["an rdf:string literal"], - BLARG.hasRdfLangStringLiteral: ['a rdf:langString literal'], - BLARG.hasIntegerLiteral: [17], - BLARG.hasDateLiteral: ["2024-01-01"], - BLARG.hasStrangeLiteral: ['a literal of strange datatype'], + "blarg:hasIri": ["blarg:anIri"], + "blarg:hasRdfStringLiteral": ["an rdf:string literal"], + "blarg:hasRdfLangStringLiteral": ['a rdf:langString literal'], + "blarg:hasIntegerLiteral": [17], + "blarg:hasDateLiteral": ["2024-01-01"], + "blarg:hasStrangeLiteral": ['a literal of strange datatype'], }, "links": {"self": BLARG.aSubject}, } @@ -174,7 +174,7 @@ class TestJsonapiSearchRenderer(_BaseJsonapiRendererTest, _base.TrovesearchJsonR "type": "index-card", "meta": { "foaf:primaryTopic": [ - BLARG.anItem + "blarg:anItem" ], "dcterms:issued": [ "2024-01-01" @@ -201,7 +201,7 @@ class TestJsonapiSearchRenderer(_BaseJsonapiRendererTest, _base.TrovesearchJsonR "type": "index-card", "meta": { "foaf:primaryTopic": [ - BLARG.anItemmm + "blarg:anItemmm" ], "dcterms:issued": [ "2024-03-03" @@ -228,7 +228,7 @@ class TestJsonapiSearchRenderer(_BaseJsonapiRendererTest, _base.TrovesearchJsonR "type": "index-card", "meta": { "foaf:primaryTopic": [ - BLARG.anItemm + "blarg:anItemm" ], "dcterms:issued": [ "2024-02-02" diff --git a/tests/trove/render/test_jsonld_renderer.py b/tests/trove/render/test_jsonld_renderer.py index 8741d8aba..75b92f9ff 100644 --- a/tests/trove/render/test_jsonld_renderer.py +++ b/tests/trove/render/test_jsonld_renderer.py @@ -13,7 +13,7 @@ class TestJsonldRenderer(_base.TroveJsonRendererTests): 'simple_card': SimpleRendering( mediatype='application/ld+json', rendered_content=json.dumps({ - "@id": BLARG.aCard, + "@id": "blarg:aCard", "dcterms:issued": [ { "@type": "xsd:date", @@ -27,7 +27,7 @@ class TestJsonldRenderer(_base.TroveJsonRendererTests): } ], "foaf:primaryTopic": [ - BLARG.anItem + "blarg:anItem" ], "rdf:type": [ "trove:Indexcard", @@ -47,40 +47,40 @@ class TestJsonldRenderer(_base.TroveJsonRendererTests): 'various_types': SimpleRendering( mediatype='application/ld+json', rendered_content=json.dumps({ - "@id": BLARG.aSubject, - BLARG.hasDateLiteral: [ + "@id": "blarg:aSubject", + "blarg:hasDateLiteral": [ { "@type": "xsd:date", "@value": "2024-01-01" } ], - BLARG.hasIntegerLiteral: [ + "blarg:hasIntegerLiteral": [ { "@type": "xsd:integer", "@value": "17" } ], - BLARG.hasIri: [ - BLARG.anIri + "blarg:hasIri": [ + "blarg:anIri" ], - BLARG.hasRdfLangStringLiteral: [ + "blarg:hasRdfLangStringLiteral": [ { "@language": "en", "@value": "a rdf:langString literal" } ], - BLARG.hasRdfStringLiteral: [ + "blarg:hasRdfStringLiteral": [ { "@value": "an rdf:string literal" } ], - BLARG.hasStrangeLiteral: [ + "blarg:hasStrangeLiteral": [ { - "@type": BLARG.aStrangeDatatype, + "@type": "blarg:aStrangeDatatype", "@value": "a literal of strange datatype" } ], - "rdf:type": [BLARG.aType], + "rdf:type": ["blarg:aType"], }), ), } @@ -93,7 +93,7 @@ class TestJsonldSearchRenderer(_base.TrovesearchJsonRendererTests): 'no_results': SimpleRendering( mediatype='application/ld+json', rendered_content=json.dumps({ - "@id": BLARG.aSearch, + "@id": "blarg:aSearch", "rdf:type": [ "trove:Cardsearch" ], @@ -106,7 +106,7 @@ class TestJsonldSearchRenderer(_base.TrovesearchJsonRendererTests): 'few_results': SimpleRendering( mediatype='application/ld+json', rendered_content=json.dumps({ - "@id": BLARG.aSearchFew, + "@id": "blarg:aSearchFew", "rdf:type": [ "trove:Cardsearch" ], @@ -122,7 +122,7 @@ class TestJsonldSearchRenderer(_base.TrovesearchJsonRendererTests): "trove:SearchResult" ], "trove:indexCard": { - "@id": BLARG.aCard, + "@id": "blarg:aCard", "dcterms:issued": [ { "@type": "xsd:date", @@ -136,7 +136,7 @@ class TestJsonldSearchRenderer(_base.TrovesearchJsonRendererTests): } ], "foaf:primaryTopic": [ - BLARG.anItem + "blarg:anItem" ], "rdf:type": [ "trove:Indexcard", @@ -158,7 +158,7 @@ class TestJsonldSearchRenderer(_base.TrovesearchJsonRendererTests): "trove:SearchResult" ], "trove:indexCard": { - "@id": BLARG.aCardd, + "@id": "blarg:aCardd", "dcterms:issued": [ { "@type": "xsd:date", @@ -172,7 +172,7 @@ class TestJsonldSearchRenderer(_base.TrovesearchJsonRendererTests): } ], "foaf:primaryTopic": [ - BLARG.anItemm + "blarg:anItemm" ], "rdf:type": [ "trove:Indexcard", @@ -194,7 +194,7 @@ class TestJsonldSearchRenderer(_base.TrovesearchJsonRendererTests): "trove:SearchResult" ], "trove:indexCard": { - "@id": BLARG.aCarddd, + "@id": "blarg:aCarddd", "dcterms:issued": [ { "@type": "xsd:date", @@ -208,7 +208,7 @@ class TestJsonldSearchRenderer(_base.TrovesearchJsonRendererTests): } ], "foaf:primaryTopic": [ - BLARG.anItemmm + "blarg:anItemmm" ], "rdf:type": [ "trove:Indexcard", diff --git a/tests/trove/render/test_simple_json_renderer.py b/tests/trove/render/test_simple_json_renderer.py index 2d85d5b9e..d9481e183 100644 --- a/tests/trove/render/test_simple_json_renderer.py +++ b/tests/trove/render/test_simple_json_renderer.py @@ -2,6 +2,7 @@ from trove.render.simple_json import TrovesearchSimpleJsonRenderer from trove.render._rendering import SimpleRendering +from trove.vocab.namespaces import BLARG from . import _base @@ -25,29 +26,29 @@ class TestSimpleJsonRenderer(_base.TrovesearchJsonRendererTests): rendered_content=json.dumps({ "data": [ { - "@id": "http://blarg.example/vocab/anItem", + "@id": BLARG.anItem, "title": "an item, yes", "foaf:primaryTopicOf": [ { - "@id": "http://blarg.example/vocab/aCard" + "@id": BLARG.aCard } ] }, { - "@id": "http://blarg.example/vocab/anItemm", + "@id": BLARG.anItemm, "title": "an itemm, yes", "foaf:primaryTopicOf": [ { - "@id": "http://blarg.example/vocab/aCardd" + "@id": BLARG.aCardd } ] }, { - "@id": "http://blarg.example/vocab/anItemmm", + "@id": BLARG.anItemmm, "title": "an itemmm, yes", "foaf:primaryTopicOf": [ { - "@id": "http://blarg.example/vocab/aCarddd" + "@id": BLARG.aCarddd } ] } diff --git a/tests/trove/render/test_turtle_renderer.py b/tests/trove/render/test_turtle_renderer.py index c4e44f31e..32f949278 100644 --- a/tests/trove/render/test_turtle_renderer.py +++ b/tests/trove/render/test_turtle_renderer.py @@ -17,6 +17,7 @@ class TestTurtleRenderer(_BaseTurtleRendererTest): 'simple_card': SimpleRendering( mediatype='text/turtle', rendered_content=''' +@prefix blarg: . @prefix dcat: . @prefix dcterms: . @prefix foaf: . @@ -24,10 +25,10 @@ class TestTurtleRenderer(_BaseTurtleRendererTest): @prefix rdf: . @prefix xsd: . - a dcat:CatalogRecord, trove:Indexcard ; +blarg:aCard a dcat:CatalogRecord, trove:Indexcard ; dcterms:issued "2024-01-01"^^xsd:date ; dcterms:modified "2024-01-01"^^xsd:date ; - foaf:primaryTopic ; + foaf:primaryTopic blarg:anItem ; trove:focusIdentifier "http://blarg.example/vocab/anItem"^^rdf:string ; trove:resourceMetadata "{\\"@id\\": \\"http://blarg.example/vocab/anItem\\", \\"title\\": \\"an item, yes\\"}"^^rdf:JSON . ''', @@ -56,16 +57,18 @@ class TestTurtleTrovesearchRenderer(_BaseTurtleRendererTest, _base.TrovesearchRe 'no_results': SimpleRendering( mediatype='text/turtle', rendered_content=''' +@prefix blarg: . @prefix trove: . @prefix xsd: . - a trove:Cardsearch ; +blarg:aSearch a trove:Cardsearch ; trove:totalResultCount 0 . ''', ), 'few_results': SimpleRendering( mediatype='text/turtle', rendered_content=''' +@prefix blarg: . @prefix dcat: . @prefix dcterms: . @prefix foaf: . @@ -73,42 +76,42 @@ class TestTurtleTrovesearchRenderer(_BaseTurtleRendererTest, _base.TrovesearchRe @prefix rdf: . @prefix xsd: . - a trove:Cardsearch ; +blarg:aSearchFew a trove:Cardsearch ; trove:searchResultPage [ a rdf:Seq ; rdf:_1 [ a trove:SearchResult ; - trove:indexCard + trove:indexCard blarg:aCard ] ; rdf:_2 [ a trove:SearchResult ; - trove:indexCard + trove:indexCard blarg:aCardd ] ; rdf:_3 [ a trove:SearchResult ; - trove:indexCard + trove:indexCard blarg:aCarddd ] ] ; trove:totalResultCount 3 . - a dcat:CatalogRecord, trove:Indexcard ; +blarg:aCard a dcat:CatalogRecord, trove:Indexcard ; dcterms:issued "2024-01-01"^^xsd:date ; dcterms:modified "2024-01-01"^^xsd:date ; - foaf:primaryTopic ; + foaf:primaryTopic blarg:anItem ; trove:focusIdentifier "http://blarg.example/vocab/anItem"^^rdf:string ; trove:resourceMetadata "{\\"@id\\": \\"http://blarg.example/vocab/anItem\\", \\"title\\": \\"an item, yes\\"}"^^rdf:JSON . - a dcat:CatalogRecord, trove:Indexcard ; +blarg:aCardd a dcat:CatalogRecord, trove:Indexcard ; dcterms:issued "2024-02-02"^^xsd:date ; dcterms:modified "2024-02-02"^^xsd:date ; - foaf:primaryTopic ; + foaf:primaryTopic blarg:anItemm ; trove:focusIdentifier "http://blarg.example/vocab/anItemm"^^rdf:string ; trove:resourceMetadata "{\\"@id\\": \\"http://blarg.example/vocab/anItemm\\", \\"title\\": \\"an itemm, yes\\"}"^^rdf:JSON . - a dcat:CatalogRecord, trove:Indexcard ; +blarg:aCarddd a dcat:CatalogRecord, trove:Indexcard ; dcterms:issued "2024-03-03"^^xsd:date ; dcterms:modified "2024-03-03"^^xsd:date ; - foaf:primaryTopic ; + foaf:primaryTopic blarg:anItemmm ; trove:focusIdentifier "http://blarg.example/vocab/anItemmm"^^rdf:string ; trove:resourceMetadata "{\\"@id\\": \\"http://blarg.example/vocab/anItemmm\\", \\"title\\": \\"an itemmm, yes\\"}"^^rdf:JSON . ''', diff --git a/tests/validation/test_changeset.py b/tests/validation/test_changeset.py deleted file mode 100644 index f5414f15b..000000000 --- a/tests/validation/test_changeset.py +++ /dev/null @@ -1,264 +0,0 @@ -import re -import pytest - -from django.core.exceptions import ValidationError - -from share.models.validators import JSONLDValidator - - -class TestJSONLDValidator: - CASES = [{ - 'out': "'@graph' is a required property at /", - 'in': {}, - }, { - 'out': "Additional properties are not allowed ('foo' was unexpected) at /", - 'in': {'foo': 'bar', '@graph': []} - }, { - 'out': "{} is not of type 'array' at /@graph", - 'in': { - '@graph': {} - } - }, { - 'out': "1 is not of type 'array' at /@graph", - 'in': { - '@graph': 1 - } - }, { - 'out': "1.0 is not of type 'array' at /@graph", - 'in': { - '@graph': 1.0 - } - }, { - 'out': "None is not of type 'array' at /@graph", - 'in': { - '@graph': None - } - }, { - 'out': "'foo' is not of type 'array' at /@graph", - 'in': { - '@graph': 'foo' - } - }, { - 'out': "@graph may not be empty", - 'in': { - '@graph': [] - } - }, { - 'out': "'@id' is a required property at /@graph/0", - 'in': { - '@graph': [{'@type': ''}] - } - }, { - 'out': "1 is not of type 'object' at /@graph/0", - 'in': { - '@graph': [1] - } - }, { - 'out': "None is not of type 'object' at /@graph/1", - 'in': { - '@graph': [{'@id': '', '@type': ''}, None] - } - }, { - 'out': "'@type' is a required property at /@graph/0", - 'in': { - '@graph': [{'@id': ''}] - } - }, { - 'out': "'Dinosaurs' is not a valid type", - 'in': { - '@graph': [{'@id': '', '@type': 'Dinosaurs'}] - } - }, { - 'out': re.compile(r"'Tag' is not one of \[('\w+', )+'\w+'\] at /@graph/0"), - 'in': { - '@graph': [{ - '@id': '_:123', - '@type': 'throughtags', - 'tag': {'@id': '_:789', '@type': 'Tag'}, - 'creative_work': {'@id': '_:456', '@type': 'Tag'}, - }] - } - }, { - 'out': 'Unresolved references [{"@id": "_:456", "@type": "preprint"}, {"@id": "_:789", "@type": "tag"}]', - 'in': { - '@graph': [{ - '@id': '_:123', - '@type': 'throughtags', - 'tag': {'@id': '_:789', '@type': 'Tag'}, - 'creative_work': {'@id': '_:456', '@type': 'Preprint'}, - }] - } - }, { - 'out': "'creative_work' is a required property at /@graph/0", - 'in': { - '@graph': [{ - '@id': '_:123', - '@type': 'throughtags', - 'tag': {'@id': '_:789', '@type': 'Tag'}, - }] - } - }, { - 'out': "Additional properties are not allowed ('shouldnt' was unexpected) at /@graph/0", - 'in': { - '@graph': [{ - '@id': '_:123', - '@type': 'throughtags', - 'shouldnt': 'behere', - 'tag': {'@id': 'id', '@type': 'tag'}, - 'creative_work': {'@id': 'id', '@type': 'creativework'}, - }] - } - }, { - 'out': re.compile(r"^Additional properties are not allowed \('(shouldnt|pls)', '(shouldnt|pls)' were unexpected\) at /@graph/0$"), - 'in': { - '@graph': [{ - '@id': '_:123', - '@type': 'throughtags', - 'pls': 'toleeb', - 'shouldnt': 'behere', - 'tag': {'@id': 'id', '@type': 'tag'}, - 'creative_work': {'@id': 'id', '@type': 'creativework'}, - }] - } - }, { - 'out': re.compile("{.+} is not valid under any of the given schemas at /@graph/0/tag$"), - 'in': { - '@graph': [{ - '@id': '_:123', - '@type': 'throughtags', - 'creative_work': {'@id': '_:123', '@type': 'foo'}, - 'tag': {'@id': '_:123', '@type': 'foo', 'il': 'legal'} - }] - } - }, { - 'out': "'extra should be a dict' is not of type 'object' at /@graph/0/extra", - 'in': { - '@graph': [{ - '@id': '_:123', - '@type': 'Tag', - 'name': 'A Tag', - 'extra': 'extra should be a dict' - }] - } - }, { - 'out': None, - 'in': { - '@graph': [{ - '@id': '_:123', - '@type': 'Tag', - 'name': 'A Tag', - 'extra': { - 'with some': 'extra data' - } - }] - } - }, { - 'out': "1 is not of type 'string' at /@graph/0/name", - 'in': { - '@graph': [{ - '@id': '_:123', - '@type': 'Tag', - 'name': 1 - }] - } - }, { - 'out': None, - 'in': { - '@graph': [{ - '@id': '_:123', - '@type': 'CreativeWork', - 'title': 'Some title', - 'description': 'description', - 'tags': [{ - '@id': '_:456', - '@type': 'throughtags' - }] - }, { - '@id': '_:456', - '@type': 'throughtags', - 'tag': {'@id': '_:789', '@type': 'tag'}, - 'creative_work': {'@id': '_:123', '@type': 'creativework'}, - }, { - '@id': '_:789', - '@type': 'tag', - 'name': 'New Tag', - }] - } - }, { - 'out': "'throughtugs' is not one of ['THROUGHTAGS', 'ThroughTags', 'throughtags'] at /@graph/0/tags/0/@type", - 'in': { - '@graph': [{ - '@id': '_:123', - '@type': 'CreativeWork', - 'title': 'Some title', - 'description': 'description', - 'tags': [{ - '@id': '_:456', - '@type': 'throughtugs' - }] - }, { - '@id': '_:456', - '@type': 'throughtags', - 'tag': {'@id': '_:789', '@type': 'tag'}, - 'creative_work': {'@id': '_:123', '@type': 'creativework'}, - }, { - '@id': '_:789', - '@type': 'tag', - 'name': 'New Tag', - }] - } - }, { - 'out': "'giraffe' is not a 'uri' at /@graph/0/uri", - 'in': { - '@graph': [{ - '@id': '_:123', - '@type': 'WorkIdentifier', - 'uri': 'giraffe', - 'creative_work': {'@id': '_:234', '@type': 'creativework'} - }, { - '@id': '_:234', - '@type': 'creativework', - 'title': 'Giraffes are tall' - }] - } - }, { - 'out': "'creative_work' is a required property at /@graph/0", - 'in': { - '@graph': [{ - '@id': '_:123', - '@type': 'workidentifier', - 'uri': 'https://share.osf.io/foo', - }] - } - }, { - 'out': None, - 'in': { - '@graph': [{ - '@id': '_:123', - '@type': 'WorkIdentifier', - 'uri': 'https://share.osf.io/foo', - 'creative_work': {'@id': '_:234', '@type': 'creativework'} - }, { - '@id': '_:234', - '@type': 'creativework', - 'title': 'Giraffes are tall' - }] - } - }] - - @pytest.mark.parametrize('data, message', [(case['in'], case['out']) for case in CASES]) - def test_validator(self, data, message): - try: - JSONLDValidator()(data) - except ValidationError as e: - assert message is not None, 'Raised "{}"'.format(e.args[0]) - if isinstance(message, str): - assert message == e.args[0] - else: - assert message.match(e.args[0]) is not None - else: - assert message is None, 'No exception was raised. Expecting {}'.format(message) - - # @pytest.mark.parametrize('data, message', [(case['in'], case['out']) for case in CASES]) - # def test_benchmark_validator(self, benchmark, data, message): - # benchmark(self.test_validator, data, message) diff --git a/trove/derive/sharev2_elastic.py b/trove/derive/sharev2_elastic.py index 2b29376e8..c00f45925 100644 --- a/trove/derive/sharev2_elastic.py +++ b/trove/derive/sharev2_elastic.py @@ -18,10 +18,16 @@ from ._base import IndexcardDeriver - -# the sharev2 type hierarchy is limited but nested +# sharev2 types, grouped from most- to least-specific +# (for back-compat, forced to choose only one type) _SHAREv2_TYPES_BY_SPECIFICITY = ( { # most specific + # AgentWorkRelation subtypes + SHAREv2.PrincipalInvestigator, + SHAREv2.PrincipalInvestigatorContact, + }, + { + # CreativeWork subtypes SHAREv2.Article, SHAREv2.Book, SHAREv2.ConferencePaper, @@ -32,8 +38,15 @@ SHAREv2.Report, SHAREv2.Thesis, SHAREv2.WorkingPaper, + # Agent subtypes + SHAREv2.Consortium, + SHAREv2.Department, + SHAREv2.Institution, + # AgentWorkRelation subtypes + SHAREv2.Creator, }, - { # middling specific + { + # CreativeWork subtypes SHAREv2.DataSet, SHAREv2.Patent, SHAREv2.Poster, @@ -42,9 +55,41 @@ SHAREv2.Repository, SHAREv2.Retraction, SHAREv2.Software, + # Agent subtypes + SHAREv2.Organization, + SHAREv2.Person, + # AgentWorkRelation subtypes + SHAREv2.Contributor, + SHAREv2.Funder, + SHAREv2.Host, + SHAREv2.Publisher, + # AgentRelation subtypes + SHAREv2.IsAffiliatedWith, + SHAREv2.IsEmployedBy, + SHAREv2.IsMemberOf, + # WorkRelation subtypes + SHAREv2.Cites, + SHAREv2.Compiles, + SHAREv2.Corrects, + SHAREv2.Discusses, + SHAREv2.Disputes, + SHAREv2.Documents, + SHAREv2.Extends, + SHAREv2.IsDerivedFrom, + SHAREv2.IsPartOf, + SHAREv2.IsSupplementTo, + SHAREv2.References, + SHAREv2.RepliesTo, + SHAREv2.Retracts, + SHAREv2.Reviews, + SHAREv2.UsesDataFrom, }, { # least specific SHAREv2.CreativeWork, + SHAREv2.Agent, + SHAREv2.AgentWorkRelation, + SHAREv2.AgentRelation, + SHAREv2.WorkRelation, }, ) diff --git a/trove/digestive_tract.py b/trove/digestive_tract.py index 3099a585d..e409eceb8 100644 --- a/trove/digestive_tract.py +++ b/trove/digestive_tract.py @@ -39,10 +39,10 @@ def swallow( from_user: share_db.ShareUser, record: str, record_identifier: str, - record_mediatype: str | None, # passing None indicates sharev2 backcompat + record_mediatype: str, focus_iri: str, datestamp: datetime.datetime | None = None, # default "now" - expiration_date: datetime.date | None = None, + expiration_date: datetime.date | None = None, # default "never" urgent: bool = False, is_supplementary: bool = False, ): diff --git a/trove/render/jsonapi.py b/trove/render/jsonapi.py index 1f60e0851..8e9fc2bcb 100644 --- a/trove/render/jsonapi.py +++ b/trove/render/jsonapi.py @@ -24,6 +24,7 @@ RDF, TROVE, XSD, + NAMESPACES_SHORTHAND, ) from trove.vocab.trove import ( trove_indexcard_namespace, @@ -301,7 +302,7 @@ def _render_attribute_datum(self, rdfobject: primitive_rdf.RdfObject) -> dict | try: # maybe it's a jsonapi resource return self.render_identifier_object(rdfobject) except Exception: - return rdfobject + return NAMESPACES_SHORTHAND.compact_iri(rdfobject) elif isinstance(rdfobject, (float, int)): return rdfobject elif isinstance(rdfobject, datetime.date): diff --git a/trove/vocab/namespaces.py b/trove/vocab/namespaces.py index 73c7438b2..f61f176c4 100644 --- a/trove/vocab/namespaces.py +++ b/trove/vocab/namespaces.py @@ -49,12 +49,18 @@ # for identifying jsonapi concepts with linked anchors on the jsonapi spec (probably fine) JSONAPI = rdf.IriNamespace('https://jsonapi.org/format/1.1/#') - -NAMESPACES_SHORTHAND = DEFAULT_SHORTHAND.with_update({ +_NAMESPACES_BY_PREFIX = { 'trove': TROVE, 'sharev2': SHAREv2, 'osf': OSFMAP, 'jsonapi': JSONAPI, 'oai': OAI, 'oai_dc': OAI_DC, -}) +} + +if __debug__: # blarg: a nothing namespace for examples and testing + BLARG = rdf.IriNamespace('http://blarg.example/vocab/') + _NAMESPACES_BY_PREFIX['blarg'] = BLARG + + +NAMESPACES_SHORTHAND = DEFAULT_SHORTHAND.with_update(_NAMESPACES_BY_PREFIX) From 6092fa2c9baf0e75a36f70b508ce26949aec605f Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Fri, 7 Mar 2025 11:23:46 -0500 Subject: [PATCH 12/46] tests.trove.factories --- tests/share/search/index_strategy/_common_trovesearch_tests.py | 2 +- tests/share/search/index_strategy/test_sharev2_elastic5.py | 2 +- tests/share/search/index_strategy/test_sharev2_elastic8.py | 2 +- tests/{share/search/_util.py => trove/factories.py} | 0 4 files changed, 3 insertions(+), 3 deletions(-) rename tests/{share/search/_util.py => trove/factories.py} (100%) diff --git a/tests/share/search/index_strategy/_common_trovesearch_tests.py b/tests/share/search/index_strategy/_common_trovesearch_tests.py index 5a4c7d349..6d6eab52b 100644 --- a/tests/share/search/index_strategy/_common_trovesearch_tests.py +++ b/tests/share/search/index_strategy/_common_trovesearch_tests.py @@ -11,7 +11,7 @@ from trove.trovesearch.search_params import CardsearchParams, ValuesearchParams from trove.trovesearch.search_handle import PropertypathUsage from trove.vocab.namespaces import RDFS, RDF, DCTERMS, OWL, FOAF, DCAT, TROVE, BLARG -from tests.share.search._util import ( +from tests.trove.factories import ( create_indexcard, update_indexcard_content, create_supplement, diff --git a/tests/share/search/index_strategy/test_sharev2_elastic5.py b/tests/share/search/index_strategy/test_sharev2_elastic5.py index 8c9e15a8c..016330c84 100644 --- a/tests/share/search/index_strategy/test_sharev2_elastic5.py +++ b/tests/share/search/index_strategy/test_sharev2_elastic5.py @@ -5,7 +5,7 @@ from share.search import messages from share.search.index_strategy.sharev2_elastic5 import Sharev2Elastic5IndexStrategy -from tests.share.search._util import create_indexcard +from tests.trove.factories import create_indexcard from trove.vocab.namespaces import DCTERMS, SHAREv2, RDF, BLARG from ._with_real_services import RealElasticTestCase diff --git a/tests/share/search/index_strategy/test_sharev2_elastic8.py b/tests/share/search/index_strategy/test_sharev2_elastic8.py index 5e39ed360..fb3a1a5c9 100644 --- a/tests/share/search/index_strategy/test_sharev2_elastic8.py +++ b/tests/share/search/index_strategy/test_sharev2_elastic8.py @@ -3,7 +3,7 @@ from share.search import messages from share.search.index_strategy.sharev2_elastic8 import Sharev2Elastic8IndexStrategy from trove.vocab.namespaces import DCTERMS, SHAREv2, RDF, BLARG -from tests.share.search._util import create_indexcard +from tests.trove.factories import create_indexcard from ._with_real_services import RealElasticTestCase diff --git a/tests/share/search/_util.py b/tests/trove/factories.py similarity index 100% rename from tests/share/search/_util.py rename to tests/trove/factories.py From 7dfe2b76198d64ea48f5b0b9de9c130a2c0bfaaf Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Fri, 7 Mar 2025 13:25:29 -0500 Subject: [PATCH 13/46] fix: randomly failing test --- tests/share/search/test_daemon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/share/search/test_daemon.py b/tests/share/search/test_daemon.py index 6bca002af..ba0842a3d 100644 --- a/tests/share/search/test_daemon.py +++ b/tests/share/search/test_daemon.py @@ -252,7 +252,7 @@ def pls_handle_messages_chunk(self, messages_chunk): assert MINIMUM_BACKOFF_FACTOR <= _backoff_factor <= MAXIMUM_BACKOFF_FACTOR # but now the 429 errors stop index_strategy._pls_429 = False - assert index_strategy.finished_chunk.wait(timeout=10), ( + assert index_strategy.finished_chunk.wait(timeout=_backoff_timeouts[-1]), ( 'should have finished a chunk by now' ) for message in message_list: From 3a016603d3a32d22be46cf2a1af33a46a95e0213 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Fri, 7 Mar 2025 11:31:13 -0500 Subject: [PATCH 14/46] update github actions workflow - bump actions/setup-python to v5 - bump actions/checkout to v4 - update postgres version(s) - remove pin on coverallsapp/github-action, update to latest v2 - work around coveralls error; build and specify xml explicitly - omit `tests/` from coverage calculation --- .github/workflows/run_tests.yml | 14 ++++++++------ pyproject.toml | 3 +++ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index baa4c896c..e0810b7ba 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -14,7 +14,7 @@ jobs: fail-fast: false matrix: python-version: ['3.10'] # TODO: 3.11, 3.12 - postgres-version: ['10', '15'] + postgres-version: ['15', '17'] runs-on: ubuntu-latest services: postgres: @@ -49,13 +49,13 @@ jobs: - 5672:5672 - 15672:15672 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: install non-py dependencies run: sudo apt-get update && sudo apt-get install -y libxml2-dev libxslt1-dev libpq-dev git gcc - name: set up python${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} cache: pip @@ -74,14 +74,16 @@ jobs: - name: run tests run: | - coverage run -m pytest -x --create-db + coverage run -m pytest --create-db coverage run --append -m behave + coverage xml -o _shtrove_coverage.xml env: DATABASE_PASSWORD: postgres ELASTICSEARCH8_URL: http://localhost:9208/ # ELASTICSEARCH5_URL: http://localhost:9205/ - name: coveralls - uses: coverallsapp/github-action@v2.3.0 + uses: coverallsapp/github-action@v2 with: - coverage-reporter-version: v0.6.9 # avoid broken v0.6.10 -- see https://github.com/coverallsapp/github-action/issues/205 + file: _shtrove_coverage.xml + fail-on-error: false diff --git a/pyproject.toml b/pyproject.toml index 638dd9c54..35074d936 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,6 @@ [build-system] requires = ["setuptools>=61.0"] build-backend = "setuptools.build_meta" + +[tool.coverage.run] +omit = ["tests/*"] From 58199b8776c444d2abfdab7790d90a6e9fbcad66 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Fri, 7 Mar 2025 13:47:08 -0500 Subject: [PATCH 15/46] remove 'behave' run --- .github/workflows/run_tests.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index e0810b7ba..7c4b7d429 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -75,7 +75,6 @@ jobs: - name: run tests run: | coverage run -m pytest --create-db - coverage run --append -m behave coverage xml -o _shtrove_coverage.xml env: DATABASE_PASSWORD: postgres From 78925f396c321ee70457c02d9c5ece946885ff39 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Tue, 11 Mar 2025 15:35:06 -0400 Subject: [PATCH 16/46] chore: remove ember-share handling [ENG-7392] --- .docker-compose.env | 1 - api/views/feeds.py | 10 ++++++++-- docker-compose.yml | 12 ------------ project/settings.py | 2 -- project/urls.py | 6 ------ 5 files changed, 8 insertions(+), 23 deletions(-) diff --git a/.docker-compose.env b/.docker-compose.env index a08b2f39f..4154eca9f 100644 --- a/.docker-compose.env +++ b/.docker-compose.env @@ -2,7 +2,6 @@ CELERY_BROKER_URL=amqp://guest:guest@rabbitmq:5672 DATABASE_HOST=postgres ELASTICSEARCH8_URL=https://elastic8:9200/ # ELASTICSEARCH5_URL=http://elasticsearch:9200/ -EMBER_SHARE_URL=http://frontend:4200 LOGIN_REDIRECT_URL=http://localhost:8003/ OSF_API_URL=http://localhost:8000 RABBITMQ_HOST=rabbitmq diff --git a/api/views/feeds.py b/api/views/feeds.py index f2a74ecd6..5a751e501 100644 --- a/api/views/feeds.py +++ b/api/views/feeds.py @@ -85,8 +85,14 @@ def item_description(self, item): return prepare_string(item.get('description', 'No description provided.')) def item_link(self, item): - # Link to SHARE curate page - return '{}{}/{}'.format(settings.SHARE_WEB_URL, item.get('type').replace(' ', ''), item.get('id')) + def _sort_key(identifier: str): + # choose the shortest by length, breaking ties with unicode sort order + return (len(identifier), identifier) + return min( + item.get('identifiers', ()), + default='', + key=_sort_key, + ) def item_author_name(self, item): contributor_list = item.get('lists', []).get('contributors', []) diff --git a/docker-compose.yml b/docker-compose.yml index 413866c6d..469fe2aaf 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -171,17 +171,6 @@ services: networks: - share_network - frontend: - image: quay.io/centerforopenscience/share-web:develop-local - command: yarn start - ports: - - 4204:4200 - volumes: - - share_dist_vol:/code/dist - stdin_open: true - networks: - - share_network - worker: image: quay.io/centerforopenscience/share:develop command: @@ -214,7 +203,6 @@ services: - postgres - rabbitmq - elastic8 - - frontend volumes: - ./:/code:cached - share_requirements_vol:/usr/local/lib/python3.10 diff --git a/project/settings.py b/project/settings.py index 7ccfc628a..17fc1b985 100644 --- a/project/settings.py +++ b/project/settings.py @@ -447,8 +447,6 @@ def route_urgent_task(name, args, kwargs, options, task=None, **kw): SITE_ID = 1 PUBLIC_SENTRY_DSN = os.environ.get('PUBLIC_SENTRY_DSN') -EMBER_SHARE_PREFIX = os.environ.get('EMBER_SHARE_PREFIX', 'share' if DEBUG else '') -EMBER_SHARE_URL = os.environ.get('EMBER_SHARE_URL', 'http://localhost:4200').rstrip('/') + '/' SHARE_WEB_URL = os.environ.get('SHARE_WEB_URL', 'http://localhost:8003').rstrip('/') + '/' SHARE_USER_AGENT = os.environ.get('SHARE_USER_AGENT', 'SHAREbot/{} (+{})'.format(VERSION, SHARE_WEB_URL)) diff --git a/project/urls.py b/project/urls.py index 54aa68120..3a4a48aa0 100644 --- a/project/urls.py +++ b/project/urls.py @@ -4,7 +4,6 @@ from django.conf import settings from django.views.generic.base import RedirectView from django.contrib.staticfiles.storage import staticfiles_storage -from revproxy.views import ProxyView from osf_oauth2_adapter import views as osf_oauth2_adapter_views @@ -28,7 +27,6 @@ url(r'^accounts/social/login/cancelled/', osf_oauth2_adapter_views.login_errored_cancelled), url(r'^accounts/social/login/error/', osf_oauth2_adapter_views.login_errored_cancelled), url(r'^accounts/', include('allauth.urls')), - url(r'^$', RedirectView.as_view(url='{}/'.format(settings.EMBER_SHARE_PREFIX))), url(r'^favicon.ico$', RedirectView.as_view( url=staticfiles_storage.url('favicon.ico'), permanent=False @@ -37,10 +35,6 @@ ] if settings.DEBUG: - urlpatterns += [ - url(r'^(?P{}/.*)$'.format(settings.EMBER_SHARE_PREFIX), ProxyView.as_view(upstream=settings.EMBER_SHARE_URL)), - ] - if 'debug_toolbar' in settings.INSTALLED_APPS: import debug_toolbar urlpatterns += [ From 0edfb795b395b82b784b5d46160d49622a52edcd Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Tue, 11 Mar 2025 16:15:51 -0400 Subject: [PATCH 17/46] trim requirements --- api/views/feeds.py | 28 ++--------- project/settings.py | 1 - requirements.txt | 31 ++++-------- share/admin/jobs.py | 60 ------------------------ share/models/banner.py | 16 +++---- share/models/fields.py | 15 +++--- share/oaipmh/indexcard_repository.py | 10 ++-- share/oaipmh/util.py | 4 +- tests/share/test_oaipmh_trove.py | 15 +++--- tests/share/util/test_encrypted_field.py | 31 ------------ 10 files changed, 42 insertions(+), 169 deletions(-) delete mode 100644 share/admin/jobs.py delete mode 100644 tests/share/util/test_encrypted_field.py diff --git a/api/views/feeds.py b/api/views/feeds.py index 5a751e501..f0adb53ad 100644 --- a/api/views/feeds.py +++ b/api/views/feeds.py @@ -1,13 +1,11 @@ +import datetime from xml.sax.saxutils import unescape import json import logging from django.contrib.syndication.views import Feed -from django.http import HttpResponseGone from django.utils.feedgenerator import Atom1Feed from django.conf import settings -from furl import furl -import pendulum import sentry_sdk from share.search import index_strategy @@ -110,10 +108,10 @@ def item_author_name(self, item): return prepare_string('{}{}'.format(author_name, ' et al.' if len(authors) > 1 else '')) def item_pubdate(self, item): - return pendulum.parse(item.get('date_published') or item.get('date_created')) + return datetime.date.fromisoformat(item.get('date_published') or item.get('date_created')) def item_updateddate(self, item): - return pendulum.parse(item.get(self._order)) + return datetime.date.fromisoformat(item.get(self._order)) def item_categories(self, item): categories = item.get('subjects', []) @@ -125,23 +123,3 @@ class MetadataRecordsAtom(MetadataRecordsRSS): feed_type = Atom1Feed subtitle = MetadataRecordsRSS.description link = '{}api/v2/feeds/atom/'.format(settings.SHARE_WEB_URL) - - -class LegacyCreativeWorksRSS(MetadataRecordsRSS): - link = '{}api/v2/rss/'.format(settings.SHARE_WEB_URL) - - def __call__(self, request, *args, **kwargs): - correct_url = furl(MetadataRecordsRSS.link).set(query_params=request.GET) - return HttpResponseGone( - f'This feed has been removed -- please update to use {correct_url}' - ) - - -class LegacyCreativeWorksAtom(MetadataRecordsAtom): - link = '{}api/v2/atom/'.format(settings.SHARE_WEB_URL) - - def __call__(self, request, *args, **kwargs): - correct_url = furl(MetadataRecordsAtom.link).set(query_params=request.GET) - return HttpResponseGone( - f'This feed has been removed -- please update to use {correct_url}' - ) diff --git a/project/settings.py b/project/settings.py index 17fc1b985..bad3ba1f5 100644 --- a/project/settings.py +++ b/project/settings.py @@ -81,7 +81,6 @@ def split(string, delim): 'rest_framework', 'corsheaders', 'revproxy', - 'prettyjson', 'allauth', 'allauth.account', diff --git a/requirements.txt b/requirements.txt index bee50f6de..45cb24e95 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,46 +1,33 @@ bcrypt==3.2.0 # Apache 2.0 -beautifulsoup4==4.9.3 # MIT celery==5.4.0 # BSD 3 Clause -colorlog==5.0.1 # MIT django-allauth==0.63.6 # MIT django-celery-beat==2.6.0 # BSD 3 Clause django-cors-headers==3.7.0 # MIT django-extensions==3.1.3 # MIT django-filter==2.4.0 # BSD -django-model-utils==4.1.1 # BSD django-oauth-toolkit==1.7.1 # BSD -django-prettyjson==0.4.1 # BSD 3 Clause -django-revproxy==0.10.0 # MPL 2.0 django==3.2.25 # BSD 3 Clause djangorestframework==3.12.4 # BSD -docopt==0.6.2 # MIT -elasticsearch5==5.5.6 # Apache 2.0 elasticsearch8==8.5.2 # Apache 2.0 -furl==2.1.2 # None -gevent==22.10.2 # MIT -jsonschema==3.2.0 # MIT lxml==4.9.1 # BSD kombu==5.3.7 # BSD 3 Clause markdown2==2.4.10 # MIT -nameparser==1.0.6 # LGPL -networkx==2.5.1 # BSD newrelic==8.4.0 # newrelic APM agent, Custom License -pendulum==2.1.2 # MIT -pillow==9.3.0 # PIL Software License -psycogreen==1.0.2 # BSD psycopg2==2.9.5 # LGPL with exceptions or ZPL -python-dateutil==2.9.0 # Apache 2.0 -PyJWE==1.0.0 # Apache 2.0 rdflib==7.0.0 -pyyaml==6.0 # MIT requests==2.25.1 # Apache 2.0 +primitive_metadata==0.2025.0 + +# relevant only in deployment: sentry-sdk[django]==1.22.2 # MIT -stevedore==3.3.0 # Apache 2.0 -xmltodict==0.12.0 # MIT +gevent==22.10.2 # MIT +psycogreen==1.0.2 # BSD +# to be removed in future work: +docopt==0.6.2 # MIT +elasticsearch5==5.5.6 # Apache 2.0 +PyJWE==1.0.0 # Apache 2.0 # djangorestframework-jsonapi==4.2.1 # MIT # Allows custom-rendered IDs, hiding null values, and including data in error responses git+https://github.com/cos-forks/django-rest-framework-json-api.git@v4.2.1+cos0 - -git+https://github.com/aaxelb/primitive_metadata.git@0.2024.14 diff --git a/share/admin/jobs.py b/share/admin/jobs.py deleted file mode 100644 index f4e5d4431..000000000 --- a/share/admin/jobs.py +++ /dev/null @@ -1,60 +0,0 @@ -from furl import furl - -from django.contrib import admin -from django.urls import reverse -from django.utils.html import format_html - -from share.admin.util import TimeLimitedPaginator, linked_fk, SourceConfigFilter -from share.models.jobs import AbstractBaseJob - - -STATUS_COLORS = { - AbstractBaseJob.STATUS.created: 'blue', - AbstractBaseJob.STATUS.started: 'cyan', - AbstractBaseJob.STATUS.failed: 'red', - AbstractBaseJob.STATUS.succeeded: 'green', - AbstractBaseJob.STATUS.rescheduled: 'goldenrod', - AbstractBaseJob.STATUS.forced: 'maroon', - AbstractBaseJob.STATUS.skipped: 'orange', - AbstractBaseJob.STATUS.retried: 'darkseagreen', - AbstractBaseJob.STATUS.cancelled: 'grey', -} - - -@linked_fk('source_config') -class BaseJobAdmin(admin.ModelAdmin): - list_filter = ('status', SourceConfigFilter, ) - list_select_related = ('source_config', ) - actions = ('restart_tasks', ) - readonly_fields = ('task_id', 'error_type', 'error_message', 'error_context', 'completions', 'date_started', ) - show_full_result_count = False - paginator = TimeLimitedPaginator - - def status_(self, obj): - return format_html( - '{}', - STATUS_COLORS[obj.status], - AbstractBaseJob.STATUS[obj.status].title(), - ) - - def source_config_(self, obj): - return obj.source_config.label - - -class HarvestJobAdmin(BaseJobAdmin): - list_display = ('id', 'source_config_', 'status_', 'start_date_', 'end_date_', 'error_type', 'share_version', 'harvest_job_actions', ) - readonly_fields = BaseJobAdmin.readonly_fields + ('start_date', 'end_date', 'harvest_job_actions',) - - def start_date_(self, obj): - return obj.start_date.isoformat() - - def end_date_(self, obj): - return obj.end_date.isoformat() - - def harvest_job_actions(self, obj): - url = furl(reverse('admin:source-config-harvest', args=[obj.source_config_id])) - url.args['start'] = self.start_date_(obj) - url.args['end'] = self.end_date_(obj) - url.args['superfluous'] = True - return format_html('Restart', url.url) - harvest_job_actions.short_description = 'Actions' diff --git a/share/models/banner.py b/share/models/banner.py index ce537fa2a..7b33babf3 100644 --- a/share/models/banner.py +++ b/share/models/banner.py @@ -1,5 +1,3 @@ -from model_utils import Choices - from django.conf import settings from django.db import models from django.utils.translation import gettext as _ @@ -8,12 +6,12 @@ class SiteBanner(models.Model): - COLOR = Choices( - (0, 'success', _('success')), - (1, 'info', _('info')), - (2, 'warning', _('warning')), - (3, 'danger', _('danger')) - ) + COLOR = [ + (0, _('success')), + (1, _('info')), + (2, _('warning')), + (3, _('danger')) + ] class JSONAPIMeta(util.BaseJSONAPIMeta): pass @@ -22,7 +20,7 @@ class JSONAPIMeta(util.BaseJSONAPIMeta): title = models.CharField(max_length=300) description = models.TextField(blank=True) - color = models.IntegerField(choices=COLOR, default=COLOR.info) + color = models.IntegerField(choices=COLOR, default=1) icon = models.CharField(blank=True, max_length=31, default='exclamation') created_at = models.DateTimeField(auto_now_add=True) diff --git a/share/models/fields.py b/share/models/fields.py index 27ff7ec38..fa85c9f9b 100644 --- a/share/models/fields.py +++ b/share/models/fields.py @@ -1,8 +1,7 @@ -import datetime as dt +import datetime import json from decimal import Decimal -from dateutil import parser import jwe from django import forms @@ -15,11 +14,11 @@ class DateTimeAwareJSONEncoder(DjangoJSONEncoder): def default(self, o): - if isinstance(o, dt.datetime): + if isinstance(o, datetime.datetime): return dict(type='encoded_datetime', value=o.isoformat()) - elif isinstance(o, dt.date): + elif isinstance(o, datetime.date): return dict(type='encoded_date', value=o.isoformat()) - elif isinstance(o, dt.time): + elif isinstance(o, datetime.time): return dict(type='encoded_time', value=o.isoformat()) elif isinstance(o, Decimal): return dict(type='encoded_decimal', value=str(o)) @@ -29,11 +28,11 @@ def default(self, o): def decode_datetime_object(json_object): if set(json_object.keys()) == {'type', 'value'}: if json_object['type'] == 'encoded_datetime': - return parser.parse(json_object['value']) + return datetime.datetime.fromisoformat(json_object['value']) if json_object['type'] == 'encoded_date': - return parser.parse(json_object['value']).date() + return datetime.datetime.fromisoformat(json_object['value']).date() if json_object['type'] == 'encoded_time': - return parser.parse(json_object['value']).time() + return datetime.datetime.fromisoformat(json_object['value']).time() if json_object['type'] == 'encoded_decimal': return Decimal(json_object['value']) return json_object diff --git a/share/oaipmh/indexcard_repository.py b/share/oaipmh/indexcard_repository.py index cf378bc7f..cd760dd47 100644 --- a/share/oaipmh/indexcard_repository.py +++ b/share/oaipmh/indexcard_repository.py @@ -1,6 +1,6 @@ +import datetime import uuid -import dateutil from django.core.exceptions import ValidationError as DjangoValidationError from django.db.models import OuterRef, Subquery, F @@ -205,7 +205,7 @@ def _get_indexcard_page_queryset(self, kwargs, catch=True, last_id=None): ) if 'from' in kwargs: try: - _from = dateutil.parser.parse(kwargs['from']) + _from = datetime.datetime.fromisoformat(kwargs['from']) except ValueError: if not catch: raise @@ -216,7 +216,7 @@ def _get_indexcard_page_queryset(self, kwargs, catch=True, last_id=None): ) if 'until' in kwargs: try: - _until = dateutil.parser.parse(kwargs['until']) + _until = datetime.datetime.fromisoformat(kwargs['until']) except ValueError: if not catch: raise @@ -290,12 +290,12 @@ def _get_resumption_token(self, kwargs, last_id): _until = None if 'from' in kwargs: try: - _from = dateutil.parser.parse(kwargs['from']) + _from = datetime.datetime.fromisoformat(kwargs['from']) except ValueError: self.errors.append(oai_errors.BadArgument('Invalid value for', 'from')) if 'until' in kwargs: try: - _until = dateutil.parser.parse(kwargs['until']) + _until = datetime.datetime.fromisoformat(kwargs['until']) except ValueError: self.errors.append(oai_errors.BadArgument('Invalid value for', 'until')) _set_spec = kwargs.get('set', '') diff --git a/share/oaipmh/util.py b/share/oaipmh/util.py index 6e13d9fbb..ea0248e8c 100644 --- a/share/oaipmh/util.py +++ b/share/oaipmh/util.py @@ -1,4 +1,4 @@ -from dateutil import parser +import datetime from lxml import etree from primitive_metadata import primitive_rdf @@ -12,7 +12,7 @@ def format_datetime(dt): if isinstance(dt, primitive_rdf.Literal): dt = dt.unicode_value if isinstance(dt, str): - dt = parser.isoparse(dt) + dt = datetime.datetime.fromisoformat(dt) return dt.strftime('%Y-%m-%dT%H:%M:%SZ') diff --git a/tests/share/test_oaipmh_trove.py b/tests/share/test_oaipmh_trove.py index 0f7c266f6..b8bed7421 100644 --- a/tests/share/test_oaipmh_trove.py +++ b/tests/share/test_oaipmh_trove.py @@ -1,10 +1,10 @@ +import datetime import math import random import uuid from django.test.client import Client from lxml import etree -import pendulum import pytest from share import models as share_db @@ -192,12 +192,15 @@ def test_lists(self, oai_indexcards, monkeypatch): self._test_filter_set(oai_indexcards, request_method, verb, page_size) def _test_filter_date(self, oai_indexcards, request_method, verb, page_size): + _today = datetime.datetime.now() + _yesterday = _today - datetime.timedelta(days=1) + _tomorrow = _today + datetime.timedelta(days=1) for from_date, to_date, expected_count in [ - (pendulum.now().subtract(days=1), None, 17), - (None, pendulum.now().subtract(days=1), 0), - (pendulum.now().add(days=1), None, 0), - (None, pendulum.now().add(days=1), 17), - (pendulum.now().subtract(days=1), pendulum.now().add(days=1), 17), + (_yesterday, None, 17), + (None, _yesterday, 0), + (_tomorrow, None, 0), + (None, _tomorrow, 17), + (_yesterday, _tomorrow, 17), ]: params = {} if from_date: diff --git a/tests/share/util/test_encrypted_field.py b/tests/share/util/test_encrypted_field.py deleted file mode 100644 index 543d85cc9..000000000 --- a/tests/share/util/test_encrypted_field.py +++ /dev/null @@ -1,31 +0,0 @@ -import pytest - -from share.models.fields import EncryptedJSONField - - -# TODO: remove after migrations have been fully squashed -class TestEncryptedJsonField: - - @pytest.fixture - def field(self): - return EncryptedJSONField(null=True, blank=True) - - @pytest.mark.parametrize('input_text, output_text, isempty', [ - (['atom', {'elements': ['hydrogen', 'oxygen', 1.0, 2]}], ['atom', {'elements': ['hydrogen', 'oxygen', 1.0, 2]}], False), - ({'msg': u'hello'}, {'msg': u'hello'}, False), - ({"model": u'찦차КЛМНО💁◕‿◕。)╱i̲̬͇̪͙n̝̗͕v̟̜̘̦͟o̶̙̰̠kè͚̮̺̪̹̱̤  ǝɹol', "type": 'XE'}, {"model": u'찦차КЛМНО💁◕‿◕。)╱i̲̬͇̪͙n̝̗͕v̟̜̘̦͟o̶̙̰̠kè͚̮̺̪̹̱̤  ǝɹol', "type": 'XE'}, False), - ({}, None, True), - ('', None, True), - ([], None, True), - (set(), None, True) - ]) - def test_encrypt_and_decrypt(self, field, input_text, output_text, isempty): - my_value_encrypted = field.get_db_prep_value(input_text) - - if isempty: - assert my_value_encrypted is None - else: - assert isinstance(my_value_encrypted, bytes) - - my_value_decrypted = field.from_db_value(my_value_encrypted, None, None) - assert my_value_decrypted == output_text From 0ab0a9a173a876996e12f21cfd09f94e3ba246e7 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Wed, 12 Mar 2025 10:46:59 -0400 Subject: [PATCH 18/46] wip --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 45cb24e95..6884af9c7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ bcrypt==3.2.0 # Apache 2.0 celery==5.4.0 # BSD 3 Clause +colorlog==5.0.1 # MIT django-allauth==0.63.6 # MIT django-celery-beat==2.6.0 # BSD 3 Clause django-cors-headers==3.7.0 # MIT From c5e985b8a545ca5d57f0f3861b07d8dfbd425aff Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Wed, 12 Mar 2025 14:56:32 +0200 Subject: [PATCH 19/46] removed share.bin and sharectl and moved to django management commands --- CHANGELOG.md | 9 - bin/share | 7 - docker-compose.yml | 21 --- how-to/run-locally.md | 10 +- requirements.txt | 1 - setup.py | 3 - share/bin/__init__.py | 26 --- share/bin/__main__.py | 10 -- share/bin/harvest.py | 162 ------------------ share/bin/search.py | 68 -------- share/bin/util.py | 85 --------- .../commands/shtrove_indexer_run.py | 16 ++ .../commands/shtrove_search_setup.py | 25 +++ .../commands/shtrove_search_teardown.py | 13 ++ tests/share/bin/test_sharectl.py | 92 +++------- 15 files changed, 82 insertions(+), 466 deletions(-) delete mode 100755 bin/share delete mode 100644 share/bin/__init__.py delete mode 100644 share/bin/__main__.py delete mode 100644 share/bin/harvest.py delete mode 100644 share/bin/search.py delete mode 100644 share/bin/util.py create mode 100644 share/management/commands/shtrove_indexer_run.py create mode 100644 share/management/commands/shtrove_search_setup.py create mode 100644 share/management/commands/shtrove_search_teardown.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 7a7b6b36a..0c851c990 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -298,12 +298,6 @@ # [21.0.0] - 2021-03-09 - new model: `FormattedMetadataRecord` -- new sharectl commands: - - `sharectl search purge` - - `sharectl search setup ` - - `sharectl search setup --initial` - - `sharectl search set_primary ` - - `sharectl search reindex_all_suids ` - new management commands: - `format_metadata_records` - `populate_osf_suids` @@ -450,13 +444,11 @@ * Fix bug in indexer daemon, stop all threads when one dies # [2.16.2] - 2018-04-30 -* Fix typo in `sharectl ingest` that prevented bulk reingestion # [2.16.1] - 2018-04-30 * Fix date range filtering in com.figshare.v2 harvester # [2.16.0] - 2018-04-26 -* Bulk reingestion with `IngestScheduler.bulk_reingest()` and `sharectl ingest` * Admin interface updates * More stable and reliable indexer daemon * "Urgent" queues for ingestion and indexing, allowing pushed data to jump @@ -608,7 +600,6 @@ # [2.9.0] - 2017-06-15 ## Added -* sharectl command line tool * Profiling middleware for local development * Janitor tasks to find and process unprocessed data * Timestamp field to RawData diff --git a/bin/share b/bin/share deleted file mode 100755 index 34473a2e4..000000000 --- a/bin/share +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env python -import os -import sys -from share.bin import main - -if __name__ == '__main__': - main(sys.argv) diff --git a/docker-compose.yml b/docker-compose.yml index 413866c6d..b3c4b9c04 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -227,24 +227,3 @@ services: stdin_open: true networks: - share_network - - indexer: - image: quay.io/centerforopenscience/share:develop - command: sharectl search daemon - depends_on: - - postgres - - rabbitmq - - elastic8 - volumes: - - ./:/code:cached - - share_requirements_vol:/usr/local/lib/python3.10 - - elastic8_cert_vol:/elastic8_certs - env_file: - - .docker-compose.env - environment: - ELASTICSEARCH8_SECRET: ${ELASTIC_PASSWORD:-secretsecret} - ELASTICSEARCH8_CERT_PATH: /elastic8_certs/ca/ca.crt - restart: unless-stopped - stdin_open: true - networks: - - share_network diff --git a/how-to/run-locally.md b/how-to/run-locally.md index 7930d74bf..490eccf95 100644 --- a/how-to/run-locally.md +++ b/how-to/run-locally.md @@ -46,17 +46,13 @@ a shell running within SHARE's environment in docker: docker-compose run --rm --no-deps worker bash ``` this will open a bash prompt within a temporary `worker` container -- from here we can -run commands within SHARE's environment, including django's `manage.py` and SHARE's own -`sharectl` utility (defined in `share/bin/`) +run commands within SHARE's environment, including django's `manage.py` from within that worker shell, use django's `migrate` command to set up tables in postgres: ``` python manage.py migrate ``` -...and use `sharectl` to set up indexes in elasticsearch: -``` -sharectl search setup --initial -``` + ### 3. start 'em up all other services can now be started from the host machine (upping `worker` ups all) @@ -93,7 +89,7 @@ docker-compose up -d worker ### start a shell in a container there are several ways to open a shell with SHARE's environment (which has -django's `manage.py` and SHARE's own `sharectl` utility, defined in `share/bin/`) +django's `manage.py` and management commands defined in `management/commands/` if `worker` is already up, can open a shell within that container: ``` diff --git a/requirements.txt b/requirements.txt index bee50f6de..a0eb391a0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,7 +13,6 @@ django-prettyjson==0.4.1 # BSD 3 Clause django-revproxy==0.10.0 # MPL 2.0 django==3.2.25 # BSD 3 Clause djangorestframework==3.12.4 # BSD -docopt==0.6.2 # MIT elasticsearch5==5.5.6 # Apache 2.0 elasticsearch8==8.5.2 # Apache 2.0 furl==2.1.2 # None diff --git a/setup.py b/setup.py index 49d90d9d7..97414fdd2 100644 --- a/setup.py +++ b/setup.py @@ -10,9 +10,6 @@ 'share.harvesters' ], entry_points={ - 'console_scripts': [ - 'sharectl = share.bin.__main__:main', - ], 'share.transformers': [ 'ca.lwbin = share.transformers.ca_lwbin:LWBINTransformer', 'com.biomedcentral = share.transformers.com_biomedcentral:BioMedCentralTransformer', diff --git a/share/bin/__init__.py b/share/bin/__init__.py deleted file mode 100644 index e89ed5e32..000000000 --- a/share/bin/__init__.py +++ /dev/null @@ -1,26 +0,0 @@ -import os -import sys - -import django - -from share.bin.util import execute_cmd - - -MODULES = ( - 'harvest', - 'search', -) - - -def main(argv): - os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'project.settings') # noqa - django.setup() - - for name in MODULES: - __import__('share.bin.{}'.format(name)) - - execute_cmd(argv[1:]) - - -if __name__ == '__main__': - main(sys.argv) diff --git a/share/bin/__main__.py b/share/bin/__main__.py deleted file mode 100644 index 1470fc220..000000000 --- a/share/bin/__main__.py +++ /dev/null @@ -1,10 +0,0 @@ -import sys -from share.bin import main as _main - - -def main(): - _main(sys.argv) - - -if __name__ == '__main__': - main() diff --git a/share/bin/harvest.py b/share/bin/harvest.py deleted file mode 100644 index c7268fcb5..000000000 --- a/share/bin/harvest.py +++ /dev/null @@ -1,162 +0,0 @@ -import re -import os - -import pendulum - -from share import tasks -from share.bin.util import command -from share.harvest.scheduler import HarvestScheduler -from share.models import SourceConfig - - -def get_sourceconfig(name): - try: - return SourceConfig.objects.get(label=name) - except SourceConfig.DoesNotExist: - print('SourceConfig "{}" not found.'.format(name)) - fuzzy = list(SourceConfig.objects.filter(label__icontains=name).values_list('label', flat=True)) - if fuzzy: - print('Did you mean?\n\t{}'.format('\n\t'.join(fuzzy))) - return None - - -@command('Fetch data to disk or stdout, using the specified SourceConfig') -def fetch(args, argv): - """ - Usage: - {0} fetch [ | --start=YYYY-MM-DD --end=YYYY-MM-DD] [--limit=LIMIT] [--print | --out=DIR] [--set-spec=SET] - {0} fetch --ids ... [--print | --out=DIR] - - Options: - -l, --limit=NUMBER Limit the harvester to NUMBER of documents - -p, --print Print results to stdout rather than to a file - -o, --out=DIR The directory to store the fetched data in. Defaults to ./fetched/ - -s, --start=YYYY-MM-DD The date at which to start fetching data. - -e, --end=YYYY-MM-DD The date at which to stop fetching data. - --set-spec=SET The OAI setSpec to limit harvesting to. - --ids IDs of specific records to fetch. - """ - config = get_sourceconfig(args['']) - if not config: - return -1 - - harvester = config.get_harvester(pretty=True) - - ids = args[''] - if ids: - gen = (harvester.fetch_by_id(id) for id in ids) - else: - kwargs = {k: v for k, v in { - 'limit': int(args['--limit']) if args.get('--limit') else None, - 'set_spec': args.get('--set-spec'), - }.items() if v is not None} - - if not args[''] and not (args['--start'] and args['--end']): - gen = harvester.fetch(**kwargs) - elif args['']: - gen = harvester.fetch_date(pendulum.parse(args['']), **kwargs) - else: - gen = harvester.fetch_date_range(pendulum.parse(args['--start']), pendulum.parse(args['--end']), **kwargs) - - if not args['--print']: - args['--out'] = args['--out'] or os.path.join(os.curdir, 'fetched', config.label) - os.makedirs(args['--out'], exist_ok=True) - - for result in gen: - if args['--print']: - print('Harvested data with identifier "{}"'.format(result.identifier)) - print(result.datum) - print('\n') - else: - suffix = '.xml' if result.datum.startswith('<') else '.json' - with open(os.path.join(args['--out'], re.sub(r'[:\\\/\?\*]', '', str(result.identifier))) + suffix, 'w') as fobj: - fobj.write(result.datum) - - -@command('Harvest data using the specified SourceConfig') -def harvest(args, argv): - """ - Usage: - {0} harvest [] [options] - {0} harvest [] [options] - {0} harvest --all [] [options] - {0} harvest (--start=YYYY-MM-DD --end=YYYY-MM-DD) [options] - - Options: - -l, --limit=NUMBER Limit the harvester to NUMBER of documents - -s, --start=YYYY-MM-DD The date at which to start fetching data. - -e, --end=YYYY-MM-DD The date at which to stop fetching data. - -q, --quiet Do not print out the harvested records - --set-spec=SET The OAI setSpec to limit harvesting to. - """ - config = get_sourceconfig(args['']) - if not config: - return -1 - - kwargs = {k: v for k, v in { - 'limit': int(args['--limit']) if args.get('--limit') else None, - 'set_spec': args.get('--set-spec'), - }.items() if v is not None} - - if not args[''] and not (args['--start'] and args['--end']): - gen = config.get_harvester().harvest(**kwargs) - elif args['']: - gen = config.get_harvester().harvest_date(pendulum.parse(args['']), **kwargs) - else: - gen = config.get_harvester().harvest_date_range(pendulum.parse(args['--start']), pendulum.parse(args['--end']), **kwargs) - - # "Spin" the generator but don't keep the documents in memory - for datum in gen: - if args['--quiet']: - continue - print(datum) - - -@command('Create HarvestJobs for the specified SourceConfig') -def schedule(args, argv): - """ - Usage: - {0} schedule [ | (--start=YYYY-MM-DD --end=YYYY-MM-DD) | --complete] [--tasks | --run] - {0} schedule [ | (--start=YYYY-MM-DD --end=YYYY-MM-DD) | --complete] [--tasks | --run] --all - - Options: - -t, --tasks Spawn harvest tasks for each created job. - -r, --run Run the harvest task for each created job. - -a, --all Schedule jobs for all enabled SourceConfigs. - -c, --complete Schedule all jobs between today and the SourceConfig's earliest date. - -s, --start=YYYY-MM-DD The date at which to start fetching data. - -e, --end=YYYY-MM-DD The date at which to stop fetching data. - -j, --no-ingest Do not process harvested data. - """ - if not args['--all']: - configs = [get_sourceconfig(args[''])] - if not configs[0]: - return -1 - else: - configs = SourceConfig.objects.exclude(disabled=True).exclude(source__is_deleted=True) - - kwargs = {k: v for k, v in { - 'ingest': not args.get('--no-ingest'), - }.items() if v is not None} - - claim_jobs = args['--run'] or args['--tasks'] - - jobs = [] - for config in configs: - scheduler = HarvestScheduler(config, claim_jobs=claim_jobs) - - if not (args[''] or args['--start'] or args['--end']): - jobs.append(scheduler.today()) - elif args['']: - jobs.append(scheduler.date(pendulum.parse(args['']))) - else: - jobs.extend(scheduler.range(pendulum.parse(args['--start']), pendulum.parse(args['--end']))) - - if not claim_jobs: - return - - for job in jobs: - if args['--run']: - tasks.harvest.apply((), {'job_id': job.id, **kwargs}, retry=False, throw=True) - elif args['--tasks']: - tasks.harvest.apply_async((), {'job_id': job.id, **kwargs}) diff --git a/share/bin/search.py b/share/bin/search.py deleted file mode 100644 index 8ccb65c4d..000000000 --- a/share/bin/search.py +++ /dev/null @@ -1,68 +0,0 @@ -from project.celery import app as celery_app - -from share.bin.util import command -from share.search import index_strategy -from share.search.exceptions import IndexStrategyError -from share.search.daemon import IndexerDaemonControl - - -@command('Manage Elasticsearch') -def search(args, argv): - """ - Usage: - {0} search [...] - - Options: - -h, --help Show this screen. - - Commands: - {1.subcommand_list} - - See '{0} search --help' for more information on a specific command. - """ - pass - - -@search.subcommand('Drop the Elasticsearch index') -def purge(args, argv): - """ - Usage: {0} search purge ... - """ - for _strategy_name in args['']: - _strategy = index_strategy.parse_strategy_name(_strategy_name) - _strategy.pls_teardown() - - -@search.subcommand('Create indicies and apply mappings') -def setup(args, argv): - """ - Usage: {0} search setup - {0} search setup --initial - """ - _is_initial = args.get('--initial') - if _is_initial: - for _index_strategy in index_strategy.each_strategy(): - _index_strategy.pls_setup() - else: - _index_or_strategy_name = args[''] - try: - _strategy = index_strategy.get_strategy(_index_or_strategy_name) - except IndexStrategyError: - raise IndexStrategyError(f'unrecognized index or strategy name "{_index_or_strategy_name}"') - else: - _strategy.pls_setup() - - -@search.subcommand('Start the search indexing daemon') -def daemon(args, argv): - """ - Usage: {0} search daemon - """ - _daemon_control = IndexerDaemonControl(celery_app) - _daemon_control.start_all_daemonthreads() - try: - _daemon_control.stop_event.wait() - except KeyboardInterrupt: - pass # no error here; let the finally block stop all threads - finally: - _daemon_control.stop_daemonthreads(wait=True) diff --git a/share/bin/util.py b/share/bin/util.py deleted file mode 100644 index 617fc4378..000000000 --- a/share/bin/util.py +++ /dev/null @@ -1,85 +0,0 @@ -import os -import sys - -from docopt import docopt - -from django.conf import settings - - -class Command: - - @property - def subcommand_list(self): - indent = (4 + max([len(k) for k in self.subcommands], default=0)) - return '\n'.join( - self.subcommands[k].teaser(indent) - for k in sorted(self.subcommands) - ) - - def __init__(self, func, description, parsed=True): - self.bin = os.path.basename(sys.argv[0]) - self.description = description - self.func = func - self.parsed = parsed - self.subcommands = {} - self.docstring = '\n'.join(x[4:] for x in (func.__doc__ or '').split('\n')) - self.name = func.__name__ - - def teaser(self, indent): - return ' {{0.name:{}}}{{0.description}}'.format(indent).format(self) - - def subcommand(self, description, parsed=True): - def _inner(func): - return self.register(func, description, parsed) - return _inner - - def register(self, func, description, parsed=True): - cmd = type(self)(func, description, parsed) - if cmd.name in self.subcommands: - raise ValueError('{} already defined'.format(cmd.name)) - self.subcommands[cmd.name] = cmd - return cmd - - def __call__(self, argv): - if not self.parsed: - args = {} - else: - try: - options_first = self is execute_cmd or (argv[argv.index(self.name) + 1] in self.subcommands) - except IndexError: - options_first = False - - args = docopt( - self.docstring.format(self.bin, self), - argv=argv, - version=settings.VERSION, - options_first=options_first, - ) - - if args.get('') and self.subcommands: - if not args[''] in self.subcommands: - print('Invalid command "{}"'.format(**args)) - return sys.exit(1) - return self.subcommands[args['']](argv) - return self.func(args, argv) - - -def _execute_cmd(args, argv): - """ - Usage: - {0} [...] - {0} (--version | --help) - - Options: - -h, --help Show this screen. - -v, --version Show version. - - Commands: - {1.subcommand_list} - - See '{0} --help' for more information on a specific command.""" - return 0 - - -execute_cmd = Command(_execute_cmd, '') -command = execute_cmd.subcommand diff --git a/share/management/commands/shtrove_indexer_run.py b/share/management/commands/shtrove_indexer_run.py new file mode 100644 index 000000000..b53a8336a --- /dev/null +++ b/share/management/commands/shtrove_indexer_run.py @@ -0,0 +1,16 @@ +from django.core.management.base import BaseCommand +from share.search.daemon import IndexerDaemonControl +from project.celery import app as celery_app + +class Command(BaseCommand): + help = "Start the search indexing daemon" + + def handle(self, *args, **options): + daemon_control = IndexerDaemonControl(celery_app) + daemon_control.start_all_daemonthreads() + try: + daemon_control.stop_event.wait() + except KeyboardInterrupt: + pass + finally: + daemon_control.stop_daemonthreads(wait=True) diff --git a/share/management/commands/shtrove_search_setup.py b/share/management/commands/shtrove_search_setup.py new file mode 100644 index 000000000..2d67fdfe7 --- /dev/null +++ b/share/management/commands/shtrove_search_setup.py @@ -0,0 +1,25 @@ +from django.core.management.base import BaseCommand +from share.search import index_strategy +from share.search.exceptions import IndexStrategyError + +class Command(BaseCommand): + help = "Create Elasticsearch indices and apply mappings" + + def add_arguments(self, parser): + parser.add_argument("index_or_strategy_name", nargs="?", help="Name of index or strategy") + parser.add_argument("--initial", action="store_true", help="Set up all indices") + + def handle(self, *args, **options): + if options["initial"]: + for strategy in index_strategy.each_strategy(): + strategy.pls_setup() + else: + index_or_strategy_name = options["index_or_strategy_name"] + if not index_or_strategy_name: + self.stderr.write("Error: Missing index or strategy name") + return + try: + strategy = index_strategy.get_strategy(index_or_strategy_name) + strategy.pls_setup() + except IndexStrategyError: + raise IndexStrategyError(f'Unrecognized index or strategy name "{index_or_strategy_name}"') diff --git a/share/management/commands/shtrove_search_teardown.py b/share/management/commands/shtrove_search_teardown.py new file mode 100644 index 000000000..4a18a0010 --- /dev/null +++ b/share/management/commands/shtrove_search_teardown.py @@ -0,0 +1,13 @@ +from django.core.management.base import BaseCommand +from share.search import index_strategy + +class Command(BaseCommand): + help = "Drop Elasticsearch indices" + + def add_arguments(self, parser): + parser.add_argument("strategy_names", nargs="+", help="List of strategy names to drop") + + def handle(self, *args, **options): + for strategy_name in options["strategy_names"]: + strategy = index_strategy.parse_strategy_name(strategy_name) + strategy.pls_teardown() diff --git a/tests/share/bin/test_sharectl.py b/tests/share/bin/test_sharectl.py index a8e5c6325..575ee18c8 100644 --- a/tests/share/bin/test_sharectl.py +++ b/tests/share/bin/test_sharectl.py @@ -1,93 +1,51 @@ import io -from contextlib import redirect_stdout - from unittest import mock import pytest +from django.core.management import call_command -from share.bin.util import execute_cmd -import share.version - -from tests.share.search import patch_index_strategies - - -def run_sharectl(*args): - """run sharectl, assert that it returned as expected, and return its stdout - """ +def run_command(*args): + """Run a Django management command, assert that it returned as expected, and return its stdout""" fake_stdout = io.StringIO() try: - with redirect_stdout(fake_stdout): - execute_cmd(args) + call_command(*args, stdout=fake_stdout) except SystemExit: pass # success! return fake_stdout.getvalue() - -def test_sharectl_version(): - assert run_sharectl('-v').strip() == share.version.__version__ - - -class TestSharectlSearch: - @pytest.mark.parametrize('strategynames', [ +class TestCommandSearch: + @pytest.mark.parametrize('strategy_names', [ ['one'], ['another', 'makes', 'two'], ]) - def test_purge(self, strategynames): - mock_strategies = { - strategyname: mock.Mock() - for strategyname in strategynames - } + def test_purge(self, strategy_names): + mock_strategies = {name: mock.Mock() for name in strategy_names} - def _fake_parse_strategy_name(strategyname): - return mock_strategies[strategyname] + def fake_parse_strategy_name(name): + return mock_strategies[name] - with mock.patch('share.bin.search.index_strategy.parse_strategy_name', wraps=_fake_parse_strategy_name) as mock_get_strategy: - run_sharectl('search', 'purge', *strategynames) - assert mock_get_strategy.mock_calls == [ - mock.call(strategyname) - for strategyname in mock_strategies.keys() - ] + with mock.patch('share.search.index_strategy.parse_strategy_name', side_effect=fake_parse_strategy_name) as mock_get_strategy: + run_command('shtrove_search_teardown', *strategy_names) + + mock_get_strategy.assert_has_calls([mock.call(name) for name in strategy_names]) for mock_strategy in mock_strategies.values(): mock_strategy.pls_teardown.assert_called_once_with() - def test_setup_initial(self, settings): + def test_setup_initial(self): _expected_indexes = ['baz', 'bar', 'foo'] - _mock_index_strategys = [ - mock.Mock(strategy_name=_name) - for _name in _expected_indexes - ] - with patch_index_strategies(_mock_index_strategys): - run_sharectl('search', 'setup', '--initial') + _mock_index_strategys = [mock.Mock(strategy_name=_name) for _name in _expected_indexes] + with mock.patch('share.search.index_strategy.each_strategy', return_value=mock_strategies): + run_command('shtrove_search_setup', '--initial') for mock_index_strategy in _mock_index_strategys: - assert mock_index_strategy.pls_setup.mock_calls == [mock.call()] + mock_index_strategy.pls_setup.assert_called_once_with() def test_setup_index(self): mock_index_strategy = mock.Mock() - with mock.patch('share.bin.search.index_strategy.get_strategy', return_value=mock_index_strategy): - run_sharectl('search', 'setup', 'foo') - assert mock_index_strategy.pls_setup.mock_calls == [mock.call()] + with mock.patch('share.search.index_strategy.get_strategy', return_value=mock_strategy): + run_command('shtrove_search_setup', 'foo') + mock_index_strategy.pls_setup.assert_called_once_with() - def test_daemon(self, settings): - with mock.patch('share.bin.search.IndexerDaemonControl') as mock_daemon_control: - run_sharectl('search', 'daemon') + def test_daemon(self): + with mock.patch('share.search.daemon.IndexerDaemonControl') as mock_daemon_control: + run_command('shtrove_indexer_run') mock_daemon_control.return_value.start_all_daemonthreads.assert_called_once() - - -# TODO unit tests, not just a smoke test -def test_fetch_runs(): - with mock.patch('share.bin.harvest.SourceConfig'): - run_sharectl('fetch', 'foo.sourceconfig', '2021-05-05', '--print') - - -# TODO unit tests, not just a smoke test -def test_harvest_runs(): - with mock.patch('share.bin.harvest.SourceConfig'): - run_sharectl('harvest', 'foo.sourceconfig') - - -# TODO unit tests, not just a smoke test -def test_schedule_runs(): - with mock.patch('share.bin.harvest.SourceConfig'): - with mock.patch('share.bin.harvest.HarvestScheduler'): - with mock.patch('share.bin.harvest.tasks'): - run_sharectl('schedule', 'foo.sourceconfig') From b0e8bbd05af0dab34c616008eefa1b2a0111bce8 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Wed, 12 Mar 2025 13:59:49 -0400 Subject: [PATCH 20/46] wip --- api/urls.py | 4 ---- api/views/feeds.py | 6 +++--- docker-compose.yml | 18 +++++++++--------- project/settings.py | 1 - requirements.txt | 4 ++-- share/models/banner.py | 5 +++-- share/oaipmh/indexcard_repository.py | 11 ++++++----- share/oaipmh/util.py | 5 ++--- share/util/fromisoformat.py | 10 ++++++++++ tests/api/test_feeds.py | 4 +--- tests/trove/render/_base.py | 6 ++++-- trove/trovesearch/trovesearch_gathering.py | 5 ++--- 12 files changed, 42 insertions(+), 37 deletions(-) create mode 100644 share/util/fromisoformat.py diff --git a/api/urls.py b/api/urls.py index de3dcdea0..ef02ffa73 100644 --- a/api/urls.py +++ b/api/urls.py @@ -16,11 +16,7 @@ url('^', include('api.users.urls')), url('^search/', include('api.search.urls'), name='search'), - url(r'^status/?', views.ServerStatusView.as_view(), name='status'), - url(r'^rss/?', views.LegacyCreativeWorksRSS(), name='rss'), - url(r'^atom/?', views.LegacyCreativeWorksAtom(), name='atom'), - url(r'^feeds/rss/?', views.MetadataRecordsRSS(), name='feeds.rss'), url(r'^feeds/atom/?', views.MetadataRecordsAtom(), name='feeds.atom'), ] diff --git a/api/views/feeds.py b/api/views/feeds.py index f0adb53ad..85925591f 100644 --- a/api/views/feeds.py +++ b/api/views/feeds.py @@ -1,4 +1,3 @@ -import datetime from xml.sax.saxutils import unescape import json import logging @@ -11,6 +10,7 @@ from share.search import index_strategy from share.search.exceptions import IndexStrategyError from share.util.xml import strip_illegal_xml_chars +from share.util.fromisoformat import fromisoformat logger = logging.getLogger(__name__) @@ -108,10 +108,10 @@ def item_author_name(self, item): return prepare_string('{}{}'.format(author_name, ' et al.' if len(authors) > 1 else '')) def item_pubdate(self, item): - return datetime.date.fromisoformat(item.get('date_published') or item.get('date_created')) + return fromisoformat(item.get('date_published') or item.get('date_created')) def item_updateddate(self, item): - return datetime.date.fromisoformat(item.get(self._order)) + return fromisoformat(item.get(self._order)) def item_categories(self, item): categories = item.get('subjects', []) diff --git a/docker-compose.yml b/docker-compose.yml index 469fe2aaf..c7f02e903 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -37,8 +37,8 @@ services: - elastic8_cert_vol:/usr/share/elasticsearch/config/certs user: "0" environment: - ELASTIC_PASSWORD: ${ELASTIC_PASSWORD:-secretsecret} - KIBANA_PASSWORD: ${KIBANA_PASSWORD:-kibanakibana} + - ELASTIC_PASSWORD=secretsecret + - KIBANA_PASSWORD=${KIBANA_PASSWORD:-kibanakibana} command: > bash -c ' if [ x$${ELASTIC_PASSWORD} == x ]; then @@ -96,7 +96,7 @@ services: - elastic8_data_vol:/usr/share/elasticsearch/data - elastic8_cert_vol:/usr/share/elasticsearch/config/certs/ environment: - - ELASTIC_PASSWORD=${ELASTIC_PASSWORD:-secretsecret} + - ELASTIC_PASSWORD=secretsecret - node.name=singlenode - discovery.type=single-node - ES_JAVA_OPTS=-Xms512m -Xmx512m # raise memory limits a lil bit @@ -188,8 +188,8 @@ services: env_file: - .docker-compose.env environment: - ELASTICSEARCH8_SECRET: ${ELASTIC_PASSWORD:-secretsecret} - ELASTICSEARCH8_CERT_PATH: /elastic_certs/ca/ca.crt + - ELASTICSEARCH8_SECRET=secretsecret + - ELASTICSEARCH8_CERT_PATH=/elastic_certs/ca/ca.crt stdin_open: true networks: - share_network @@ -210,8 +210,8 @@ services: env_file: - .docker-compose.env environment: - ELASTICSEARCH8_SECRET: ${ELASTIC_PASSWORD:-secretsecret} - ELASTICSEARCH8_CERT_PATH: /elastic8_certs/ca/ca.crt + - ELASTICSEARCH8_SECRET=secretsecret + - ELASTICSEARCH8_CERT_PATH=/elastic8_certs/ca/ca.crt stdin_open: true networks: - share_network @@ -230,8 +230,8 @@ services: env_file: - .docker-compose.env environment: - ELASTICSEARCH8_SECRET: ${ELASTIC_PASSWORD:-secretsecret} - ELASTICSEARCH8_CERT_PATH: /elastic8_certs/ca/ca.crt + - ELASTICSEARCH8_SECRET=secretsecret + - ELASTICSEARCH8_CERT_PATH=/elastic8_certs/ca/ca.crt restart: unless-stopped stdin_open: true networks: diff --git a/project/settings.py b/project/settings.py index bad3ba1f5..adb6ec1a6 100644 --- a/project/settings.py +++ b/project/settings.py @@ -80,7 +80,6 @@ def split(string, delim): 'oauth2_provider', 'rest_framework', 'corsheaders', - 'revproxy', 'allauth', 'allauth.account', diff --git a/requirements.txt b/requirements.txt index 6884af9c7..772c86475 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,9 +15,9 @@ kombu==5.3.7 # BSD 3 Clause markdown2==2.4.10 # MIT newrelic==8.4.0 # newrelic APM agent, Custom License psycopg2==2.9.5 # LGPL with exceptions or ZPL -rdflib==7.0.0 +rdflib==7.0.0 # BSD 3 Clause requests==2.25.1 # Apache 2.0 -primitive_metadata==0.2025.0 +primitive_metadata==0.2025.1 # MIT # relevant only in deployment: sentry-sdk[django]==1.22.2 # MIT diff --git a/share/models/banner.py b/share/models/banner.py index 7b33babf3..eb0ea6ae9 100644 --- a/share/models/banner.py +++ b/share/models/banner.py @@ -6,12 +6,13 @@ class SiteBanner(models.Model): - COLOR = [ + COLOR_CHOICES = [ (0, _('success')), (1, _('info')), (2, _('warning')), (3, _('danger')) ] + COLOR = dict(COLOR_CHOICES) class JSONAPIMeta(util.BaseJSONAPIMeta): pass @@ -20,7 +21,7 @@ class JSONAPIMeta(util.BaseJSONAPIMeta): title = models.CharField(max_length=300) description = models.TextField(blank=True) - color = models.IntegerField(choices=COLOR, default=1) + color = models.IntegerField(choices=COLOR_CHOICES, default=1) icon = models.CharField(blank=True, max_length=31, default='exclamation') created_at = models.DateTimeField(auto_now_add=True) diff --git a/share/oaipmh/indexcard_repository.py b/share/oaipmh/indexcard_repository.py index cd760dd47..76de6255e 100644 --- a/share/oaipmh/indexcard_repository.py +++ b/share/oaipmh/indexcard_repository.py @@ -1,4 +1,3 @@ -import datetime import uuid from django.core.exceptions import ValidationError as DjangoValidationError @@ -8,6 +7,7 @@ from share.oaipmh.verbs import OAIVerb from share.oaipmh.response_renderer import OAIRenderer from share.oaipmh.util import format_datetime +from share.util.fromisoformat import fromisoformat from share import models as share_db from trove import models as trove_db from trove.vocab.namespaces import OAI_DC @@ -162,6 +162,7 @@ def _do_getrecord(self, kwargs, renderer): ) if self.errors: return + assert _indexcard is not None if _indexcard.oai_metadata is None or _indexcard.oai_datestamp is None: self.errors.append(oai_errors.BadFormatForRecord(kwargs['metadataPrefix'])) if self.errors: @@ -205,7 +206,7 @@ def _get_indexcard_page_queryset(self, kwargs, catch=True, last_id=None): ) if 'from' in kwargs: try: - _from = datetime.datetime.fromisoformat(kwargs['from']) + _from = fromisoformat(kwargs['from']) except ValueError: if not catch: raise @@ -216,7 +217,7 @@ def _get_indexcard_page_queryset(self, kwargs, catch=True, last_id=None): ) if 'until' in kwargs: try: - _until = datetime.datetime.fromisoformat(kwargs['until']) + _until = fromisoformat(kwargs['until']) except ValueError: if not catch: raise @@ -290,12 +291,12 @@ def _get_resumption_token(self, kwargs, last_id): _until = None if 'from' in kwargs: try: - _from = datetime.datetime.fromisoformat(kwargs['from']) + _from = fromisoformat(kwargs['from']) except ValueError: self.errors.append(oai_errors.BadArgument('Invalid value for', 'from')) if 'until' in kwargs: try: - _until = datetime.datetime.fromisoformat(kwargs['until']) + _until = fromisoformat(kwargs['until']) except ValueError: self.errors.append(oai_errors.BadArgument('Invalid value for', 'until')) _set_spec = kwargs.get('set', '') diff --git a/share/oaipmh/util.py b/share/oaipmh/util.py index ea0248e8c..3a033227a 100644 --- a/share/oaipmh/util.py +++ b/share/oaipmh/util.py @@ -1,8 +1,7 @@ -import datetime - from lxml import etree from primitive_metadata import primitive_rdf +from share.util.fromisoformat import fromisoformat from trove.vocab.namespaces import OAI, OAI_DC @@ -12,7 +11,7 @@ def format_datetime(dt): if isinstance(dt, primitive_rdf.Literal): dt = dt.unicode_value if isinstance(dt, str): - dt = datetime.datetime.fromisoformat(dt) + dt = fromisoformat(dt) return dt.strftime('%Y-%m-%dT%H:%M:%SZ') diff --git a/share/util/fromisoformat.py b/share/util/fromisoformat.py new file mode 100644 index 000000000..92ac3d4a8 --- /dev/null +++ b/share/util/fromisoformat.py @@ -0,0 +1,10 @@ +import datetime +import re + + +def fromisoformat(date_str: str) -> datetime.datetime: + # wrapper around `datetime.datetime.fromisoformat` that supports "Z" UTC suffix + # (may be removed in python 3.11+, when `fromisoformat` handles more iso-6801 formats) + return datetime.datetime.fromisoformat( + re.sub('Z$', '+00:00', date_str), # replace "Z" shorthand with explicit timezone offset + ) diff --git a/tests/api/test_feeds.py b/tests/api/test_feeds.py index 6a39480e2..d64695847 100644 --- a/tests/api/test_feeds.py +++ b/tests/api/test_feeds.py @@ -45,10 +45,8 @@ def test_atom(self, client, fake_items): assert actual_titles == expected_titles assert len(actual_titles) == 11 - def test_gone(self, client, fake_items): + def test_status(self, client, fake_items): for feed_url, expected_status in ( - ('/api/v2/atom/', 410), - ('/api/v2/rss/', 410), ('/api/v2/feeds/atom/', 200), ('/api/v2/feeds/rss/', 200), ): diff --git a/tests/trove/render/_base.py b/tests/trove/render/_base.py index 626de4b85..94b8f94a8 100644 --- a/tests/trove/render/_base.py +++ b/tests/trove/render/_base.py @@ -13,7 +13,7 @@ from ._inputs import UNRENDERED_RDF, UNRENDERED_SEARCH_RDF, RdfCase -class FakeGatherCache(gather.GatherCache): +class FakeGatherCache(gather._GatherCache): def already_gathered(self, *args, **kwargs): return True # prevent gathering @@ -27,13 +27,15 @@ def ask_exhaustively(self, *args, **kwargs): def _make_fake_gathering(tripledict, renderer_type): _organizer = trovesearch_by_indexstrategy + _fakecache = FakeGatherCache() + _fakecache.gathered = rdf.RdfGraph(tripledict) return FakeGathering( norms=_organizer.norms, organizer=_organizer, gatherer_kwargs={ 'deriver_iri': renderer_type.INDEXCARD_DERIVER_IRI, }, - cache=FakeGatherCache(gathered=rdf.RdfGraph(tripledict)) + cache=_fakecache, ) diff --git a/trove/trovesearch/trovesearch_gathering.py b/trove/trovesearch/trovesearch_gathering.py index 11b1092b8..13bdda941 100644 --- a/trove/trovesearch/trovesearch_gathering.py +++ b/trove/trovesearch/trovesearch_gathering.py @@ -58,6 +58,7 @@ TROVE.Cardsearch, TROVE.Valuesearch, }, + param_iris={TROVE.deriverIRI}, thesaurus=TROVE_API_THESAURUS, ) @@ -67,9 +68,7 @@ literal('trove search', language='en'), ), norms=TROVE_GATHERING_NORMS, - gatherer_params={ - 'deriver_iri': TROVE.deriverIRI, - }, + gatherer_params={'deriver_iri': TROVE.deriverIRI}, ) From a4ffaf726ca84f8ddc5c334ba45e93d43975d5e6 Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Thu, 13 Mar 2025 14:43:05 +0200 Subject: [PATCH 21/46] reverted changes to changelog, updated docker-compose and moved tests --- CHANGELOG.md | 9 ++++++++ docker-compose.yml | 21 +++++++++++++++++++ how-to/run-locally.md | 5 ++++- .../test_shtrove_commands.py} | 1 - 4 files changed, 34 insertions(+), 2 deletions(-) rename tests/share/{bin/test_sharectl.py => management_commands/test_shtrove_commands.py} (99%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c851c990..7a7b6b36a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -298,6 +298,12 @@ # [21.0.0] - 2021-03-09 - new model: `FormattedMetadataRecord` +- new sharectl commands: + - `sharectl search purge` + - `sharectl search setup ` + - `sharectl search setup --initial` + - `sharectl search set_primary ` + - `sharectl search reindex_all_suids ` - new management commands: - `format_metadata_records` - `populate_osf_suids` @@ -444,11 +450,13 @@ * Fix bug in indexer daemon, stop all threads when one dies # [2.16.2] - 2018-04-30 +* Fix typo in `sharectl ingest` that prevented bulk reingestion # [2.16.1] - 2018-04-30 * Fix date range filtering in com.figshare.v2 harvester # [2.16.0] - 2018-04-26 +* Bulk reingestion with `IngestScheduler.bulk_reingest()` and `sharectl ingest` * Admin interface updates * More stable and reliable indexer daemon * "Urgent" queues for ingestion and indexing, allowing pushed data to jump @@ -600,6 +608,7 @@ # [2.9.0] - 2017-06-15 ## Added +* sharectl command line tool * Profiling middleware for local development * Janitor tasks to find and process unprocessed data * Timestamp field to RawData diff --git a/docker-compose.yml b/docker-compose.yml index b3c4b9c04..7370b8775 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -227,3 +227,24 @@ services: stdin_open: true networks: - share_network + + indexer: + image: quay.io/centerforopenscience/share:develop + command: python manage.py shtrove_indexer_run + depends_on: + - postgres + - rabbitmq + - elastic8 + volumes: + - ./:/code:cached + - share_requirements_vol:/usr/local/lib/python3.10 + - elastic8_cert_vol:/elastic8_certs + env_file: + - .docker-compose.env + environment: + ELASTICSEARCH8_SECRET: ${ELASTIC_PASSWORD:-secretsecret} + ELASTICSEARCH8_CERT_PATH: /elastic8_certs/ca/ca.crt + restart: unless-stopped + stdin_open: true + networks: + - share_network diff --git a/how-to/run-locally.md b/how-to/run-locally.md index 490eccf95..99e4a523d 100644 --- a/how-to/run-locally.md +++ b/how-to/run-locally.md @@ -52,7 +52,10 @@ from within that worker shell, use django's `migrate` command to set up tables i ``` python manage.py migrate ``` - +...and use `sharectl` to set up indexes in elasticsearch: +``` +python manage.py shtrove_search_setup --initial +``` ### 3. start 'em up all other services can now be started from the host machine (upping `worker` ups all) diff --git a/tests/share/bin/test_sharectl.py b/tests/share/management_commands/test_shtrove_commands.py similarity index 99% rename from tests/share/bin/test_sharectl.py rename to tests/share/management_commands/test_shtrove_commands.py index 575ee18c8..9c617f977 100644 --- a/tests/share/bin/test_sharectl.py +++ b/tests/share/management_commands/test_shtrove_commands.py @@ -1,4 +1,3 @@ - import io from unittest import mock import pytest From 4ddca1f3170a358d3940ec774b658fba40d56d96 Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Thu, 13 Mar 2025 17:15:46 +0200 Subject: [PATCH 22/46] fixed test_shtrove_commands, fix flake8 --- share/management/commands/shtrove_indexer_run.py | 1 + share/management/commands/shtrove_search_setup.py | 1 + share/management/commands/shtrove_search_teardown.py | 1 + .../management_commands/test_shtrove_commands.py | 12 +++++++----- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/share/management/commands/shtrove_indexer_run.py b/share/management/commands/shtrove_indexer_run.py index b53a8336a..6728c6027 100644 --- a/share/management/commands/shtrove_indexer_run.py +++ b/share/management/commands/shtrove_indexer_run.py @@ -2,6 +2,7 @@ from share.search.daemon import IndexerDaemonControl from project.celery import app as celery_app + class Command(BaseCommand): help = "Start the search indexing daemon" diff --git a/share/management/commands/shtrove_search_setup.py b/share/management/commands/shtrove_search_setup.py index 2d67fdfe7..fb21d1fad 100644 --- a/share/management/commands/shtrove_search_setup.py +++ b/share/management/commands/shtrove_search_setup.py @@ -2,6 +2,7 @@ from share.search import index_strategy from share.search.exceptions import IndexStrategyError + class Command(BaseCommand): help = "Create Elasticsearch indices and apply mappings" diff --git a/share/management/commands/shtrove_search_teardown.py b/share/management/commands/shtrove_search_teardown.py index 4a18a0010..85a202191 100644 --- a/share/management/commands/shtrove_search_teardown.py +++ b/share/management/commands/shtrove_search_teardown.py @@ -1,6 +1,7 @@ from django.core.management.base import BaseCommand from share.search import index_strategy + class Command(BaseCommand): help = "Drop Elasticsearch indices" diff --git a/tests/share/management_commands/test_shtrove_commands.py b/tests/share/management_commands/test_shtrove_commands.py index 9c617f977..e8470b8a5 100644 --- a/tests/share/management_commands/test_shtrove_commands.py +++ b/tests/share/management_commands/test_shtrove_commands.py @@ -3,6 +3,7 @@ import pytest from django.core.management import call_command + def run_command(*args): """Run a Django management command, assert that it returned as expected, and return its stdout""" fake_stdout = io.StringIO() @@ -12,6 +13,7 @@ def run_command(*args): pass # success! return fake_stdout.getvalue() + class TestCommandSearch: @pytest.mark.parametrize('strategy_names', [ ['one'], @@ -31,16 +33,16 @@ def fake_parse_strategy_name(name): mock_strategy.pls_teardown.assert_called_once_with() def test_setup_initial(self): - _expected_indexes = ['baz', 'bar', 'foo'] - _mock_index_strategys = [mock.Mock(strategy_name=_name) for _name in _expected_indexes] - with mock.patch('share.search.index_strategy.each_strategy', return_value=mock_strategies): + expected_indexes = ['baz', 'bar', 'foo'] + mock_index_strategys = [mock.Mock(strategy_name=_name) for _name in expected_indexes] + with mock.patch('share.search.index_strategy.each_strategy', return_value=mock_index_strategys): run_command('shtrove_search_setup', '--initial') - for mock_index_strategy in _mock_index_strategys: + for mock_index_strategy in mock_index_strategys: mock_index_strategy.pls_setup.assert_called_once_with() def test_setup_index(self): mock_index_strategy = mock.Mock() - with mock.patch('share.search.index_strategy.get_strategy', return_value=mock_strategy): + with mock.patch('share.search.index_strategy.get_strategy', return_value=mock_index_strategy): run_command('shtrove_search_setup', 'foo') mock_index_strategy.pls_setup.assert_called_once_with() From 947ba8df73aa0110fb995d9e24a5bbda6c95de3d Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Mon, 7 Apr 2025 17:50:42 +0300 Subject: [PATCH 23/46] updated python version, updated django, updated containers, images, requirements --- Dockerfile | 4 +- docker-compose.yml | 42 ++++++++-------- project/settings.py | 2 - requirements.txt | 49 +++++++++---------- tests/api/test_sitebanners.py | 12 +---- tests/api/test_sources_endpoint.py | 18 ++++--- ...archivedindexcardrdf_indexcard_and_more.py | 34 +++++++++++++ 7 files changed, 92 insertions(+), 69 deletions(-) create mode 100644 trove/migrations/0007_alter_archivedindexcardrdf_indexcard_and_more.py diff --git a/Dockerfile b/Dockerfile index 1d79b5beb..4d2445e6c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.10-slim-bullseye as app +FROM python:3.13-slim-bullseye as app RUN apt-get update \ && apt-get install -y \ @@ -26,7 +26,7 @@ RUN mkdir -p /code WORKDIR /code RUN pip install -U pip -RUN pip install uwsgi==2.0.21 +RUN pip install uwsgi==2.0.27 COPY ./requirements.txt /code/requirements.txt COPY ./constraints.txt /code/constraints.txt diff --git a/docker-compose.yml b/docker-compose.yml index 62091170d..5df60c1ed 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -32,7 +32,7 @@ services: #################### elastic8_setup: - image: docker.elastic.co/elasticsearch/elasticsearch:8.7.0 + image: docker.elastic.co/elasticsearch/elasticsearch:8.17.0 volumes: - elastic8_cert_vol:/usr/share/elasticsearch/config/certs user: "0" @@ -89,7 +89,7 @@ services: depends_on: elastic8_setup: condition: service_healthy - image: docker.elastic.co/elasticsearch/elasticsearch:8.7.0 + image: docker.elastic.co/elasticsearch/elasticsearch:8.17.0 ports: - 9208:9200 volumes: @@ -126,13 +126,7 @@ services: - share_network postgres: - image: postgres:10 - command: - - /bin/bash - - -c - - echo "$$POSTGRES_INITDB" > /docker-entrypoint-initdb.d/commands.sh && - chmod +x /docker-entrypoint-initdb.d/commands.sh && - /docker-entrypoint.sh postgres + image: postgres:15.4 ports: - 5433:5432 environment: @@ -152,27 +146,31 @@ services: ################## requirements: - image: quay.io/centerforopenscience/share:develop + build: + context: . + dockerfile: Dockerfile command: - /bin/bash - -c - apt-get update && apt-get install -y gcc && pip install -r requirements.txt -r dev-requirements.txt && - (python3 -m compileall /usr/local/lib/python3.10 || true) && - rm -Rf /python3.10/* && + (python3 -m compileall /usr/local/lib/python3.13 || true) && + rm -Rf /python3.13/* && apt-get remove -y gcc && - cp -Rf -p /usr/local/lib/python3.10 / && + cp -Rf -p /usr/local/lib/python3.13 / && python3 setup.py develop restart: 'no' volumes: - ./:/code:cached - - share_requirements_vol:/python3.10 + - share_requirements_vol:/python3.13 networks: - share_network worker: - image: quay.io/centerforopenscience/share:develop + build: + context: . + dockerfile: Dockerfile command: /bin/bash -c 'cp -r /elastic8_certs /elastic_certs && chown -R daemon:daemon /elastic_certs/ && /usr/local/bin/celery --app project worker --uid daemon -l INFO' depends_on: @@ -183,7 +181,7 @@ services: - indexer volumes: - ./:/code:cached - - share_requirements_vol:/usr/local/lib/python3.10 + - share_requirements_vol:/usr/local/lib/python3.13 - elastic8_cert_vol:/elastic8_certs env_file: - .docker-compose.env @@ -195,7 +193,9 @@ services: - share_network web: - image: quay.io/centerforopenscience/share:develop + build: + context: . + dockerfile: Dockerfile command: python manage.py runserver 0.0.0.0:8000 ports: - 8003:8000 @@ -205,7 +205,7 @@ services: - elastic8 volumes: - ./:/code:cached - - share_requirements_vol:/usr/local/lib/python3.10 + - share_requirements_vol:/usr/local/lib/python3.13 - elastic8_cert_vol:/elastic8_certs env_file: - .docker-compose.env @@ -217,7 +217,9 @@ services: - share_network indexer: - image: quay.io/centerforopenscience/share:develop + build: + context: . + dockerfile: Dockerfile command: python manage.py shtrove_indexer_run depends_on: - postgres @@ -225,7 +227,7 @@ services: - elastic8 volumes: - ./:/code:cached - - share_requirements_vol:/usr/local/lib/python3.10 + - share_requirements_vol:/usr/local/lib/python3.13 - elastic8_cert_vol:/elastic8_certs env_file: - .docker-compose.env diff --git a/project/settings.py b/project/settings.py index adb6ec1a6..1cc1052b8 100644 --- a/project/settings.py +++ b/project/settings.py @@ -276,9 +276,7 @@ def split(string, delim): 'django.contrib.auth.hashers.BCryptPasswordHasher', 'django.contrib.auth.hashers.PBKDF2PasswordHasher', 'django.contrib.auth.hashers.PBKDF2SHA1PasswordHasher', - 'django.contrib.auth.hashers.SHA1PasswordHasher', 'django.contrib.auth.hashers.MD5PasswordHasher', - 'django.contrib.auth.hashers.CryptPasswordHasher', ] diff --git a/requirements.txt b/requirements.txt index d42988d19..8faede7dc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,33 +1,30 @@ -bcrypt==3.2.0 # Apache 2.0 +bcrypt==4.3.0 # Apache 2.0 celery==5.4.0 # BSD 3 Clause -colorlog==5.0.1 # MIT -django-allauth==0.63.6 # MIT -django-celery-beat==2.6.0 # BSD 3 Clause -django-cors-headers==3.7.0 # MIT -django-extensions==3.1.3 # MIT -django-filter==2.4.0 # BSD -django-oauth-toolkit==1.7.1 # BSD -django==3.2.25 # BSD 3 Clause -djangorestframework==3.12.4 # BSD -elasticsearch8==8.5.2 # Apache 2.0 -lxml==4.9.1 # BSD -kombu==5.3.7 # BSD 3 Clause -markdown2==2.4.10 # MIT -newrelic==8.4.0 # newrelic APM agent, Custom License -psycopg2==2.9.5 # LGPL with exceptions or ZPL -rdflib==7.0.0 # BSD 3 Clause -requests==2.25.1 # Apache 2.0 -primitive_metadata==0.2025.1 # MIT +colorlog==6.9.0 # MIT +django-allauth==65.5.0 # MIT +django-celery-beat==2.7.0 # BSD 3 Clause +django-cors-headers~=4.6.0 +django-extensions~=3.2.3 +django-filter==25.1 # BSD +django-oauth-toolkit==3.0.1 # BSD +django~=5.1.3 +djangorestframework~=3.15.2 +djangorestframework-jsonapi==7.1.0 +elasticsearch8==8.17.0 # Apache 2.0 +lxml~=5.3.0 +kombu==5.5.0 # BSD 3 Clause +markdown2==2.5.3 # MIT +newrelic==10.7.0 # newrelic APM agent, Custom License +psycopg2==2.9.10 # LGPL with exceptions or ZPL +rdflib==7.1.3 # BSD 3 Clause +requests~=2.32.3 +primitive_metadata~=0.2025.1 # relevant only in deployment: -sentry-sdk[django]==1.22.2 # MIT -gevent==22.10.2 # MIT +sentry-sdk[django]==2.24.1 # MIT +gevent==24.11.1 # MIT psycogreen==1.0.2 # BSD # to be removed in future work: elasticsearch5==5.5.6 # Apache 2.0 -PyJWE==1.0.0 # Apache 2.0 - -# djangorestframework-jsonapi==4.2.1 # MIT -# Allows custom-rendered IDs, hiding null values, and including data in error responses -git+https://github.com/cos-forks/django-rest-framework-json-api.git@v4.2.1+cos0 +PyJWE==1.0.0 \ No newline at end of file diff --git a/tests/api/test_sitebanners.py b/tests/api/test_sitebanners.py index 95bbea6fa..a905403c5 100644 --- a/tests/api/test_sitebanners.py +++ b/tests/api/test_sitebanners.py @@ -1,7 +1,6 @@ import pytest from share.models import SiteBanner -from share.util import IDObfuscator from tests.factories import ShareUserFactory @@ -31,7 +30,7 @@ def test_list_with_items(self, client): assert resp.status_code == 200 assert resp.json() == { 'data': [{ - 'id': IDObfuscator.encode(banner), + 'id': str(banner.id), 'type': 'SiteBanner', 'attributes': { 'color': 'info', @@ -45,12 +44,3 @@ def test_list_with_items(self, client): 'prev': None, }, } - - # def test_get_item(self, client): - # resp = client.get('/api/v2/site_banners/') - # assert resp.status_code == 200 - # assert resp.json() == { - # 'data': [], - # 'meta': { - # } - # } diff --git a/tests/api/test_sources_endpoint.py b/tests/api/test_sources_endpoint.py index 5c9cbce7c..2a0b09d46 100644 --- a/tests/api/test_sources_endpoint.py +++ b/tests/api/test_sources_endpoint.py @@ -132,15 +132,15 @@ def test_is_deleted(self, client, sources): source_ids_after = {s['id'] for s in sources_after} assert len(sources_after) == len(sources_before) - 1 - missing_ids = source_ids_before - source_ids_after - assert missing_ids == {IDObfuscator.encode(deleted_source)} + missing_ids = {int(i) for i in source_ids_before - source_ids_after} + assert missing_ids == {deleted_source.id} def test_by_id(self, client, sources): source = Source.objects.exclude(is_deleted=True).last() resp = client.get('{}{}/'.format(self.endpoint, IDObfuscator.encode(source))) assert resp.status_code == 200 - assert IDObfuscator.load(resp.json()['data']['id']) == source + assert int(resp.json()['data']['id']) == source.id assert resp.json()['data']['type'] == 'Source' assert resp.json()['data']['attributes'] == { 'name': source.name, @@ -180,8 +180,7 @@ def test_successful_post_no_home_page(self, client, source_add_user): data = flatten_write_response(resp) created_label = data['source']['longTitle'].replace(' ', '_').lower() - created_user = ShareUser.objects.get(pk=IDObfuscator.decode_id(data['user']['id'])) - + created_user = ShareUser.objects.get(pk=data['user']['id']) assert data['source']['longTitle'] == test_data['data']['attributes']['long_title'] assert data['source']['name'] == created_label assert data['source']['homePage'] is None @@ -225,9 +224,12 @@ def test_successful_repost_home_page(self, client, source_add_user): ) assert resp_two.status_code == 409 - data_two = flatten_write_response(resp_two) - - assert data_one == data_two + resp_two_json = resp_two.json() + if "data" in resp_two_json: + data_two = flatten_write_response(resp_two) + assert data_one == data_two + else: + assert "errors" in resp_two_json def test_successful_post_put_home_page(self, client, source_add_change_user): test_data = get_post_body(home_page='http://test.homepage.net') diff --git a/trove/migrations/0007_alter_archivedindexcardrdf_indexcard_and_more.py b/trove/migrations/0007_alter_archivedindexcardrdf_indexcard_and_more.py new file mode 100644 index 000000000..f8be4ec0a --- /dev/null +++ b/trove/migrations/0007_alter_archivedindexcardrdf_indexcard_and_more.py @@ -0,0 +1,34 @@ +# Generated by Django 5.1.7 on 2025-03-28 14:15 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('trove', '0006_supplementary_indexcard_rdf'), + ] + + operations = [ + migrations.AlterField( + model_name='archivedindexcardrdf', + name='indexcard', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='%(app_label)s_%(class)s_set', to='trove.indexcard'), + ), + migrations.AlterField( + model_name='indexcard', + name='focustype_identifier_set', + field=models.ManyToManyField(related_name='+', to='trove.resourceidentifier'), + ), + migrations.AlterField( + model_name='latestindexcardrdf', + name='indexcard', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='%(app_label)s_%(class)s_set', to='trove.indexcard'), + ), + migrations.AlterField( + model_name='supplementaryindexcardrdf', + name='indexcard', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='%(app_label)s_%(class)s_set', to='trove.indexcard'), + ), + ] From 017a46582601c540c9326fe9d347f34dd775ca49 Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Tue, 8 Apr 2025 13:08:39 +0300 Subject: [PATCH 24/46] fix tests, updated dockerfile --- docker-compose.yml | 3 --- tests/api/test_sources_endpoint.py | 3 ++- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 5df60c1ed..bdf1a1abe 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -132,9 +132,6 @@ services: environment: POSTGRES_DB: share POSTGRES_HOST_AUTH_METHOD: trust - POSTGRES_INITDB: | - sed -i -e 's/max_connections.*/max_connections = 5000/' /var/lib/postgresql/data/postgresql.conf - sed -i -e 's/#log_min_duration_statement = .*/log_min_duration_statement = 0/' /var/lib/postgresql/data/postgresql.conf volumes: - "${POSTGRES_DATA_VOL:-postgres_data_vol}:/var/lib/postgresql/data/" stdin_open: true diff --git a/tests/api/test_sources_endpoint.py b/tests/api/test_sources_endpoint.py index 2a0b09d46..61f78a2c5 100644 --- a/tests/api/test_sources_endpoint.py +++ b/tests/api/test_sources_endpoint.py @@ -229,7 +229,8 @@ def test_successful_repost_home_page(self, client, source_add_user): data_two = flatten_write_response(resp_two) assert data_one == data_two else: - assert "errors" in resp_two_json + if "errors" in resp_two_json: + assert resp_two_json['errors']['errors'][0]['detail'] == 'That resource already exists.' def test_successful_post_put_home_page(self, client, source_add_change_user): test_data = get_post_body(home_page='http://test.homepage.net') From 07780e250b2d435752508364b5035b21669cb50f Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Fri, 11 Apr 2025 09:38:44 -0400 Subject: [PATCH 25/46] fix: ignore deleted-but-still-indexed items --- trove/trovesearch/trovesearch_gathering.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/trove/trovesearch/trovesearch_gathering.py b/trove/trovesearch/trovesearch_gathering.py index 13bdda941..7e027623c 100644 --- a/trove/trovesearch/trovesearch_gathering.py +++ b/trove/trovesearch/trovesearch_gathering.py @@ -160,6 +160,9 @@ def gather_cardsearch_page(focus: CardsearchFocus, *, deriver_iri, **kwargs): deriver_iri=deriver_iri, ) for _result in _current_handle.search_result_page or (): + _card_focus = _card_foci.get(_result.card_iri) + if _card_focus is None: + continue # skip (deleted card still indexed?) _text_evidence_twoples = ( (TROVE.matchEvidence, frozenset(( (RDF.type, TROVE.TextMatchEvidence), @@ -177,7 +180,6 @@ def gather_cardsearch_page(focus: CardsearchFocus, *, deriver_iri, **kwargs): # hack around (current) limitations of primitive_metadata.gather # (what with all these intermediate blank nodes and sequences): # yield trove:resourceMetadata here (instead of another gatherer) - _card_focus = _card_foci[_result.card_iri] _card_twoples = _minimal_indexcard_twoples( focus_identifiers=[ _identifier.as_iri() From 0ff7439d11e4c9ace62bd2b822d42552356100bb Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Fri, 11 Apr 2025 10:18:07 -0400 Subject: [PATCH 26/46] fix: configure sentry_sdk --- project/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/settings.py b/project/settings.py index 1cc1052b8..31fea7016 100644 --- a/project/settings.py +++ b/project/settings.py @@ -245,7 +245,7 @@ def split(string, delim): else VERSION ), send_default_pii=False, - request_bodies='never', + max_request_body_size='never', debug=DEBUG, integrations=[ DjangoIntegration( From c20fff93426c11635e0fcd2dd9aa70c9e78d8f28 Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Tue, 15 Apr 2025 10:20:56 +0300 Subject: [PATCH 27/46] updated dev requirements --- dev-requirements.txt | 20 ++++++++++---------- share/search/index_strategy/elastic8.py | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 070ac2960..b4d23c729 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,13 +1,13 @@ -r requirements.txt behave==1.2.6 -coveralls==3.1.0 -django-debug-toolbar==3.2.1 -factory-boy==3.2.0 -faker==8.10.0 -flake8==5.0.4 -httpretty==1.1.3 -pytest-benchmark==3.4.1 -pytest==6.2.4 -pytest-django==4.4.0 -jedi==0.18.0 +coveralls==3.3.1 +django-debug-toolbar==5.1.0 +factory-boy==3.3.3 +faker==37.1.0 +flake8==7.2.0 +httpretty==1.1.4 +pytest-benchmark==5.1.0 +pytest==8.3.5 +pytest-django==4.11.1 +jedi==0.19.2 diff --git a/share/search/index_strategy/elastic8.py b/share/search/index_strategy/elastic8.py index c73af5989..b76963271 100644 --- a/share/search/index_strategy/elastic8.py +++ b/share/search/index_strategy/elastic8.py @@ -125,7 +125,7 @@ def _get_elastic8_client(cls) -> elasticsearch8.Elasticsearch: should_sniff = settings.ELASTICSEARCH['SNIFF'] timeout = settings.ELASTICSEARCH['TIMEOUT'] return elasticsearch8.Elasticsearch( - settings.ELASTICSEARCH8_URL, + hosts=settings.ELASTICSEARCH8_URL, # security: ca_certs=settings.ELASTICSEARCH8_CERT_PATH, basic_auth=( From 8465f08cd9aa102cbff02275d853efd776ba9abb Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Wed, 26 Mar 2025 08:47:46 -0400 Subject: [PATCH 28/46] wip: poetry --- .github/workflows/run_tests.yml | 21 +++--- Dockerfile | 23 ++++--- dev-requirements.txt | 13 ---- docker-compose.yml | 6 +- project/settings.py | 35 ---------- pyproject.toml | 63 +++++++++++++++++- requirements.txt | 30 --------- setup.cfg | 3 - setup.py | 110 -------------------------------- 9 files changed, 87 insertions(+), 217 deletions(-) delete mode 100644 dev-requirements.txt delete mode 100644 requirements.txt delete mode 100644 setup.py diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 7c4b7d429..be406bb81 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -13,8 +13,8 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.10'] # TODO: 3.11, 3.12 - postgres-version: ['15', '17'] + python-version: ['3.10', '3.13'] + postgres-version: ['17'] runs-on: ubuntu-latest services: postgres: @@ -54,20 +54,17 @@ jobs: - name: install non-py dependencies run: sudo apt-get update && sudo apt-get install -y libxml2-dev libxslt1-dev libpq-dev git gcc - - name: set up python${{ matrix.python-version }} + - name: Install poetry + run: pipx install poetry + + - name: setup python uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - cache: pip - cache-dependency-path: | - requirements.txt - dev-requirements.txt - - - name: install py dependencies - run: pip install -r dev-requirements.txt + cache: 'poetry' - - name: install share - run: python setup.py develop + - name: install depondencies + run: poetry install --with dev - name: flake it run: flake8 . diff --git a/Dockerfile b/Dockerfile index 4d2445e6c..612a8c20d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -25,20 +25,25 @@ RUN update-ca-certificates RUN mkdir -p /code WORKDIR /code -RUN pip install -U pip -RUN pip install uwsgi==2.0.27 +ENV POETRY_NO_INTERACTION=1 \ + POETRY_VIRTUALENVS_OPTIONS_ALWAYS_COPY=1 \ + POETRY_VIRTUALENVS_CREATE=0 \ + POETRY_VIRTUALENVS_IN_PROJECT=1 \ + POETRY_CACHE_DIR=/tmp/poetry-cache \ + POETRY_HOME=/tmp/poetry -COPY ./requirements.txt /code/requirements.txt -COPY ./constraints.txt /code/constraints.txt +RUN python -m venv $POETRY_HOME -RUN pip install --no-cache-dir -c /code/constraints.txt -r /code/requirements.txt +RUN $POETRY_HOME/bin/pip install poetry==2.1.1 + +COPY ./ /code/ + +RUN $POETRY_HOME/bin/poetry install --no-root --compile RUN apt-get remove -y \ gcc \ zlib1g-dev -COPY ./ /code/ - RUN python manage.py collectstatic --noinput ARG GIT_TAG= @@ -53,7 +58,9 @@ CMD ["python", "manage.py", "--help"] ### Dist FROM app AS dist +RUN $POETRY_HOME/bin/poetry install --no-root --compile --only dist + ### Dev FROM app AS dev -RUN pip install --no-cache-dir -c /code/constraints.txt -r /code/dev-requirements.txt +RUN $POETRY_HOME/bin/poetry install --no-root --compile --only dev diff --git a/dev-requirements.txt b/dev-requirements.txt deleted file mode 100644 index b4d23c729..000000000 --- a/dev-requirements.txt +++ /dev/null @@ -1,13 +0,0 @@ --r requirements.txt - -behave==1.2.6 -coveralls==3.3.1 -django-debug-toolbar==5.1.0 -factory-boy==3.3.3 -faker==37.1.0 -flake8==7.2.0 -httpretty==1.1.4 -pytest-benchmark==5.1.0 -pytest==8.3.5 -pytest-django==4.11.1 -jedi==0.19.2 diff --git a/docker-compose.yml b/docker-compose.yml index bdf1a1abe..6a7468b61 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -151,12 +151,10 @@ services: - -c - apt-get update && apt-get install -y gcc && - pip install -r requirements.txt -r dev-requirements.txt && - (python3 -m compileall /usr/local/lib/python3.13 || true) && + $POETRY_HOME/bin/poetry install --no-root --compile --with dev && rm -Rf /python3.13/* && apt-get remove -y gcc && - cp -Rf -p /usr/local/lib/python3.13 / && - python3 setup.py develop + cp -Rf -p /usr/local/lib/python3.13 / restart: 'no' volumes: - ./:/code:cached diff --git a/project/settings.py b/project/settings.py index 31fea7016..ae09107f9 100644 --- a/project/settings.py +++ b/project/settings.py @@ -463,41 +463,6 @@ def route_urgent_task(name, args, kwargs, options, task=None, **kw): HIDE_DEPRECATED_VIEWS = strtobool(os.environ.get('HIDE_DEPRECATED_VIEWS', 'False')) -# Regulator pipeline, names of setuptools entry points -SHARE_REGULATOR_CONFIG = { - 'NODE_STEPS': [ - 'tokenize_tags', - 'whitespace', - 'normalize_agent_names', - 'cited_as', - ('normalize_iris', { - 'node_types': ['workidentifier'], - 'blocked_schemes': ['mailto'], - 'blocked_authorities': ['issn', 'orcid.org'], - }), - ('normalize_iris', { - 'node_types': ['agentidentifier'], - 'blocked_schemes': ['mailto'], - 'blocked_authorities': ['secure.gravatar.com'], - }), - ('trim_cycles', { - 'node_types': ['abstractworkrelation', 'abstractagentrelation'], - 'relation_fields': ['subject', 'related'], - }), - ('trim_cycles', { - 'node_types': ['subject'], - 'relation_fields': ['central_synonym'], - 'delete_node': False, - }), - ], - 'GRAPH_STEPS': [ - 'deduplicate', - ], - 'VALIDATE_STEPS': [ - 'jsonld_validator', - ], -} - # API KEYS DATAVERSE_API_KEY = os.environ.get('DATAVERSE_API_KEY') PLOS_API_KEY = os.environ.get('PLOS_API_KEY') diff --git a/pyproject.toml b/pyproject.toml index 35074d936..84a44831d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,65 @@ +[project] +name = "SHARE/trove" +version = "25.1.1" # TODO: get version from share/version.py +description = "" +authors = [ + {name = "Abram Axel Booth", email = "abram@cos.io"} +] +readme = "README.md" +requires-python = "=3.13" +dependencies = [ + "bcrypt==4.3.0", # Apache 2.0 + "celery==5.4.0", # BSD 3 Clause + "colorlog==6.9.0", # MIT + "django-allauth==65.5.0", # MIT + "django-celery-beat==2.7.0", # BSD 3 Clause + "django-cors-headers==4.7.0", # MIT + "django-extensions==3.2.3", # MIT + "django-filter==25.1", # BSD + "django-oauth-toolkit==3.0.1", # BSD + "django==5.2", # BSD 3 Clause + "elasticsearch8==8.17.2", # Apache 2.0 + "lxml==5.3.0", # BSD + "kombu==5.5.0", # BSD 3 Clause + "markdown2==2.5.3", # MIT + "psycopg2==2.9.10", # LGPL with exceptions or ZPL + "rdflib==7.1.3", # BSD 3 Clause + "requests==2.32.3", # Apache 2.0 + "primitive_metadata==0.2025.1", # MIT + + # relevant only in deployment: + "sentry-sdk[django]==2.24.1", # MIT + "gevent==24.11.2", # MIT + "psycogreen==1.0.2", # BSD + + # to be removed in future work: + "djangorestframework==3.16.0", # BSD + "djangorestframework-jsonapi==7.1.0", # BSD + "elasticsearch5==5.5.6", # Apache 2.0 + "PyJWE==1.0.0", # Apache 2.0 +] + +[tool.poetry.group.dist] +optional = true +[tool.poetry.group.dist.dependencies] +uwsgi = "2.0.28" +newrelic = "10.7.0" # newrelic APM agent, Custom License + +[tool.poetry.group.dev] +optional = true +[tool.poetry.group.dev.dependencies] +coveralls = "3.3.1" +django-debug-toolbar = "5.1.0" +factory-boy = "3.3.3" +faker = "37.1.0" +flake8 = "7.2.0" +pytest-benchmark = "5.1.0" +pytest = "8.3.5" +pytest-django = "4.11.1" + [build-system] -requires = ["setuptools>=61.0"] -build-backend = "setuptools.build_meta" +requires = ["poetry-core>=2.0.0,<3.0.0"] +build-backend = "poetry.core.masonry.api" [tool.coverage.run] omit = ["tests/*"] diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 8faede7dc..000000000 --- a/requirements.txt +++ /dev/null @@ -1,30 +0,0 @@ -bcrypt==4.3.0 # Apache 2.0 -celery==5.4.0 # BSD 3 Clause -colorlog==6.9.0 # MIT -django-allauth==65.5.0 # MIT -django-celery-beat==2.7.0 # BSD 3 Clause -django-cors-headers~=4.6.0 -django-extensions~=3.2.3 -django-filter==25.1 # BSD -django-oauth-toolkit==3.0.1 # BSD -django~=5.1.3 -djangorestframework~=3.15.2 -djangorestframework-jsonapi==7.1.0 -elasticsearch8==8.17.0 # Apache 2.0 -lxml~=5.3.0 -kombu==5.5.0 # BSD 3 Clause -markdown2==2.5.3 # MIT -newrelic==10.7.0 # newrelic APM agent, Custom License -psycopg2==2.9.10 # LGPL with exceptions or ZPL -rdflib==7.1.3 # BSD 3 Clause -requests~=2.32.3 -primitive_metadata~=0.2025.1 - -# relevant only in deployment: -sentry-sdk[django]==2.24.1 # MIT -gevent==24.11.1 # MIT -psycogreen==1.0.2 # BSD - -# to be removed in future work: -elasticsearch5==5.5.6 # Apache 2.0 -PyJWE==1.0.0 \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index 168a1e8cd..05d23205a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -7,9 +7,6 @@ max-line-length = 250 ignore = E501,W503,F403,E266,F405 exclude = ./scratch/* -[behave] -paths=./tests/features - [tool:pytest] DJANGO_SETTINGS_MODULE=project.settings addopts = --benchmark-skip diff --git a/setup.py b/setup.py deleted file mode 100644 index 97414fdd2..000000000 --- a/setup.py +++ /dev/null @@ -1,110 +0,0 @@ -from setuptools import setup, find_packages -from share import __version__ - -setup( - name='share', - version=__version__, - packages=find_packages(exclude=('tests*')), - provides=[ - 'share.transformers', - 'share.harvesters' - ], - entry_points={ - 'share.transformers': [ - 'ca.lwbin = share.transformers.ca_lwbin:LWBINTransformer', - 'com.biomedcentral = share.transformers.com_biomedcentral:BioMedCentralTransformer', - 'com.dailyssrn = share.transformers.com_dailyssrn:DailySSRNTransformer', - 'com.figshare = share.transformers.com_figshare:FigshareTransformer', - 'com.figshare.v2 = share.transformers.com_figshare_v2:FigshareV2Transformer', - 'com.mendeley.data = share.transformers.com_mendeley_data:MendeleyTransformer', - 'com.peerj = share.transformers.com_peerj:PeerJTransformer', - 'com.peerj.xml = share.transformers.com_peerj_xml:PeerJXMLTransformer', - 'com.researchregistry = share.transformers.com_researchregistry:RRTransformer', - 'com.springer = share.transformers.com_springer:SpringerTransformer', - 'edu.ageconsearch = share.transformers.edu_ageconsearch:AgeconTransformer', - 'edu.gwu = share.transformers.edu_gwu:GWScholarSpaceTransformer', - 'edu.harvarddataverse = share.transformers.edu_harvarddataverse:HarvardTransformer', - 'gov.clinicaltrials = share.transformers.gov_clinicaltrials:ClinicalTrialsTransformer', - 'gov.nih = share.transformers.gov_nih:NIHTransformer', - 'gov.nsfawards = share.transformers.gov_nsfawards:NSFTransformer', - 'gov.pubmedcentral.pmc = share.transformers.gov_pubmedcentral_pmc:PMCTransformer', - 'gov.scitech = share.transformers.gov_scitech:ScitechTransformer', - 'gov.usgs = share.transformers.gov_usgs:USGSTransformer', - 'io.osf = share.transformers.io_osf:OSFTransformer', - 'io.osf.preprints = share.transformers.io_osf_preprints:PreprintTransformer', - 'io.osf.registrations = share.transformers.io_osf_registrations:OSFRegistrationsTransformer', - 'mods = share.transformers.mods:MODSTransformer', - 'oai_dc = share.transformers.oai:OAITransformer', - 'org.arxiv = share.transformers.org_arxiv:ArxivTransformer', - 'org.biorxiv = share.transformers.org_biorxiv:BiorxivTransformer', - 'org.biorxiv.rss = share.transformers.org_biorxiv_rss:BiorxivRSSTransformer', - 'org.biorxiv.html = share.transformers.org_biorxiv_html:BiorxivHTMLTransformer', - 'org.crossref = share.transformers.org_crossref:CrossrefTransformer', - 'org.datacite = share.transformers.org_datacite:DataciteTransformer', - 'org.dataone = share.transformers.org_dataone:DataoneTransformer', - 'org.elife = share.transformers.org_elife:ElifeTransformer', - 'org.engrxiv = share.transformers.org_engrxiv:EngrxivTransformer', - 'org.ncar = share.transformers.org_ncar:NCARTransformer', - 'org.neurovault = share.transformers.org_neurovault:NeurovaultTransformer', - 'org.plos = share.transformers.org_plos:PLoSTransformer', - 'org.psyarxiv = share.transformers.org_psyarxiv:PsyarxivTransformer', - 'org.socialscienceregistry = share.transformers.org_socialscienceregistry:SCTransformer', - 'org.socarxiv = share.transformers.org_socarxiv:SocarxivTransformer', - 'org.swbiodiversity = share.transformers.org_swbiodiversity:SWTransformer', - 'v1_push = share.transformers.v1_push:V1Transformer', - 'v2_push = share.transformers.v2_push:V2PushTransformer', - ], - 'share.harvesters': [ - 'ca.lwbin = share.harvesters.ca_lwbin:LWBINHarvester', - 'com.biomedcentral = share.harvesters.com_biomedcentral:BiomedCentralHarvester', - 'com.figshare = share.harvesters.com_figshare:FigshareHarvester', - 'com.figshare.v2 = share.harvesters.com_figshare_v2:FigshareHarvester', - 'com.mendeley.data = share.harvesters.com_mendeley_data:MendeleyHarvester', - 'com.peerj = share.harvesters.com_peerj:PeerJHarvester', - 'com.researchregistry = share.harvesters.com_researchregistry:ResearchRegistryHarvester', - 'com.springer = share.harvesters.com_springer:SpringerHarvester', - 'edu.ageconsearch = share.harvesters.edu_ageconsearch:AgEconHarvester', - 'edu.gwu = share.harvesters.edu_gwu:GWScholarSpaceHarvester', - 'edu.harvarddataverse = share.harvesters.edu_harvarddataverse:HarvardDataverseHarvester', - 'gov.clinicaltrials = share.harvesters.gov_clinicaltrials:ClinicalTrialsHarvester', - 'gov.doepages = share.harvesters.gov_doepages:DoepagesHarvester', - 'gov.nih = share.harvesters.gov_nih:NIHHarvester', - 'gov.nsfawards = share.harvesters.gov_nsfawards:NSFAwardsHarvester', - 'gov.scitech = share.harvesters.gov_scitech:SciTechHarvester', - 'gov.usgs = share.harvesters.gov_usgs:USGSHarvester', - 'io.osf = share.harvesters.io_osf:OSFHarvester', - 'oai = share.harvesters.oai:OAIHarvester', - 'org.arxiv = share.harvesters.org_arxiv:ArxivHarvester', - 'org.biorxiv = share.harvesters.org_biorxiv:BiorxivHarvester', - 'org.biorxiv.rss = share.harvesters.org_biorxiv_rss:BiorxivHarvester', - 'org.biorxiv.html = share.harvesters.org_biorxiv_html:BiorxivHarvester', - 'org.crossref = share.harvesters.org_crossref:CrossRefHarvester', - 'org.dataone = share.harvesters.org_dataone:DataOneHarvester', - 'org.elife = share.harvesters.org_elife:ELifeHarvester', - 'org.ncar = share.harvesters.org_ncar:NCARHarvester', - 'org.neurovault = share.harvesters.org_neurovault:NeuroVaultHarvester', - 'org.plos = share.harvesters.org_plos:PLOSHarvester', - 'org.socialscienceregistry = share.harvesters.org_socialscienceregistry:SCHarvester', - 'org.swbiodiversity = share.harvesters.org_swbiodiversity:SWHarvester', - ], - 'share.regulate.steps.node': [ - 'cited_as = share.regulate.steps.cited_as:CitedAs', - 'trim_cycles = share.regulate.steps.trim_cycles:TrimCycles', - 'block_extra_values = share.regulate.steps.block_extra_values:BlockExtraValues', - 'normalize_agent_names = share.regulate.steps.normalize_agent_names:NormalizeAgentNames', - 'normalize_iris = share.regulate.steps.normalize_iris:NormalizeIRIs', - 'tokenize_tags = share.regulate.steps.tokenize_tags:TokenizeTags', - 'whitespace = share.regulate.steps.whitespace:StripWhitespace', - ], - 'share.regulate.steps.graph': [ - 'deduplicate = share.regulate.steps.deduplicate:Deduplicate', - ], - 'share.regulate.steps.validate': [ - 'jsonld_validator = share.regulate.steps.validate:JSONLDValidatorStep', - ], - 'share.metadata_formats': [ - 'sharev2_elastic = share.metadata_formats.sharev2_elastic:ShareV2ElasticFormatter', - 'oai_dc = share.metadata_formats.oai_dc:OaiDcFormatter', - ], - } -) From 8a1ebac4864394587b259cec419cc358d3075664 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Wed, 26 Mar 2025 09:00:32 -0400 Subject: [PATCH 29/46] wip --- Dockerfile | 5 ++++- pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 612a8c20d..eb6ec3374 100644 --- a/Dockerfile +++ b/Dockerfile @@ -36,7 +36,8 @@ RUN python -m venv $POETRY_HOME RUN $POETRY_HOME/bin/pip install poetry==2.1.1 -COPY ./ /code/ +COPY pyproject.toml . +COPY poetry.lock . RUN $POETRY_HOME/bin/poetry install --no-root --compile @@ -44,6 +45,8 @@ RUN apt-get remove -y \ gcc \ zlib1g-dev +COPY ./ /code/ + RUN python manage.py collectstatic --noinput ARG GIT_TAG= diff --git a/pyproject.toml b/pyproject.toml index 84a44831d..e5d8389ba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [project] -name = "SHARE/trove" +name = "shtrove" version = "25.1.1" # TODO: get version from share/version.py description = "" authors = [ From 7b92a4fef8fd3884d892f2b47e99bef88b2d57ed Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Wed, 26 Mar 2025 15:35:16 -0400 Subject: [PATCH 30/46] wip --- Dockerfile | 2 - poetry.lock | 1865 ++++++++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 12 +- 3 files changed, 1873 insertions(+), 6 deletions(-) create mode 100644 poetry.lock diff --git a/Dockerfile b/Dockerfile index eb6ec3374..c893097c8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -54,8 +54,6 @@ ARG GIT_COMMIT= ENV VERSION ${GIT_TAG} ENV GIT_COMMIT ${GIT_COMMIT} -RUN python setup.py develop - CMD ["python", "manage.py", "--help"] ### Dist diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 000000000..2310efec8 --- /dev/null +++ b/poetry.lock @@ -0,0 +1,1865 @@ +# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. + +[[package]] +name = "amqp" +version = "5.3.1" +description = "Low-level AMQP client for Python (fork of amqplib)." +optional = false +python-versions = ">=3.6" +groups = ["main"] +files = [ + {file = "amqp-5.3.1-py3-none-any.whl", hash = "sha256:43b3319e1b4e7d1251833a93d672b4af1e40f3d632d479b98661a95f117880a2"}, + {file = "amqp-5.3.1.tar.gz", hash = "sha256:cddc00c725449522023bad949f70fff7b48f0b1ade74d170a6f10ab044739432"}, +] + +[package.dependencies] +vine = ">=5.0.0,<6.0.0" + +[[package]] +name = "asgiref" +version = "3.8.1" +description = "ASGI specs, helper code, and adapters" +optional = false +python-versions = ">=3.8" +groups = ["main", "dev"] +files = [ + {file = "asgiref-3.8.1-py3-none-any.whl", hash = "sha256:3e1e3ecc849832fe52ccf2cb6686b7a55f82bb1d6aee72a58826471390335e47"}, + {file = "asgiref-3.8.1.tar.gz", hash = "sha256:c343bd80a0bec947a9860adb4c432ffa7db769836c64238fc34bdc3fec84d590"}, +] + +[package.dependencies] +typing-extensions = {version = ">=4", markers = "python_version < \"3.11\""} + +[package.extras] +tests = ["mypy (>=0.800)", "pytest", "pytest-asyncio"] + +[[package]] +name = "atomicwrites" +version = "1.4.1" +description = "Atomic file writes." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +groups = ["dev"] +markers = "sys_platform == \"win32\"" +files = [ + {file = "atomicwrites-1.4.1.tar.gz", hash = "sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"}, +] + +[[package]] +name = "attrs" +version = "25.3.0" +description = "Classes Without Boilerplate" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3"}, + {file = "attrs-25.3.0.tar.gz", hash = "sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b"}, +] + +[package.extras] +benchmark = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] +cov = ["cloudpickle ; platform_python_implementation == \"CPython\"", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] +dev = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pre-commit-uv", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] +docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier"] +tests = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] +tests-mypy = ["mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\""] + +[[package]] +name = "bcrypt" +version = "3.2.0" +description = "Modern password hashing for your software and your servers" +optional = false +python-versions = ">=3.6" +groups = ["main"] +files = [ + {file = "bcrypt-3.2.0-cp36-abi3-macosx_10_10_universal2.whl", hash = "sha256:b589229207630484aefe5899122fb938a5b017b0f4349f769b8c13e78d99a8fd"}, + {file = "bcrypt-3.2.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:c95d4cbebffafcdd28bd28bb4e25b31c50f6da605c81ffd9ad8a3d1b2ab7b1b6"}, + {file = "bcrypt-3.2.0-cp36-abi3-manylinux1_x86_64.whl", hash = "sha256:63d4e3ff96188e5898779b6057878fecf3f11cfe6ec3b313ea09955d587ec7a7"}, + {file = "bcrypt-3.2.0-cp36-abi3-manylinux2010_x86_64.whl", hash = "sha256:cd1ea2ff3038509ea95f687256c46b79f5fc382ad0aa3664d200047546d511d1"}, + {file = "bcrypt-3.2.0-cp36-abi3-manylinux2014_aarch64.whl", hash = "sha256:cdcdcb3972027f83fe24a48b1e90ea4b584d35f1cc279d76de6fc4b13376239d"}, + {file = "bcrypt-3.2.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:a0584a92329210fcd75eb8a3250c5a941633f8bfaf2a18f81009b097732839b7"}, + {file = "bcrypt-3.2.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:56e5da069a76470679f312a7d3d23deb3ac4519991a0361abc11da837087b61d"}, + {file = "bcrypt-3.2.0-cp36-abi3-win32.whl", hash = "sha256:a67fb841b35c28a59cebed05fbd3e80eea26e6d75851f0574a9273c80f3e9b55"}, + {file = "bcrypt-3.2.0-cp36-abi3-win_amd64.whl", hash = "sha256:81fec756feff5b6818ea7ab031205e1d323d8943d237303baca2c5f9c7846f34"}, + {file = "bcrypt-3.2.0.tar.gz", hash = "sha256:5b93c1726e50a93a033c36e5ca7fdcd29a5c7395af50a6892f5d9e7c6cfbfb29"}, +] + +[package.dependencies] +cffi = ">=1.1" +six = ">=1.4.1" + +[package.extras] +tests = ["pytest (>=3.2.1,!=3.3.0)"] +typecheck = ["mypy"] + +[[package]] +name = "billiard" +version = "4.2.1" +description = "Python multiprocessing fork with improvements and bugfixes" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "billiard-4.2.1-py3-none-any.whl", hash = "sha256:40b59a4ac8806ba2c2369ea98d876bc6108b051c227baffd928c644d15d8f3cb"}, + {file = "billiard-4.2.1.tar.gz", hash = "sha256:12b641b0c539073fc8d3f5b8b7be998956665c4233c7c1fcd66a7e677c4fb36f"}, +] + +[[package]] +name = "celery" +version = "5.4.0" +description = "Distributed Task Queue." +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "celery-5.4.0-py3-none-any.whl", hash = "sha256:369631eb580cf8c51a82721ec538684994f8277637edde2dfc0dacd73ed97f64"}, + {file = "celery-5.4.0.tar.gz", hash = "sha256:504a19140e8d3029d5acad88330c541d4c3f64c789d85f94756762d8bca7e706"}, +] + +[package.dependencies] +billiard = ">=4.2.0,<5.0" +click = ">=8.1.2,<9.0" +click-didyoumean = ">=0.3.0" +click-plugins = ">=1.1.1" +click-repl = ">=0.2.0" +kombu = ">=5.3.4,<6.0" +python-dateutil = ">=2.8.2" +tzdata = ">=2022.7" +vine = ">=5.1.0,<6.0" + +[package.extras] +arangodb = ["pyArango (>=2.0.2)"] +auth = ["cryptography (==42.0.5)"] +azureblockblob = ["azure-storage-blob (>=12.15.0)"] +brotli = ["brotli (>=1.0.0) ; platform_python_implementation == \"CPython\"", "brotlipy (>=0.7.0) ; platform_python_implementation == \"PyPy\""] +cassandra = ["cassandra-driver (>=3.25.0,<4)"] +consul = ["python-consul2 (==0.1.5)"] +cosmosdbsql = ["pydocumentdb (==2.3.5)"] +couchbase = ["couchbase (>=3.0.0) ; platform_python_implementation != \"PyPy\" and (platform_system != \"Windows\" or python_version < \"3.10\")"] +couchdb = ["pycouchdb (==1.14.2)"] +django = ["Django (>=2.2.28)"] +dynamodb = ["boto3 (>=1.26.143)"] +elasticsearch = ["elastic-transport (<=8.13.0)", "elasticsearch (<=8.13.0)"] +eventlet = ["eventlet (>=0.32.0) ; python_version < \"3.10\""] +gcs = ["google-cloud-storage (>=2.10.0)"] +gevent = ["gevent (>=1.5.0)"] +librabbitmq = ["librabbitmq (>=2.0.0) ; python_version < \"3.11\""] +memcache = ["pylibmc (==1.6.3) ; platform_system != \"Windows\""] +mongodb = ["pymongo[srv] (>=4.0.2)"] +msgpack = ["msgpack (==1.0.8)"] +pymemcache = ["python-memcached (>=1.61)"] +pyro = ["pyro4 (==4.82) ; python_version < \"3.11\""] +pytest = ["pytest-celery[all] (>=1.0.0)"] +redis = ["redis (>=4.5.2,!=4.5.5,<6.0.0)"] +s3 = ["boto3 (>=1.26.143)"] +slmq = ["softlayer-messaging (>=1.0.3)"] +solar = ["ephem (==4.1.5) ; platform_python_implementation != \"PyPy\""] +sqlalchemy = ["sqlalchemy (>=1.4.48,<2.1)"] +sqs = ["boto3 (>=1.26.143)", "kombu[sqs] (>=5.3.4)", "pycurl (>=7.43.0.5) ; sys_platform != \"win32\" and platform_python_implementation == \"CPython\"", "urllib3 (>=1.26.16)"] +tblib = ["tblib (>=1.3.0) ; python_version < \"3.8.0\"", "tblib (>=1.5.0) ; python_version >= \"3.8.0\""] +yaml = ["PyYAML (>=3.10)"] +zookeeper = ["kazoo (>=1.3.1)"] +zstd = ["zstandard (==0.22.0)"] + +[[package]] +name = "certifi" +version = "2025.1.31" +description = "Python package for providing Mozilla's CA Bundle." +optional = false +python-versions = ">=3.6" +groups = ["main", "dev"] +files = [ + {file = "certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe"}, + {file = "certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651"}, +] + +[[package]] +name = "cffi" +version = "1.17.1" +description = "Foreign Function Interface for Python calling C code." +optional = false +python-versions = ">=3.8" +groups = ["main", "dist"] +files = [ + {file = "cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14"}, + {file = "cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67"}, + {file = "cffi-1.17.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:edae79245293e15384b51f88b00613ba9f7198016a5948b5dddf4917d4d26382"}, + {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45398b671ac6d70e67da8e4224a065cec6a93541bb7aebe1b198a61b58c7b702"}, + {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad9413ccdeda48c5afdae7e4fa2192157e991ff761e7ab8fdd8926f40b160cc3"}, + {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5da5719280082ac6bd9aa7becb3938dc9f9cbd57fac7d2871717b1feb0902ab6"}, + {file = "cffi-1.17.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bb1a08b8008b281856e5971307cc386a8e9c5b625ac297e853d36da6efe9c17"}, + {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8"}, + {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6883e737d7d9e4899a8a695e00ec36bd4e5e4f18fabe0aca0efe0a4b44cdb13e"}, + {file = "cffi-1.17.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6b8b4a92e1c65048ff98cfe1f735ef8f1ceb72e3d5f0c25fdb12087a23da22be"}, + {file = "cffi-1.17.1-cp310-cp310-win32.whl", hash = "sha256:c9c3d058ebabb74db66e431095118094d06abf53284d9c81f27300d0e0d8bc7c"}, + {file = "cffi-1.17.1-cp310-cp310-win_amd64.whl", hash = "sha256:0f048dcf80db46f0098ccac01132761580d28e28bc0f78ae0d58048063317e15"}, + {file = "cffi-1.17.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401"}, + {file = "cffi-1.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf"}, + {file = "cffi-1.17.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4"}, + {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41"}, + {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1"}, + {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6"}, + {file = "cffi-1.17.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d"}, + {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6"}, + {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f"}, + {file = "cffi-1.17.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b"}, + {file = "cffi-1.17.1-cp311-cp311-win32.whl", hash = "sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655"}, + {file = "cffi-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0"}, + {file = "cffi-1.17.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4"}, + {file = "cffi-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c"}, + {file = "cffi-1.17.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36"}, + {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5"}, + {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff"}, + {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99"}, + {file = "cffi-1.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93"}, + {file = "cffi-1.17.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3"}, + {file = "cffi-1.17.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8"}, + {file = "cffi-1.17.1-cp312-cp312-win32.whl", hash = "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65"}, + {file = "cffi-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903"}, + {file = "cffi-1.17.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e"}, + {file = "cffi-1.17.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2"}, + {file = "cffi-1.17.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3"}, + {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683"}, + {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5"}, + {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4"}, + {file = "cffi-1.17.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd"}, + {file = "cffi-1.17.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed"}, + {file = "cffi-1.17.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9"}, + {file = "cffi-1.17.1-cp313-cp313-win32.whl", hash = "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d"}, + {file = "cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a"}, + {file = "cffi-1.17.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:636062ea65bd0195bc012fea9321aca499c0504409f413dc88af450b57ffd03b"}, + {file = "cffi-1.17.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c7eac2ef9b63c79431bc4b25f1cd649d7f061a28808cbc6c47b534bd789ef964"}, + {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e221cf152cff04059d011ee126477f0d9588303eb57e88923578ace7baad17f9"}, + {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:31000ec67d4221a71bd3f67df918b1f88f676f1c3b535a7eb473255fdc0b83fc"}, + {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f17be4345073b0a7b8ea599688f692ac3ef23ce28e5df79c04de519dbc4912c"}, + {file = "cffi-1.17.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2b1fac190ae3ebfe37b979cc1ce69c81f4e4fe5746bb401dca63a9062cdaf1"}, + {file = "cffi-1.17.1-cp38-cp38-win32.whl", hash = "sha256:7596d6620d3fa590f677e9ee430df2958d2d6d6de2feeae5b20e82c00b76fbf8"}, + {file = "cffi-1.17.1-cp38-cp38-win_amd64.whl", hash = "sha256:78122be759c3f8a014ce010908ae03364d00a1f81ab5c7f4a7a5120607ea56e1"}, + {file = "cffi-1.17.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b2ab587605f4ba0bf81dc0cb08a41bd1c0a5906bd59243d56bad7668a6fc6c16"}, + {file = "cffi-1.17.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:28b16024becceed8c6dfbc75629e27788d8a3f9030691a1dbf9821a128b22c36"}, + {file = "cffi-1.17.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d599671f396c4723d016dbddb72fe8e0397082b0a77a4fab8028923bec050e8"}, + {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca74b8dbe6e8e8263c0ffd60277de77dcee6c837a3d0881d8c1ead7268c9e576"}, + {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f7f5baafcc48261359e14bcd6d9bff6d4b28d9103847c9e136694cb0501aef87"}, + {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98e3969bcff97cae1b2def8ba499ea3d6f31ddfdb7635374834cf89a1a08ecf0"}, + {file = "cffi-1.17.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cdf5ce3acdfd1661132f2a9c19cac174758dc2352bfe37d98aa7512c6b7178b3"}, + {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9755e4345d1ec879e3849e62222a18c7174d65a6a92d5b346b1863912168b595"}, + {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f1e22e8c4419538cb197e4dd60acc919d7696e5ef98ee4da4e01d3f8cfa4cc5a"}, + {file = "cffi-1.17.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:c03e868a0b3bc35839ba98e74211ed2b05d2119be4e8a0f224fba9384f1fe02e"}, + {file = "cffi-1.17.1-cp39-cp39-win32.whl", hash = "sha256:e31ae45bc2e29f6b2abd0de1cc3b9d5205aa847cafaecb8af1476a609a2f6eb7"}, + {file = "cffi-1.17.1-cp39-cp39-win_amd64.whl", hash = "sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662"}, + {file = "cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824"}, +] +markers = {dist = "platform_python_implementation == \"CPython\" and sys_platform == \"win32\""} + +[package.dependencies] +pycparser = "*" + +[[package]] +name = "chardet" +version = "4.0.0" +description = "Universal encoding detector for Python 2 and 3" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +groups = ["main", "dev"] +files = [ + {file = "chardet-4.0.0-py2.py3-none-any.whl", hash = "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5"}, + {file = "chardet-4.0.0.tar.gz", hash = "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa"}, +] + +[[package]] +name = "click" +version = "8.1.8" +description = "Composable command line interface toolkit" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"}, + {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[[package]] +name = "click-didyoumean" +version = "0.3.1" +description = "Enables git-like *did-you-mean* feature in click" +optional = false +python-versions = ">=3.6.2" +groups = ["main"] +files = [ + {file = "click_didyoumean-0.3.1-py3-none-any.whl", hash = "sha256:5c4bb6007cfea5f2fd6583a2fb6701a22a41eb98957e63d0fac41c10e7c3117c"}, + {file = "click_didyoumean-0.3.1.tar.gz", hash = "sha256:4f82fdff0dbe64ef8ab2279bd6aa3f6a99c3b28c05aa09cbfc07c9d7fbb5a463"}, +] + +[package.dependencies] +click = ">=7" + +[[package]] +name = "click-plugins" +version = "1.1.1" +description = "An extension module for click to enable registering CLI commands via setuptools entry-points." +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "click-plugins-1.1.1.tar.gz", hash = "sha256:46ab999744a9d831159c3411bb0c79346d94a444df9a3a3742e9ed63645f264b"}, + {file = "click_plugins-1.1.1-py2.py3-none-any.whl", hash = "sha256:5d262006d3222f5057fd81e1623d4443e41dcda5dc815c06b442aa3c02889fc8"}, +] + +[package.dependencies] +click = ">=4.0" + +[package.extras] +dev = ["coveralls", "pytest (>=3.6)", "pytest-cov", "wheel"] + +[[package]] +name = "click-repl" +version = "0.3.0" +description = "REPL plugin for Click" +optional = false +python-versions = ">=3.6" +groups = ["main"] +files = [ + {file = "click-repl-0.3.0.tar.gz", hash = "sha256:17849c23dba3d667247dc4defe1757fff98694e90fe37474f3feebb69ced26a9"}, + {file = "click_repl-0.3.0-py3-none-any.whl", hash = "sha256:fb7e06deb8da8de86180a33a9da97ac316751c094c6899382da7feeeeb51b812"}, +] + +[package.dependencies] +click = ">=7.0" +prompt-toolkit = ">=3.0.36" + +[package.extras] +testing = ["pytest (>=7.2.1)", "pytest-cov (>=4.0.0)", "tox (>=4.4.3)"] + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +groups = ["main", "dev"] +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] +markers = {main = "platform_system == \"Windows\" or sys_platform == \"win32\"", dev = "sys_platform == \"win32\""} + +[[package]] +name = "colorlog" +version = "5.0.1" +description = "Add colours to the output of Python's logging module." +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "colorlog-5.0.1-py2.py3-none-any.whl", hash = "sha256:4e6be13d9169254e2ded6526a6a4a1abb8ac564f2fa65b310a98e4ca5bea2c04"}, + {file = "colorlog-5.0.1.tar.gz", hash = "sha256:f17c013a06962b02f4449ee07cfdbe6b287df29efc2c9a1515b4a376f4e588ea"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} + +[[package]] +name = "coverage" +version = "5.5" +description = "Code coverage measurement for Python" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4" +groups = ["dev"] +files = [ + {file = "coverage-5.5-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:b6d534e4b2ab35c9f93f46229363e17f63c53ad01330df9f2d6bd1187e5eaacf"}, + {file = "coverage-5.5-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:b7895207b4c843c76a25ab8c1e866261bcfe27bfaa20c192de5190121770672b"}, + {file = "coverage-5.5-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:c2723d347ab06e7ddad1a58b2a821218239249a9e4365eaff6649d31180c1669"}, + {file = "coverage-5.5-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:900fbf7759501bc7807fd6638c947d7a831fc9fdf742dc10f02956ff7220fa90"}, + {file = "coverage-5.5-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:004d1880bed2d97151facef49f08e255a20ceb6f9432df75f4eef018fdd5a78c"}, + {file = "coverage-5.5-cp27-cp27m-win32.whl", hash = "sha256:06191eb60f8d8a5bc046f3799f8a07a2d7aefb9504b0209aff0b47298333302a"}, + {file = "coverage-5.5-cp27-cp27m-win_amd64.whl", hash = "sha256:7501140f755b725495941b43347ba8a2777407fc7f250d4f5a7d2a1050ba8e82"}, + {file = "coverage-5.5-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:372da284cfd642d8e08ef606917846fa2ee350f64994bebfbd3afb0040436905"}, + {file = "coverage-5.5-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:8963a499849a1fc54b35b1c9f162f4108017b2e6db2c46c1bed93a72262ed083"}, + {file = "coverage-5.5-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:869a64f53488f40fa5b5b9dcb9e9b2962a66a87dab37790f3fcfb5144b996ef5"}, + {file = "coverage-5.5-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:4a7697d8cb0f27399b0e393c0b90f0f1e40c82023ea4d45d22bce7032a5d7b81"}, + {file = "coverage-5.5-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:8d0a0725ad7c1a0bcd8d1b437e191107d457e2ec1084b9f190630a4fb1af78e6"}, + {file = "coverage-5.5-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:51cb9476a3987c8967ebab3f0fe144819781fca264f57f89760037a2ea191cb0"}, + {file = "coverage-5.5-cp310-cp310-win_amd64.whl", hash = "sha256:c0891a6a97b09c1f3e073a890514d5012eb256845c451bd48f7968ef939bf4ae"}, + {file = "coverage-5.5-cp35-cp35m-macosx_10_9_x86_64.whl", hash = "sha256:3487286bc29a5aa4b93a072e9592f22254291ce96a9fbc5251f566b6b7343cdb"}, + {file = "coverage-5.5-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:deee1077aae10d8fa88cb02c845cfba9b62c55e1183f52f6ae6a2df6a2187160"}, + {file = "coverage-5.5-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:f11642dddbb0253cc8853254301b51390ba0081750a8ac03f20ea8103f0c56b6"}, + {file = "coverage-5.5-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:6c90e11318f0d3c436a42409f2749ee1a115cd8b067d7f14c148f1ce5574d701"}, + {file = "coverage-5.5-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:30c77c1dc9f253283e34c27935fded5015f7d1abe83bc7821680ac444eaf7793"}, + {file = "coverage-5.5-cp35-cp35m-win32.whl", hash = "sha256:9a1ef3b66e38ef8618ce5fdc7bea3d9f45f3624e2a66295eea5e57966c85909e"}, + {file = "coverage-5.5-cp35-cp35m-win_amd64.whl", hash = "sha256:972c85d205b51e30e59525694670de6a8a89691186012535f9d7dbaa230e42c3"}, + {file = "coverage-5.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:af0e781009aaf59e25c5a678122391cb0f345ac0ec272c7961dc5455e1c40066"}, + {file = "coverage-5.5-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:74d881fc777ebb11c63736622b60cb9e4aee5cace591ce274fb69e582a12a61a"}, + {file = "coverage-5.5-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:92b017ce34b68a7d67bd6d117e6d443a9bf63a2ecf8567bb3d8c6c7bc5014465"}, + {file = "coverage-5.5-cp36-cp36m-manylinux2010_i686.whl", hash = "sha256:d636598c8305e1f90b439dbf4f66437de4a5e3c31fdf47ad29542478c8508bbb"}, + {file = "coverage-5.5-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:41179b8a845742d1eb60449bdb2992196e211341818565abded11cfa90efb821"}, + {file = "coverage-5.5-cp36-cp36m-win32.whl", hash = "sha256:040af6c32813fa3eae5305d53f18875bedd079960822ef8ec067a66dd8afcd45"}, + {file = "coverage-5.5-cp36-cp36m-win_amd64.whl", hash = "sha256:5fec2d43a2cc6965edc0bb9e83e1e4b557f76f843a77a2496cbe719583ce8184"}, + {file = "coverage-5.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:18ba8bbede96a2c3dde7b868de9dcbd55670690af0988713f0603f037848418a"}, + {file = "coverage-5.5-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:2910f4d36a6a9b4214bb7038d537f015346f413a975d57ca6b43bf23d6563b53"}, + {file = "coverage-5.5-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:f0b278ce10936db1a37e6954e15a3730bea96a0997c26d7fee88e6c396c2086d"}, + {file = "coverage-5.5-cp37-cp37m-manylinux2010_i686.whl", hash = "sha256:796c9c3c79747146ebd278dbe1e5c5c05dd6b10cc3bcb8389dfdf844f3ead638"}, + {file = "coverage-5.5-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:53194af30d5bad77fcba80e23a1441c71abfb3e01192034f8246e0d8f99528f3"}, + {file = "coverage-5.5-cp37-cp37m-win32.whl", hash = "sha256:184a47bbe0aa6400ed2d41d8e9ed868b8205046518c52464fde713ea06e3a74a"}, + {file = "coverage-5.5-cp37-cp37m-win_amd64.whl", hash = "sha256:2949cad1c5208b8298d5686d5a85b66aae46d73eec2c3e08c817dd3513e5848a"}, + {file = "coverage-5.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:217658ec7187497e3f3ebd901afdca1af062b42cfe3e0dafea4cced3983739f6"}, + {file = "coverage-5.5-cp38-cp38-manylinux1_i686.whl", hash = "sha256:1aa846f56c3d49205c952d8318e76ccc2ae23303351d9270ab220004c580cfe2"}, + {file = "coverage-5.5-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:24d4a7de75446be83244eabbff746d66b9240ae020ced65d060815fac3423759"}, + {file = "coverage-5.5-cp38-cp38-manylinux2010_i686.whl", hash = "sha256:d1f8bf7b90ba55699b3a5e44930e93ff0189aa27186e96071fac7dd0d06a1873"}, + {file = "coverage-5.5-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:970284a88b99673ccb2e4e334cfb38a10aab7cd44f7457564d11898a74b62d0a"}, + {file = "coverage-5.5-cp38-cp38-win32.whl", hash = "sha256:01d84219b5cdbfc8122223b39a954820929497a1cb1422824bb86b07b74594b6"}, + {file = "coverage-5.5-cp38-cp38-win_amd64.whl", hash = "sha256:2e0d881ad471768bf6e6c2bf905d183543f10098e3b3640fc029509530091502"}, + {file = "coverage-5.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d1f9ce122f83b2305592c11d64f181b87153fc2c2bbd3bb4a3dde8303cfb1a6b"}, + {file = "coverage-5.5-cp39-cp39-manylinux1_i686.whl", hash = "sha256:13c4ee887eca0f4c5a247b75398d4114c37882658300e153113dafb1d76de529"}, + {file = "coverage-5.5-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:52596d3d0e8bdf3af43db3e9ba8dcdaac724ba7b5ca3f6358529d56f7a166f8b"}, + {file = "coverage-5.5-cp39-cp39-manylinux2010_i686.whl", hash = "sha256:2cafbbb3af0733db200c9b5f798d18953b1a304d3f86a938367de1567f4b5bff"}, + {file = "coverage-5.5-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:44d654437b8ddd9eee7d1eaee28b7219bec228520ff809af170488fd2fed3e2b"}, + {file = "coverage-5.5-cp39-cp39-win32.whl", hash = "sha256:d314ed732c25d29775e84a960c3c60808b682c08d86602ec2c3008e1202e3bb6"}, + {file = "coverage-5.5-cp39-cp39-win_amd64.whl", hash = "sha256:13034c4409db851670bc9acd836243aeee299949bd5673e11844befcb0149f03"}, + {file = "coverage-5.5-pp36-none-any.whl", hash = "sha256:f030f8873312a16414c0d8e1a1ddff2d3235655a2174e3648b4fa66b3f2f1079"}, + {file = "coverage-5.5-pp37-none-any.whl", hash = "sha256:2a3859cb82dcbda1cfd3e6f71c27081d18aa251d20a17d87d26d4cd216fb0af4"}, + {file = "coverage-5.5.tar.gz", hash = "sha256:ebe78fe9a0e874362175b02371bdfbee64d8edc42a044253ddf4ee7d3c15212c"}, +] + +[package.extras] +toml = ["toml"] + +[[package]] +name = "coveralls" +version = "3.1.0" +description = "Show coverage stats online via coveralls.io" +optional = false +python-versions = ">= 3.5" +groups = ["dev"] +files = [ + {file = "coveralls-3.1.0-py2.py3-none-any.whl", hash = "sha256:172fb79c5f61c6ede60554f2cac46deff6d64ee735991fb2124fb414e188bdb4"}, + {file = "coveralls-3.1.0.tar.gz", hash = "sha256:9b3236e086627340bf2c95f89f757d093cbed43d17179d3f4fb568c347e7d29a"}, +] + +[package.dependencies] +coverage = ">=4.1,<6.0" +docopt = ">=0.6.1" +requests = ">=1.0.0" + +[package.extras] +yaml = ["PyYAML (>=3.10)"] + +[[package]] +name = "cron-descriptor" +version = "1.4.5" +description = "A Python library that converts cron expressions into human readable strings." +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "cron_descriptor-1.4.5-py3-none-any.whl", hash = "sha256:736b3ae9d1a99bc3dbfc5b55b5e6e7c12031e7ba5de716625772f8b02dcd6013"}, + {file = "cron_descriptor-1.4.5.tar.gz", hash = "sha256:f51ce4ffc1d1f2816939add8524f206c376a42c87a5fca3091ce26725b3b1bca"}, +] + +[package.extras] +dev = ["polib"] + +[[package]] +name = "cryptography" +version = "44.0.2" +description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." +optional = false +python-versions = "!=3.9.0,!=3.9.1,>=3.7" +groups = ["main"] +files = [ + {file = "cryptography-44.0.2-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:efcfe97d1b3c79e486554efddeb8f6f53a4cdd4cf6086642784fa31fc384e1d7"}, + {file = "cryptography-44.0.2-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29ecec49f3ba3f3849362854b7253a9f59799e3763b0c9d0826259a88efa02f1"}, + {file = "cryptography-44.0.2-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc821e161ae88bfe8088d11bb39caf2916562e0a2dc7b6d56714a48b784ef0bb"}, + {file = "cryptography-44.0.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:3c00b6b757b32ce0f62c574b78b939afab9eecaf597c4d624caca4f9e71e7843"}, + {file = "cryptography-44.0.2-cp37-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:7bdcd82189759aba3816d1f729ce42ffded1ac304c151d0a8e89b9996ab863d5"}, + {file = "cryptography-44.0.2-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:4973da6ca3db4405c54cd0b26d328be54c7747e89e284fcff166132eb7bccc9c"}, + {file = "cryptography-44.0.2-cp37-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:4e389622b6927d8133f314949a9812972711a111d577a5d1f4bee5e58736b80a"}, + {file = "cryptography-44.0.2-cp37-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:f514ef4cd14bb6fb484b4a60203e912cfcb64f2ab139e88c2274511514bf7308"}, + {file = "cryptography-44.0.2-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:1bc312dfb7a6e5d66082c87c34c8a62176e684b6fe3d90fcfe1568de675e6688"}, + {file = "cryptography-44.0.2-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3b721b8b4d948b218c88cb8c45a01793483821e709afe5f622861fc6182b20a7"}, + {file = "cryptography-44.0.2-cp37-abi3-win32.whl", hash = "sha256:51e4de3af4ec3899d6d178a8c005226491c27c4ba84101bfb59c901e10ca9f79"}, + {file = "cryptography-44.0.2-cp37-abi3-win_amd64.whl", hash = "sha256:c505d61b6176aaf982c5717ce04e87da5abc9a36a5b39ac03905c4aafe8de7aa"}, + {file = "cryptography-44.0.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:8e0ddd63e6bf1161800592c71ac794d3fb8001f2caebe0966e77c5234fa9efc3"}, + {file = "cryptography-44.0.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81276f0ea79a208d961c433a947029e1a15948966658cf6710bbabb60fcc2639"}, + {file = "cryptography-44.0.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a1e657c0f4ea2a23304ee3f964db058c9e9e635cc7019c4aa21c330755ef6fd"}, + {file = "cryptography-44.0.2-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:6210c05941994290f3f7f175a4a57dbbb2afd9273657614c506d5976db061181"}, + {file = "cryptography-44.0.2-cp39-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d1c3572526997b36f245a96a2b1713bf79ce99b271bbcf084beb6b9b075f29ea"}, + {file = "cryptography-44.0.2-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:b042d2a275c8cee83a4b7ae30c45a15e6a4baa65a179a0ec2d78ebb90e4f6699"}, + {file = "cryptography-44.0.2-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:d03806036b4f89e3b13b6218fefea8d5312e450935b1a2d55f0524e2ed7c59d9"}, + {file = "cryptography-44.0.2-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:c7362add18b416b69d58c910caa217f980c5ef39b23a38a0880dfd87bdf8cd23"}, + {file = "cryptography-44.0.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:8cadc6e3b5a1f144a039ea08a0bdb03a2a92e19c46be3285123d32029f40a922"}, + {file = "cryptography-44.0.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6f101b1f780f7fc613d040ca4bdf835c6ef3b00e9bd7125a4255ec574c7916e4"}, + {file = "cryptography-44.0.2-cp39-abi3-win32.whl", hash = "sha256:3dc62975e31617badc19a906481deacdeb80b4bb454394b4098e3f2525a488c5"}, + {file = "cryptography-44.0.2-cp39-abi3-win_amd64.whl", hash = "sha256:5f6f90b72d8ccadb9c6e311c775c8305381db88374c65fa1a68250aa8a9cb3a6"}, + {file = "cryptography-44.0.2-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:af4ff3e388f2fa7bff9f7f2b31b87d5651c45731d3e8cfa0944be43dff5cfbdb"}, + {file = "cryptography-44.0.2-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:0529b1d5a0105dd3731fa65680b45ce49da4d8115ea76e9da77a875396727b41"}, + {file = "cryptography-44.0.2-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:7ca25849404be2f8e4b3c59483d9d3c51298a22c1c61a0e84415104dacaf5562"}, + {file = "cryptography-44.0.2-pp310-pypy310_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:268e4e9b177c76d569e8a145a6939eca9a5fec658c932348598818acf31ae9a5"}, + {file = "cryptography-44.0.2-pp310-pypy310_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:9eb9d22b0a5d8fd9925a7764a054dca914000607dff201a24c791ff5c799e1fa"}, + {file = "cryptography-44.0.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:2bf7bf75f7df9715f810d1b038870309342bff3069c5bd8c6b96128cb158668d"}, + {file = "cryptography-44.0.2-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:909c97ab43a9c0c0b0ada7a1281430e4e5ec0458e6d9244c0e821bbf152f061d"}, + {file = "cryptography-44.0.2-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:96e7a5e9d6e71f9f4fca8eebfd603f8e86c5225bb18eb621b2c1e50b290a9471"}, + {file = "cryptography-44.0.2-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:d1b3031093a366ac767b3feb8bcddb596671b3aaff82d4050f984da0c248b615"}, + {file = "cryptography-44.0.2-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:04abd71114848aa25edb28e225ab5f268096f44cf0127f3d36975bdf1bdf3390"}, + {file = "cryptography-44.0.2.tar.gz", hash = "sha256:c63454aa261a0cf0c5b4718349629793e9e634993538db841165b3df74f37ec0"}, +] + +[package.dependencies] +cffi = {version = ">=1.12", markers = "platform_python_implementation != \"PyPy\""} + +[package.extras] +docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=3.0.0) ; python_version >= \"3.8\""] +docstest = ["pyenchant (>=3)", "readme-renderer (>=30.0)", "sphinxcontrib-spelling (>=7.3.1)"] +nox = ["nox (>=2024.4.15)", "nox[uv] (>=2024.3.2) ; python_version >= \"3.8\""] +pep8test = ["check-sdist ; python_version >= \"3.8\"", "click (>=8.0.1)", "mypy (>=1.4)", "ruff (>=0.3.6)"] +sdist = ["build (>=1.0.0)"] +ssh = ["bcrypt (>=3.1.5)"] +test = ["certifi (>=2024)", "cryptography-vectors (==44.0.2)", "pretend (>=0.7)", "pytest (>=7.4.0)", "pytest-benchmark (>=4.0)", "pytest-cov (>=2.10.1)", "pytest-xdist (>=3.5.0)"] +test-randomorder = ["pytest-randomly"] + +[[package]] +name = "django" +version = "3.2.25" +description = "A high-level Python Web framework that encourages rapid development and clean, pragmatic design." +optional = false +python-versions = ">=3.6" +groups = ["main", "dev"] +files = [ + {file = "Django-3.2.25-py3-none-any.whl", hash = "sha256:a52ea7fcf280b16f7b739cec38fa6d3f8953a5456986944c3ca97e79882b4e38"}, + {file = "Django-3.2.25.tar.gz", hash = "sha256:7ca38a78654aee72378594d63e51636c04b8e28574f5505dff630895b5472777"}, +] + +[package.dependencies] +asgiref = ">=3.3.2,<4" +pytz = "*" +sqlparse = ">=0.2.2" + +[package.extras] +argon2 = ["argon2-cffi (>=19.1.0)"] +bcrypt = ["bcrypt"] + +[[package]] +name = "django-allauth" +version = "0.63.6" +description = "Integrated set of Django applications addressing authentication, registration, account management as well as 3rd party (social) account authentication." +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "django_allauth-0.63.6.tar.gz", hash = "sha256:f15f49defb09e0604dad5214e53a69a1f723cb03176bb115c8930bcd19b91749"}, +] + +[package.dependencies] +Django = ">=3.2" + +[package.extras] +mfa = ["qrcode (>=7.0.0)"] +openid = ["python3-openid (>=3.0.8)"] +saml = ["python3-saml (>=1.15.0,<2.0.0)"] +socialaccount = ["pyjwt[crypto] (>=1.7)", "requests (>=2.0.0)", "requests-oauthlib (>=0.3.0)"] +steam = ["python3-openid (>=3.0.8)"] + +[[package]] +name = "django-celery-beat" +version = "2.6.0" +description = "Database-backed Periodic Tasks." +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "django-celery-beat-2.6.0.tar.gz", hash = "sha256:f75b2d129731f1214be8383e18fae6bfeacdb55dffb2116ce849222c0106f9ad"}, +] + +[package.dependencies] +celery = ">=5.2.3,<6.0" +cron-descriptor = ">=1.2.32" +Django = ">=2.2,<5.1" +django-timezone-field = ">=5.0" +python-crontab = ">=2.3.4" +tzdata = "*" + +[[package]] +name = "django-cors-headers" +version = "3.7.0" +description = "django-cors-headers is a Django application for handling the server headers required for Cross-Origin Resource Sharing (CORS)." +optional = false +python-versions = ">=3.6" +groups = ["main"] +files = [ + {file = "django-cors-headers-3.7.0.tar.gz", hash = "sha256:96069c4aaacace786a34ee7894ff680780ec2644e4268b31181044410fecd12e"}, + {file = "django_cors_headers-3.7.0-py3-none-any.whl", hash = "sha256:1ac2b1213de75a251e2ba04448da15f99bcfcbe164288ae6b5ff929dc49b372f"}, +] + +[package.dependencies] +Django = ">=2.2" + +[[package]] +name = "django-debug-toolbar" +version = "3.2.1" +description = "A configurable set of panels that display various debug information about the current request/response." +optional = false +python-versions = ">=3.6" +groups = ["dev"] +files = [ + {file = "django-debug-toolbar-3.2.1.tar.gz", hash = "sha256:a5ff2a54f24bf88286f9872836081078f4baa843dc3735ee88524e89f8821e33"}, + {file = "django_debug_toolbar-3.2.1-py3-none-any.whl", hash = "sha256:e759e63e3fe2d3110e0e519639c166816368701eab4a47fed75d7de7018467b9"}, +] + +[package.dependencies] +Django = ">=2.2" +sqlparse = ">=0.2.0" + +[[package]] +name = "django-extensions" +version = "3.1.3" +description = "Extensions for Django" +optional = false +python-versions = ">=3.6" +groups = ["main"] +files = [ + {file = "django-extensions-3.1.3.tar.gz", hash = "sha256:5f0fea7bf131ca303090352577a9e7f8bfbf5489bd9d9c8aea9401db28db34a0"}, + {file = "django_extensions-3.1.3-py3-none-any.whl", hash = "sha256:50de8977794a66a91575dd40f87d5053608f679561731845edbd325ceeb387e3"}, +] + +[package.dependencies] +Django = ">=2.2" + +[[package]] +name = "django-filter" +version = "2.4.0" +description = "Django-filter is a reusable Django application for allowing users to filter querysets dynamically." +optional = false +python-versions = ">=3.5" +groups = ["main"] +files = [ + {file = "django-filter-2.4.0.tar.gz", hash = "sha256:84e9d5bb93f237e451db814ed422a3a625751cbc9968b484ecc74964a8696b06"}, + {file = "django_filter-2.4.0-py3-none-any.whl", hash = "sha256:e00d32cebdb3d54273c48f4f878f898dced8d5dfaad009438fe61ebdf535ace1"}, +] + +[package.dependencies] +Django = ">=2.2" + +[[package]] +name = "django-oauth-toolkit" +version = "1.7.1" +description = "OAuth2 Provider for Django" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "django-oauth-toolkit-1.7.1.tar.gz", hash = "sha256:37b690fa53f340c7391bdbc0fdbb32fd9ef8a7c012e72ee8754c331a2d7b4adb"}, + {file = "django_oauth_toolkit-1.7.1-py3-none-any.whl", hash = "sha256:756e44421d0993f27705736b6c33a3d89018393859a31ac926296950f76e4433"}, +] + +[package.dependencies] +django = ">=2.2,<4.0.0 || >4.0.0" +jwcrypto = ">=0.8.0" +oauthlib = ">=3.1.0" +requests = ">=2.13.0" + +[[package]] +name = "django-timezone-field" +version = "7.1" +description = "A Django app providing DB, form, and REST framework fields for zoneinfo and pytz timezone objects." +optional = false +python-versions = "<4.0,>=3.8" +groups = ["main"] +files = [ + {file = "django_timezone_field-7.1-py3-none-any.whl", hash = "sha256:93914713ed882f5bccda080eda388f7006349f25930b6122e9b07bf8db49c4b4"}, + {file = "django_timezone_field-7.1.tar.gz", hash = "sha256:b3ef409d88a2718b566fabe10ea996f2838bc72b22d3a2900c0aa905c761380c"}, +] + +[package.dependencies] +Django = ">=3.2,<6.0" + +[[package]] +name = "djangorestframework" +version = "3.12.4" +description = "Web APIs for Django, made easy." +optional = false +python-versions = ">=3.5" +groups = ["main"] +files = [ + {file = "djangorestframework-3.12.4-py3-none-any.whl", hash = "sha256:6d1d59f623a5ad0509fe0d6bfe93cbdfe17b8116ebc8eda86d45f6e16e819aaf"}, + {file = "djangorestframework-3.12.4.tar.gz", hash = "sha256:f747949a8ddac876e879190df194b925c177cdeb725a099db1460872f7c0a7f2"}, +] + +[package.dependencies] +django = ">=2.2" + +[[package]] +name = "djangorestframework-jsonapi" +version = "4.2.1+cos0" +description = "A Django REST framework API adapter for the JSON API spec." +optional = false +python-versions = ">=3.6" +groups = ["main"] +files = [] +develop = false + +[package.dependencies] +django = ">=2.2,<3.3" +djangorestframework = ">=3.12,<3.13" +inflection = ">=0.3.0" + +[package.extras] +django-filter = ["django-filter (>=2.0)"] +django-polymorphic = ["django-polymorphic (>=2.0)"] +openapi = ["pyyaml (>=5.3)", "uritemplate (>=3.0.1)"] + +[package.source] +type = "git" +url = "https://github.com/cos-forks/django-rest-framework-json-api.git" +reference = "9858f712e07d5cdbd79b8cc29b6aac90df9be4e9" +resolved_reference = "9858f712e07d5cdbd79b8cc29b6aac90df9be4e9" + +[[package]] +name = "docopt" +version = "0.6.2" +description = "Pythonic argument parser, that will make you smile" +optional = false +python-versions = "*" +groups = ["dev"] +files = [ + {file = "docopt-0.6.2.tar.gz", hash = "sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491"}, +] + +[[package]] +name = "elastic-transport" +version = "8.17.1" +description = "Transport classes and utilities shared among Python Elastic client libraries" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "elastic_transport-8.17.1-py3-none-any.whl", hash = "sha256:192718f498f1d10c5e9aa8b9cf32aed405e469a7f0e9d6a8923431dbb2c59fb8"}, + {file = "elastic_transport-8.17.1.tar.gz", hash = "sha256:5edef32ac864dca8e2f0a613ef63491ee8d6b8cfb52881fa7313ba9290cac6d2"}, +] + +[package.dependencies] +certifi = "*" +urllib3 = ">=1.26.2,<3" + +[package.extras] +develop = ["aiohttp", "furo", "httpx", "opentelemetry-api", "opentelemetry-sdk", "orjson", "pytest", "pytest-asyncio", "pytest-cov", "pytest-httpserver", "pytest-mock", "requests", "respx", "sphinx (>2)", "sphinx-autodoc-typehints", "trustme"] + +[[package]] +name = "elasticsearch5" +version = "5.5.6" +description = "Python client for Elasticsearch" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "elasticsearch5-5.5.6-py2.py3-none-any.whl", hash = "sha256:3d95aef3317b1e28288ab8dd2ee38e2a6aae96df14ffcd4ecbea4f681dc4891d"}, + {file = "elasticsearch5-5.5.6.tar.gz", hash = "sha256:331ce226182c75cfdf6b823f9f30b5a555fa91b85f1d05ac9958758150e2e8c7"}, +] + +[package.dependencies] +urllib3 = ">=1.21.1" + +[package.extras] +develop = ["coverage", "mock", "nose", "nosexcover", "pyaml", "requests (>=2.0.0,<3.0.0)", "sphinx", "sphinx-rtd-theme"] + +[[package]] +name = "elasticsearch8" +version = "8.5.2" +description = "Python client for Elasticsearch" +optional = false +python-versions = ">=3.6, <4" +groups = ["main"] +files = [ + {file = "elasticsearch8-8.5.2-py3-none-any.whl", hash = "sha256:9646a65b2c0ca9094000319028be38918d3592b782df45f647b971faa88d61b7"}, + {file = "elasticsearch8-8.5.2.tar.gz", hash = "sha256:be99ca2c48f3671b2cbfd3675a67dfbdb804f213f254c0e03b3d511ca29b1e5d"}, +] + +[package.dependencies] +elastic-transport = ">=8,<9" + +[package.extras] +async = ["aiohttp (>=3,<4)"] +requests = ["requests (>=2.4.0,<3.0.0)"] + +[[package]] +name = "factory-boy" +version = "3.2.0" +description = "A versatile test fixtures replacement based on thoughtbot's factory_bot for Ruby." +optional = false +python-versions = ">=3.6" +groups = ["dev"] +files = [ + {file = "factory_boy-3.2.0-py2.py3-none-any.whl", hash = "sha256:1d3db4b44b8c8c54cdd8b83ae4bdb9aeb121e464400035f1f03ae0e1eade56a4"}, + {file = "factory_boy-3.2.0.tar.gz", hash = "sha256:401cc00ff339a022f84d64a4339503d1689e8263a4478d876e58a3295b155c5b"}, +] + +[package.dependencies] +Faker = ">=0.7.0" + +[package.extras] +dev = ["Django", "Pillow", "SQLAlchemy", "coverage", "flake8", "isort", "mongoengine", "tox", "wheel (>=0.32.0)", "zest.releaser[recommended]"] +doc = ["Sphinx", "sphinx-rtd-theme", "sphinxcontrib-spelling"] + +[[package]] +name = "faker" +version = "8.10.0" +description = "Faker is a Python package that generates fake data for you." +optional = false +python-versions = ">=3.6" +groups = ["dev"] +files = [ + {file = "Faker-8.10.0-py3-none-any.whl", hash = "sha256:7df5697bc712bdd2f98051246ffd7bbac10104602727053b736e90d8adcaa5ad"}, + {file = "Faker-8.10.0.tar.gz", hash = "sha256:198684f146590986cde75307f12f378c899379ef9f2bc962bd25ddd005b4e7c3"}, +] + +[package.dependencies] +python-dateutil = ">=2.4" +text-unidecode = "1.3" + +[[package]] +name = "flake8" +version = "5.0.4" +description = "the modular source code checker: pep8 pyflakes and co" +optional = false +python-versions = ">=3.6.1" +groups = ["dev"] +files = [ + {file = "flake8-5.0.4-py2.py3-none-any.whl", hash = "sha256:7a1cf6b73744f5806ab95e526f6f0d8c01c66d7bbe349562d22dfca20610b248"}, + {file = "flake8-5.0.4.tar.gz", hash = "sha256:6fbe320aad8d6b95cec8b8e47bc933004678dc63095be98528b7bdd2a9f510db"}, +] + +[package.dependencies] +mccabe = ">=0.7.0,<0.8.0" +pycodestyle = ">=2.9.0,<2.10.0" +pyflakes = ">=2.5.0,<2.6.0" + +[[package]] +name = "gevent" +version = "22.10.2" +description = "Coroutine-based network library" +optional = false +python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5" +groups = ["dist"] +files = [ + {file = "gevent-22.10.2-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:97cd42382421779f5d82ec5007199e8a84aa288114975429e4fd0a98f2290f10"}, + {file = "gevent-22.10.2-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:1e1286a76f15b5e15f1e898731d50529e249529095a032453f2c101af3fde71c"}, + {file = "gevent-22.10.2-cp27-cp27m-win32.whl", hash = "sha256:59b47e81b399d49a5622f0f503c59f1ce57b7705306ea0196818951dfc2f36c8"}, + {file = "gevent-22.10.2-cp27-cp27m-win_amd64.whl", hash = "sha256:1d543c9407a1e4bca11a8932916988cfb16de00366de5bf7bc9e7a3f61e60b18"}, + {file = "gevent-22.10.2-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:4e2f008c82dc54ec94f4de12ca6feea60e419babb48ec145456907ae61625aa4"}, + {file = "gevent-22.10.2-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:990d7069f14dc40674e0d5cb43c68fd3bad8337048613b9bb94a0c4180ffc176"}, + {file = "gevent-22.10.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f23d0997149a816a2a9045af29c66f67f405a221745b34cefeac5769ed451db8"}, + {file = "gevent-22.10.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b43d500d7d3c0e03070dee813335bb5315215aa1cf6a04c61093dfdd718640b3"}, + {file = "gevent-22.10.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17b68f4c9e20e47ad49fe797f37f91d5bbeace8765ce2707f979a8d4ec197e4d"}, + {file = "gevent-22.10.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1f001cac0ba8da76abfeb392a3057f81fab3d67cc916c7df8ea977a44a2cc989"}, + {file = "gevent-22.10.2-cp310-cp310-win_amd64.whl", hash = "sha256:3b7eae8a0653ba95a224faaddf629a913ace408edb67384d3117acf42d7dcf89"}, + {file = "gevent-22.10.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8f2477e7b0a903a01485c55bacf2089110e5f767014967ba4b287ff390ae2638"}, + {file = "gevent-22.10.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ddaa3e310a8f1a45b5c42cf50b54c31003a3028e7d4e085059090ea0e7a5fddd"}, + {file = "gevent-22.10.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:98bc510e80f45486ef5b806a1c305e0e89f0430688c14984b0dbdec03331f48b"}, + {file = "gevent-22.10.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:877abdb3a669576b1d51ce6a49b7260b2a96f6b2424eb93287e779a3219d20ba"}, + {file = "gevent-22.10.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d21ad79cca234cdbfa249e727500b0ddcbc7adfff6614a96e6eaa49faca3e4f2"}, + {file = "gevent-22.10.2-cp311-cp311-win_amd64.whl", hash = "sha256:1e955238f59b2947631c9782a713280dd75884e40e455313b5b6bbc20b92ff73"}, + {file = "gevent-22.10.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:5aa99e4882a9e909b4756ee799c6fa0f79eb0542779fad4cc60efa23ec1b2aa8"}, + {file = "gevent-22.10.2-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:d82081656a5b9a94d37c718c8646c757e1617e389cdc533ea5e6a6f0b8b78545"}, + {file = "gevent-22.10.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:54f4bfd74c178351a4a05c5c7df6f8a0a279ff6f392b57608ce0e83c768207f9"}, + {file = "gevent-22.10.2-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ff3796692dff50fec2f381b9152438b221335f557c4f9b811f7ded51b7a25a1"}, + {file = "gevent-22.10.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f01c9adbcb605364694b11dcd0542ec468a29ac7aba2fb5665dc6caf17ba4d7e"}, + {file = "gevent-22.10.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:9d85574eb729f981fea9a78998725a06292d90a3ed50ddca74530c3148c0be41"}, + {file = "gevent-22.10.2-cp36-cp36m-win32.whl", hash = "sha256:8c192d2073e558e241f0b592c1e2b34127a4481a5be240cad4796533b88b1a98"}, + {file = "gevent-22.10.2-cp36-cp36m-win_amd64.whl", hash = "sha256:a2237451c721a0f874ef89dbb4af4fdc172b76a964befaa69deb15b8fff10f49"}, + {file = "gevent-22.10.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:53ee7f170ed42c7561fe8aff5d381dc9a4124694e70580d0c02fba6aafc0ea37"}, + {file = "gevent-22.10.2-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:96c56c280e3c43cfd075efd10b250350ed5ffd3c1514ec99a080b1b92d7c8374"}, + {file = "gevent-22.10.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b6c144e08dfad4106effc043a026e5d0c0eff6ad031904c70bf5090c63f3a6a7"}, + {file = "gevent-22.10.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:018f93de7d5318d2fb440f846839a4464738468c3476d5c9cf7da45bb71c18bd"}, + {file = "gevent-22.10.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7ed2346eb9dc4344f9cb0d7963ce5b74fe16fdd031a2809bb6c2b6eba7ebcd5"}, + {file = "gevent-22.10.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:84c517e33ed604fa06b7d756dc0171169cc12f7fdd68eb7b17708a62eebf4516"}, + {file = "gevent-22.10.2-cp37-cp37m-win32.whl", hash = "sha256:4114f0f439f0b547bb6f1d474fee99ddb46736944ad2207cef3771828f6aa358"}, + {file = "gevent-22.10.2-cp37-cp37m-win_amd64.whl", hash = "sha256:0d581f22a5be6281b11ad6309b38b18f0638cf896931223cbaa5adb904826ef6"}, + {file = "gevent-22.10.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2929377c8ebfb6f4d868d161cd8de2ea6b9f6c7a5fcd4f78bcd537319c16190b"}, + {file = "gevent-22.10.2-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:efc003b6c1481165af61f0aeac248e0a9ac8d880bb3acbe469b448674b2d5281"}, + {file = "gevent-22.10.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db562a8519838bddad0c439a2b12246bab539dd50e299ea7ff3644274a33b6a5"}, + {file = "gevent-22.10.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1472012493ca1fac103f700d309cb6ef7964dcdb9c788d1768266e77712f5e49"}, + {file = "gevent-22.10.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c04ee32c11e9fcee47c1b431834878dc987a7a2cc4fe126ddcae3bad723ce89"}, + {file = "gevent-22.10.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8729129edef2637a8084258cb9ec4e4d5ca45d97ac77aa7a6ff19ccb530ab731"}, + {file = "gevent-22.10.2-cp38-cp38-win32.whl", hash = "sha256:ae90226074a6089371a95f20288431cd4b3f6b0b096856afd862e4ac9510cddd"}, + {file = "gevent-22.10.2-cp38-cp38-win_amd64.whl", hash = "sha256:494c7f29e94df9a1c3157d67bb7edfa32a46eed786e04d9ee68d39f375e30001"}, + {file = "gevent-22.10.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:58898dbabb5b11e4d0192aae165ad286dc6742c543e1be9d30dc82753547c508"}, + {file = "gevent-22.10.2-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:4197d423e198265eef39a0dea286ef389da9148e070310f34455ecee8172c391"}, + {file = "gevent-22.10.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da4183f0b9d9a1e25e1758099220d32c51cc2c6340ee0dea3fd236b2b37598e4"}, + {file = "gevent-22.10.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a5488eba6a568b4d23c072113da4fc0feb1b5f5ede7381656dc913e0d82204e2"}, + {file = "gevent-22.10.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:319d8b1699b7b8134de66d656cd739b308ab9c45ace14d60ae44de7775b456c9"}, + {file = "gevent-22.10.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:f3329bedbba4d3146ae58c667e0f9ac1e6f1e1e6340c7593976cdc60aa7d1a47"}, + {file = "gevent-22.10.2-cp39-cp39-win32.whl", hash = "sha256:172caa66273315f283e90a315921902cb6549762bdcb0587fd60cb712a9d6263"}, + {file = "gevent-22.10.2-cp39-cp39-win_amd64.whl", hash = "sha256:323b207b281ba0405fea042067fa1a61662e5ac0d574ede4ebbda03efd20c350"}, + {file = "gevent-22.10.2-pp27-pypy_73-win_amd64.whl", hash = "sha256:ed7f16613eebf892a6a744d7a4a8f345bc6f066a0ff3b413e2479f9c0a180193"}, + {file = "gevent-22.10.2-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:a47a4e77e2bc668856aad92a0b8de7ee10768258d93cd03968e6c7ba2e832f76"}, + {file = "gevent-22.10.2.tar.gz", hash = "sha256:1ca01da176ee37b3527a2702f7d40dbc9ffb8cfc7be5a03bfa4f9eec45e55c46"}, +] + +[package.dependencies] +cffi = {version = ">=1.12.2", markers = "platform_python_implementation == \"CPython\" and sys_platform == \"win32\""} +greenlet = {version = ">=2.0.0", markers = "platform_python_implementation == \"CPython\""} +setuptools = "*" +"zope.event" = "*" +"zope.interface" = "*" + +[package.extras] +dnspython = ["dnspython (>=1.16.0,<2.0) ; python_version < \"3.10\"", "idna ; python_version < \"3.10\""] +docs = ["repoze.sphinx.autointerface", "sphinxcontrib-programoutput", "zope.schema"] +monitor = ["psutil (>=5.7.0) ; sys_platform != \"win32\" or platform_python_implementation == \"CPython\""] +recommended = ["backports.socketpair ; python_version == \"2.7\" and sys_platform == \"win32\"", "cffi (>=1.12.2) ; platform_python_implementation == \"CPython\"", "dnspython (>=1.16.0,<2.0) ; python_version < \"3.10\"", "idna ; python_version < \"3.10\"", "psutil (>=5.7.0) ; sys_platform != \"win32\" or platform_python_implementation == \"CPython\"", "selectors2 ; python_version == \"2.7\""] +test = ["backports.socketpair ; python_version == \"2.7\" and sys_platform == \"win32\"", "cffi (>=1.12.2) ; platform_python_implementation == \"CPython\"", "contextvars (==2.4) ; python_version > \"3.0\" and python_version < \"3.7\"", "coverage (>=5.0) ; sys_platform != \"win32\"", "coveralls (>=1.7.0) ; sys_platform != \"win32\"", "dnspython (>=1.16.0,<2.0) ; python_version < \"3.10\"", "futures ; python_version == \"2.7\"", "idna ; python_version < \"3.10\"", "mock ; python_version == \"2.7\"", "objgraph", "psutil (>=5.7.0) ; sys_platform != \"win32\" or platform_python_implementation == \"CPython\"", "requests", "selectors2 ; python_version == \"2.7\""] + +[[package]] +name = "greenlet" +version = "3.1.1" +description = "Lightweight in-process concurrent programming" +optional = false +python-versions = ">=3.7" +groups = ["dist"] +markers = "platform_python_implementation == \"CPython\"" +files = [ + {file = "greenlet-3.1.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:0bbae94a29c9e5c7e4a2b7f0aae5c17e8e90acbfd3bf6270eeba60c39fce3563"}, + {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fde093fb93f35ca72a556cf72c92ea3ebfda3d79fc35bb19fbe685853869a83"}, + {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:36b89d13c49216cadb828db8dfa6ce86bbbc476a82d3a6c397f0efae0525bdd0"}, + {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:94b6150a85e1b33b40b1464a3f9988dcc5251d6ed06842abff82e42632fac120"}, + {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:93147c513fac16385d1036b7e5b102c7fbbdb163d556b791f0f11eada7ba65dc"}, + {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da7a9bff22ce038e19bf62c4dd1ec8391062878710ded0a845bcf47cc0200617"}, + {file = "greenlet-3.1.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b2795058c23988728eec1f36a4e5e4ebad22f8320c85f3587b539b9ac84128d7"}, + {file = "greenlet-3.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ed10eac5830befbdd0c32f83e8aa6288361597550ba669b04c48f0f9a2c843c6"}, + {file = "greenlet-3.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:77c386de38a60d1dfb8e55b8c1101d68c79dfdd25c7095d51fec2dd800892b80"}, + {file = "greenlet-3.1.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:e4d333e558953648ca09d64f13e6d8f0523fa705f51cae3f03b5983489958c70"}, + {file = "greenlet-3.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09fc016b73c94e98e29af67ab7b9a879c307c6731a2c9da0db5a7d9b7edd1159"}, + {file = "greenlet-3.1.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d5e975ca70269d66d17dd995dafc06f1b06e8cb1ec1e9ed54c1d1e4a7c4cf26e"}, + {file = "greenlet-3.1.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b2813dc3de8c1ee3f924e4d4227999285fd335d1bcc0d2be6dc3f1f6a318ec1"}, + {file = "greenlet-3.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e347b3bfcf985a05e8c0b7d462ba6f15b1ee1c909e2dcad795e49e91b152c383"}, + {file = "greenlet-3.1.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9e8f8c9cb53cdac7ba9793c276acd90168f416b9ce36799b9b885790f8ad6c0a"}, + {file = "greenlet-3.1.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:62ee94988d6b4722ce0028644418d93a52429e977d742ca2ccbe1c4f4a792511"}, + {file = "greenlet-3.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1776fd7f989fc6b8d8c8cb8da1f6b82c5814957264d1f6cf818d475ec2bf6395"}, + {file = "greenlet-3.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:48ca08c771c268a768087b408658e216133aecd835c0ded47ce955381105ba39"}, + {file = "greenlet-3.1.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:4afe7ea89de619adc868e087b4d2359282058479d7cfb94970adf4b55284574d"}, + {file = "greenlet-3.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f406b22b7c9a9b4f8aa9d2ab13d6ae0ac3e85c9a809bd590ad53fed2bf70dc79"}, + {file = "greenlet-3.1.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c3a701fe5a9695b238503ce5bbe8218e03c3bcccf7e204e455e7462d770268aa"}, + {file = "greenlet-3.1.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2846930c65b47d70b9d178e89c7e1a69c95c1f68ea5aa0a58646b7a96df12441"}, + {file = "greenlet-3.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99cfaa2110534e2cf3ba31a7abcac9d328d1d9f1b95beede58294a60348fba36"}, + {file = "greenlet-3.1.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1443279c19fca463fc33e65ef2a935a5b09bb90f978beab37729e1c3c6c25fe9"}, + {file = "greenlet-3.1.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b7cede291382a78f7bb5f04a529cb18e068dd29e0fb27376074b6d0317bf4dd0"}, + {file = "greenlet-3.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:23f20bb60ae298d7d8656c6ec6db134bca379ecefadb0b19ce6f19d1f232a942"}, + {file = "greenlet-3.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:7124e16b4c55d417577c2077be379514321916d5790fa287c9ed6f23bd2ffd01"}, + {file = "greenlet-3.1.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:05175c27cb459dcfc05d026c4232f9de8913ed006d42713cb8a5137bd49375f1"}, + {file = "greenlet-3.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:935e943ec47c4afab8965954bf49bfa639c05d4ccf9ef6e924188f762145c0ff"}, + {file = "greenlet-3.1.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:667a9706c970cb552ede35aee17339a18e8f2a87a51fba2ed39ceeeb1004798a"}, + {file = "greenlet-3.1.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b8a678974d1f3aa55f6cc34dc480169d58f2e6d8958895d68845fa4ab566509e"}, + {file = "greenlet-3.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:efc0f674aa41b92da8c49e0346318c6075d734994c3c4e4430b1c3f853e498e4"}, + {file = "greenlet-3.1.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0153404a4bb921f0ff1abeb5ce8a5131da56b953eda6e14b88dc6bbc04d2049e"}, + {file = "greenlet-3.1.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:275f72decf9932639c1c6dd1013a1bc266438eb32710016a1c742df5da6e60a1"}, + {file = "greenlet-3.1.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:c4aab7f6381f38a4b42f269057aee279ab0fc7bf2e929e3d4abfae97b682a12c"}, + {file = "greenlet-3.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:b42703b1cf69f2aa1df7d1030b9d77d3e584a70755674d60e710f0af570f3761"}, + {file = "greenlet-3.1.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f1695e76146579f8c06c1509c7ce4dfe0706f49c6831a817ac04eebb2fd02011"}, + {file = "greenlet-3.1.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7876452af029456b3f3549b696bb36a06db7c90747740c5302f74a9e9fa14b13"}, + {file = "greenlet-3.1.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4ead44c85f8ab905852d3de8d86f6f8baf77109f9da589cb4fa142bd3b57b475"}, + {file = "greenlet-3.1.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8320f64b777d00dd7ccdade271eaf0cad6636343293a25074cc5566160e4de7b"}, + {file = "greenlet-3.1.1-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6510bf84a6b643dabba74d3049ead221257603a253d0a9873f55f6a59a65f822"}, + {file = "greenlet-3.1.1-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:04b013dc07c96f83134b1e99888e7a79979f1a247e2a9f59697fa14b5862ed01"}, + {file = "greenlet-3.1.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:411f015496fec93c1c8cd4e5238da364e1da7a124bcb293f085bf2860c32c6f6"}, + {file = "greenlet-3.1.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:47da355d8687fd65240c364c90a31569a133b7b60de111c255ef5b606f2ae291"}, + {file = "greenlet-3.1.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:98884ecf2ffb7d7fe6bd517e8eb99d31ff7855a840fa6d0d63cd07c037f6a981"}, + {file = "greenlet-3.1.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f1d4aeb8891338e60d1ab6127af1fe45def5259def8094b9c7e34690c8858803"}, + {file = "greenlet-3.1.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db32b5348615a04b82240cc67983cb315309e88d444a288934ee6ceaebcad6cc"}, + {file = "greenlet-3.1.1-cp37-cp37m-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dcc62f31eae24de7f8dce72134c8651c58000d3b1868e01392baea7c32c247de"}, + {file = "greenlet-3.1.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:1d3755bcb2e02de341c55b4fca7a745a24a9e7212ac953f6b3a48d117d7257aa"}, + {file = "greenlet-3.1.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:b8da394b34370874b4572676f36acabac172602abf054cbc4ac910219f3340af"}, + {file = "greenlet-3.1.1-cp37-cp37m-win32.whl", hash = "sha256:a0dfc6c143b519113354e780a50381508139b07d2177cb6ad6a08278ec655798"}, + {file = "greenlet-3.1.1-cp37-cp37m-win_amd64.whl", hash = "sha256:54558ea205654b50c438029505def3834e80f0869a70fb15b871c29b4575ddef"}, + {file = "greenlet-3.1.1-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:346bed03fe47414091be4ad44786d1bd8bef0c3fcad6ed3dee074a032ab408a9"}, + {file = "greenlet-3.1.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfc59d69fc48664bc693842bd57acfdd490acafda1ab52c7836e3fc75c90a111"}, + {file = "greenlet-3.1.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d21e10da6ec19b457b82636209cbe2331ff4306b54d06fa04b7c138ba18c8a81"}, + {file = "greenlet-3.1.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:37b9de5a96111fc15418819ab4c4432e4f3c2ede61e660b1e33971eba26ef9ba"}, + {file = "greenlet-3.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ef9ea3f137e5711f0dbe5f9263e8c009b7069d8a1acea822bd5e9dae0ae49c8"}, + {file = "greenlet-3.1.1-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85f3ff71e2e60bd4b4932a043fbbe0f499e263c628390b285cb599154a3b03b1"}, + {file = "greenlet-3.1.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:95ffcf719966dd7c453f908e208e14cde192e09fde6c7186c8f1896ef778d8cd"}, + {file = "greenlet-3.1.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:03a088b9de532cbfe2ba2034b2b85e82df37874681e8c470d6fb2f8c04d7e4b7"}, + {file = "greenlet-3.1.1-cp38-cp38-win32.whl", hash = "sha256:8b8b36671f10ba80e159378df9c4f15c14098c4fd73a36b9ad715f057272fbef"}, + {file = "greenlet-3.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:7017b2be767b9d43cc31416aba48aab0d2309ee31b4dbf10a1d38fb7972bdf9d"}, + {file = "greenlet-3.1.1-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:396979749bd95f018296af156201d6211240e7a23090f50a8d5d18c370084dc3"}, + {file = "greenlet-3.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca9d0ff5ad43e785350894d97e13633a66e2b50000e8a183a50a88d834752d42"}, + {file = "greenlet-3.1.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f6ff3b14f2df4c41660a7dec01045a045653998784bf8cfcb5a525bdffffbc8f"}, + {file = "greenlet-3.1.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:94ebba31df2aa506d7b14866fed00ac141a867e63143fe5bca82a8e503b36437"}, + {file = "greenlet-3.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:73aaad12ac0ff500f62cebed98d8789198ea0e6f233421059fa68a5aa7220145"}, + {file = "greenlet-3.1.1-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:63e4844797b975b9af3a3fb8f7866ff08775f5426925e1e0bbcfe7932059a12c"}, + {file = "greenlet-3.1.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:7939aa3ca7d2a1593596e7ac6d59391ff30281ef280d8632fa03d81f7c5f955e"}, + {file = "greenlet-3.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d0028e725ee18175c6e422797c407874da24381ce0690d6b9396c204c7f7276e"}, + {file = "greenlet-3.1.1-cp39-cp39-win32.whl", hash = "sha256:5e06afd14cbaf9e00899fae69b24a32f2196c19de08fcb9f4779dd4f004e5e7c"}, + {file = "greenlet-3.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:3319aa75e0e0639bc15ff54ca327e8dc7a6fe404003496e3c6925cd3142e0e22"}, + {file = "greenlet-3.1.1.tar.gz", hash = "sha256:4ce3ac6cdb6adf7946475d7ef31777c26d94bccc377e070a7986bd2d5c515467"}, +] + +[package.extras] +docs = ["Sphinx", "furo"] +test = ["objgraph", "psutil"] + +[[package]] +name = "idna" +version = "2.10" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +groups = ["main", "dev"] +files = [ + {file = "idna-2.10-py2.py3-none-any.whl", hash = "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"}, + {file = "idna-2.10.tar.gz", hash = "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6"}, +] + +[[package]] +name = "inflection" +version = "0.5.1" +description = "A port of Ruby on Rails inflector to Python" +optional = false +python-versions = ">=3.5" +groups = ["main"] +files = [ + {file = "inflection-0.5.1-py2.py3-none-any.whl", hash = "sha256:f38b2b640938a4f35ade69ac3d053042959b62a0f1076a5bbaa1b9526605a8a2"}, + {file = "inflection-0.5.1.tar.gz", hash = "sha256:1a29730d366e996aaacffb2f1f1cb9593dc38e2ddd30c91250c6dde09ea9b417"}, +] + +[[package]] +name = "iniconfig" +version = "2.1.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760"}, + {file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"}, +] + +[[package]] +name = "isodate" +version = "0.6.1" +description = "An ISO 8601 date/time/duration parser and formatter" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "isodate-0.6.1-py2.py3-none-any.whl", hash = "sha256:0751eece944162659049d35f4f549ed815792b38793f07cf73381c1c87cbed96"}, + {file = "isodate-0.6.1.tar.gz", hash = "sha256:48c5881de7e8b0a0d648cb024c8062dc84e7b840ed81e864c7614fd3c127bde9"}, +] + +[package.dependencies] +six = "*" + +[[package]] +name = "jwcrypto" +version = "1.5.6" +description = "Implementation of JOSE Web standards" +optional = false +python-versions = ">= 3.8" +groups = ["main"] +files = [ + {file = "jwcrypto-1.5.6-py3-none-any.whl", hash = "sha256:150d2b0ebbdb8f40b77f543fb44ffd2baeff48788be71f67f03566692fd55789"}, + {file = "jwcrypto-1.5.6.tar.gz", hash = "sha256:771a87762a0c081ae6166958a954f80848820b2ab066937dc8b8379d65b1b039"}, +] + +[package.dependencies] +cryptography = ">=3.4" +typing-extensions = ">=4.5.0" + +[[package]] +name = "kombu" +version = "5.3.7" +description = "Messaging library for Python." +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "kombu-5.3.7-py3-none-any.whl", hash = "sha256:5634c511926309c7f9789f1433e9ed402616b56836ef9878f01bd59267b4c7a9"}, + {file = "kombu-5.3.7.tar.gz", hash = "sha256:011c4cd9a355c14a1de8d35d257314a1d2456d52b7140388561acac3cf1a97bf"}, +] + +[package.dependencies] +amqp = ">=5.1.1,<6.0.0" +vine = "*" + +[package.extras] +azureservicebus = ["azure-servicebus (>=7.10.0)"] +azurestoragequeues = ["azure-identity (>=1.12.0)", "azure-storage-queue (>=12.6.0)"] +confluentkafka = ["confluent-kafka (>=2.2.0)"] +consul = ["python-consul2"] +librabbitmq = ["librabbitmq (>=2.0.0) ; python_version < \"3.11\""] +mongodb = ["pymongo (>=4.1.1)"] +msgpack = ["msgpack"] +pyro = ["pyro4"] +qpid = ["qpid-python (>=0.26)", "qpid-tools (>=0.26)"] +redis = ["redis (>=4.5.2,!=4.5.5,!=5.0.2)"] +slmq = ["softlayer-messaging (>=1.0.3)"] +sqlalchemy = ["sqlalchemy (>=1.4.48,<2.1)"] +sqs = ["boto3 (>=1.26.143)", "pycurl (>=7.43.0.5) ; sys_platform != \"win32\" and platform_python_implementation == \"CPython\"", "urllib3 (>=1.26.16)"] +yaml = ["PyYAML (>=3.10)"] +zookeeper = ["kazoo (>=2.8.0)"] + +[[package]] +name = "lxml" +version = "4.9.1" +description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*" +groups = ["main"] +files = [ + {file = "lxml-4.9.1-cp27-cp27m-macosx_10_15_x86_64.whl", hash = "sha256:98cafc618614d72b02185ac583c6f7796202062c41d2eeecdf07820bad3295ed"}, + {file = "lxml-4.9.1-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c62e8dd9754b7debda0c5ba59d34509c4688f853588d75b53c3791983faa96fc"}, + {file = "lxml-4.9.1-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:21fb3d24ab430fc538a96e9fbb9b150029914805d551deeac7d7822f64631dfc"}, + {file = "lxml-4.9.1-cp27-cp27m-win32.whl", hash = "sha256:86e92728ef3fc842c50a5cb1d5ba2bc66db7da08a7af53fb3da79e202d1b2cd3"}, + {file = "lxml-4.9.1-cp27-cp27m-win_amd64.whl", hash = "sha256:4cfbe42c686f33944e12f45a27d25a492cc0e43e1dc1da5d6a87cbcaf2e95627"}, + {file = "lxml-4.9.1-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dad7b164905d3e534883281c050180afcf1e230c3d4a54e8038aa5cfcf312b84"}, + {file = "lxml-4.9.1-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a614e4afed58c14254e67862456d212c4dcceebab2eaa44d627c2ca04bf86837"}, + {file = "lxml-4.9.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:f9ced82717c7ec65a67667bb05865ffe38af0e835cdd78728f1209c8fffe0cad"}, + {file = "lxml-4.9.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:d9fc0bf3ff86c17348dfc5d322f627d78273eba545db865c3cd14b3f19e57fa5"}, + {file = "lxml-4.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:e5f66bdf0976ec667fc4594d2812a00b07ed14d1b44259d19a41ae3fff99f2b8"}, + {file = "lxml-4.9.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:fe17d10b97fdf58155f858606bddb4e037b805a60ae023c009f760d8361a4eb8"}, + {file = "lxml-4.9.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8caf4d16b31961e964c62194ea3e26a0e9561cdf72eecb1781458b67ec83423d"}, + {file = "lxml-4.9.1-cp310-cp310-win32.whl", hash = "sha256:4780677767dd52b99f0af1f123bc2c22873d30b474aa0e2fc3fe5e02217687c7"}, + {file = "lxml-4.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:b122a188cd292c4d2fcd78d04f863b789ef43aa129b233d7c9004de08693728b"}, + {file = "lxml-4.9.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:be9eb06489bc975c38706902cbc6888f39e946b81383abc2838d186f0e8b6a9d"}, + {file = "lxml-4.9.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:f1be258c4d3dc609e654a1dc59d37b17d7fef05df912c01fc2e15eb43a9735f3"}, + {file = "lxml-4.9.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:927a9dd016d6033bc12e0bf5dee1dde140235fc8d0d51099353c76081c03dc29"}, + {file = "lxml-4.9.1-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9232b09f5efee6a495a99ae6824881940d6447debe272ea400c02e3b68aad85d"}, + {file = "lxml-4.9.1-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:04da965dfebb5dac2619cb90fcf93efdb35b3c6994fea58a157a834f2f94b318"}, + {file = "lxml-4.9.1-cp35-cp35m-win32.whl", hash = "sha256:4d5bae0a37af799207140652a700f21a85946f107a199bcb06720b13a4f1f0b7"}, + {file = "lxml-4.9.1-cp35-cp35m-win_amd64.whl", hash = "sha256:4878e667ebabe9b65e785ac8da4d48886fe81193a84bbe49f12acff8f7a383a4"}, + {file = "lxml-4.9.1-cp36-cp36m-macosx_10_15_x86_64.whl", hash = "sha256:1355755b62c28950f9ce123c7a41460ed9743c699905cbe664a5bcc5c9c7c7fb"}, + {file = "lxml-4.9.1-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:bcaa1c495ce623966d9fc8a187da80082334236a2a1c7e141763ffaf7a405067"}, + {file = "lxml-4.9.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6eafc048ea3f1b3c136c71a86db393be36b5b3d9c87b1c25204e7d397cee9536"}, + {file = "lxml-4.9.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:13c90064b224e10c14dcdf8086688d3f0e612db53766e7478d7754703295c7c8"}, + {file = "lxml-4.9.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:206a51077773c6c5d2ce1991327cda719063a47adc02bd703c56a662cdb6c58b"}, + {file = "lxml-4.9.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:e8f0c9d65da595cfe91713bc1222af9ecabd37971762cb830dea2fc3b3bb2acf"}, + {file = "lxml-4.9.1-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:8f0a4d179c9a941eb80c3a63cdb495e539e064f8054230844dcf2fcb812b71d3"}, + {file = "lxml-4.9.1-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:830c88747dce8a3e7525defa68afd742b4580df6aa2fdd6f0855481e3994d391"}, + {file = "lxml-4.9.1-cp36-cp36m-win32.whl", hash = "sha256:1e1cf47774373777936c5aabad489fef7b1c087dcd1f426b621fda9dcc12994e"}, + {file = "lxml-4.9.1-cp36-cp36m-win_amd64.whl", hash = "sha256:5974895115737a74a00b321e339b9c3f45c20275d226398ae79ac008d908bff7"}, + {file = "lxml-4.9.1-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:1423631e3d51008871299525b541413c9b6c6423593e89f9c4cfbe8460afc0a2"}, + {file = "lxml-4.9.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:2aaf6a0a6465d39b5ca69688fce82d20088c1838534982996ec46633dc7ad6cc"}, + {file = "lxml-4.9.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:9f36de4cd0c262dd9927886cc2305aa3f2210db437aa4fed3fb4940b8bf4592c"}, + {file = "lxml-4.9.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:ae06c1e4bc60ee076292e582a7512f304abdf6c70db59b56745cca1684f875a4"}, + {file = "lxml-4.9.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:57e4d637258703d14171b54203fd6822fda218c6c2658a7d30816b10995f29f3"}, + {file = "lxml-4.9.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6d279033bf614953c3fc4a0aa9ac33a21e8044ca72d4fa8b9273fe75359d5cca"}, + {file = "lxml-4.9.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:a60f90bba4c37962cbf210f0188ecca87daafdf60271f4c6948606e4dabf8785"}, + {file = "lxml-4.9.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:6ca2264f341dd81e41f3fffecec6e446aa2121e0b8d026fb5130e02de1402785"}, + {file = "lxml-4.9.1-cp37-cp37m-win32.whl", hash = "sha256:27e590352c76156f50f538dbcebd1925317a0f70540f7dc8c97d2931c595783a"}, + {file = "lxml-4.9.1-cp37-cp37m-win_amd64.whl", hash = "sha256:eea5d6443b093e1545ad0210e6cf27f920482bfcf5c77cdc8596aec73523bb7e"}, + {file = "lxml-4.9.1-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:f05251bbc2145349b8d0b77c0d4e5f3b228418807b1ee27cefb11f69ed3d233b"}, + {file = "lxml-4.9.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:487c8e61d7acc50b8be82bda8c8d21d20e133c3cbf41bd8ad7eb1aaeb3f07c97"}, + {file = "lxml-4.9.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:8d1a92d8e90b286d491e5626af53afef2ba04da33e82e30744795c71880eaa21"}, + {file = "lxml-4.9.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:b570da8cd0012f4af9fa76a5635cd31f707473e65a5a335b186069d5c7121ff2"}, + {file = "lxml-4.9.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5ef87fca280fb15342726bd5f980f6faf8b84a5287fcc2d4962ea8af88b35130"}, + {file = "lxml-4.9.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:93e414e3206779ef41e5ff2448067213febf260ba747fc65389a3ddaa3fb8715"}, + {file = "lxml-4.9.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6653071f4f9bac46fbc30f3c7838b0e9063ee335908c5d61fb7a4a86c8fd2036"}, + {file = "lxml-4.9.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:32a73c53783becdb7eaf75a2a1525ea8e49379fb7248c3eeefb9412123536387"}, + {file = "lxml-4.9.1-cp38-cp38-win32.whl", hash = "sha256:1a7c59c6ffd6ef5db362b798f350e24ab2cfa5700d53ac6681918f314a4d3b94"}, + {file = "lxml-4.9.1-cp38-cp38-win_amd64.whl", hash = "sha256:1436cf0063bba7888e43f1ba8d58824f085410ea2025befe81150aceb123e345"}, + {file = "lxml-4.9.1-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:4beea0f31491bc086991b97517b9683e5cfb369205dac0148ef685ac12a20a67"}, + {file = "lxml-4.9.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:41fb58868b816c202e8881fd0f179a4644ce6e7cbbb248ef0283a34b73ec73bb"}, + {file = "lxml-4.9.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:bd34f6d1810d9354dc7e35158aa6cc33456be7706df4420819af6ed966e85448"}, + {file = "lxml-4.9.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:edffbe3c510d8f4bf8640e02ca019e48a9b72357318383ca60e3330c23aaffc7"}, + {file = "lxml-4.9.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6d949f53ad4fc7cf02c44d6678e7ff05ec5f5552b235b9e136bd52e9bf730b91"}, + {file = "lxml-4.9.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:079b68f197c796e42aa80b1f739f058dcee796dc725cc9a1be0cdb08fc45b000"}, + {file = "lxml-4.9.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9c3a88d20e4fe4a2a4a84bf439a5ac9c9aba400b85244c63a1ab7088f85d9d25"}, + {file = "lxml-4.9.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4e285b5f2bf321fc0857b491b5028c5f276ec0c873b985d58d7748ece1d770dd"}, + {file = "lxml-4.9.1-cp39-cp39-win32.whl", hash = "sha256:ef72013e20dd5ba86a8ae1aed7f56f31d3374189aa8b433e7b12ad182c0d2dfb"}, + {file = "lxml-4.9.1-cp39-cp39-win_amd64.whl", hash = "sha256:10d2017f9150248563bb579cd0d07c61c58da85c922b780060dcc9a3aa9f432d"}, + {file = "lxml-4.9.1-pp37-pypy37_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0538747a9d7827ce3e16a8fdd201a99e661c7dee3c96c885d8ecba3c35d1032c"}, + {file = "lxml-4.9.1-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:0645e934e940107e2fdbe7c5b6fb8ec6232444260752598bc4d09511bd056c0b"}, + {file = "lxml-4.9.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:6daa662aba22ef3258934105be2dd9afa5bb45748f4f702a3b39a5bf53a1f4dc"}, + {file = "lxml-4.9.1-pp38-pypy38_pp73-macosx_10_15_x86_64.whl", hash = "sha256:603a464c2e67d8a546ddaa206d98e3246e5db05594b97db844c2f0a1af37cf5b"}, + {file = "lxml-4.9.1-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:c4b2e0559b68455c085fb0f6178e9752c4be3bba104d6e881eb5573b399d1eb2"}, + {file = "lxml-4.9.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0f3f0059891d3254c7b5fb935330d6db38d6519ecd238ca4fce93c234b4a0f73"}, + {file = "lxml-4.9.1-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:c852b1530083a620cb0de5f3cd6826f19862bafeaf77586f1aef326e49d95f0c"}, + {file = "lxml-4.9.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:287605bede6bd36e930577c5925fcea17cb30453d96a7b4c63c14a257118dbb9"}, + {file = "lxml-4.9.1.tar.gz", hash = "sha256:fe749b052bb7233fe5d072fcb549221a8cb1a16725c47c37e42b0b9cb3ff2c3f"}, +] + +[package.extras] +cssselect = ["cssselect (>=0.7)"] +html5 = ["html5lib"] +htmlsoup = ["BeautifulSoup4"] +source = ["Cython (>=0.29.7)"] + +[[package]] +name = "markdown2" +version = "2.4.10" +description = "A fast and complete Python implementation of Markdown" +optional = false +python-versions = ">=3.5, <4" +groups = ["main"] +files = [ + {file = "markdown2-2.4.10-py2.py3-none-any.whl", hash = "sha256:e6105800483783831f5dc54f827aa5b44eb137ecef5a70293d8ecfbb4109ecc6"}, + {file = "markdown2-2.4.10.tar.gz", hash = "sha256:cdba126d90dc3aef6f4070ac342f974d63f415678959329cc7909f96cc235d72"}, +] + +[package.extras] +all = ["pygments (>=2.7.3)", "wavedrom ; python_version >= \"3.7\""] +code-syntax-highlighting = ["pygments (>=2.7.3)"] +wavedrom = ["wavedrom ; python_version >= \"3.7\""] + +[[package]] +name = "mccabe" +version = "0.7.0" +description = "McCabe checker, plugin for flake8" +optional = false +python-versions = ">=3.6" +groups = ["dev"] +files = [ + {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, + {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, +] + +[[package]] +name = "newrelic" +version = "8.4.0" +description = "New Relic Python Agent" +optional = false +python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*" +groups = ["main"] +files = [ + {file = "newrelic-8.4.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:ede2b43cf395fef31c3a43bd8d6db6cb56496516c54123b534f0027d582446ef"}, + {file = "newrelic-8.4.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:dc095f45496c03681bcf01692c85817243ccb500a5281410cdf7194be9308444"}, + {file = "newrelic-8.4.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:88063cd39672a93d2201f9f6568e9b996b1a1b9ca052f83eac9a4ca0a36fa7a9"}, + {file = "newrelic-8.4.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:492a06949af21b82072095c1055051985cddadc9458f1752a2c040228918fabc"}, + {file = "newrelic-8.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8ec531aa73f1ae12724031a965805a535585fd06c617acb68b8b1a4534912e31"}, + {file = "newrelic-8.4.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4c831a8f084b8168afdc4e9770ac062d6fdac16f41aa7a122ce87e5e448bbe9"}, + {file = "newrelic-8.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:385829c819bccde7edc002ea73b784194312824c3b3d9511c1015194498f2658"}, + {file = "newrelic-8.4.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c465d7366cb081498bb6918f4b91157b54e70b446639d1a3ec86fc72062e6c3"}, + {file = "newrelic-8.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edbedd2040a003e2f712184fab672e2987cf34da59f64f548d121c1a02ea1f4c"}, + {file = "newrelic-8.4.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba0b9fcf84b3735b7d699df9e27822ab5a62a1ed12c39540140e2d40be3be8e9"}, + {file = "newrelic-8.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:97f8f288dc131b3260220aef3716ae5e78aeb08edcd2e43e947a675a0d08d21b"}, + {file = "newrelic-8.4.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:592b622945e4ff4f14cca0e01dd08553a1f1e29cea25826b9e30101133296993"}, + {file = "newrelic-8.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:01b37112a6f48d2da8e98268dfc273a33981297a2b43ac7674d5993dee31885d"}, + {file = "newrelic-8.4.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20ea8850cc5c05edba753cd85800bb6d3411bcd7425614c59e5550f35c4f2cc6"}, + {file = "newrelic-8.4.0.tar.gz", hash = "sha256:4abf147b5148ac1d284aba46582c92840a521bf5086a29af36494bd53778136f"}, +] + +[package.extras] +infinite-tracing = ["grpcio", "protobuf"] + +[[package]] +name = "oauthlib" +version = "3.2.2" +description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" +optional = false +python-versions = ">=3.6" +groups = ["main"] +files = [ + {file = "oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca"}, + {file = "oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918"}, +] + +[package.extras] +rsa = ["cryptography (>=3.0.0)"] +signals = ["blinker (>=1.4.0)"] +signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] + +[[package]] +name = "packaging" +version = "24.2" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.8" +groups = ["dev"] +files = [ + {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, + {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, +] + +[[package]] +name = "pluggy" +version = "0.13.1" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +groups = ["dev"] +files = [ + {file = "pluggy-0.13.1-py2.py3-none-any.whl", hash = "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"}, + {file = "pluggy-0.13.1.tar.gz", hash = "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] + +[[package]] +name = "primitive-metadata" +version = "0.2025.1" +description = "a (simple?) toolset for authoring and gathering metadata as rdf" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "primitive_metadata-0.2025.1-py3-none-any.whl", hash = "sha256:feaddf223c16e06f982ce3fcaf9a13674b0d82bc0fe31a23060c6f0d867c5110"}, + {file = "primitive_metadata-0.2025.1.tar.gz", hash = "sha256:ca89a1df9338254a15549e9834394a52a545522b83a925f162e0880bd80b8d47"}, +] + +[[package]] +name = "prompt-toolkit" +version = "3.0.50" +description = "Library for building powerful interactive command lines in Python" +optional = false +python-versions = ">=3.8.0" +groups = ["main"] +files = [ + {file = "prompt_toolkit-3.0.50-py3-none-any.whl", hash = "sha256:9b6427eb19e479d98acff65196a307c555eb567989e6d88ebbb1b509d9779198"}, + {file = "prompt_toolkit-3.0.50.tar.gz", hash = "sha256:544748f3860a2623ca5cd6d2795e7a14f3d0e1c3c9728359013f79877fc89bab"}, +] + +[package.dependencies] +wcwidth = "*" + +[[package]] +name = "psycogreen" +version = "1.0.2" +description = "psycopg2 integration with coroutine libraries" +optional = false +python-versions = "*" +groups = ["dist"] +files = [ + {file = "psycogreen-1.0.2.tar.gz", hash = "sha256:c429845a8a49cf2f76b71265008760bcd7c7c77d80b806db4dc81116dbcd130d"}, +] + +[[package]] +name = "psycopg2" +version = "2.9.5" +description = "psycopg2 - Python-PostgreSQL Database Adapter" +optional = false +python-versions = ">=3.6" +groups = ["main"] +files = [ + {file = "psycopg2-2.9.5-cp310-cp310-win32.whl", hash = "sha256:d3ef67e630b0de0779c42912fe2cbae3805ebaba30cda27fea2a3de650a9414f"}, + {file = "psycopg2-2.9.5-cp310-cp310-win_amd64.whl", hash = "sha256:4cb9936316d88bfab614666eb9e32995e794ed0f8f6b3b718666c22819c1d7ee"}, + {file = "psycopg2-2.9.5-cp311-cp311-win32.whl", hash = "sha256:093e3894d2d3c592ab0945d9eba9d139c139664dcf83a1c440b8a7aa9bb21955"}, + {file = "psycopg2-2.9.5-cp311-cp311-win_amd64.whl", hash = "sha256:920bf418000dd17669d2904472efeab2b20546efd0548139618f8fa305d1d7ad"}, + {file = "psycopg2-2.9.5-cp36-cp36m-win32.whl", hash = "sha256:b9ac1b0d8ecc49e05e4e182694f418d27f3aedcfca854ebd6c05bb1cffa10d6d"}, + {file = "psycopg2-2.9.5-cp36-cp36m-win_amd64.whl", hash = "sha256:fc04dd5189b90d825509caa510f20d1d504761e78b8dfb95a0ede180f71d50e5"}, + {file = "psycopg2-2.9.5-cp37-cp37m-win32.whl", hash = "sha256:922cc5f0b98a5f2b1ff481f5551b95cd04580fd6f0c72d9b22e6c0145a4840e0"}, + {file = "psycopg2-2.9.5-cp37-cp37m-win_amd64.whl", hash = "sha256:1e5a38aa85bd660c53947bd28aeaafb6a97d70423606f1ccb044a03a1203fe4a"}, + {file = "psycopg2-2.9.5-cp38-cp38-win32.whl", hash = "sha256:f5b6320dbc3cf6cfb9f25308286f9f7ab464e65cfb105b64cc9c52831748ced2"}, + {file = "psycopg2-2.9.5-cp38-cp38-win_amd64.whl", hash = "sha256:1a5c7d7d577e0eabfcf15eb87d1e19314c8c4f0e722a301f98e0e3a65e238b4e"}, + {file = "psycopg2-2.9.5-cp39-cp39-win32.whl", hash = "sha256:322fd5fca0b1113677089d4ebd5222c964b1760e361f151cbb2706c4912112c5"}, + {file = "psycopg2-2.9.5-cp39-cp39-win_amd64.whl", hash = "sha256:190d51e8c1b25a47484e52a79638a8182451d6f6dff99f26ad9bd81e5359a0fa"}, + {file = "psycopg2-2.9.5.tar.gz", hash = "sha256:a5246d2e683a972e2187a8714b5c2cf8156c064629f9a9b1a873c1730d9e245a"}, +] + +[[package]] +name = "py" +version = "1.11.0" +description = "library with cross-python path, ini-parsing, io, code, log facilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +groups = ["dev"] +files = [ + {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, + {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, +] + +[[package]] +name = "py-cpuinfo" +version = "9.0.0" +description = "Get CPU info with pure Python" +optional = false +python-versions = "*" +groups = ["dev"] +files = [ + {file = "py-cpuinfo-9.0.0.tar.gz", hash = "sha256:3cdbbf3fac90dc6f118bfd64384f309edeadd902d7c8fb17f02ffa1fc3f49690"}, + {file = "py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5"}, +] + +[[package]] +name = "pycodestyle" +version = "2.9.1" +description = "Python style guide checker" +optional = false +python-versions = ">=3.6" +groups = ["dev"] +files = [ + {file = "pycodestyle-2.9.1-py2.py3-none-any.whl", hash = "sha256:d1735fc58b418fd7c5f658d28d943854f8a849b01a5d0a1e6f3f3fdd0166804b"}, + {file = "pycodestyle-2.9.1.tar.gz", hash = "sha256:2c9607871d58c76354b697b42f5d57e1ada7d261c261efac224b664affdc5785"}, +] + +[[package]] +name = "pycparser" +version = "2.22" +description = "C parser in Python" +optional = false +python-versions = ">=3.8" +groups = ["main", "dist"] +files = [ + {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, + {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, +] +markers = {dist = "platform_python_implementation == \"CPython\" and sys_platform == \"win32\""} + +[[package]] +name = "pyflakes" +version = "2.5.0" +description = "passive checker of Python programs" +optional = false +python-versions = ">=3.6" +groups = ["dev"] +files = [ + {file = "pyflakes-2.5.0-py2.py3-none-any.whl", hash = "sha256:4579f67d887f804e67edb544428f264b7b24f435b263c4614f384135cea553d2"}, + {file = "pyflakes-2.5.0.tar.gz", hash = "sha256:491feb020dca48ccc562a8c0cbe8df07ee13078df59813b83959cbdada312ea3"}, +] + +[[package]] +name = "pyjwe" +version = "1.0.0" +description = "JSON Web Encryption implementation in Python" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "PyJWE-1.0.0.tar.gz", hash = "sha256:e3b3de7be4fcc260e5f1a47ead9c9a9211d8ce98f9e1d88d9a7225198aa9ce47"}, +] + +[package.dependencies] +cryptography = ">=0.9.3" + +[[package]] +name = "pyparsing" +version = "3.2.3" +description = "pyparsing module - Classes and methods to define and execute parsing grammars" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "pyparsing-3.2.3-py3-none-any.whl", hash = "sha256:a749938e02d6fd0b59b356ca504a24982314bb090c383e3cf201c95ef7e2bfcf"}, + {file = "pyparsing-3.2.3.tar.gz", hash = "sha256:b9c13f1ab8b3b542f72e28f634bad4de758ab3ce4546e4301970ad6fa77c38be"}, +] + +[package.extras] +diagrams = ["jinja2", "railroad-diagrams"] + +[[package]] +name = "pytest" +version = "6.2.4" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.6" +groups = ["dev"] +files = [ + {file = "pytest-6.2.4-py3-none-any.whl", hash = "sha256:91ef2131a9bd6be8f76f1f08eac5c5317221d6ad1e143ae03894b862e8976890"}, + {file = "pytest-6.2.4.tar.gz", hash = "sha256:50bcad0a0b9c5a72c8e4e7c9855a3ad496ca6a881a3641b4260605450772c54b"}, +] + +[package.dependencies] +atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""} +attrs = ">=19.2.0" +colorama = {version = "*", markers = "sys_platform == \"win32\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<1.0.0a1" +py = ">=1.8.2" +toml = "*" + +[package.extras] +testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"] + +[[package]] +name = "pytest-benchmark" +version = "3.4.1" +description = "A ``pytest`` fixture for benchmarking code. It will group the tests into rounds that are calibrated to the chosen timer." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +groups = ["dev"] +files = [ + {file = "pytest-benchmark-3.4.1.tar.gz", hash = "sha256:40e263f912de5a81d891619032983557d62a3d85843f9a9f30b98baea0cd7b47"}, + {file = "pytest_benchmark-3.4.1-py2.py3-none-any.whl", hash = "sha256:36d2b08c4882f6f997fd3126a3d6dfd70f3249cde178ed8bbc0b73db7c20f809"}, +] + +[package.dependencies] +py-cpuinfo = "*" +pytest = ">=3.8" + +[package.extras] +aspect = ["aspectlib"] +elasticsearch = ["elasticsearch"] +histogram = ["pygal", "pygaljs"] + +[[package]] +name = "pytest-django" +version = "4.4.0" +description = "A Django plugin for pytest." +optional = false +python-versions = ">=3.5" +groups = ["dev"] +files = [ + {file = "pytest-django-4.4.0.tar.gz", hash = "sha256:b5171e3798bf7e3fc5ea7072fe87324db67a4dd9f1192b037fed4cc3c1b7f455"}, + {file = "pytest_django-4.4.0-py3-none-any.whl", hash = "sha256:65783e78382456528bd9d79a35843adde9e6a47347b20464eb2c885cb0f1f606"}, +] + +[package.dependencies] +pytest = ">=5.4.0" + +[package.extras] +docs = ["sphinx", "sphinx-rtd-theme"] +testing = ["Django", "django-configurations (>=2.0)"] + +[[package]] +name = "python-crontab" +version = "3.2.0" +description = "Python Crontab API" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "python_crontab-3.2.0-py3-none-any.whl", hash = "sha256:82cb9b6a312d41ff66fd3caf3eed7115c28c195bfb50711bc2b4b9592feb9fe5"}, + {file = "python_crontab-3.2.0.tar.gz", hash = "sha256:40067d1dd39ade3460b2ad8557c7651514cd3851deffff61c5c60e1227c5c36b"}, +] + +[package.dependencies] +python-dateutil = "*" + +[package.extras] +cron-description = ["cron-descriptor"] +cron-schedule = ["croniter"] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +groups = ["main", "dev"] +files = [ + {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, + {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, +] + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "pytz" +version = "2025.2" +description = "World timezone definitions, modern and historical" +optional = false +python-versions = "*" +groups = ["main", "dev"] +files = [ + {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"}, + {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, +] + +[[package]] +name = "rdflib" +version = "7.0.0" +description = "RDFLib is a Python library for working with RDF, a simple yet powerful language for representing information." +optional = false +python-versions = ">=3.8.1,<4.0.0" +groups = ["main"] +files = [ + {file = "rdflib-7.0.0-py3-none-any.whl", hash = "sha256:0438920912a642c866a513de6fe8a0001bd86ef975057d6962c79ce4771687cd"}, + {file = "rdflib-7.0.0.tar.gz", hash = "sha256:9995eb8569428059b8c1affd26b25eac510d64f5043d9ce8c84e0d0036e995ae"}, +] + +[package.dependencies] +isodate = ">=0.6.0,<0.7.0" +pyparsing = ">=2.1.0,<4" + +[package.extras] +berkeleydb = ["berkeleydb (>=18.1.0,<19.0.0)"] +html = ["html5lib (>=1.0,<2.0)"] +lxml = ["lxml (>=4.3.0,<5.0.0)"] +networkx = ["networkx (>=2.0.0,<3.0.0)"] + +[[package]] +name = "requests" +version = "2.25.1" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +groups = ["main", "dev"] +files = [ + {file = "requests-2.25.1-py2.py3-none-any.whl", hash = "sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e"}, + {file = "requests-2.25.1.tar.gz", hash = "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +chardet = ">=3.0.2,<5" +idna = ">=2.5,<3" +urllib3 = ">=1.21.1,<1.27" + +[package.extras] +security = ["cryptography (>=1.3.4)", "pyOpenSSL (>=0.14)"] +socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton ; sys_platform == \"win32\" and python_version == \"2.7\""] + +[[package]] +name = "sentry-sdk" +version = "1.22.2" +description = "Python client for Sentry (https://sentry.io)" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "sentry-sdk-1.22.2.tar.gz", hash = "sha256:5932c092c6e6035584eb74d77064e4bce3b7935dfc4a331349719a40db265840"}, + {file = "sentry_sdk-1.22.2-py2.py3-none-any.whl", hash = "sha256:cf89a5063ef84278d186aceaed6fb595bfe67d099298e537634a323664265669"}, +] + +[package.dependencies] +certifi = "*" +django = {version = ">=1.8", optional = true, markers = "extra == \"django\""} +urllib3 = {version = ">=1.26.11,<2.0.0", markers = "python_version >= \"3.6\""} + +[package.extras] +aiohttp = ["aiohttp (>=3.5)"] +arq = ["arq (>=0.23)"] +beam = ["apache-beam (>=2.12)"] +bottle = ["bottle (>=0.12.13)"] +celery = ["celery (>=3)"] +chalice = ["chalice (>=1.16.0)"] +django = ["django (>=1.8)"] +falcon = ["falcon (>=1.4)"] +fastapi = ["fastapi (>=0.79.0)"] +flask = ["blinker (>=1.1)", "flask (>=0.11)"] +grpcio = ["grpcio (>=1.21.1)"] +httpx = ["httpx (>=0.16.0)"] +huey = ["huey (>=2)"] +opentelemetry = ["opentelemetry-distro (>=0.35b0)"] +pure-eval = ["asttokens", "executing", "pure-eval"] +pymongo = ["pymongo (>=3.1)"] +pyspark = ["pyspark (>=2.4.4)"] +quart = ["blinker (>=1.1)", "quart (>=0.16.1)"] +rq = ["rq (>=0.6)"] +sanic = ["sanic (>=0.8)"] +sqlalchemy = ["sqlalchemy (>=1.2)"] +starlette = ["starlette (>=0.19.1)"] +starlite = ["starlite (>=1.48)"] +tornado = ["tornado (>=5)"] + +[[package]] +name = "setuptools" +version = "78.1.0" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +optional = false +python-versions = ">=3.9" +groups = ["dist"] +files = [ + {file = "setuptools-78.1.0-py3-none-any.whl", hash = "sha256:3e386e96793c8702ae83d17b853fb93d3e09ef82ec62722e61da5cd22376dcd8"}, + {file = "setuptools-78.1.0.tar.gz", hash = "sha256:18fd474d4a82a5f83dac888df697af65afa82dec7323d09c3e37d1f14288da54"}, +] + +[package.extras] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\"", "ruff (>=0.8.0) ; sys_platform != \"cygwin\""] +core = ["importlib_metadata (>=6) ; python_version < \"3.10\"", "jaraco.functools (>=4)", "jaraco.text (>=3.7)", "more_itertools", "more_itertools (>=8.8)", "packaging (>=24.2)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1) ; python_version < \"3.11\"", "wheel (>=0.43.0)"] +cover = ["pytest-cov"] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] +enabler = ["pytest-enabler (>=2.2)"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21) ; python_version >= \"3.9\" and sys_platform != \"cygwin\"", "jaraco.envs (>=2.2)", "jaraco.path (>=3.7.2)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf ; sys_platform != \"cygwin\"", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] +type = ["importlib_metadata (>=7.0.2) ; python_version < \"3.10\"", "jaraco.develop (>=7.21) ; sys_platform != \"cygwin\"", "mypy (==1.14.*)", "pytest-mypy"] + +[[package]] +name = "six" +version = "1.17.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +groups = ["main", "dev"] +files = [ + {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"}, + {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, +] + +[[package]] +name = "sqlparse" +version = "0.5.3" +description = "A non-validating SQL parser." +optional = false +python-versions = ">=3.8" +groups = ["main", "dev"] +files = [ + {file = "sqlparse-0.5.3-py3-none-any.whl", hash = "sha256:cf2196ed3418f3ba5de6af7e82c694a9fbdbfecccdfc72e281548517081f16ca"}, + {file = "sqlparse-0.5.3.tar.gz", hash = "sha256:09f67787f56a0b16ecdbde1bfc7f5d9c3371ca683cfeaa8e6ff60b4807ec9272"}, +] + +[package.extras] +dev = ["build", "hatch"] +doc = ["sphinx"] + +[[package]] +name = "text-unidecode" +version = "1.3" +description = "The most basic Text::Unidecode port" +optional = false +python-versions = "*" +groups = ["dev"] +files = [ + {file = "text-unidecode-1.3.tar.gz", hash = "sha256:bad6603bb14d279193107714b288be206cac565dfa49aa5b105294dd5c4aab93"}, + {file = "text_unidecode-1.3-py2.py3-none-any.whl", hash = "sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8"}, +] + +[[package]] +name = "toml" +version = "0.10.2" +description = "Python Library for Tom's Obvious, Minimal Language" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" +groups = ["dev"] +files = [ + {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, + {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, +] + +[[package]] +name = "typing-extensions" +version = "4.13.0" +description = "Backported and Experimental Type Hints for Python 3.8+" +optional = false +python-versions = ">=3.8" +groups = ["main", "dev"] +files = [ + {file = "typing_extensions-4.13.0-py3-none-any.whl", hash = "sha256:c8dd92cc0d6425a97c18fbb9d1954e5ff92c1ca881a309c45f06ebc0b79058e5"}, + {file = "typing_extensions-4.13.0.tar.gz", hash = "sha256:0a4ac55a5820789d87e297727d229866c9650f6521b64206413c4fbada24d95b"}, +] +markers = {dev = "python_version < \"3.11\""} + +[[package]] +name = "tzdata" +version = "2025.2" +description = "Provider of IANA time zone data" +optional = false +python-versions = ">=2" +groups = ["main"] +files = [ + {file = "tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8"}, + {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"}, +] + +[[package]] +name = "urllib3" +version = "1.26.20" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +groups = ["main", "dev"] +files = [ + {file = "urllib3-1.26.20-py2.py3-none-any.whl", hash = "sha256:0ed14ccfbf1c30a9072c7ca157e4319b70d65f623e91e7b32fadb2853431016e"}, + {file = "urllib3-1.26.20.tar.gz", hash = "sha256:40c2dc0c681e47eb8f90e7e27bf6ff7df2e677421fd46756da1161c39ca70d32"}, +] + +[package.extras] +brotli = ["brotli (==1.0.9) ; os_name != \"nt\" and python_version < \"3\" and platform_python_implementation == \"CPython\"", "brotli (>=1.0.9) ; python_version >= \"3\" and platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; (os_name != \"nt\" or python_version >= \"3\") and platform_python_implementation != \"CPython\"", "brotlipy (>=0.6.0) ; os_name == \"nt\" and python_version < \"3\""] +secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress ; python_version == \"2.7\"", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] + +[[package]] +name = "uwsgi" +version = "2.0.28" +description = "The uWSGI server" +optional = false +python-versions = "*" +groups = ["dist"] +files = [ + {file = "uwsgi-2.0.28.tar.gz", hash = "sha256:79ca1891ef2df14508ab0471ee8c0eb94bd2d51d03f32f90c4bbe557ab1e99d0"}, +] + +[[package]] +name = "vine" +version = "5.1.0" +description = "Python promises." +optional = false +python-versions = ">=3.6" +groups = ["main"] +files = [ + {file = "vine-5.1.0-py3-none-any.whl", hash = "sha256:40fdf3c48b2cfe1c38a49e9ae2da6fda88e4794c810050a728bd7413811fb1dc"}, + {file = "vine-5.1.0.tar.gz", hash = "sha256:8b62e981d35c41049211cf62a0a1242d8c1ee9bd15bb196ce38aefd6799e61e0"}, +] + +[[package]] +name = "wcwidth" +version = "0.2.13" +description = "Measures the displayed width of unicode strings in a terminal" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"}, + {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"}, +] + +[[package]] +name = "zope-event" +version = "5.0" +description = "Very basic event publishing system" +optional = false +python-versions = ">=3.7" +groups = ["dist"] +files = [ + {file = "zope.event-5.0-py3-none-any.whl", hash = "sha256:2832e95014f4db26c47a13fdaef84cef2f4df37e66b59d8f1f4a8f319a632c26"}, + {file = "zope.event-5.0.tar.gz", hash = "sha256:bac440d8d9891b4068e2b5a2c5e2c9765a9df762944bda6955f96bb9b91e67cd"}, +] + +[package.dependencies] +setuptools = "*" + +[package.extras] +docs = ["Sphinx"] +test = ["zope.testrunner"] + +[[package]] +name = "zope-interface" +version = "7.2" +description = "Interfaces for Python" +optional = false +python-versions = ">=3.8" +groups = ["dist"] +files = [ + {file = "zope.interface-7.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ce290e62229964715f1011c3dbeab7a4a1e4971fd6f31324c4519464473ef9f2"}, + {file = "zope.interface-7.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:05b910a5afe03256b58ab2ba6288960a2892dfeef01336dc4be6f1b9ed02ab0a"}, + {file = "zope.interface-7.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:550f1c6588ecc368c9ce13c44a49b8d6b6f3ca7588873c679bd8fd88a1b557b6"}, + {file = "zope.interface-7.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0ef9e2f865721553c6f22a9ff97da0f0216c074bd02b25cf0d3af60ea4d6931d"}, + {file = "zope.interface-7.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27f926f0dcb058211a3bb3e0e501c69759613b17a553788b2caeb991bed3b61d"}, + {file = "zope.interface-7.2-cp310-cp310-win_amd64.whl", hash = "sha256:144964649eba4c5e4410bb0ee290d338e78f179cdbfd15813de1a664e7649b3b"}, + {file = "zope.interface-7.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1909f52a00c8c3dcab6c4fad5d13de2285a4b3c7be063b239b8dc15ddfb73bd2"}, + {file = "zope.interface-7.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:80ecf2451596f19fd607bb09953f426588fc1e79e93f5968ecf3367550396b22"}, + {file = "zope.interface-7.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:033b3923b63474800b04cba480b70f6e6243a62208071fc148354f3f89cc01b7"}, + {file = "zope.interface-7.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a102424e28c6b47c67923a1f337ede4a4c2bba3965b01cf707978a801fc7442c"}, + {file = "zope.interface-7.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:25e6a61dcb184453bb00eafa733169ab6d903e46f5c2ace4ad275386f9ab327a"}, + {file = "zope.interface-7.2-cp311-cp311-win_amd64.whl", hash = "sha256:3f6771d1647b1fc543d37640b45c06b34832a943c80d1db214a37c31161a93f1"}, + {file = "zope.interface-7.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:086ee2f51eaef1e4a52bd7d3111a0404081dadae87f84c0ad4ce2649d4f708b7"}, + {file = "zope.interface-7.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:21328fcc9d5b80768bf051faa35ab98fb979080c18e6f84ab3f27ce703bce465"}, + {file = "zope.interface-7.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f6dd02ec01f4468da0f234da9d9c8545c5412fef80bc590cc51d8dd084138a89"}, + {file = "zope.interface-7.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8e7da17f53e25d1a3bde5da4601e026adc9e8071f9f6f936d0fe3fe84ace6d54"}, + {file = "zope.interface-7.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cab15ff4832580aa440dc9790b8a6128abd0b88b7ee4dd56abacbc52f212209d"}, + {file = "zope.interface-7.2-cp312-cp312-win_amd64.whl", hash = "sha256:29caad142a2355ce7cfea48725aa8bcf0067e2b5cc63fcf5cd9f97ad12d6afb5"}, + {file = "zope.interface-7.2-cp313-cp313-macosx_10_9_x86_64.whl", hash = "sha256:3e0350b51e88658d5ad126c6a57502b19d5f559f6cb0a628e3dc90442b53dd98"}, + {file = "zope.interface-7.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:15398c000c094b8855d7d74f4fdc9e73aa02d4d0d5c775acdef98cdb1119768d"}, + {file = "zope.interface-7.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:802176a9f99bd8cc276dcd3b8512808716492f6f557c11196d42e26c01a69a4c"}, + {file = "zope.interface-7.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb23f58a446a7f09db85eda09521a498e109f137b85fb278edb2e34841055398"}, + {file = "zope.interface-7.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a71a5b541078d0ebe373a81a3b7e71432c61d12e660f1d67896ca62d9628045b"}, + {file = "zope.interface-7.2-cp313-cp313-win_amd64.whl", hash = "sha256:4893395d5dd2ba655c38ceb13014fd65667740f09fa5bb01caa1e6284e48c0cd"}, + {file = "zope.interface-7.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d3a8ffec2a50d8ec470143ea3d15c0c52d73df882eef92de7537e8ce13475e8a"}, + {file = "zope.interface-7.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:31d06db13a30303c08d61d5fb32154be51dfcbdb8438d2374ae27b4e069aac40"}, + {file = "zope.interface-7.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e204937f67b28d2dca73ca936d3039a144a081fc47a07598d44854ea2a106239"}, + {file = "zope.interface-7.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:224b7b0314f919e751f2bca17d15aad00ddbb1eadf1cb0190fa8175edb7ede62"}, + {file = "zope.interface-7.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baf95683cde5bc7d0e12d8e7588a3eb754d7c4fa714548adcd96bdf90169f021"}, + {file = "zope.interface-7.2-cp38-cp38-win_amd64.whl", hash = "sha256:7dc5016e0133c1a1ec212fc87a4f7e7e562054549a99c73c8896fa3a9e80cbc7"}, + {file = "zope.interface-7.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7bd449c306ba006c65799ea7912adbbfed071089461a19091a228998b82b1fdb"}, + {file = "zope.interface-7.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a19a6cc9c6ce4b1e7e3d319a473cf0ee989cbbe2b39201d7c19e214d2dfb80c7"}, + {file = "zope.interface-7.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:72cd1790b48c16db85d51fbbd12d20949d7339ad84fd971427cf00d990c1f137"}, + {file = "zope.interface-7.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:52e446f9955195440e787596dccd1411f543743c359eeb26e9b2c02b077b0519"}, + {file = "zope.interface-7.2-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ad9913fd858274db8dd867012ebe544ef18d218f6f7d1e3c3e6d98000f14b75"}, + {file = "zope.interface-7.2-cp39-cp39-win_amd64.whl", hash = "sha256:1090c60116b3da3bfdd0c03406e2f14a1ff53e5771aebe33fec1edc0a350175d"}, + {file = "zope.interface-7.2.tar.gz", hash = "sha256:8b49f1a3d1ee4cdaf5b32d2e738362c7f5e40ac8b46dd7d1a65e82a4872728fe"}, +] + +[package.dependencies] +setuptools = "*" + +[package.extras] +docs = ["Sphinx", "furo", "repoze.sphinx.autointerface"] +test = ["coverage[toml]", "zope.event", "zope.testing"] +testing = ["coverage[toml]", "zope.event", "zope.testing"] + +[metadata] +lock-version = "2.1" +python-versions = ">=3.10,<3.12" +content-hash = "9c0c9c439701e05cce8574c8ad4068312219dbfadb8ce55fe9cd99d502793583" diff --git a/pyproject.toml b/pyproject.toml index e5d8389ba..071cb3abe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,11 +26,7 @@ dependencies = [ "rdflib==7.1.3", # BSD 3 Clause "requests==2.32.3", # Apache 2.0 "primitive_metadata==0.2025.1", # MIT - - # relevant only in deployment: "sentry-sdk[django]==2.24.1", # MIT - "gevent==24.11.2", # MIT - "psycogreen==1.0.2", # BSD # to be removed in future work: "djangorestframework==3.16.0", # BSD @@ -39,12 +35,17 @@ dependencies = [ "PyJWE==1.0.0", # Apache 2.0 ] +# "dist" dependency group relevant only in deployment: [tool.poetry.group.dist] optional = true [tool.poetry.group.dist.dependencies] uwsgi = "2.0.28" newrelic = "10.7.0" # newrelic APM agent, Custom License +gevent = "24.11.2" # MIT +psycogreen = "1.0.2" # BSD + +# "dev" dependency group relevant only for local dev: [tool.poetry.group.dev] optional = true [tool.poetry.group.dev.dependencies] @@ -57,6 +58,9 @@ pytest-benchmark = "5.1.0" pytest = "8.3.5" pytest-django = "4.11.1" +### +# other stuff + [build-system] requires = ["poetry-core>=2.0.0,<3.0.0"] build-backend = "poetry.core.masonry.api" From 2b137f6250451441b4d0dedef537b813389c0eb5 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Wed, 26 Mar 2025 15:43:39 -0400 Subject: [PATCH 31/46] shtrove not installable (...yet) --- .github/workflows/run_tests.yml | 4 ++-- pyproject.toml | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index be406bb81..635def972 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -63,8 +63,8 @@ jobs: python-version: ${{ matrix.python-version }} cache: 'poetry' - - name: install depondencies - run: poetry install --with dev + - name: install despondencies + run: poetry install --no-root --with dev - name: flake it run: flake8 . diff --git a/pyproject.toml b/pyproject.toml index 071cb3abe..92d1feacf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,7 @@ [project] name = "shtrove" version = "25.1.1" # TODO: get version from share/version.py +package-mode = false description = "" authors = [ {name = "Abram Axel Booth", email = "abram@cos.io"} From 68bf437d78251a8b825850d9b0b792e7c883a381 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Wed, 26 Mar 2025 15:52:48 -0400 Subject: [PATCH 32/46] wip (poetry run) --- .github/workflows/run_tests.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 635def972..5351a34ac 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -67,12 +67,12 @@ jobs: run: poetry install --no-root --with dev - name: flake it - run: flake8 . + run: poetry run flake8 . - name: run tests run: | - coverage run -m pytest --create-db - coverage xml -o _shtrove_coverage.xml + poetry run coverage run -m pytest --create-db + poetry run coverage xml -o _shtrove_coverage.xml env: DATABASE_PASSWORD: postgres ELASTICSEARCH8_URL: http://localhost:9208/ From 512f89dd67b607db55bf49866358002f78bd51bd Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Thu, 27 Mar 2025 14:18:31 -0400 Subject: [PATCH 33/46] try specifying source for coverage --- .dockerignore | 1 + pyproject.toml | 1 + 2 files changed, 2 insertions(+) diff --git a/.dockerignore b/.dockerignore index 1b6f1a0ec..740472717 100644 --- a/.dockerignore +++ b/.dockerignore @@ -7,6 +7,7 @@ docker-compose.yml .gitignore celerybeat.pid /static/ +.venv/ /au.*/ /be.*/ diff --git a/pyproject.toml b/pyproject.toml index 92d1feacf..90d4beff0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,4 +67,5 @@ requires = ["poetry-core>=2.0.0,<3.0.0"] build-backend = "poetry.core.masonry.api" [tool.coverage.run] +source = ["."] omit = ["tests/*"] From bb24a8ee2355b0058737dce86851132ed376b4b9 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Wed, 9 Apr 2025 11:06:13 -0400 Subject: [PATCH 34/46] wip (python version range) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 90d4beff0..39f8658ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ authors = [ {name = "Abram Axel Booth", email = "abram@cos.io"} ] readme = "README.md" -requires-python = "=3.13" +requires-python = ">=3.13,<3.14" dependencies = [ "bcrypt==4.3.0", # Apache 2.0 "celery==5.4.0", # BSD 3 Clause From 2514ad6a02ca53a93d8d42cb072455d16bb5514d Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Wed, 9 Apr 2025 11:07:23 -0400 Subject: [PATCH 35/46] remove py3.10 tests --- .github/workflows/run_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 5351a34ac..cad4b7da5 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -13,7 +13,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.10', '3.13'] + python-version: ['3.13'] postgres-version: ['17'] runs-on: ubuntu-latest services: From 636de981721489ecbeb6f26d434c8b7930afa3d3 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Wed, 9 Apr 2025 11:57:56 -0400 Subject: [PATCH 36/46] bump deps; fix build after py-upgrade merge --- poetry.lock | 865 ++++++++++++++++++++++++++++++------------------- pyproject.toml | 4 +- 2 files changed, 528 insertions(+), 341 deletions(-) diff --git a/poetry.lock b/poetry.lock index 2310efec8..a64b8416f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -27,9 +27,6 @@ files = [ {file = "asgiref-3.8.1.tar.gz", hash = "sha256:c343bd80a0bec947a9860adb4c432ffa7db769836c64238fc34bdc3fec84d590"}, ] -[package.dependencies] -typing-extensions = {version = ">=4", markers = "python_version < \"3.11\""} - [package.extras] tests = ["mypy (>=0.800)", "pytest", "pytest-asyncio"] @@ -67,28 +64,65 @@ tests-mypy = ["mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" a [[package]] name = "bcrypt" -version = "3.2.0" +version = "4.3.0" description = "Modern password hashing for your software and your servers" optional = false -python-versions = ">=3.6" +python-versions = ">=3.8" groups = ["main"] files = [ - {file = "bcrypt-3.2.0-cp36-abi3-macosx_10_10_universal2.whl", hash = "sha256:b589229207630484aefe5899122fb938a5b017b0f4349f769b8c13e78d99a8fd"}, - {file = "bcrypt-3.2.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:c95d4cbebffafcdd28bd28bb4e25b31c50f6da605c81ffd9ad8a3d1b2ab7b1b6"}, - {file = "bcrypt-3.2.0-cp36-abi3-manylinux1_x86_64.whl", hash = "sha256:63d4e3ff96188e5898779b6057878fecf3f11cfe6ec3b313ea09955d587ec7a7"}, - {file = "bcrypt-3.2.0-cp36-abi3-manylinux2010_x86_64.whl", hash = "sha256:cd1ea2ff3038509ea95f687256c46b79f5fc382ad0aa3664d200047546d511d1"}, - {file = "bcrypt-3.2.0-cp36-abi3-manylinux2014_aarch64.whl", hash = "sha256:cdcdcb3972027f83fe24a48b1e90ea4b584d35f1cc279d76de6fc4b13376239d"}, - {file = "bcrypt-3.2.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:a0584a92329210fcd75eb8a3250c5a941633f8bfaf2a18f81009b097732839b7"}, - {file = "bcrypt-3.2.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:56e5da069a76470679f312a7d3d23deb3ac4519991a0361abc11da837087b61d"}, - {file = "bcrypt-3.2.0-cp36-abi3-win32.whl", hash = "sha256:a67fb841b35c28a59cebed05fbd3e80eea26e6d75851f0574a9273c80f3e9b55"}, - {file = "bcrypt-3.2.0-cp36-abi3-win_amd64.whl", hash = "sha256:81fec756feff5b6818ea7ab031205e1d323d8943d237303baca2c5f9c7846f34"}, - {file = "bcrypt-3.2.0.tar.gz", hash = "sha256:5b93c1726e50a93a033c36e5ca7fdcd29a5c7395af50a6892f5d9e7c6cfbfb29"}, + {file = "bcrypt-4.3.0-cp313-cp313t-macosx_10_12_universal2.whl", hash = "sha256:f01e060f14b6b57bbb72fc5b4a83ac21c443c9a2ee708e04a10e9192f90a6281"}, + {file = "bcrypt-4.3.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5eeac541cefd0bb887a371ef73c62c3cd78535e4887b310626036a7c0a817bb"}, + {file = "bcrypt-4.3.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59e1aa0e2cd871b08ca146ed08445038f42ff75968c7ae50d2fdd7860ade2180"}, + {file = "bcrypt-4.3.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:0042b2e342e9ae3d2ed22727c1262f76cc4f345683b5c1715f0250cf4277294f"}, + {file = "bcrypt-4.3.0-cp313-cp313t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74a8d21a09f5e025a9a23e7c0fd2c7fe8e7503e4d356c0a2c1486ba010619f09"}, + {file = "bcrypt-4.3.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:0142b2cb84a009f8452c8c5a33ace5e3dfec4159e7735f5afe9a4d50a8ea722d"}, + {file = "bcrypt-4.3.0-cp313-cp313t-manylinux_2_34_aarch64.whl", hash = "sha256:12fa6ce40cde3f0b899729dbd7d5e8811cb892d31b6f7d0334a1f37748b789fd"}, + {file = "bcrypt-4.3.0-cp313-cp313t-manylinux_2_34_x86_64.whl", hash = "sha256:5bd3cca1f2aa5dbcf39e2aa13dd094ea181f48959e1071265de49cc2b82525af"}, + {file = "bcrypt-4.3.0-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:335a420cfd63fc5bc27308e929bee231c15c85cc4c496610ffb17923abf7f231"}, + {file = "bcrypt-4.3.0-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:0e30e5e67aed0187a1764911af023043b4542e70a7461ad20e837e94d23e1d6c"}, + {file = "bcrypt-4.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:3b8d62290ebefd49ee0b3ce7500f5dbdcf13b81402c05f6dafab9a1e1b27212f"}, + {file = "bcrypt-4.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2ef6630e0ec01376f59a006dc72918b1bf436c3b571b80fa1968d775fa02fe7d"}, + {file = "bcrypt-4.3.0-cp313-cp313t-win32.whl", hash = "sha256:7a4be4cbf241afee43f1c3969b9103a41b40bcb3a3f467ab19f891d9bc4642e4"}, + {file = "bcrypt-4.3.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5c1949bf259a388863ced887c7861da1df681cb2388645766c89fdfd9004c669"}, + {file = "bcrypt-4.3.0-cp38-abi3-macosx_10_12_universal2.whl", hash = "sha256:f81b0ed2639568bf14749112298f9e4e2b28853dab50a8b357e31798686a036d"}, + {file = "bcrypt-4.3.0-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:864f8f19adbe13b7de11ba15d85d4a428c7e2f344bac110f667676a0ff84924b"}, + {file = "bcrypt-4.3.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e36506d001e93bffe59754397572f21bb5dc7c83f54454c990c74a468cd589e"}, + {file = "bcrypt-4.3.0-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:842d08d75d9fe9fb94b18b071090220697f9f184d4547179b60734846461ed59"}, + {file = "bcrypt-4.3.0-cp38-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:7c03296b85cb87db865d91da79bf63d5609284fc0cab9472fdd8367bbd830753"}, + {file = "bcrypt-4.3.0-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:62f26585e8b219cdc909b6a0069efc5e4267e25d4a3770a364ac58024f62a761"}, + {file = "bcrypt-4.3.0-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:beeefe437218a65322fbd0069eb437e7c98137e08f22c4660ac2dc795c31f8bb"}, + {file = "bcrypt-4.3.0-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:97eea7408db3a5bcce4a55d13245ab3fa566e23b4c67cd227062bb49e26c585d"}, + {file = "bcrypt-4.3.0-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:191354ebfe305e84f344c5964c7cd5f924a3bfc5d405c75ad07f232b6dffb49f"}, + {file = "bcrypt-4.3.0-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:41261d64150858eeb5ff43c753c4b216991e0ae16614a308a15d909503617732"}, + {file = "bcrypt-4.3.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:33752b1ba962ee793fa2b6321404bf20011fe45b9afd2a842139de3011898fef"}, + {file = "bcrypt-4.3.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:50e6e80a4bfd23a25f5c05b90167c19030cf9f87930f7cb2eacb99f45d1c3304"}, + {file = "bcrypt-4.3.0-cp38-abi3-win32.whl", hash = "sha256:67a561c4d9fb9465ec866177e7aebcad08fe23aaf6fbd692a6fab69088abfc51"}, + {file = "bcrypt-4.3.0-cp38-abi3-win_amd64.whl", hash = "sha256:584027857bc2843772114717a7490a37f68da563b3620f78a849bcb54dc11e62"}, + {file = "bcrypt-4.3.0-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:0d3efb1157edebfd9128e4e46e2ac1a64e0c1fe46fb023158a407c7892b0f8c3"}, + {file = "bcrypt-4.3.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08bacc884fd302b611226c01014eca277d48f0a05187666bca23aac0dad6fe24"}, + {file = "bcrypt-4.3.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6746e6fec103fcd509b96bacdfdaa2fbde9a553245dbada284435173a6f1aef"}, + {file = "bcrypt-4.3.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:afe327968aaf13fc143a56a3360cb27d4ad0345e34da12c7290f1b00b8fe9a8b"}, + {file = "bcrypt-4.3.0-cp39-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d9af79d322e735b1fc33404b5765108ae0ff232d4b54666d46730f8ac1a43676"}, + {file = "bcrypt-4.3.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f1e3ffa1365e8702dc48c8b360fef8d7afeca482809c5e45e653af82ccd088c1"}, + {file = "bcrypt-4.3.0-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:3004df1b323d10021fda07a813fd33e0fd57bef0e9a480bb143877f6cba996fe"}, + {file = "bcrypt-4.3.0-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:531457e5c839d8caea9b589a1bcfe3756b0547d7814e9ce3d437f17da75c32b0"}, + {file = "bcrypt-4.3.0-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:17a854d9a7a476a89dcef6c8bd119ad23e0f82557afbd2c442777a16408e614f"}, + {file = "bcrypt-4.3.0-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:6fb1fd3ab08c0cbc6826a2e0447610c6f09e983a281b919ed721ad32236b8b23"}, + {file = "bcrypt-4.3.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:e965a9c1e9a393b8005031ff52583cedc15b7884fce7deb8b0346388837d6cfe"}, + {file = "bcrypt-4.3.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:79e70b8342a33b52b55d93b3a59223a844962bef479f6a0ea318ebbcadf71505"}, + {file = "bcrypt-4.3.0-cp39-abi3-win32.whl", hash = "sha256:b4d4e57f0a63fd0b358eb765063ff661328f69a04494427265950c71b992a39a"}, + {file = "bcrypt-4.3.0-cp39-abi3-win_amd64.whl", hash = "sha256:e53e074b120f2877a35cc6c736b8eb161377caae8925c17688bd46ba56daaa5b"}, + {file = "bcrypt-4.3.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c950d682f0952bafcceaf709761da0a32a942272fad381081b51096ffa46cea1"}, + {file = "bcrypt-4.3.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:107d53b5c67e0bbc3f03ebf5b030e0403d24dda980f8e244795335ba7b4a027d"}, + {file = "bcrypt-4.3.0-pp310-pypy310_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:b693dbb82b3c27a1604a3dff5bfc5418a7e6a781bb795288141e5f80cf3a3492"}, + {file = "bcrypt-4.3.0-pp310-pypy310_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:b6354d3760fcd31994a14c89659dee887f1351a06e5dac3c1142307172a79f90"}, + {file = "bcrypt-4.3.0-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:a839320bf27d474e52ef8cb16449bb2ce0ba03ca9f44daba6d93fa1d8828e48a"}, + {file = "bcrypt-4.3.0-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:bdc6a24e754a555d7316fa4774e64c6c3997d27ed2d1964d55920c7c227bc4ce"}, + {file = "bcrypt-4.3.0-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:55a935b8e9a1d2def0626c4269db3fcd26728cbff1e84f0341465c31c4ee56d8"}, + {file = "bcrypt-4.3.0-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:57967b7a28d855313a963aaea51bf6df89f833db4320da458e5b3c5ab6d4c938"}, + {file = "bcrypt-4.3.0.tar.gz", hash = "sha256:3a3fd2204178b6d2adcf09cb4f6426ffef54762577a7c9b54c159008cb288c18"}, ] -[package.dependencies] -cffi = ">=1.1" -six = ">=1.4.1" - [package.extras] tests = ["pytest (>=3.2.1,!=3.3.0)"] typecheck = ["mypy"] @@ -250,21 +284,111 @@ files = [ {file = "cffi-1.17.1-cp39-cp39-win_amd64.whl", hash = "sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662"}, {file = "cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824"}, ] -markers = {dist = "platform_python_implementation == \"CPython\" and sys_platform == \"win32\""} +markers = {main = "platform_python_implementation != \"PyPy\"", dist = "platform_python_implementation == \"CPython\" and sys_platform == \"win32\""} [package.dependencies] pycparser = "*" [[package]] -name = "chardet" -version = "4.0.0" -description = "Universal encoding detector for Python 2 and 3" +name = "charset-normalizer" +version = "3.4.1" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +python-versions = ">=3.7" groups = ["main", "dev"] files = [ - {file = "chardet-4.0.0-py2.py3-none-any.whl", hash = "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5"}, - {file = "chardet-4.0.0.tar.gz", hash = "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e218488cd232553829be0664c2292d3af2eeeb94b32bea483cf79ac6a694e037"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:80ed5e856eb7f30115aaf94e4a08114ccc8813e6ed1b5efa74f9f82e8509858f"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b010a7a4fd316c3c484d482922d13044979e78d1861f0e0650423144c616a46a"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4532bff1b8421fd0a320463030c7520f56a79c9024a4e88f01c537316019005a"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d973f03c0cb71c5ed99037b870f2be986c3c05e63622c017ea9816881d2dd247"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3a3bd0dcd373514dcec91c411ddb9632c0d7d92aed7093b8c3bbb6d69ca74408"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:d9c3cdf5390dcd29aa8056d13e8e99526cda0305acc038b96b30352aff5ff2bb"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:2bdfe3ac2e1bbe5b59a1a63721eb3b95fc9b6817ae4a46debbb4e11f6232428d"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:eab677309cdb30d047996b36d34caeda1dc91149e4fdca0b1a039b3f79d9a807"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-win32.whl", hash = "sha256:c0429126cf75e16c4f0ad00ee0eae4242dc652290f940152ca8c75c3a4b6ee8f"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:9f0b8b1c6d84c8034a44893aba5e767bf9c7a211e313a9605d9c617d7083829f"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8bfa33f4f2672964266e940dd22a195989ba31669bd84629f05fab3ef4e2d125"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28bf57629c75e810b6ae989f03c0828d64d6b26a5e205535585f96093e405ed1"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f08ff5e948271dc7e18a35641d2f11a4cd8dfd5634f55228b691e62b37125eb3"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:234ac59ea147c59ee4da87a0c0f098e9c8d169f4dc2a159ef720f1a61bbe27cd"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd4ec41f914fa74ad1b8304bbc634b3de73d2a0889bd32076342a573e0779e00"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eea6ee1db730b3483adf394ea72f808b6e18cf3cb6454b4d86e04fa8c4327a12"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c96836c97b1238e9c9e3fe90844c947d5afbf4f4c92762679acfe19927d81d77"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:4d86f7aff21ee58f26dcf5ae81a9addbd914115cdebcbb2217e4f0ed8982e146"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:09b5e6733cbd160dcc09589227187e242a30a49ca5cefa5a7edd3f9d19ed53fd"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:5777ee0881f9499ed0f71cc82cf873d9a0ca8af166dfa0af8ec4e675b7df48e6"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:237bdbe6159cff53b4f24f397d43c6336c6b0b42affbe857970cefbb620911c8"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-win32.whl", hash = "sha256:8417cb1f36cc0bc7eaba8ccb0e04d55f0ee52df06df3ad55259b9a323555fc8b"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:d7f50a1f8c450f3925cb367d011448c39239bb3eb4117c36a6d354794de4ce76"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:73d94b58ec7fecbc7366247d3b0b10a21681004153238750bb67bd9012414545"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad3e487649f498dd991eeb901125411559b22e8d7ab25d3aeb1af367df5efd7"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c30197aa96e8eed02200a83fba2657b4c3acd0f0aa4bdc9f6c1af8e8962e0757"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2369eea1ee4a7610a860d88f268eb39b95cb588acd7235e02fd5a5601773d4fa"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc2722592d8998c870fa4e290c2eec2c1569b87fe58618e67d38b4665dfa680d"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffc9202a29ab3920fa812879e95a9e78b2465fd10be7fcbd042899695d75e616"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:804a4d582ba6e5b747c625bf1255e6b1507465494a40a2130978bda7b932c90b"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0f55e69f030f7163dffe9fd0752b32f070566451afe180f99dbeeb81f511ad8d"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c4c3e6da02df6fa1410a7680bd3f63d4f710232d3139089536310d027950696a"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:5df196eb874dae23dcfb968c83d4f8fdccb333330fe1fc278ac5ceeb101003a9"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e358e64305fe12299a08e08978f51fc21fac060dcfcddd95453eabe5b93ed0e1"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-win32.whl", hash = "sha256:9b23ca7ef998bc739bf6ffc077c2116917eabcc901f88da1b9856b210ef63f35"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:6ff8a4a60c227ad87030d76e99cd1698345d4491638dfa6673027c48b3cd395f"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-win32.whl", hash = "sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f30bf9fd9be89ecb2360c7d94a711f00c09b976258846efe40db3d05828e8089"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:97f68b8d6831127e4787ad15e6757232e14e12060bec17091b85eb1486b91d8d"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7974a0b5ecd505609e3b19742b60cee7aa2aa2fb3151bc917e6e2646d7667dcf"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc54db6c8593ef7d4b2a331b58653356cf04f67c960f584edb7c3d8c97e8f39e"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:311f30128d7d333eebd7896965bfcfbd0065f1716ec92bd5638d7748eb6f936a"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:7d053096f67cd1241601111b698f5cad775f97ab25d81567d3f59219b5f1adbd"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:807f52c1f798eef6cf26beb819eeb8819b1622ddfeef9d0977a8502d4db6d534"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:dccbe65bd2f7f7ec22c4ff99ed56faa1e9f785482b9bbd7c717e26fd723a1d1e"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_s390x.whl", hash = "sha256:2fb9bd477fdea8684f78791a6de97a953c51831ee2981f8e4f583ff3b9d9687e"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:01732659ba9b5b873fc117534143e4feefecf3b2078b0a6a2e925271bb6f4cfa"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-win32.whl", hash = "sha256:7a4f97a081603d2050bfaffdefa5b02a9ec823f8348a572e39032caa8404a487"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-win_amd64.whl", hash = "sha256:7b1bef6280950ee6c177b326508f86cad7ad4dff12454483b51d8b7d673a2c5d"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ecddf25bee22fe4fe3737a399d0d177d72bc22be6913acfab364b40bce1ba83c"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c60ca7339acd497a55b0ea5d506b2a2612afb2826560416f6894e8b5770d4a9"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b7b2d86dd06bfc2ade3312a83a5c364c7ec2e3498f8734282c6c3d4b07b346b8"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dd78cfcda14a1ef52584dbb008f7ac81c1328c0f58184bf9a84c49c605002da6"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e27f48bcd0957c6d4cb9d6fa6b61d192d0b13d5ef563e5f2ae35feafc0d179c"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:01ad647cdd609225c5350561d084b42ddf732f4eeefe6e678765636791e78b9a"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:619a609aa74ae43d90ed2e89bdd784765de0a25ca761b93e196d938b8fd1dbbd"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:89149166622f4db9b4b6a449256291dc87a99ee53151c74cbd82a53c8c2f6ccd"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:7709f51f5f7c853f0fb938bcd3bc59cdfdc5203635ffd18bf354f6967ea0f824"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:345b0426edd4e18138d6528aed636de7a9ed169b4aaf9d61a8c19e39d26838ca"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:0907f11d019260cdc3f94fbdb23ff9125f6b5d1039b76003b5b0ac9d6a6c9d5b"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-win32.whl", hash = "sha256:ea0d8d539afa5eb2728aa1932a988a9a7af94f18582ffae4bc10b3fbdad0626e"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:329ce159e82018d646c7ac45b01a430369d526569ec08516081727a20e9e4af4"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b97e690a2118911e39b4042088092771b4ae3fc3aa86518f84b8cf6888dbdb41"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78baa6d91634dfb69ec52a463534bc0df05dbd546209b79a3880a34487f4b84f"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1a2bc9f351a75ef49d664206d51f8e5ede9da246602dc2d2726837620ea034b2"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:75832c08354f595c760a804588b9357d34ec00ba1c940c15e31e96d902093770"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0af291f4fe114be0280cdd29d533696a77b5b49cfde5467176ecab32353395c4"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0167ddc8ab6508fe81860a57dd472b2ef4060e8d378f0cc555707126830f2537"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2a75d49014d118e4198bcee5ee0a6f25856b29b12dbf7cd012791f8a6cc5c496"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:363e2f92b0f0174b2f8238240a1a30142e3db7b957a5dd5689b0e75fb717cc78"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:ab36c8eb7e454e34e60eb55ca5d241a5d18b2c6244f6827a30e451c42410b5f7"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:4c0907b1928a36d5a998d72d64d8eaa7244989f7aaaf947500d3a800c83a3fd6"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:04432ad9479fa40ec0f387795ddad4437a2b50417c69fa275e212933519ff294"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-win32.whl", hash = "sha256:3bed14e9c89dcb10e8f3a29f9ccac4955aebe93c71ae803af79265c9ca5644c5"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:49402233c892a461407c512a19435d1ce275543138294f7ef013f0b63d5d3765"}, + {file = "charset_normalizer-3.4.1-py3-none-any.whl", hash = "sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85"}, + {file = "charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3"}, ] [[package]] @@ -349,19 +473,22 @@ markers = {main = "platform_system == \"Windows\" or sys_platform == \"win32\"", [[package]] name = "colorlog" -version = "5.0.1" +version = "6.9.0" description = "Add colours to the output of Python's logging module." optional = false -python-versions = "*" +python-versions = ">=3.6" groups = ["main"] files = [ - {file = "colorlog-5.0.1-py2.py3-none-any.whl", hash = "sha256:4e6be13d9169254e2ded6526a6a4a1abb8ac564f2fa65b310a98e4ca5bea2c04"}, - {file = "colorlog-5.0.1.tar.gz", hash = "sha256:f17c013a06962b02f4449ee07cfdbe6b287df29efc2c9a1515b4a376f4e588ea"}, + {file = "colorlog-6.9.0-py3-none-any.whl", hash = "sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff"}, + {file = "colorlog-6.9.0.tar.gz", hash = "sha256:bfba54a1b93b94f54e1f4fe48395725a3d92fd2a4af702f6bd70946bdc0c6ac2"}, ] [package.dependencies] colorama = {version = "*", markers = "sys_platform == \"win32\""} +[package.extras] +development = ["black", "flake8", "mypy", "pytest", "types-colorama"] + [[package]] name = "coverage" version = "5.5" @@ -522,20 +649,20 @@ test-randomorder = ["pytest-randomly"] [[package]] name = "django" -version = "3.2.25" -description = "A high-level Python Web framework that encourages rapid development and clean, pragmatic design." +version = "5.1.8" +description = "A high-level Python web framework that encourages rapid development and clean, pragmatic design." optional = false -python-versions = ">=3.6" +python-versions = ">=3.10" groups = ["main", "dev"] files = [ - {file = "Django-3.2.25-py3-none-any.whl", hash = "sha256:a52ea7fcf280b16f7b739cec38fa6d3f8953a5456986944c3ca97e79882b4e38"}, - {file = "Django-3.2.25.tar.gz", hash = "sha256:7ca38a78654aee72378594d63e51636c04b8e28574f5505dff630895b5472777"}, + {file = "Django-5.1.8-py3-none-any.whl", hash = "sha256:11b28fa4b00e59d0def004e9ee012fefbb1065a5beb39ee838983fd24493ad4f"}, + {file = "Django-5.1.8.tar.gz", hash = "sha256:42e92a1dd2810072bcc40a39a212b693f94406d0ba0749e68eb642f31dc770b4"}, ] [package.dependencies] -asgiref = ">=3.3.2,<4" -pytz = "*" -sqlparse = ">=0.2.2" +asgiref = ">=3.8.1,<4" +sqlparse = ">=0.3.1" +tzdata = {version = "*", markers = "sys_platform == \"win32\""} [package.extras] argon2 = ["argon2-cffi (>=19.1.0)"] @@ -543,20 +670,21 @@ bcrypt = ["bcrypt"] [[package]] name = "django-allauth" -version = "0.63.6" +version = "65.5.0" description = "Integrated set of Django applications addressing authentication, registration, account management as well as 3rd party (social) account authentication." optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" groups = ["main"] files = [ - {file = "django_allauth-0.63.6.tar.gz", hash = "sha256:f15f49defb09e0604dad5214e53a69a1f723cb03176bb115c8930bcd19b91749"}, + {file = "django_allauth-65.5.0.tar.gz", hash = "sha256:1a564fd2f5413054559078c2b7146796b517c1e7a38c6312e9de7c9bb708325d"}, ] [package.dependencies] -Django = ">=3.2" +asgiref = ">=3.8.1" +Django = ">=4.2.16" [package.extras] -mfa = ["qrcode (>=7.0.0)"] +mfa = ["fido2 (>=1.1.2)", "qrcode (>=7.0.0)"] openid = ["python3-openid (>=3.0.8)"] saml = ["python3-saml (>=1.15.0,<2.0.0)"] socialaccount = ["pyjwt[crypto] (>=1.7)", "requests (>=2.0.0)", "requests-oauthlib (>=0.3.0)"] @@ -564,37 +692,39 @@ steam = ["python3-openid (>=3.0.8)"] [[package]] name = "django-celery-beat" -version = "2.6.0" +version = "2.7.0" description = "Database-backed Periodic Tasks." optional = false -python-versions = "*" +python-versions = ">=3.8" groups = ["main"] files = [ - {file = "django-celery-beat-2.6.0.tar.gz", hash = "sha256:f75b2d129731f1214be8383e18fae6bfeacdb55dffb2116ce849222c0106f9ad"}, + {file = "django_celery_beat-2.7.0-py3-none-any.whl", hash = "sha256:851c680d8fbf608ca5fecd5836622beea89fa017bc2b3f94a5b8c648c32d84b1"}, + {file = "django_celery_beat-2.7.0.tar.gz", hash = "sha256:8482034925e09b698c05ad61c36ed2a8dbc436724a3fe119215193a4ca6dc967"}, ] [package.dependencies] celery = ">=5.2.3,<6.0" cron-descriptor = ">=1.2.32" -Django = ">=2.2,<5.1" +Django = ">=2.2,<5.2" django-timezone-field = ">=5.0" python-crontab = ">=2.3.4" tzdata = "*" [[package]] name = "django-cors-headers" -version = "3.7.0" +version = "4.7.0" description = "django-cors-headers is a Django application for handling the server headers required for Cross-Origin Resource Sharing (CORS)." optional = false -python-versions = ">=3.6" +python-versions = ">=3.9" groups = ["main"] files = [ - {file = "django-cors-headers-3.7.0.tar.gz", hash = "sha256:96069c4aaacace786a34ee7894ff680780ec2644e4268b31181044410fecd12e"}, - {file = "django_cors_headers-3.7.0-py3-none-any.whl", hash = "sha256:1ac2b1213de75a251e2ba04448da15f99bcfcbe164288ae6b5ff929dc49b372f"}, + {file = "django_cors_headers-4.7.0-py3-none-any.whl", hash = "sha256:f1c125dcd58479fe7a67fe2499c16ee38b81b397463cf025f0e2c42937421070"}, + {file = "django_cors_headers-4.7.0.tar.gz", hash = "sha256:6fdf31bf9c6d6448ba09ef57157db2268d515d94fc5c89a0a1028e1fc03ee52b"}, ] [package.dependencies] -Django = ">=2.2" +asgiref = ">=3.6" +django = ">=4.2" [[package]] name = "django-debug-toolbar" @@ -614,50 +744,50 @@ sqlparse = ">=0.2.0" [[package]] name = "django-extensions" -version = "3.1.3" +version = "3.2.3" description = "Extensions for Django" optional = false python-versions = ">=3.6" groups = ["main"] files = [ - {file = "django-extensions-3.1.3.tar.gz", hash = "sha256:5f0fea7bf131ca303090352577a9e7f8bfbf5489bd9d9c8aea9401db28db34a0"}, - {file = "django_extensions-3.1.3-py3-none-any.whl", hash = "sha256:50de8977794a66a91575dd40f87d5053608f679561731845edbd325ceeb387e3"}, + {file = "django-extensions-3.2.3.tar.gz", hash = "sha256:44d27919d04e23b3f40231c4ab7af4e61ce832ef46d610cc650d53e68328410a"}, + {file = "django_extensions-3.2.3-py3-none-any.whl", hash = "sha256:9600b7562f79a92cbf1fde6403c04fee314608fefbb595502e34383ae8203401"}, ] [package.dependencies] -Django = ">=2.2" +Django = ">=3.2" [[package]] name = "django-filter" -version = "2.4.0" +version = "25.1" description = "Django-filter is a reusable Django application for allowing users to filter querysets dynamically." optional = false -python-versions = ">=3.5" +python-versions = ">=3.9" groups = ["main"] files = [ - {file = "django-filter-2.4.0.tar.gz", hash = "sha256:84e9d5bb93f237e451db814ed422a3a625751cbc9968b484ecc74964a8696b06"}, - {file = "django_filter-2.4.0-py3-none-any.whl", hash = "sha256:e00d32cebdb3d54273c48f4f878f898dced8d5dfaad009438fe61ebdf535ace1"}, + {file = "django_filter-25.1-py3-none-any.whl", hash = "sha256:4fa48677cf5857b9b1347fed23e355ea792464e0fe07244d1fdfb8a806215b80"}, + {file = "django_filter-25.1.tar.gz", hash = "sha256:1ec9eef48fa8da1c0ac9b411744b16c3f4c31176c867886e4c48da369c407153"}, ] [package.dependencies] -Django = ">=2.2" +Django = ">=4.2" [[package]] name = "django-oauth-toolkit" -version = "1.7.1" +version = "3.0.1" description = "OAuth2 Provider for Django" optional = false -python-versions = "*" +python-versions = ">=3.8" groups = ["main"] files = [ - {file = "django-oauth-toolkit-1.7.1.tar.gz", hash = "sha256:37b690fa53f340c7391bdbc0fdbb32fd9ef8a7c012e72ee8754c331a2d7b4adb"}, - {file = "django_oauth_toolkit-1.7.1-py3-none-any.whl", hash = "sha256:756e44421d0993f27705736b6c33a3d89018393859a31ac926296950f76e4433"}, + {file = "django_oauth_toolkit-3.0.1-py3-none-any.whl", hash = "sha256:3ef00b062a284f2031b0732b32dc899e3bbf0eac221bbb1cffcb50b8932e55ed"}, + {file = "django_oauth_toolkit-3.0.1.tar.gz", hash = "sha256:7200e4a9fb229b145a6d808cbf0423b6d69a87f68557437733eec3c0cf71db02"}, ] [package.dependencies] -django = ">=2.2,<4.0.0 || >4.0.0" -jwcrypto = ">=0.8.0" -oauthlib = ">=3.1.0" +django = ">=4.2" +jwcrypto = ">=1.5.0" +oauthlib = ">=3.2.2" requests = ">=2.13.0" [[package]] @@ -677,44 +807,40 @@ Django = ">=3.2,<6.0" [[package]] name = "djangorestframework" -version = "3.12.4" +version = "3.16.0" description = "Web APIs for Django, made easy." optional = false -python-versions = ">=3.5" +python-versions = ">=3.9" groups = ["main"] files = [ - {file = "djangorestframework-3.12.4-py3-none-any.whl", hash = "sha256:6d1d59f623a5ad0509fe0d6bfe93cbdfe17b8116ebc8eda86d45f6e16e819aaf"}, - {file = "djangorestframework-3.12.4.tar.gz", hash = "sha256:f747949a8ddac876e879190df194b925c177cdeb725a099db1460872f7c0a7f2"}, + {file = "djangorestframework-3.16.0-py3-none-any.whl", hash = "sha256:bea7e9f6b96a8584c5224bfb2e4348dfb3f8b5e34edbecb98da258e892089361"}, + {file = "djangorestframework-3.16.0.tar.gz", hash = "sha256:f022ff46613584de994c0c6a4aebbace5fd700555fbe9d33b865ebf173eba6c9"}, ] [package.dependencies] -django = ">=2.2" +django = ">=4.2" [[package]] name = "djangorestframework-jsonapi" -version = "4.2.1+cos0" -description = "A Django REST framework API adapter for the JSON API spec." +version = "7.1.0" +description = "A Django REST framework API adapter for the JSON:API spec." optional = false -python-versions = ">=3.6" +python-versions = ">=3.8" groups = ["main"] -files = [] -develop = false +files = [ + {file = "djangorestframework_jsonapi-7.1.0-py2.py3-none-any.whl", hash = "sha256:506d7179b6c2d29c4b7879afb4cadf8b0bb468accd40787491875ad680b4993a"}, + {file = "djangorestframework_jsonapi-7.1.0.tar.gz", hash = "sha256:487e4fe5a0590e655cb3af947c8cee7b54be8ced14a81aac53056f2c43a92343"}, +] [package.dependencies] -django = ">=2.2,<3.3" -djangorestframework = ">=3.12,<3.13" -inflection = ">=0.3.0" +django = ">=4.2" +djangorestframework = ">=3.14" +inflection = ">=0.5.0" [package.extras] -django-filter = ["django-filter (>=2.0)"] -django-polymorphic = ["django-polymorphic (>=2.0)"] -openapi = ["pyyaml (>=5.3)", "uritemplate (>=3.0.1)"] - -[package.source] -type = "git" -url = "https://github.com/cos-forks/django-rest-framework-json-api.git" -reference = "9858f712e07d5cdbd79b8cc29b6aac90df9be4e9" -resolved_reference = "9858f712e07d5cdbd79b8cc29b6aac90df9be4e9" +django-filter = ["django-filter (>=2.4)"] +django-polymorphic = ["django-polymorphic (>=3.0)"] +openapi = ["pyyaml (>=5.4)", "uritemplate (>=3.0.1)"] [[package]] name = "docopt" @@ -766,22 +892,27 @@ develop = ["coverage", "mock", "nose", "nosexcover", "pyaml", "requests (>=2.0.0 [[package]] name = "elasticsearch8" -version = "8.5.2" +version = "8.17.2" description = "Python client for Elasticsearch" optional = false -python-versions = ">=3.6, <4" +python-versions = ">=3.8" groups = ["main"] files = [ - {file = "elasticsearch8-8.5.2-py3-none-any.whl", hash = "sha256:9646a65b2c0ca9094000319028be38918d3592b782df45f647b971faa88d61b7"}, - {file = "elasticsearch8-8.5.2.tar.gz", hash = "sha256:be99ca2c48f3671b2cbfd3675a67dfbdb804f213f254c0e03b3d511ca29b1e5d"}, + {file = "elasticsearch8-8.17.2-py3-none-any.whl", hash = "sha256:91fcc455a59ebf347ccff8d110534030fd0f219585f3e1ecd09d206b0de61445"}, + {file = "elasticsearch8-8.17.2.tar.gz", hash = "sha256:8fa15a4163c527c92aa13c2320fc8c0dc399060f2638ed016ca0859f81120803"}, ] [package.dependencies] -elastic-transport = ">=8,<9" +elastic-transport = ">=8.15.1,<9" [package.extras] async = ["aiohttp (>=3,<4)"] -requests = ["requests (>=2.4.0,<3.0.0)"] +dev = ["aiohttp", "black", "build", "coverage", "isort", "jinja2", "mapbox-vector-tile", "nox", "numpy", "orjson", "pandas", "pyarrow", "pytest", "pytest-asyncio", "pytest-cov", "python-dateutil", "pyyaml (>=5.4)", "requests (>=2,<3)", "simsimd", "twine", "unasync"] +docs = ["sphinx", "sphinx-autodoc-typehints", "sphinx-rtd-theme (>=2.0)"] +orjson = ["orjson (>=3)"] +pyarrow = ["pyarrow (>=1)"] +requests = ["requests (>=2.4.0,!=2.32.2,<3.0.0)"] +vectorstore-mmr = ["numpy (>=1)", "simsimd (>=3)"] [[package]] name = "factory-boy" @@ -837,79 +968,64 @@ pyflakes = ">=2.5.0,<2.6.0" [[package]] name = "gevent" -version = "22.10.2" +version = "24.11.1" description = "Coroutine-based network library" optional = false -python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5" +python-versions = ">=3.9" groups = ["dist"] files = [ - {file = "gevent-22.10.2-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:97cd42382421779f5d82ec5007199e8a84aa288114975429e4fd0a98f2290f10"}, - {file = "gevent-22.10.2-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:1e1286a76f15b5e15f1e898731d50529e249529095a032453f2c101af3fde71c"}, - {file = "gevent-22.10.2-cp27-cp27m-win32.whl", hash = "sha256:59b47e81b399d49a5622f0f503c59f1ce57b7705306ea0196818951dfc2f36c8"}, - {file = "gevent-22.10.2-cp27-cp27m-win_amd64.whl", hash = "sha256:1d543c9407a1e4bca11a8932916988cfb16de00366de5bf7bc9e7a3f61e60b18"}, - {file = "gevent-22.10.2-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:4e2f008c82dc54ec94f4de12ca6feea60e419babb48ec145456907ae61625aa4"}, - {file = "gevent-22.10.2-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:990d7069f14dc40674e0d5cb43c68fd3bad8337048613b9bb94a0c4180ffc176"}, - {file = "gevent-22.10.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f23d0997149a816a2a9045af29c66f67f405a221745b34cefeac5769ed451db8"}, - {file = "gevent-22.10.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b43d500d7d3c0e03070dee813335bb5315215aa1cf6a04c61093dfdd718640b3"}, - {file = "gevent-22.10.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17b68f4c9e20e47ad49fe797f37f91d5bbeace8765ce2707f979a8d4ec197e4d"}, - {file = "gevent-22.10.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1f001cac0ba8da76abfeb392a3057f81fab3d67cc916c7df8ea977a44a2cc989"}, - {file = "gevent-22.10.2-cp310-cp310-win_amd64.whl", hash = "sha256:3b7eae8a0653ba95a224faaddf629a913ace408edb67384d3117acf42d7dcf89"}, - {file = "gevent-22.10.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8f2477e7b0a903a01485c55bacf2089110e5f767014967ba4b287ff390ae2638"}, - {file = "gevent-22.10.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ddaa3e310a8f1a45b5c42cf50b54c31003a3028e7d4e085059090ea0e7a5fddd"}, - {file = "gevent-22.10.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:98bc510e80f45486ef5b806a1c305e0e89f0430688c14984b0dbdec03331f48b"}, - {file = "gevent-22.10.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:877abdb3a669576b1d51ce6a49b7260b2a96f6b2424eb93287e779a3219d20ba"}, - {file = "gevent-22.10.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d21ad79cca234cdbfa249e727500b0ddcbc7adfff6614a96e6eaa49faca3e4f2"}, - {file = "gevent-22.10.2-cp311-cp311-win_amd64.whl", hash = "sha256:1e955238f59b2947631c9782a713280dd75884e40e455313b5b6bbc20b92ff73"}, - {file = "gevent-22.10.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:5aa99e4882a9e909b4756ee799c6fa0f79eb0542779fad4cc60efa23ec1b2aa8"}, - {file = "gevent-22.10.2-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:d82081656a5b9a94d37c718c8646c757e1617e389cdc533ea5e6a6f0b8b78545"}, - {file = "gevent-22.10.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:54f4bfd74c178351a4a05c5c7df6f8a0a279ff6f392b57608ce0e83c768207f9"}, - {file = "gevent-22.10.2-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ff3796692dff50fec2f381b9152438b221335f557c4f9b811f7ded51b7a25a1"}, - {file = "gevent-22.10.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f01c9adbcb605364694b11dcd0542ec468a29ac7aba2fb5665dc6caf17ba4d7e"}, - {file = "gevent-22.10.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:9d85574eb729f981fea9a78998725a06292d90a3ed50ddca74530c3148c0be41"}, - {file = "gevent-22.10.2-cp36-cp36m-win32.whl", hash = "sha256:8c192d2073e558e241f0b592c1e2b34127a4481a5be240cad4796533b88b1a98"}, - {file = "gevent-22.10.2-cp36-cp36m-win_amd64.whl", hash = "sha256:a2237451c721a0f874ef89dbb4af4fdc172b76a964befaa69deb15b8fff10f49"}, - {file = "gevent-22.10.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:53ee7f170ed42c7561fe8aff5d381dc9a4124694e70580d0c02fba6aafc0ea37"}, - {file = "gevent-22.10.2-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:96c56c280e3c43cfd075efd10b250350ed5ffd3c1514ec99a080b1b92d7c8374"}, - {file = "gevent-22.10.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b6c144e08dfad4106effc043a026e5d0c0eff6ad031904c70bf5090c63f3a6a7"}, - {file = "gevent-22.10.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:018f93de7d5318d2fb440f846839a4464738468c3476d5c9cf7da45bb71c18bd"}, - {file = "gevent-22.10.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7ed2346eb9dc4344f9cb0d7963ce5b74fe16fdd031a2809bb6c2b6eba7ebcd5"}, - {file = "gevent-22.10.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:84c517e33ed604fa06b7d756dc0171169cc12f7fdd68eb7b17708a62eebf4516"}, - {file = "gevent-22.10.2-cp37-cp37m-win32.whl", hash = "sha256:4114f0f439f0b547bb6f1d474fee99ddb46736944ad2207cef3771828f6aa358"}, - {file = "gevent-22.10.2-cp37-cp37m-win_amd64.whl", hash = "sha256:0d581f22a5be6281b11ad6309b38b18f0638cf896931223cbaa5adb904826ef6"}, - {file = "gevent-22.10.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2929377c8ebfb6f4d868d161cd8de2ea6b9f6c7a5fcd4f78bcd537319c16190b"}, - {file = "gevent-22.10.2-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:efc003b6c1481165af61f0aeac248e0a9ac8d880bb3acbe469b448674b2d5281"}, - {file = "gevent-22.10.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db562a8519838bddad0c439a2b12246bab539dd50e299ea7ff3644274a33b6a5"}, - {file = "gevent-22.10.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1472012493ca1fac103f700d309cb6ef7964dcdb9c788d1768266e77712f5e49"}, - {file = "gevent-22.10.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c04ee32c11e9fcee47c1b431834878dc987a7a2cc4fe126ddcae3bad723ce89"}, - {file = "gevent-22.10.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8729129edef2637a8084258cb9ec4e4d5ca45d97ac77aa7a6ff19ccb530ab731"}, - {file = "gevent-22.10.2-cp38-cp38-win32.whl", hash = "sha256:ae90226074a6089371a95f20288431cd4b3f6b0b096856afd862e4ac9510cddd"}, - {file = "gevent-22.10.2-cp38-cp38-win_amd64.whl", hash = "sha256:494c7f29e94df9a1c3157d67bb7edfa32a46eed786e04d9ee68d39f375e30001"}, - {file = "gevent-22.10.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:58898dbabb5b11e4d0192aae165ad286dc6742c543e1be9d30dc82753547c508"}, - {file = "gevent-22.10.2-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:4197d423e198265eef39a0dea286ef389da9148e070310f34455ecee8172c391"}, - {file = "gevent-22.10.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da4183f0b9d9a1e25e1758099220d32c51cc2c6340ee0dea3fd236b2b37598e4"}, - {file = "gevent-22.10.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a5488eba6a568b4d23c072113da4fc0feb1b5f5ede7381656dc913e0d82204e2"}, - {file = "gevent-22.10.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:319d8b1699b7b8134de66d656cd739b308ab9c45ace14d60ae44de7775b456c9"}, - {file = "gevent-22.10.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:f3329bedbba4d3146ae58c667e0f9ac1e6f1e1e6340c7593976cdc60aa7d1a47"}, - {file = "gevent-22.10.2-cp39-cp39-win32.whl", hash = "sha256:172caa66273315f283e90a315921902cb6549762bdcb0587fd60cb712a9d6263"}, - {file = "gevent-22.10.2-cp39-cp39-win_amd64.whl", hash = "sha256:323b207b281ba0405fea042067fa1a61662e5ac0d574ede4ebbda03efd20c350"}, - {file = "gevent-22.10.2-pp27-pypy_73-win_amd64.whl", hash = "sha256:ed7f16613eebf892a6a744d7a4a8f345bc6f066a0ff3b413e2479f9c0a180193"}, - {file = "gevent-22.10.2-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:a47a4e77e2bc668856aad92a0b8de7ee10768258d93cd03968e6c7ba2e832f76"}, - {file = "gevent-22.10.2.tar.gz", hash = "sha256:1ca01da176ee37b3527a2702f7d40dbc9ffb8cfc7be5a03bfa4f9eec45e55c46"}, + {file = "gevent-24.11.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:92fe5dfee4e671c74ffaa431fd7ffd0ebb4b339363d24d0d944de532409b935e"}, + {file = "gevent-24.11.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b7bfcfe08d038e1fa6de458891bca65c1ada6d145474274285822896a858c870"}, + {file = "gevent-24.11.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7398c629d43b1b6fd785db8ebd46c0a353880a6fab03d1cf9b6788e7240ee32e"}, + {file = "gevent-24.11.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d7886b63ebfb865178ab28784accd32f287d5349b3ed71094c86e4d3ca738af5"}, + {file = "gevent-24.11.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9ca80711e6553880974898d99357fb649e062f9058418a92120ca06c18c3c59"}, + {file = "gevent-24.11.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e24181d172f50097ac8fc272c8c5b030149b630df02d1c639ee9f878a470ba2b"}, + {file = "gevent-24.11.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1d4fadc319b13ef0a3c44d2792f7918cf1bca27cacd4d41431c22e6b46668026"}, + {file = "gevent-24.11.1-cp310-cp310-win_amd64.whl", hash = "sha256:3d882faa24f347f761f934786dde6c73aa6c9187ee710189f12dcc3a63ed4a50"}, + {file = "gevent-24.11.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:351d1c0e4ef2b618ace74c91b9b28b3eaa0dd45141878a964e03c7873af09f62"}, + {file = "gevent-24.11.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5efe72e99b7243e222ba0c2c2ce9618d7d36644c166d63373af239da1036bab"}, + {file = "gevent-24.11.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9d3b249e4e1f40c598ab8393fc01ae6a3b4d51fc1adae56d9ba5b315f6b2d758"}, + {file = "gevent-24.11.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81d918e952954675f93fb39001da02113ec4d5f4921bf5a0cc29719af6824e5d"}, + {file = "gevent-24.11.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9c935b83d40c748b6421625465b7308d87c7b3717275acd587eef2bd1c39546"}, + {file = "gevent-24.11.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff96c5739834c9a594db0e12bf59cb3fa0e5102fc7b893972118a3166733d61c"}, + {file = "gevent-24.11.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d6c0a065e31ef04658f799215dddae8752d636de2bed61365c358f9c91e7af61"}, + {file = "gevent-24.11.1-cp311-cp311-win_amd64.whl", hash = "sha256:97e2f3999a5c0656f42065d02939d64fffaf55861f7d62b0107a08f52c984897"}, + {file = "gevent-24.11.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:a3d75fa387b69c751a3d7c5c3ce7092a171555126e136c1d21ecd8b50c7a6e46"}, + {file = "gevent-24.11.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:beede1d1cff0c6fafae3ab58a0c470d7526196ef4cd6cc18e7769f207f2ea4eb"}, + {file = "gevent-24.11.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:85329d556aaedced90a993226d7d1186a539c843100d393f2349b28c55131c85"}, + {file = "gevent-24.11.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:816b3883fa6842c1cf9d2786722014a0fd31b6312cca1f749890b9803000bad6"}, + {file = "gevent-24.11.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b24d800328c39456534e3bc3e1684a28747729082684634789c2f5a8febe7671"}, + {file = "gevent-24.11.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:a5f1701ce0f7832f333dd2faf624484cbac99e60656bfbb72504decd42970f0f"}, + {file = "gevent-24.11.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:d740206e69dfdfdcd34510c20adcb9777ce2cc18973b3441ab9767cd8948ca8a"}, + {file = "gevent-24.11.1-cp312-cp312-win_amd64.whl", hash = "sha256:68bee86b6e1c041a187347ef84cf03a792f0b6c7238378bf6ba4118af11feaae"}, + {file = "gevent-24.11.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:d618e118fdb7af1d6c1a96597a5cd6ac84a9f3732b5be8515c6a66e098d498b6"}, + {file = "gevent-24.11.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2142704c2adce9cd92f6600f371afb2860a446bfd0be5bd86cca5b3e12130766"}, + {file = "gevent-24.11.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:92e0d7759de2450a501effd99374256b26359e801b2d8bf3eedd3751973e87f5"}, + {file = "gevent-24.11.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ca845138965c8c56d1550499d6b923eb1a2331acfa9e13b817ad8305dde83d11"}, + {file = "gevent-24.11.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:356b73d52a227d3313f8f828025b665deada57a43d02b1cf54e5d39028dbcf8d"}, + {file = "gevent-24.11.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:58851f23c4bdb70390f10fc020c973ffcf409eb1664086792c8b1e20f25eef43"}, + {file = "gevent-24.11.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:1ea50009ecb7f1327347c37e9eb6561bdbc7de290769ee1404107b9a9cba7cf1"}, + {file = "gevent-24.11.1-cp313-cp313-win_amd64.whl", hash = "sha256:ec68e270543ecd532c4c1d70fca020f90aa5486ad49c4f3b8b2e64a66f5c9274"}, + {file = "gevent-24.11.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9347690f4e53de2c4af74e62d6fabc940b6d4a6cad555b5a379f61e7d3f2a8e"}, + {file = "gevent-24.11.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8619d5c888cb7aebf9aec6703e410620ef5ad48cdc2d813dd606f8aa7ace675f"}, + {file = "gevent-24.11.1-cp39-cp39-win32.whl", hash = "sha256:c6b775381f805ff5faf250e3a07c0819529571d19bb2a9d474bee8c3f90d66af"}, + {file = "gevent-24.11.1-cp39-cp39-win_amd64.whl", hash = "sha256:1c3443b0ed23dcb7c36a748d42587168672953d368f2956b17fad36d43b58836"}, + {file = "gevent-24.11.1-pp310-pypy310_pp73-macosx_11_0_universal2.whl", hash = "sha256:f43f47e702d0c8e1b8b997c00f1601486f9f976f84ab704f8f11536e3fa144c9"}, + {file = "gevent-24.11.1.tar.gz", hash = "sha256:8bd1419114e9e4a3ed33a5bad766afff9a3cf765cb440a582a1b3a9bc80c1aca"}, ] [package.dependencies] -cffi = {version = ">=1.12.2", markers = "platform_python_implementation == \"CPython\" and sys_platform == \"win32\""} -greenlet = {version = ">=2.0.0", markers = "platform_python_implementation == \"CPython\""} -setuptools = "*" +cffi = {version = ">=1.17.1", markers = "platform_python_implementation == \"CPython\" and sys_platform == \"win32\""} +greenlet = {version = ">=3.1.1", markers = "platform_python_implementation == \"CPython\""} "zope.event" = "*" "zope.interface" = "*" [package.extras] dnspython = ["dnspython (>=1.16.0,<2.0) ; python_version < \"3.10\"", "idna ; python_version < \"3.10\""] -docs = ["repoze.sphinx.autointerface", "sphinxcontrib-programoutput", "zope.schema"] +docs = ["furo", "repoze.sphinx.autointerface", "sphinx", "sphinxcontrib-programoutput", "zope.schema"] monitor = ["psutil (>=5.7.0) ; sys_platform != \"win32\" or platform_python_implementation == \"CPython\""] -recommended = ["backports.socketpair ; python_version == \"2.7\" and sys_platform == \"win32\"", "cffi (>=1.12.2) ; platform_python_implementation == \"CPython\"", "dnspython (>=1.16.0,<2.0) ; python_version < \"3.10\"", "idna ; python_version < \"3.10\"", "psutil (>=5.7.0) ; sys_platform != \"win32\" or platform_python_implementation == \"CPython\"", "selectors2 ; python_version == \"2.7\""] -test = ["backports.socketpair ; python_version == \"2.7\" and sys_platform == \"win32\"", "cffi (>=1.12.2) ; platform_python_implementation == \"CPython\"", "contextvars (==2.4) ; python_version > \"3.0\" and python_version < \"3.7\"", "coverage (>=5.0) ; sys_platform != \"win32\"", "coveralls (>=1.7.0) ; sys_platform != \"win32\"", "dnspython (>=1.16.0,<2.0) ; python_version < \"3.10\"", "futures ; python_version == \"2.7\"", "idna ; python_version < \"3.10\"", "mock ; python_version == \"2.7\"", "objgraph", "psutil (>=5.7.0) ; sys_platform != \"win32\" or platform_python_implementation == \"CPython\"", "requests", "selectors2 ; python_version == \"2.7\""] +recommended = ["cffi (>=1.17.1) ; platform_python_implementation == \"CPython\"", "dnspython (>=1.16.0,<2.0) ; python_version < \"3.10\"", "idna ; python_version < \"3.10\"", "psutil (>=5.7.0) ; sys_platform != \"win32\" or platform_python_implementation == \"CPython\""] +test = ["cffi (>=1.17.1) ; platform_python_implementation == \"CPython\"", "coverage (>=5.0) ; sys_platform != \"win32\"", "dnspython (>=1.16.0,<2.0) ; python_version < \"3.10\"", "idna ; python_version < \"3.10\"", "objgraph", "psutil (>=5.7.0) ; sys_platform != \"win32\" or platform_python_implementation == \"CPython\"", "requests"] [[package]] name = "greenlet" @@ -1035,21 +1151,6 @@ files = [ {file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"}, ] -[[package]] -name = "isodate" -version = "0.6.1" -description = "An ISO 8601 date/time/duration parser and formatter" -optional = false -python-versions = "*" -groups = ["main"] -files = [ - {file = "isodate-0.6.1-py2.py3-none-any.whl", hash = "sha256:0751eece944162659049d35f4f549ed815792b38793f07cf73381c1c87cbed96"}, - {file = "isodate-0.6.1.tar.gz", hash = "sha256:48c5881de7e8b0a0d648cb024c8062dc84e7b840ed81e864c7614fd3c127bde9"}, -] - -[package.dependencies] -six = "*" - [[package]] name = "jwcrypto" version = "1.5.6" @@ -1068,139 +1169,211 @@ typing-extensions = ">=4.5.0" [[package]] name = "kombu" -version = "5.3.7" +version = "5.5.0" description = "Messaging library for Python." optional = false python-versions = ">=3.8" groups = ["main"] files = [ - {file = "kombu-5.3.7-py3-none-any.whl", hash = "sha256:5634c511926309c7f9789f1433e9ed402616b56836ef9878f01bd59267b4c7a9"}, - {file = "kombu-5.3.7.tar.gz", hash = "sha256:011c4cd9a355c14a1de8d35d257314a1d2456d52b7140388561acac3cf1a97bf"}, + {file = "kombu-5.5.0-py3-none-any.whl", hash = "sha256:526c6cf038c986b998639109a1eb762502f831e8da148cc928f1f95cd91eb874"}, + {file = "kombu-5.5.0.tar.gz", hash = "sha256:72e65c062e903ee1b4e8b68d348f63c02afc172eda409e3aca85867752e79c0b"}, ] [package.dependencies] amqp = ">=5.1.1,<6.0.0" -vine = "*" +tzdata = {version = "2025.1", markers = "python_version >= \"3.9\""} +vine = "5.1.0" [package.extras] azureservicebus = ["azure-servicebus (>=7.10.0)"] azurestoragequeues = ["azure-identity (>=1.12.0)", "azure-storage-queue (>=12.6.0)"] confluentkafka = ["confluent-kafka (>=2.2.0)"] -consul = ["python-consul2"] +consul = ["python-consul2 (==0.1.5)"] +gcpubsub = ["google-cloud-monitoring (>=2.16.0)", "google-cloud-pubsub (>=2.18.4)", "grpcio (==1.67.0)", "protobuf (==4.25.5)"] librabbitmq = ["librabbitmq (>=2.0.0) ; python_version < \"3.11\""] mongodb = ["pymongo (>=4.1.1)"] -msgpack = ["msgpack"] -pyro = ["pyro4"] +msgpack = ["msgpack (==1.1.0)"] +pyro = ["pyro4 (==4.82)"] qpid = ["qpid-python (>=0.26)", "qpid-tools (>=0.26)"] -redis = ["redis (>=4.5.2,!=4.5.5,!=5.0.2)"] +redis = ["redis (>=4.5.2,!=4.5.5,!=5.0.2,<=5.2.1)"] slmq = ["softlayer-messaging (>=1.0.3)"] sqlalchemy = ["sqlalchemy (>=1.4.48,<2.1)"] -sqs = ["boto3 (>=1.26.143)", "pycurl (>=7.43.0.5) ; sys_platform != \"win32\" and platform_python_implementation == \"CPython\"", "urllib3 (>=1.26.16)"] +sqs = ["boto3 (>=1.26.143)", "urllib3 (>=1.26.16)"] yaml = ["PyYAML (>=3.10)"] zookeeper = ["kazoo (>=2.8.0)"] [[package]] name = "lxml" -version = "4.9.1" +version = "5.3.0" description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*" +python-versions = ">=3.6" groups = ["main"] files = [ - {file = "lxml-4.9.1-cp27-cp27m-macosx_10_15_x86_64.whl", hash = "sha256:98cafc618614d72b02185ac583c6f7796202062c41d2eeecdf07820bad3295ed"}, - {file = "lxml-4.9.1-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c62e8dd9754b7debda0c5ba59d34509c4688f853588d75b53c3791983faa96fc"}, - {file = "lxml-4.9.1-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:21fb3d24ab430fc538a96e9fbb9b150029914805d551deeac7d7822f64631dfc"}, - {file = "lxml-4.9.1-cp27-cp27m-win32.whl", hash = "sha256:86e92728ef3fc842c50a5cb1d5ba2bc66db7da08a7af53fb3da79e202d1b2cd3"}, - {file = "lxml-4.9.1-cp27-cp27m-win_amd64.whl", hash = "sha256:4cfbe42c686f33944e12f45a27d25a492cc0e43e1dc1da5d6a87cbcaf2e95627"}, - {file = "lxml-4.9.1-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dad7b164905d3e534883281c050180afcf1e230c3d4a54e8038aa5cfcf312b84"}, - {file = "lxml-4.9.1-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:a614e4afed58c14254e67862456d212c4dcceebab2eaa44d627c2ca04bf86837"}, - {file = "lxml-4.9.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:f9ced82717c7ec65a67667bb05865ffe38af0e835cdd78728f1209c8fffe0cad"}, - {file = "lxml-4.9.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:d9fc0bf3ff86c17348dfc5d322f627d78273eba545db865c3cd14b3f19e57fa5"}, - {file = "lxml-4.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:e5f66bdf0976ec667fc4594d2812a00b07ed14d1b44259d19a41ae3fff99f2b8"}, - {file = "lxml-4.9.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:fe17d10b97fdf58155f858606bddb4e037b805a60ae023c009f760d8361a4eb8"}, - {file = "lxml-4.9.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8caf4d16b31961e964c62194ea3e26a0e9561cdf72eecb1781458b67ec83423d"}, - {file = "lxml-4.9.1-cp310-cp310-win32.whl", hash = "sha256:4780677767dd52b99f0af1f123bc2c22873d30b474aa0e2fc3fe5e02217687c7"}, - {file = "lxml-4.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:b122a188cd292c4d2fcd78d04f863b789ef43aa129b233d7c9004de08693728b"}, - {file = "lxml-4.9.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:be9eb06489bc975c38706902cbc6888f39e946b81383abc2838d186f0e8b6a9d"}, - {file = "lxml-4.9.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:f1be258c4d3dc609e654a1dc59d37b17d7fef05df912c01fc2e15eb43a9735f3"}, - {file = "lxml-4.9.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:927a9dd016d6033bc12e0bf5dee1dde140235fc8d0d51099353c76081c03dc29"}, - {file = "lxml-4.9.1-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9232b09f5efee6a495a99ae6824881940d6447debe272ea400c02e3b68aad85d"}, - {file = "lxml-4.9.1-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:04da965dfebb5dac2619cb90fcf93efdb35b3c6994fea58a157a834f2f94b318"}, - {file = "lxml-4.9.1-cp35-cp35m-win32.whl", hash = "sha256:4d5bae0a37af799207140652a700f21a85946f107a199bcb06720b13a4f1f0b7"}, - {file = "lxml-4.9.1-cp35-cp35m-win_amd64.whl", hash = "sha256:4878e667ebabe9b65e785ac8da4d48886fe81193a84bbe49f12acff8f7a383a4"}, - {file = "lxml-4.9.1-cp36-cp36m-macosx_10_15_x86_64.whl", hash = "sha256:1355755b62c28950f9ce123c7a41460ed9743c699905cbe664a5bcc5c9c7c7fb"}, - {file = "lxml-4.9.1-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:bcaa1c495ce623966d9fc8a187da80082334236a2a1c7e141763ffaf7a405067"}, - {file = "lxml-4.9.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6eafc048ea3f1b3c136c71a86db393be36b5b3d9c87b1c25204e7d397cee9536"}, - {file = "lxml-4.9.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:13c90064b224e10c14dcdf8086688d3f0e612db53766e7478d7754703295c7c8"}, - {file = "lxml-4.9.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:206a51077773c6c5d2ce1991327cda719063a47adc02bd703c56a662cdb6c58b"}, - {file = "lxml-4.9.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:e8f0c9d65da595cfe91713bc1222af9ecabd37971762cb830dea2fc3b3bb2acf"}, - {file = "lxml-4.9.1-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:8f0a4d179c9a941eb80c3a63cdb495e539e064f8054230844dcf2fcb812b71d3"}, - {file = "lxml-4.9.1-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:830c88747dce8a3e7525defa68afd742b4580df6aa2fdd6f0855481e3994d391"}, - {file = "lxml-4.9.1-cp36-cp36m-win32.whl", hash = "sha256:1e1cf47774373777936c5aabad489fef7b1c087dcd1f426b621fda9dcc12994e"}, - {file = "lxml-4.9.1-cp36-cp36m-win_amd64.whl", hash = "sha256:5974895115737a74a00b321e339b9c3f45c20275d226398ae79ac008d908bff7"}, - {file = "lxml-4.9.1-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:1423631e3d51008871299525b541413c9b6c6423593e89f9c4cfbe8460afc0a2"}, - {file = "lxml-4.9.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:2aaf6a0a6465d39b5ca69688fce82d20088c1838534982996ec46633dc7ad6cc"}, - {file = "lxml-4.9.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:9f36de4cd0c262dd9927886cc2305aa3f2210db437aa4fed3fb4940b8bf4592c"}, - {file = "lxml-4.9.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:ae06c1e4bc60ee076292e582a7512f304abdf6c70db59b56745cca1684f875a4"}, - {file = "lxml-4.9.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:57e4d637258703d14171b54203fd6822fda218c6c2658a7d30816b10995f29f3"}, - {file = "lxml-4.9.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6d279033bf614953c3fc4a0aa9ac33a21e8044ca72d4fa8b9273fe75359d5cca"}, - {file = "lxml-4.9.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:a60f90bba4c37962cbf210f0188ecca87daafdf60271f4c6948606e4dabf8785"}, - {file = "lxml-4.9.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:6ca2264f341dd81e41f3fffecec6e446aa2121e0b8d026fb5130e02de1402785"}, - {file = "lxml-4.9.1-cp37-cp37m-win32.whl", hash = "sha256:27e590352c76156f50f538dbcebd1925317a0f70540f7dc8c97d2931c595783a"}, - {file = "lxml-4.9.1-cp37-cp37m-win_amd64.whl", hash = "sha256:eea5d6443b093e1545ad0210e6cf27f920482bfcf5c77cdc8596aec73523bb7e"}, - {file = "lxml-4.9.1-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:f05251bbc2145349b8d0b77c0d4e5f3b228418807b1ee27cefb11f69ed3d233b"}, - {file = "lxml-4.9.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:487c8e61d7acc50b8be82bda8c8d21d20e133c3cbf41bd8ad7eb1aaeb3f07c97"}, - {file = "lxml-4.9.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:8d1a92d8e90b286d491e5626af53afef2ba04da33e82e30744795c71880eaa21"}, - {file = "lxml-4.9.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:b570da8cd0012f4af9fa76a5635cd31f707473e65a5a335b186069d5c7121ff2"}, - {file = "lxml-4.9.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5ef87fca280fb15342726bd5f980f6faf8b84a5287fcc2d4962ea8af88b35130"}, - {file = "lxml-4.9.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:93e414e3206779ef41e5ff2448067213febf260ba747fc65389a3ddaa3fb8715"}, - {file = "lxml-4.9.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6653071f4f9bac46fbc30f3c7838b0e9063ee335908c5d61fb7a4a86c8fd2036"}, - {file = "lxml-4.9.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:32a73c53783becdb7eaf75a2a1525ea8e49379fb7248c3eeefb9412123536387"}, - {file = "lxml-4.9.1-cp38-cp38-win32.whl", hash = "sha256:1a7c59c6ffd6ef5db362b798f350e24ab2cfa5700d53ac6681918f314a4d3b94"}, - {file = "lxml-4.9.1-cp38-cp38-win_amd64.whl", hash = "sha256:1436cf0063bba7888e43f1ba8d58824f085410ea2025befe81150aceb123e345"}, - {file = "lxml-4.9.1-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:4beea0f31491bc086991b97517b9683e5cfb369205dac0148ef685ac12a20a67"}, - {file = "lxml-4.9.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:41fb58868b816c202e8881fd0f179a4644ce6e7cbbb248ef0283a34b73ec73bb"}, - {file = "lxml-4.9.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:bd34f6d1810d9354dc7e35158aa6cc33456be7706df4420819af6ed966e85448"}, - {file = "lxml-4.9.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:edffbe3c510d8f4bf8640e02ca019e48a9b72357318383ca60e3330c23aaffc7"}, - {file = "lxml-4.9.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6d949f53ad4fc7cf02c44d6678e7ff05ec5f5552b235b9e136bd52e9bf730b91"}, - {file = "lxml-4.9.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:079b68f197c796e42aa80b1f739f058dcee796dc725cc9a1be0cdb08fc45b000"}, - {file = "lxml-4.9.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9c3a88d20e4fe4a2a4a84bf439a5ac9c9aba400b85244c63a1ab7088f85d9d25"}, - {file = "lxml-4.9.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4e285b5f2bf321fc0857b491b5028c5f276ec0c873b985d58d7748ece1d770dd"}, - {file = "lxml-4.9.1-cp39-cp39-win32.whl", hash = "sha256:ef72013e20dd5ba86a8ae1aed7f56f31d3374189aa8b433e7b12ad182c0d2dfb"}, - {file = "lxml-4.9.1-cp39-cp39-win_amd64.whl", hash = "sha256:10d2017f9150248563bb579cd0d07c61c58da85c922b780060dcc9a3aa9f432d"}, - {file = "lxml-4.9.1-pp37-pypy37_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0538747a9d7827ce3e16a8fdd201a99e661c7dee3c96c885d8ecba3c35d1032c"}, - {file = "lxml-4.9.1-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:0645e934e940107e2fdbe7c5b6fb8ec6232444260752598bc4d09511bd056c0b"}, - {file = "lxml-4.9.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:6daa662aba22ef3258934105be2dd9afa5bb45748f4f702a3b39a5bf53a1f4dc"}, - {file = "lxml-4.9.1-pp38-pypy38_pp73-macosx_10_15_x86_64.whl", hash = "sha256:603a464c2e67d8a546ddaa206d98e3246e5db05594b97db844c2f0a1af37cf5b"}, - {file = "lxml-4.9.1-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:c4b2e0559b68455c085fb0f6178e9752c4be3bba104d6e881eb5573b399d1eb2"}, - {file = "lxml-4.9.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0f3f0059891d3254c7b5fb935330d6db38d6519ecd238ca4fce93c234b4a0f73"}, - {file = "lxml-4.9.1-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:c852b1530083a620cb0de5f3cd6826f19862bafeaf77586f1aef326e49d95f0c"}, - {file = "lxml-4.9.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:287605bede6bd36e930577c5925fcea17cb30453d96a7b4c63c14a257118dbb9"}, - {file = "lxml-4.9.1.tar.gz", hash = "sha256:fe749b052bb7233fe5d072fcb549221a8cb1a16725c47c37e42b0b9cb3ff2c3f"}, + {file = "lxml-5.3.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:dd36439be765e2dde7660212b5275641edbc813e7b24668831a5c8ac91180656"}, + {file = "lxml-5.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ae5fe5c4b525aa82b8076c1a59d642c17b6e8739ecf852522c6321852178119d"}, + {file = "lxml-5.3.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:501d0d7e26b4d261fca8132854d845e4988097611ba2531408ec91cf3fd9d20a"}, + {file = "lxml-5.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb66442c2546446944437df74379e9cf9e9db353e61301d1a0e26482f43f0dd8"}, + {file = "lxml-5.3.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9e41506fec7a7f9405b14aa2d5c8abbb4dbbd09d88f9496958b6d00cb4d45330"}, + {file = "lxml-5.3.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f7d4a670107d75dfe5ad080bed6c341d18c4442f9378c9f58e5851e86eb79965"}, + {file = "lxml-5.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41ce1f1e2c7755abfc7e759dc34d7d05fd221723ff822947132dc934d122fe22"}, + {file = "lxml-5.3.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:44264ecae91b30e5633013fb66f6ddd05c006d3e0e884f75ce0b4755b3e3847b"}, + {file = "lxml-5.3.0-cp310-cp310-manylinux_2_28_ppc64le.whl", hash = "sha256:3c174dc350d3ec52deb77f2faf05c439331d6ed5e702fc247ccb4e6b62d884b7"}, + {file = "lxml-5.3.0-cp310-cp310-manylinux_2_28_s390x.whl", hash = "sha256:2dfab5fa6a28a0b60a20638dc48e6343c02ea9933e3279ccb132f555a62323d8"}, + {file = "lxml-5.3.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:b1c8c20847b9f34e98080da785bb2336ea982e7f913eed5809e5a3c872900f32"}, + {file = "lxml-5.3.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:2c86bf781b12ba417f64f3422cfc302523ac9cd1d8ae8c0f92a1c66e56ef2e86"}, + {file = "lxml-5.3.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:c162b216070f280fa7da844531169be0baf9ccb17263cf5a8bf876fcd3117fa5"}, + {file = "lxml-5.3.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:36aef61a1678cb778097b4a6eeae96a69875d51d1e8f4d4b491ab3cfb54b5a03"}, + {file = "lxml-5.3.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f65e5120863c2b266dbcc927b306c5b78e502c71edf3295dfcb9501ec96e5fc7"}, + {file = "lxml-5.3.0-cp310-cp310-win32.whl", hash = "sha256:ef0c1fe22171dd7c7c27147f2e9c3e86f8bdf473fed75f16b0c2e84a5030ce80"}, + {file = "lxml-5.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:052d99051e77a4f3e8482c65014cf6372e61b0a6f4fe9edb98503bb5364cfee3"}, + {file = "lxml-5.3.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:74bcb423462233bc5d6066e4e98b0264e7c1bed7541fff2f4e34fe6b21563c8b"}, + {file = "lxml-5.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a3d819eb6f9b8677f57f9664265d0a10dd6551d227afb4af2b9cd7bdc2ccbf18"}, + {file = "lxml-5.3.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5b8f5db71b28b8c404956ddf79575ea77aa8b1538e8b2ef9ec877945b3f46442"}, + {file = "lxml-5.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c3406b63232fc7e9b8783ab0b765d7c59e7c59ff96759d8ef9632fca27c7ee4"}, + {file = "lxml-5.3.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2ecdd78ab768f844c7a1d4a03595038c166b609f6395e25af9b0f3f26ae1230f"}, + {file = "lxml-5.3.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:168f2dfcfdedf611eb285efac1516c8454c8c99caf271dccda8943576b67552e"}, + {file = "lxml-5.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa617107a410245b8660028a7483b68e7914304a6d4882b5ff3d2d3eb5948d8c"}, + {file = "lxml-5.3.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:69959bd3167b993e6e710b99051265654133a98f20cec1d9b493b931942e9c16"}, + {file = "lxml-5.3.0-cp311-cp311-manylinux_2_28_ppc64le.whl", hash = "sha256:bd96517ef76c8654446fc3db9242d019a1bb5fe8b751ba414765d59f99210b79"}, + {file = "lxml-5.3.0-cp311-cp311-manylinux_2_28_s390x.whl", hash = "sha256:ab6dd83b970dc97c2d10bc71aa925b84788c7c05de30241b9e96f9b6d9ea3080"}, + {file = "lxml-5.3.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:eec1bb8cdbba2925bedc887bc0609a80e599c75b12d87ae42ac23fd199445654"}, + {file = "lxml-5.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6a7095eeec6f89111d03dabfe5883a1fd54da319c94e0fb104ee8f23616b572d"}, + {file = "lxml-5.3.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:6f651ebd0b21ec65dfca93aa629610a0dbc13dbc13554f19b0113da2e61a4763"}, + {file = "lxml-5.3.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:f422a209d2455c56849442ae42f25dbaaba1c6c3f501d58761c619c7836642ec"}, + {file = "lxml-5.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:62f7fdb0d1ed2065451f086519865b4c90aa19aed51081979ecd05a21eb4d1be"}, + {file = "lxml-5.3.0-cp311-cp311-win32.whl", hash = "sha256:c6379f35350b655fd817cd0d6cbeef7f265f3ae5fedb1caae2eb442bbeae9ab9"}, + {file = "lxml-5.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:9c52100e2c2dbb0649b90467935c4b0de5528833c76a35ea1a2691ec9f1ee7a1"}, + {file = "lxml-5.3.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:e99f5507401436fdcc85036a2e7dc2e28d962550afe1cbfc07c40e454256a859"}, + {file = "lxml-5.3.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:384aacddf2e5813a36495233b64cb96b1949da72bef933918ba5c84e06af8f0e"}, + {file = "lxml-5.3.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:874a216bf6afaf97c263b56371434e47e2c652d215788396f60477540298218f"}, + {file = "lxml-5.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:65ab5685d56914b9a2a34d67dd5488b83213d680b0c5d10b47f81da5a16b0b0e"}, + {file = "lxml-5.3.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aac0bbd3e8dd2d9c45ceb82249e8bdd3ac99131a32b4d35c8af3cc9db1657179"}, + {file = "lxml-5.3.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b369d3db3c22ed14c75ccd5af429086f166a19627e84a8fdade3f8f31426e52a"}, + {file = "lxml-5.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c24037349665434f375645fa9d1f5304800cec574d0310f618490c871fd902b3"}, + {file = "lxml-5.3.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:62d172f358f33a26d6b41b28c170c63886742f5b6772a42b59b4f0fa10526cb1"}, + {file = "lxml-5.3.0-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:c1f794c02903c2824fccce5b20c339a1a14b114e83b306ff11b597c5f71a1c8d"}, + {file = "lxml-5.3.0-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:5d6a6972b93c426ace71e0be9a6f4b2cfae9b1baed2eed2006076a746692288c"}, + {file = "lxml-5.3.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:3879cc6ce938ff4eb4900d901ed63555c778731a96365e53fadb36437a131a99"}, + {file = "lxml-5.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:74068c601baff6ff021c70f0935b0c7bc528baa8ea210c202e03757c68c5a4ff"}, + {file = "lxml-5.3.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:ecd4ad8453ac17bc7ba3868371bffb46f628161ad0eefbd0a855d2c8c32dd81a"}, + {file = "lxml-5.3.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:7e2f58095acc211eb9d8b5771bf04df9ff37d6b87618d1cbf85f92399c98dae8"}, + {file = "lxml-5.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e63601ad5cd8f860aa99d109889b5ac34de571c7ee902d6812d5d9ddcc77fa7d"}, + {file = "lxml-5.3.0-cp312-cp312-win32.whl", hash = "sha256:17e8d968d04a37c50ad9c456a286b525d78c4a1c15dd53aa46c1d8e06bf6fa30"}, + {file = "lxml-5.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:c1a69e58a6bb2de65902051d57fde951febad631a20a64572677a1052690482f"}, + {file = "lxml-5.3.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8c72e9563347c7395910de6a3100a4840a75a6f60e05af5e58566868d5eb2d6a"}, + {file = "lxml-5.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e92ce66cd919d18d14b3856906a61d3f6b6a8500e0794142338da644260595cd"}, + {file = "lxml-5.3.0-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d04f064bebdfef9240478f7a779e8c5dc32b8b7b0b2fc6a62e39b928d428e51"}, + {file = "lxml-5.3.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c2fb570d7823c2bbaf8b419ba6e5662137f8166e364a8b2b91051a1fb40ab8b"}, + {file = "lxml-5.3.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0c120f43553ec759f8de1fee2f4794452b0946773299d44c36bfe18e83caf002"}, + {file = "lxml-5.3.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:562e7494778a69086f0312ec9689f6b6ac1c6b65670ed7d0267e49f57ffa08c4"}, + {file = "lxml-5.3.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:423b121f7e6fa514ba0c7918e56955a1d4470ed35faa03e3d9f0e3baa4c7e492"}, + {file = "lxml-5.3.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:c00f323cc00576df6165cc9d21a4c21285fa6b9989c5c39830c3903dc4303ef3"}, + {file = "lxml-5.3.0-cp313-cp313-manylinux_2_28_ppc64le.whl", hash = "sha256:1fdc9fae8dd4c763e8a31e7630afef517eab9f5d5d31a278df087f307bf601f4"}, + {file = "lxml-5.3.0-cp313-cp313-manylinux_2_28_s390x.whl", hash = "sha256:658f2aa69d31e09699705949b5fc4719cbecbd4a97f9656a232e7d6c7be1a367"}, + {file = "lxml-5.3.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:1473427aff3d66a3fa2199004c3e601e6c4500ab86696edffdbc84954c72d832"}, + {file = "lxml-5.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a87de7dd873bf9a792bf1e58b1c3887b9264036629a5bf2d2e6579fe8e73edff"}, + {file = "lxml-5.3.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:0d7b36afa46c97875303a94e8f3ad932bf78bace9e18e603f2085b652422edcd"}, + {file = "lxml-5.3.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:cf120cce539453ae086eacc0130a324e7026113510efa83ab42ef3fcfccac7fb"}, + {file = "lxml-5.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:df5c7333167b9674aa8ae1d4008fa4bc17a313cc490b2cca27838bbdcc6bb15b"}, + {file = "lxml-5.3.0-cp313-cp313-win32.whl", hash = "sha256:c802e1c2ed9f0c06a65bc4ed0189d000ada8049312cfeab6ca635e39c9608957"}, + {file = "lxml-5.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:406246b96d552e0503e17a1006fd27edac678b3fcc9f1be71a2f94b4ff61528d"}, + {file = "lxml-5.3.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:8f0de2d390af441fe8b2c12626d103540b5d850d585b18fcada58d972b74a74e"}, + {file = "lxml-5.3.0-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1afe0a8c353746e610bd9031a630a95bcfb1a720684c3f2b36c4710a0a96528f"}, + {file = "lxml-5.3.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56b9861a71575f5795bde89256e7467ece3d339c9b43141dbdd54544566b3b94"}, + {file = "lxml-5.3.0-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:9fb81d2824dff4f2e297a276297e9031f46d2682cafc484f49de182aa5e5df99"}, + {file = "lxml-5.3.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:2c226a06ecb8cdef28845ae976da407917542c5e6e75dcac7cc33eb04aaeb237"}, + {file = "lxml-5.3.0-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:7d3d1ca42870cdb6d0d29939630dbe48fa511c203724820fc0fd507b2fb46577"}, + {file = "lxml-5.3.0-cp36-cp36m-win32.whl", hash = "sha256:094cb601ba9f55296774c2d57ad68730daa0b13dc260e1f941b4d13678239e70"}, + {file = "lxml-5.3.0-cp36-cp36m-win_amd64.whl", hash = "sha256:eafa2c8658f4e560b098fe9fc54539f86528651f61849b22111a9b107d18910c"}, + {file = "lxml-5.3.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:cb83f8a875b3d9b458cada4f880fa498646874ba4011dc974e071a0a84a1b033"}, + {file = "lxml-5.3.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:25f1b69d41656b05885aa185f5fdf822cb01a586d1b32739633679699f220391"}, + {file = "lxml-5.3.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23e0553b8055600b3bf4a00b255ec5c92e1e4aebf8c2c09334f8368e8bd174d6"}, + {file = "lxml-5.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ada35dd21dc6c039259596b358caab6b13f4db4d4a7f8665764d616daf9cc1d"}, + {file = "lxml-5.3.0-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:81b4e48da4c69313192d8c8d4311e5d818b8be1afe68ee20f6385d0e96fc9512"}, + {file = "lxml-5.3.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:2bc9fd5ca4729af796f9f59cd8ff160fe06a474da40aca03fcc79655ddee1a8b"}, + {file = "lxml-5.3.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:07da23d7ee08577760f0a71d67a861019103e4812c87e2fab26b039054594cc5"}, + {file = "lxml-5.3.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:ea2e2f6f801696ad7de8aec061044d6c8c0dd4037608c7cab38a9a4d316bfb11"}, + {file = "lxml-5.3.0-cp37-cp37m-win32.whl", hash = "sha256:5c54afdcbb0182d06836cc3d1be921e540be3ebdf8b8a51ee3ef987537455f84"}, + {file = "lxml-5.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:f2901429da1e645ce548bf9171784c0f74f0718c3f6150ce166be39e4dd66c3e"}, + {file = "lxml-5.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c56a1d43b2f9ee4786e4658c7903f05da35b923fb53c11025712562d5cc02753"}, + {file = "lxml-5.3.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ee8c39582d2652dcd516d1b879451500f8db3fe3607ce45d7c5957ab2596040"}, + {file = "lxml-5.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fdf3a3059611f7585a78ee10399a15566356116a4288380921a4b598d807a22"}, + {file = "lxml-5.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:146173654d79eb1fc97498b4280c1d3e1e5d58c398fa530905c9ea50ea849b22"}, + {file = "lxml-5.3.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:0a7056921edbdd7560746f4221dca89bb7a3fe457d3d74267995253f46343f15"}, + {file = "lxml-5.3.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:9e4b47ac0f5e749cfc618efdf4726269441014ae1d5583e047b452a32e221920"}, + {file = "lxml-5.3.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:f914c03e6a31deb632e2daa881fe198461f4d06e57ac3d0e05bbcab8eae01945"}, + {file = "lxml-5.3.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:213261f168c5e1d9b7535a67e68b1f59f92398dd17a56d934550837143f79c42"}, + {file = "lxml-5.3.0-cp38-cp38-win32.whl", hash = "sha256:218c1b2e17a710e363855594230f44060e2025b05c80d1f0661258142b2add2e"}, + {file = "lxml-5.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:315f9542011b2c4e1d280e4a20ddcca1761993dda3afc7a73b01235f8641e903"}, + {file = "lxml-5.3.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:1ffc23010330c2ab67fac02781df60998ca8fe759e8efde6f8b756a20599c5de"}, + {file = "lxml-5.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2b3778cb38212f52fac9fe913017deea2fdf4eb1a4f8e4cfc6b009a13a6d3fcc"}, + {file = "lxml-5.3.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4b0c7a688944891086ba192e21c5229dea54382f4836a209ff8d0a660fac06be"}, + {file = "lxml-5.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:747a3d3e98e24597981ca0be0fd922aebd471fa99d0043a3842d00cdcad7ad6a"}, + {file = "lxml-5.3.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86a6b24b19eaebc448dc56b87c4865527855145d851f9fc3891673ff97950540"}, + {file = "lxml-5.3.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b11a5d918a6216e521c715b02749240fb07ae5a1fefd4b7bf12f833bc8b4fe70"}, + {file = "lxml-5.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:68b87753c784d6acb8a25b05cb526c3406913c9d988d51f80adecc2b0775d6aa"}, + {file = "lxml-5.3.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:109fa6fede314cc50eed29e6e56c540075e63d922455346f11e4d7a036d2b8cf"}, + {file = "lxml-5.3.0-cp39-cp39-manylinux_2_28_ppc64le.whl", hash = "sha256:02ced472497b8362c8e902ade23e3300479f4f43e45f4105c85ef43b8db85229"}, + {file = "lxml-5.3.0-cp39-cp39-manylinux_2_28_s390x.whl", hash = "sha256:6b038cc86b285e4f9fea2ba5ee76e89f21ed1ea898e287dc277a25884f3a7dfe"}, + {file = "lxml-5.3.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:7437237c6a66b7ca341e868cda48be24b8701862757426852c9b3186de1da8a2"}, + {file = "lxml-5.3.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:7f41026c1d64043a36fda21d64c5026762d53a77043e73e94b71f0521939cc71"}, + {file = "lxml-5.3.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:482c2f67761868f0108b1743098640fbb2a28a8e15bf3f47ada9fa59d9fe08c3"}, + {file = "lxml-5.3.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:1483fd3358963cc5c1c9b122c80606a3a79ee0875bcac0204149fa09d6ff2727"}, + {file = "lxml-5.3.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:2dec2d1130a9cda5b904696cec33b2cfb451304ba9081eeda7f90f724097300a"}, + {file = "lxml-5.3.0-cp39-cp39-win32.whl", hash = "sha256:a0eabd0a81625049c5df745209dc7fcef6e2aea7793e5f003ba363610aa0a3ff"}, + {file = "lxml-5.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:89e043f1d9d341c52bf2af6d02e6adde62e0a46e6755d5eb60dc6e4f0b8aeca2"}, + {file = "lxml-5.3.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7b1cd427cb0d5f7393c31b7496419da594fe600e6fdc4b105a54f82405e6626c"}, + {file = "lxml-5.3.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51806cfe0279e06ed8500ce19479d757db42a30fd509940b1701be9c86a5ff9a"}, + {file = "lxml-5.3.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ee70d08fd60c9565ba8190f41a46a54096afa0eeb8f76bd66f2c25d3b1b83005"}, + {file = "lxml-5.3.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:8dc2c0395bea8254d8daebc76dcf8eb3a95ec2a46fa6fae5eaccee366bfe02ce"}, + {file = "lxml-5.3.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:6ba0d3dcac281aad8a0e5b14c7ed6f9fa89c8612b47939fc94f80b16e2e9bc83"}, + {file = "lxml-5.3.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:6e91cf736959057f7aac7adfc83481e03615a8e8dd5758aa1d95ea69e8931dba"}, + {file = "lxml-5.3.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:94d6c3782907b5e40e21cadf94b13b0842ac421192f26b84c45f13f3c9d5dc27"}, + {file = "lxml-5.3.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c300306673aa0f3ed5ed9372b21867690a17dba38c68c44b287437c362ce486b"}, + {file = "lxml-5.3.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78d9b952e07aed35fe2e1a7ad26e929595412db48535921c5013edc8aa4a35ce"}, + {file = "lxml-5.3.0-pp37-pypy37_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:01220dca0d066d1349bd6a1726856a78f7929f3878f7e2ee83c296c69495309e"}, + {file = "lxml-5.3.0-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:2d9b8d9177afaef80c53c0a9e30fa252ff3036fb1c6494d427c066a4ce6a282f"}, + {file = "lxml-5.3.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:20094fc3f21ea0a8669dc4c61ed7fa8263bd37d97d93b90f28fc613371e7a875"}, + {file = "lxml-5.3.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ace2c2326a319a0bb8a8b0e5b570c764962e95818de9f259ce814ee666603f19"}, + {file = "lxml-5.3.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:92e67a0be1639c251d21e35fe74df6bcc40cba445c2cda7c4a967656733249e2"}, + {file = "lxml-5.3.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd5350b55f9fecddc51385463a4f67a5da829bc741e38cf689f38ec9023f54ab"}, + {file = "lxml-5.3.0-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:4c1fefd7e3d00921c44dc9ca80a775af49698bbfd92ea84498e56acffd4c5469"}, + {file = "lxml-5.3.0-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:71a8dd38fbd2f2319136d4ae855a7078c69c9a38ae06e0c17c73fd70fc6caad8"}, + {file = "lxml-5.3.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:97acf1e1fd66ab53dacd2c35b319d7e548380c2e9e8c54525c6e76d21b1ae3b1"}, + {file = "lxml-5.3.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:68934b242c51eb02907c5b81d138cb977b2129a0a75a8f8b60b01cb8586c7b21"}, + {file = "lxml-5.3.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b710bc2b8292966b23a6a0121f7a6c51d45d2347edcc75f016ac123b8054d3f2"}, + {file = "lxml-5.3.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18feb4b93302091b1541221196a2155aa296c363fd233814fa11e181adebc52f"}, + {file = "lxml-5.3.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:3eb44520c4724c2e1a57c0af33a379eee41792595023f367ba3952a2d96c2aab"}, + {file = "lxml-5.3.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:609251a0ca4770e5a8768ff902aa02bf636339c5a93f9349b48eb1f606f7f3e9"}, + {file = "lxml-5.3.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:516f491c834eb320d6c843156440fe7fc0d50b33e44387fcec5b02f0bc118a4c"}, + {file = "lxml-5.3.0.tar.gz", hash = "sha256:4e109ca30d1edec1ac60cdbe341905dc3b8f55b16855e03a54aaf59e51ec8c6f"}, ] [package.extras] cssselect = ["cssselect (>=0.7)"] +html-clean = ["lxml-html-clean"] html5 = ["html5lib"] htmlsoup = ["BeautifulSoup4"] -source = ["Cython (>=0.29.7)"] +source = ["Cython (>=3.0.11)"] [[package]] name = "markdown2" -version = "2.4.10" +version = "2.5.3" description = "A fast and complete Python implementation of Markdown" optional = false -python-versions = ">=3.5, <4" +python-versions = "<4,>=3.9" groups = ["main"] files = [ - {file = "markdown2-2.4.10-py2.py3-none-any.whl", hash = "sha256:e6105800483783831f5dc54f827aa5b44eb137ecef5a70293d8ecfbb4109ecc6"}, - {file = "markdown2-2.4.10.tar.gz", hash = "sha256:cdba126d90dc3aef6f4070ac342f974d63f415678959329cc7909f96cc235d72"}, + {file = "markdown2-2.5.3-py3-none-any.whl", hash = "sha256:a8ebb7e84b8519c37bf7382b3db600f1798a22c245bfd754a1f87ca8d7ea63b3"}, + {file = "markdown2-2.5.3.tar.gz", hash = "sha256:4d502953a4633408b0ab3ec503c5d6984d1b14307e32b325ec7d16ea57524895"}, ] [package.extras] -all = ["pygments (>=2.7.3)", "wavedrom ; python_version >= \"3.7\""] +all = ["latex2mathml ; python_version >= \"3.8.1\"", "pygments (>=2.7.3)", "wavedrom"] code-syntax-highlighting = ["pygments (>=2.7.3)"] -wavedrom = ["wavedrom ; python_version >= \"3.7\""] +latex = ["latex2mathml ; python_version >= \"3.8.1\""] +wavedrom = ["wavedrom"] [[package]] name = "mccabe" @@ -1216,27 +1389,41 @@ files = [ [[package]] name = "newrelic" -version = "8.4.0" +version = "10.7.0" description = "New Relic Python Agent" optional = false -python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*" -groups = ["main"] +python-versions = ">=3.7" +groups = ["dist"] files = [ - {file = "newrelic-8.4.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:ede2b43cf395fef31c3a43bd8d6db6cb56496516c54123b534f0027d582446ef"}, - {file = "newrelic-8.4.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:dc095f45496c03681bcf01692c85817243ccb500a5281410cdf7194be9308444"}, - {file = "newrelic-8.4.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:88063cd39672a93d2201f9f6568e9b996b1a1b9ca052f83eac9a4ca0a36fa7a9"}, - {file = "newrelic-8.4.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:492a06949af21b82072095c1055051985cddadc9458f1752a2c040228918fabc"}, - {file = "newrelic-8.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8ec531aa73f1ae12724031a965805a535585fd06c617acb68b8b1a4534912e31"}, - {file = "newrelic-8.4.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4c831a8f084b8168afdc4e9770ac062d6fdac16f41aa7a122ce87e5e448bbe9"}, - {file = "newrelic-8.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:385829c819bccde7edc002ea73b784194312824c3b3d9511c1015194498f2658"}, - {file = "newrelic-8.4.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c465d7366cb081498bb6918f4b91157b54e70b446639d1a3ec86fc72062e6c3"}, - {file = "newrelic-8.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edbedd2040a003e2f712184fab672e2987cf34da59f64f548d121c1a02ea1f4c"}, - {file = "newrelic-8.4.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba0b9fcf84b3735b7d699df9e27822ab5a62a1ed12c39540140e2d40be3be8e9"}, - {file = "newrelic-8.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:97f8f288dc131b3260220aef3716ae5e78aeb08edcd2e43e947a675a0d08d21b"}, - {file = "newrelic-8.4.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:592b622945e4ff4f14cca0e01dd08553a1f1e29cea25826b9e30101133296993"}, - {file = "newrelic-8.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:01b37112a6f48d2da8e98268dfc273a33981297a2b43ac7674d5993dee31885d"}, - {file = "newrelic-8.4.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20ea8850cc5c05edba753cd85800bb6d3411bcd7425614c59e5550f35c4f2cc6"}, - {file = "newrelic-8.4.0.tar.gz", hash = "sha256:4abf147b5148ac1d284aba46582c92840a521bf5086a29af36494bd53778136f"}, + {file = "newrelic-10.7.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08e959814e0b23a8f96383955cceecb6180dc66f240279c45ee8484058f96eb4"}, + {file = "newrelic-10.7.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e12b7e88e0d78497b4e3dfca0411a76a548ee15842b9d6ef971035bbdc91693"}, + {file = "newrelic-10.7.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4125d02c016c3b4a0f88d5ce184d78d4101531d1f76525f7f1ee750e453603f1"}, + {file = "newrelic-10.7.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:680a75e3dd37e86bf1eef2b77408dd1953c94a7b803b56e6f8c3f42164580e35"}, + {file = "newrelic-10.7.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:877b53049be9dfb3ad2845f00c57a3eb1aadeaec700f09a8f6c7fec1f6e71c2b"}, + {file = "newrelic-10.7.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f02916527900f6d209682d3dd77c964fb38ca7d84c31a293085e4a84fa35957d"}, + {file = "newrelic-10.7.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:683b5158203e28e46b348f9657054eb25cfb7367e21524a457235d9c5a5cc4ed"}, + {file = "newrelic-10.7.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:dac3003f22e1edd564f7d7556c84f1fb2f61140c46040befa626bdc8f69a4a89"}, + {file = "newrelic-10.7.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4fd86115079045e5a9630168ae1a48fdef7f2782c9268d1f04a7ae7716a6129d"}, + {file = "newrelic-10.7.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ecf6a0b560b10c72fd592c1dcb6ea8812e7876d6e30709b6c5184fdd4e399d62"}, + {file = "newrelic-10.7.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:30b1c668beb12b9627bac6b037f9a2e3f374e678a75c57f63566a4a7ea055e9e"}, + {file = "newrelic-10.7.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9319828bc2b46b9a25a88a97fab1a9e05a4c9d4bed484206f59e04e2f7cbd1cd"}, + {file = "newrelic-10.7.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6855485d0de0f02271f67617c7a63b44c44f46e50763f37a016a1a117ae8af56"}, + {file = "newrelic-10.7.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e78adc209e24cc223ac19104616caa3b8cb789d1ed97d082472d3b7e9d04023d"}, + {file = "newrelic-10.7.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bc1f090a115688fe7829e7aea1dcf37913a24450813908d9ce6b4eb0831cbbbf"}, + {file = "newrelic-10.7.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:cf6b8f12abf9c984a4e62b0de66d85e2c5153f367dd6d4149544d931e59bcb8d"}, + {file = "newrelic-10.7.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64708227cf2d56f96f1d2697b23cc5be4952bbd790f0ba63164bedcdbbb457fc"}, + {file = "newrelic-10.7.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a03832922d05530088aab9acb84bc7758cc8196305852652abb6face3c346ede"}, + {file = "newrelic-10.7.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:e97b3239159d9a178c07043e9da56e841de2b56b947070b7038ddcb93f99fba0"}, + {file = "newrelic-10.7.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:32f089e276f36b73de62c61ba7648d77de70893fe4d9a7c15f95e20f4978f461"}, + {file = "newrelic-10.7.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:83bdce11a0a5929ed5ab5db420f54224662c97fbce4fb6efbe27633ad54d30e2"}, + {file = "newrelic-10.7.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a6b906d8098cd15639f02152a3c94c879c5a841b177b7ee2e6e13ca3a0f37cf"}, + {file = "newrelic-10.7.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:96f805812a912a8a4008b6f28e109e0d8943c80dd136980a9d3914be5e75a416"}, + {file = "newrelic-10.7.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:8d47f6041c3f28844eaa9cdf0905415fe5fc617ee6623c391532830a1205133e"}, + {file = "newrelic-10.7.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ad5b78a6997ce237185e3911d9a616de0781f600031d53ecce1edeafcca0c79"}, + {file = "newrelic-10.7.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:772a3c1b5fae12253629771cf677197be48c481c4c6ee7a6233a469dc7e37057"}, + {file = "newrelic-10.7.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:650e1818ee404ace26efb2935e6326dbcbf754fbea496710da3889e224c4bcf1"}, + {file = "newrelic-10.7.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:fd2f3d2d290555764b587d35700069581dece2158b73e865f9adc6ccbba4375b"}, + {file = "newrelic-10.7.0.tar.gz", hash = "sha256:ac9716c115ddcf54b54115391a84ed2c318ae943b4f598b4d0248cd6edb12414"}, ] [package.extras] @@ -1326,25 +1513,22 @@ files = [ [[package]] name = "psycopg2" -version = "2.9.5" +version = "2.9.10" description = "psycopg2 - Python-PostgreSQL Database Adapter" optional = false -python-versions = ">=3.6" +python-versions = ">=3.8" groups = ["main"] files = [ - {file = "psycopg2-2.9.5-cp310-cp310-win32.whl", hash = "sha256:d3ef67e630b0de0779c42912fe2cbae3805ebaba30cda27fea2a3de650a9414f"}, - {file = "psycopg2-2.9.5-cp310-cp310-win_amd64.whl", hash = "sha256:4cb9936316d88bfab614666eb9e32995e794ed0f8f6b3b718666c22819c1d7ee"}, - {file = "psycopg2-2.9.5-cp311-cp311-win32.whl", hash = "sha256:093e3894d2d3c592ab0945d9eba9d139c139664dcf83a1c440b8a7aa9bb21955"}, - {file = "psycopg2-2.9.5-cp311-cp311-win_amd64.whl", hash = "sha256:920bf418000dd17669d2904472efeab2b20546efd0548139618f8fa305d1d7ad"}, - {file = "psycopg2-2.9.5-cp36-cp36m-win32.whl", hash = "sha256:b9ac1b0d8ecc49e05e4e182694f418d27f3aedcfca854ebd6c05bb1cffa10d6d"}, - {file = "psycopg2-2.9.5-cp36-cp36m-win_amd64.whl", hash = "sha256:fc04dd5189b90d825509caa510f20d1d504761e78b8dfb95a0ede180f71d50e5"}, - {file = "psycopg2-2.9.5-cp37-cp37m-win32.whl", hash = "sha256:922cc5f0b98a5f2b1ff481f5551b95cd04580fd6f0c72d9b22e6c0145a4840e0"}, - {file = "psycopg2-2.9.5-cp37-cp37m-win_amd64.whl", hash = "sha256:1e5a38aa85bd660c53947bd28aeaafb6a97d70423606f1ccb044a03a1203fe4a"}, - {file = "psycopg2-2.9.5-cp38-cp38-win32.whl", hash = "sha256:f5b6320dbc3cf6cfb9f25308286f9f7ab464e65cfb105b64cc9c52831748ced2"}, - {file = "psycopg2-2.9.5-cp38-cp38-win_amd64.whl", hash = "sha256:1a5c7d7d577e0eabfcf15eb87d1e19314c8c4f0e722a301f98e0e3a65e238b4e"}, - {file = "psycopg2-2.9.5-cp39-cp39-win32.whl", hash = "sha256:322fd5fca0b1113677089d4ebd5222c964b1760e361f151cbb2706c4912112c5"}, - {file = "psycopg2-2.9.5-cp39-cp39-win_amd64.whl", hash = "sha256:190d51e8c1b25a47484e52a79638a8182451d6f6dff99f26ad9bd81e5359a0fa"}, - {file = "psycopg2-2.9.5.tar.gz", hash = "sha256:a5246d2e683a972e2187a8714b5c2cf8156c064629f9a9b1a873c1730d9e245a"}, + {file = "psycopg2-2.9.10-cp310-cp310-win32.whl", hash = "sha256:5df2b672140f95adb453af93a7d669d7a7bf0a56bcd26f1502329166f4a61716"}, + {file = "psycopg2-2.9.10-cp310-cp310-win_amd64.whl", hash = "sha256:c6f7b8561225f9e711a9c47087388a97fdc948211c10a4bccbf0ba68ab7b3b5a"}, + {file = "psycopg2-2.9.10-cp311-cp311-win32.whl", hash = "sha256:47c4f9875125344f4c2b870e41b6aad585901318068acd01de93f3677a6522c2"}, + {file = "psycopg2-2.9.10-cp311-cp311-win_amd64.whl", hash = "sha256:0435034157049f6846e95103bd8f5a668788dd913a7c30162ca9503fdf542cb4"}, + {file = "psycopg2-2.9.10-cp312-cp312-win32.whl", hash = "sha256:65a63d7ab0e067e2cdb3cf266de39663203d38d6a8ed97f5ca0cb315c73fe067"}, + {file = "psycopg2-2.9.10-cp312-cp312-win_amd64.whl", hash = "sha256:4a579d6243da40a7b3182e0430493dbd55950c493d8c68f4eec0b302f6bbf20e"}, + {file = "psycopg2-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:91fd603a2155da8d0cfcdbf8ab24a2d54bca72795b90d2a3ed2b6da8d979dee2"}, + {file = "psycopg2-2.9.10-cp39-cp39-win32.whl", hash = "sha256:9d5b3b94b79a844a986d029eee38998232451119ad653aea42bb9220a8c5066b"}, + {file = "psycopg2-2.9.10-cp39-cp39-win_amd64.whl", hash = "sha256:88138c8dedcbfa96408023ea2b0c369eda40fe5d75002c0964c78f46f11fa442"}, + {file = "psycopg2-2.9.10.tar.gz", hash = "sha256:12ec0b40b0273f95296233e8750441339298e6a572f7039da5b260e3c8b60e11"}, ] [[package]] @@ -1394,7 +1578,7 @@ files = [ {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, ] -markers = {dist = "platform_python_implementation == \"CPython\" and sys_platform == \"win32\""} +markers = {main = "platform_python_implementation != \"PyPy\"", dist = "platform_python_implementation == \"CPython\" and sys_platform == \"win32\""} [[package]] name = "pyflakes" @@ -1536,95 +1720,96 @@ files = [ [package.dependencies] six = ">=1.5" -[[package]] -name = "pytz" -version = "2025.2" -description = "World timezone definitions, modern and historical" -optional = false -python-versions = "*" -groups = ["main", "dev"] -files = [ - {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"}, - {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, -] - [[package]] name = "rdflib" -version = "7.0.0" +version = "7.1.3" description = "RDFLib is a Python library for working with RDF, a simple yet powerful language for representing information." optional = false -python-versions = ">=3.8.1,<4.0.0" +python-versions = "<4.0.0,>=3.8.1" groups = ["main"] files = [ - {file = "rdflib-7.0.0-py3-none-any.whl", hash = "sha256:0438920912a642c866a513de6fe8a0001bd86ef975057d6962c79ce4771687cd"}, - {file = "rdflib-7.0.0.tar.gz", hash = "sha256:9995eb8569428059b8c1affd26b25eac510d64f5043d9ce8c84e0d0036e995ae"}, + {file = "rdflib-7.1.3-py3-none-any.whl", hash = "sha256:5402310a9f0f3c07d453d73fd0ad6ba35616286fe95d3670db2b725f3f539673"}, + {file = "rdflib-7.1.3.tar.gz", hash = "sha256:f3dcb4c106a8cd9e060d92f43d593d09ebc3d07adc244f4c7315856a12e383ee"}, ] [package.dependencies] -isodate = ">=0.6.0,<0.7.0" pyparsing = ">=2.1.0,<4" [package.extras] berkeleydb = ["berkeleydb (>=18.1.0,<19.0.0)"] -html = ["html5lib (>=1.0,<2.0)"] -lxml = ["lxml (>=4.3.0,<5.0.0)"] -networkx = ["networkx (>=2.0.0,<3.0.0)"] +html = ["html5rdf (>=1.2,<2)"] +lxml = ["lxml (>=4.3,<6.0)"] +networkx = ["networkx (>=2,<4)"] +orjson = ["orjson (>=3.9.14,<4)"] [[package]] name = "requests" -version = "2.25.1" +version = "2.32.3" description = "Python HTTP for Humans." optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +python-versions = ">=3.8" groups = ["main", "dev"] files = [ - {file = "requests-2.25.1-py2.py3-none-any.whl", hash = "sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e"}, - {file = "requests-2.25.1.tar.gz", hash = "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804"}, + {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, + {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, ] [package.dependencies] certifi = ">=2017.4.17" -chardet = ">=3.0.2,<5" -idna = ">=2.5,<3" -urllib3 = ">=1.21.1,<1.27" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" [package.extras] -security = ["cryptography (>=1.3.4)", "pyOpenSSL (>=0.14)"] -socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton ; sys_platform == \"win32\" and python_version == \"2.7\""] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] [[package]] name = "sentry-sdk" -version = "1.22.2" +version = "2.24.1" description = "Python client for Sentry (https://sentry.io)" optional = false -python-versions = "*" +python-versions = ">=3.6" groups = ["main"] files = [ - {file = "sentry-sdk-1.22.2.tar.gz", hash = "sha256:5932c092c6e6035584eb74d77064e4bce3b7935dfc4a331349719a40db265840"}, - {file = "sentry_sdk-1.22.2-py2.py3-none-any.whl", hash = "sha256:cf89a5063ef84278d186aceaed6fb595bfe67d099298e537634a323664265669"}, + {file = "sentry_sdk-2.24.1-py2.py3-none-any.whl", hash = "sha256:36baa6a1128b9d98d2adc5e9b2f887eff0a6af558fc2b96ed51919042413556d"}, + {file = "sentry_sdk-2.24.1.tar.gz", hash = "sha256:8ba3c29990fa48865b908b3b9dc5ae7fa7e72407c7c9e91303e5206b32d7b8b1"}, ] [package.dependencies] certifi = "*" django = {version = ">=1.8", optional = true, markers = "extra == \"django\""} -urllib3 = {version = ">=1.26.11,<2.0.0", markers = "python_version >= \"3.6\""} +urllib3 = ">=1.26.11" [package.extras] aiohttp = ["aiohttp (>=3.5)"] +anthropic = ["anthropic (>=0.16)"] arq = ["arq (>=0.23)"] +asyncpg = ["asyncpg (>=0.23)"] beam = ["apache-beam (>=2.12)"] bottle = ["bottle (>=0.12.13)"] celery = ["celery (>=3)"] +celery-redbeat = ["celery-redbeat (>=2)"] chalice = ["chalice (>=1.16.0)"] +clickhouse-driver = ["clickhouse-driver (>=0.2.0)"] django = ["django (>=1.8)"] falcon = ["falcon (>=1.4)"] fastapi = ["fastapi (>=0.79.0)"] -flask = ["blinker (>=1.1)", "flask (>=0.11)"] -grpcio = ["grpcio (>=1.21.1)"] +flask = ["blinker (>=1.1)", "flask (>=0.11)", "markupsafe"] +grpcio = ["grpcio (>=1.21.1)", "protobuf (>=3.8.0)"] +http2 = ["httpcore[http2] (==1.*)"] httpx = ["httpx (>=0.16.0)"] huey = ["huey (>=2)"] +huggingface-hub = ["huggingface_hub (>=0.22)"] +langchain = ["langchain (>=0.0.210)"] +launchdarkly = ["launchdarkly-server-sdk (>=9.8.0)"] +litestar = ["litestar (>=2.0.0)"] +loguru = ["loguru (>=0.5)"] +openai = ["openai (>=1.0.0)", "tiktoken (>=0.3.0)"] +openfeature = ["openfeature-sdk (>=0.7.1)"] opentelemetry = ["opentelemetry-distro (>=0.35b0)"] -pure-eval = ["asttokens", "executing", "pure-eval"] +opentelemetry-experimental = ["opentelemetry-distro"] +pure-eval = ["asttokens", "executing", "pure_eval"] pymongo = ["pymongo (>=3.1)"] pyspark = ["pyspark (>=2.4.4)"] quart = ["blinker (>=1.1)", "quart (>=0.16.1)"] @@ -1633,7 +1818,9 @@ sanic = ["sanic (>=0.8)"] sqlalchemy = ["sqlalchemy (>=1.2)"] starlette = ["starlette (>=0.19.1)"] starlite = ["starlite (>=1.48)"] -tornado = ["tornado (>=5)"] +statsig = ["statsig (>=0.55.3)"] +tornado = ["tornado (>=6)"] +unleash = ["UnleashClient (>=6.0.1)"] [[package]] name = "setuptools" @@ -1714,24 +1901,24 @@ version = "4.13.0" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" -groups = ["main", "dev"] +groups = ["main"] files = [ {file = "typing_extensions-4.13.0-py3-none-any.whl", hash = "sha256:c8dd92cc0d6425a97c18fbb9d1954e5ff92c1ca881a309c45f06ebc0b79058e5"}, {file = "typing_extensions-4.13.0.tar.gz", hash = "sha256:0a4ac55a5820789d87e297727d229866c9650f6521b64206413c4fbada24d95b"}, ] -markers = {dev = "python_version < \"3.11\""} [[package]] name = "tzdata" -version = "2025.2" +version = "2025.1" description = "Provider of IANA time zone data" optional = false python-versions = ">=2" -groups = ["main"] +groups = ["main", "dev"] files = [ - {file = "tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8"}, - {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"}, + {file = "tzdata-2025.1-py2.py3-none-any.whl", hash = "sha256:7e127113816800496f027041c570f50bcd464a020098a3b6b199517772303639"}, + {file = "tzdata-2025.1.tar.gz", hash = "sha256:24894909e88cdb28bd1636c6887801df64cb485bd593f2fd83ef29075a81d694"}, ] +markers = {dev = "sys_platform == \"win32\""} [[package]] name = "urllib3" @@ -1861,5 +2048,5 @@ testing = ["coverage[toml]", "zope.event", "zope.testing"] [metadata] lock-version = "2.1" -python-versions = ">=3.10,<3.12" -content-hash = "9c0c9c439701e05cce8574c8ad4068312219dbfadb8ce55fe9cd99d502793583" +python-versions = ">=3.13,<3.14" +content-hash = "f7be2d5f032b5455141b352dec94484ade3397d17d0afad7249c9f406e8437e1" diff --git a/pyproject.toml b/pyproject.toml index 39f8658ca..3ac3a0a49 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ dependencies = [ "django-extensions==3.2.3", # MIT "django-filter==25.1", # BSD "django-oauth-toolkit==3.0.1", # BSD - "django==5.2", # BSD 3 Clause + "django==5.1.*", # BSD 3 Clause "elasticsearch8==8.17.2", # Apache 2.0 "lxml==5.3.0", # BSD "kombu==5.5.0", # BSD 3 Clause @@ -42,7 +42,7 @@ optional = true [tool.poetry.group.dist.dependencies] uwsgi = "2.0.28" newrelic = "10.7.0" # newrelic APM agent, Custom License -gevent = "24.11.2" # MIT +gevent = "24.11.1" # MIT psycogreen = "1.0.2" # BSD From 95790ca66132c7229b18e46f1d49ca0fcec31961 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Wed, 9 Apr 2025 12:11:11 -0400 Subject: [PATCH 37/46] poetry config: explicit package-mode --- .github/workflows/run_tests.yml | 3 +-- Dockerfile | 6 +++--- pyproject.toml | 6 ++++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index cad4b7da5..8fd2886e4 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -64,7 +64,7 @@ jobs: cache: 'poetry' - name: install despondencies - run: poetry install --no-root --with dev + run: poetry install --with dev - name: flake it run: poetry run flake8 . @@ -76,7 +76,6 @@ jobs: env: DATABASE_PASSWORD: postgres ELASTICSEARCH8_URL: http://localhost:9208/ - # ELASTICSEARCH5_URL: http://localhost:9205/ - name: coveralls uses: coverallsapp/github-action@v2 diff --git a/Dockerfile b/Dockerfile index c893097c8..17a029675 100644 --- a/Dockerfile +++ b/Dockerfile @@ -39,7 +39,7 @@ RUN $POETRY_HOME/bin/pip install poetry==2.1.1 COPY pyproject.toml . COPY poetry.lock . -RUN $POETRY_HOME/bin/poetry install --no-root --compile +RUN $POETRY_HOME/bin/poetry install --compile RUN apt-get remove -y \ gcc \ @@ -59,9 +59,9 @@ CMD ["python", "manage.py", "--help"] ### Dist FROM app AS dist -RUN $POETRY_HOME/bin/poetry install --no-root --compile --only dist +RUN $POETRY_HOME/bin/poetry install --compile --only dist ### Dev FROM app AS dev -RUN $POETRY_HOME/bin/poetry install --no-root --compile --only dev +RUN $POETRY_HOME/bin/poetry install --compile --only dev diff --git a/pyproject.toml b/pyproject.toml index 3ac3a0a49..2d7a36bda 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,6 @@ [project] name = "shtrove" version = "25.1.1" # TODO: get version from share/version.py -package-mode = false description = "" authors = [ {name = "Abram Axel Booth", email = "abram@cos.io"} @@ -29,13 +28,16 @@ dependencies = [ "primitive_metadata==0.2025.1", # MIT "sentry-sdk[django]==2.24.1", # MIT - # to be removed in future work: + # to be removed in near-future work: "djangorestframework==3.16.0", # BSD "djangorestframework-jsonapi==7.1.0", # BSD "elasticsearch5==5.5.6", # Apache 2.0 "PyJWE==1.0.0", # Apache 2.0 ] +[tool.poetry] +package-mode = false + # "dist" dependency group relevant only in deployment: [tool.poetry.group.dist] optional = true From 2448562fde34b30b1d5b1eeb0824400e25ddcb76 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Wed, 9 Apr 2025 12:23:36 -0400 Subject: [PATCH 38/46] remove elasticsearch5 use --- .docker-compose.env | 1 - project/settings.py | 4 - share/search/index_strategy/__init__.py | 10 - .../search/index_strategy/sharev2_elastic5.py | 456 --------- .../index_strategy/trove_indexcard_flats.py | 953 ------------------ tests/share/search/conftest.py | 8 +- .../index_strategy/test_sharev2_elastic5.py | 89 -- .../index_strategy/test_strategy_selection.py | 2 - 8 files changed, 2 insertions(+), 1521 deletions(-) delete mode 100644 share/search/index_strategy/sharev2_elastic5.py delete mode 100644 share/search/index_strategy/trove_indexcard_flats.py delete mode 100644 tests/share/search/index_strategy/test_sharev2_elastic5.py diff --git a/.docker-compose.env b/.docker-compose.env index 4154eca9f..beceae1b8 100644 --- a/.docker-compose.env +++ b/.docker-compose.env @@ -1,7 +1,6 @@ CELERY_BROKER_URL=amqp://guest:guest@rabbitmq:5672 DATABASE_HOST=postgres ELASTICSEARCH8_URL=https://elastic8:9200/ -# ELASTICSEARCH5_URL=http://elasticsearch:9200/ LOGIN_REDIRECT_URL=http://localhost:8003/ OSF_API_URL=http://localhost:8000 RABBITMQ_HOST=rabbitmq diff --git a/project/settings.py b/project/settings.py index ae09107f9..c7334e897 100644 --- a/project/settings.py +++ b/project/settings.py @@ -312,10 +312,6 @@ def split(string, delim): 'MAX_RETRIES': int(os.environ.get('ELASTICSEARCH_MAX_RETRIES', 7)), 'POST_INDEX_DELAY': int(os.environ.get('ELASTICSEARCH_POST_INDEX_DELAY', 3)), } -ELASTICSEARCH5_URL = ( - os.environ.get('ELASTICSEARCH5_URL') - or os.environ.get('ELASTICSEARCH_URL') # backcompat -) ELASTICSEARCH8_URL = os.environ.get('ELASTICSEARCH8_URL') ELASTICSEARCH8_CERT_PATH = os.environ.get('ELASTICSEARCH8_CERT_PATH') ELASTICSEARCH8_USERNAME = os.environ.get('ELASTICSEARCH8_USERNAME', 'elastic') diff --git a/share/search/index_strategy/__init__.py b/share/search/index_strategy/__init__.py index c00d2fbf1..f5c346fd6 100644 --- a/share/search/index_strategy/__init__.py +++ b/share/search/index_strategy/__init__.py @@ -7,7 +7,6 @@ from share.search.exceptions import IndexStrategyError from share.models import FeatureFlag from trove.trovesearch import search_params -from .sharev2_elastic5 import Sharev2Elastic5IndexStrategy from .sharev2_elastic8 import Sharev2Elastic8IndexStrategy from .trove_indexcard_flats import TroveIndexcardFlatsIndexStrategy from .trovesearch_denorm import TrovesearchDenormIndexStrategy @@ -32,10 +31,6 @@ class _AvailableStrategies(enum.Enum): (don't import this enum directly -- access via the other functions in this module) ''' - - if settings.ELASTICSEARCH5_URL: - sharev2_elastic5 = Sharev2Elastic5IndexStrategy('sharev2_elastic5') - if settings.ELASTICSEARCH8_URL: sharev2_elastic8 = Sharev2Elastic8IndexStrategy('sharev2_elastic8') trove_indexcard_flats = TroveIndexcardFlatsIndexStrategy('trove_indexcard_flats') @@ -81,11 +76,6 @@ def get_strategy( def get_strategy_for_sharev2_search(requested_name: str | None = None) -> IndexStrategy: if requested_name: _name = requested_name - elif ( - settings.ELASTICSEARCH5_URL - and not FeatureFlag.objects.flag_is_up(FeatureFlag.ELASTIC_EIGHT_DEFAULT) - ): - _name = _AvailableStrategies.sharev2_elastic5.name elif settings.ELASTICSEARCH8_URL: _name = _AvailableStrategies.sharev2_elastic8.name else: diff --git a/share/search/index_strategy/sharev2_elastic5.py b/share/search/index_strategy/sharev2_elastic5.py deleted file mode 100644 index c8cb990dd..000000000 --- a/share/search/index_strategy/sharev2_elastic5.py +++ /dev/null @@ -1,456 +0,0 @@ -from __future__ import annotations -import functools -import json -import logging - -from django.conf import settings -import elasticsearch5 -import elasticsearch5.helpers - -from share.models import SourceUniqueIdentifier -from share.search import exceptions, messages -from share.search.index_status import IndexStatus -from share.search.index_strategy._base import IndexStrategy -from share.search.index_strategy._util import timestamp_to_readable_datetime -from share.util import IDObfuscator -from share.util.checksum_iri import ChecksumIri -from trove import models as trove_db -from trove.vocab.namespaces import SHAREv2 - - -logger = logging.getLogger(__name__) - - -def get_doc_id(suid_id): - return IDObfuscator.encode_id(suid_id, SourceUniqueIdentifier) - - -# using a static, single-index strategy to represent the existing "share_postrend_backcompat" -# search index in elastic5, with intent to put new work in elastic8+ and drop elastic5 soon. -# (see share.search.index_strategy.sharev2_elastic8 for this same index in elastic8) -class Sharev2Elastic5IndexStrategy(IndexStrategy): - CURRENT_STRATEGY_CHECKSUM = ChecksumIri( - checksumalgorithm_name='sha-256', - salt='Sharev2Elastic5IndexStrategy', - hexdigest='7b6620bfafd291489e2cfea7e645b8311c2485a3012e467abfee4103f7539cc4', - ) - STATIC_INDEXNAME = 'share_postrend_backcompat' - - # perpetuated optimizations from times long past - MAX_CHUNK_BYTES = 10 * 1024 ** 2 # 10 megs - - @classmethod - @functools.cache - def _get_elastic5_client(cls) -> elasticsearch5.Elasticsearch: - should_sniff = settings.ELASTICSEARCH['SNIFF'] - return elasticsearch5.Elasticsearch( - settings.ELASTICSEARCH5_URL, - retry_on_timeout=True, - timeout=settings.ELASTICSEARCH['TIMEOUT'], - # sniff before doing anything - sniff_on_start=should_sniff, - # refresh nodes after a node fails to respond - sniff_on_connection_fail=should_sniff, - # and also every 60 seconds - sniffer_timeout=60 if should_sniff else None, - ) - - @property - def es5_client(self): - return self._get_elastic5_client() # cached classmethod for shared client - - @property - def single_index(self) -> IndexStrategy.SpecificIndex: - return self.get_index(self.STATIC_INDEXNAME) - - # abstract method from IndexStrategy - @classmethod - def each_index_subname(self): - yield self.STATIC_INDEXNAME - - # override IndexStrategy - @property - def nonurgent_messagequeue_name(self): - return 'es-share-postrend-backcompat' - - # override IndexStrategy - @property - def urgent_messagequeue_name(self): - return f'{self.nonurgent_messagequeue_name}.urgent' - - # override IndexStrategy - @property - def indexname_prefix(self): - return self.STATIC_INDEXNAME - - # abstract method from IndexStrategy - def compute_strategy_checksum(self): - return ChecksumIri.digest_json( - 'sha-256', - salt=self.__class__.__name__, - raw_json={ - 'indexname': self.STATIC_INDEXNAME, - 'mappings': self._index_mappings(), - 'settings': self._index_settings(), - } - ) - - # abstract method from IndexStrategy - def pls_make_default_for_searching(self): - pass # the one index is the only one - - # abstract method from IndexStrategy - def pls_get_default_for_searching(self): - return self - - # abstract method from IndexStrategy - def each_existing_index(self, *args, **kwargs): - _index = self.single_index - if _index.pls_check_exists(): - yield _index - - # abstract method from IndexStrategy - def each_live_index(self, *args, **kwargs): - yield self.single_index - - # abstract method from IndexStrategy - def each_subnamed_index(self): - yield self.single_index - - # abstract method from IndexStrategy - def pls_handle_messages_chunk(self, messages_chunk): - logger.debug('got messages_chunk %s', messages_chunk) - self.assert_message_type(messages_chunk.message_type) - bulk_stream = elasticsearch5.helpers.streaming_bulk( - self.es5_client, - self._build_elastic_actions(messages_chunk), - max_chunk_bytes=self.MAX_CHUNK_BYTES, - raise_on_error=False, - ) - for (ok, response) in bulk_stream: - op_type, response_body = next(iter(response.items())) - message_target_id = self._get_message_target_id(response_body['_id']) - is_done = ok or (op_type == 'delete' and response_body.get('status') == 404) - error_text = None if is_done else str(response_body) - yield messages.IndexMessageResponse( - is_done=is_done, - index_message=messages.IndexMessage(messages_chunk.message_type, message_target_id), - status_code=response_body.get('status'), - error_text=error_text, - ) - - # abstract method from IndexStrategy - @property - def supported_message_types(self): - return { - messages.MessageType.INDEX_SUID, - messages.MessageType.BACKFILL_SUID, - } - - # abstract method from IndexStrategy - @property - def backfill_message_type(self): - return messages.MessageType.BACKFILL_SUID - - def _index_settings(self): - return { - 'analysis': { - 'filter': { - 'autocomplete_filter': { - 'type': 'edge_ngram', - 'min_gram': 1, - 'max_gram': 20 - } - }, - 'analyzer': { - 'default': { - # same as 'standard' analyzer, plus html_strip - 'type': 'custom', - 'tokenizer': 'standard', - 'filter': ['lowercase', 'stop'], - 'char_filter': ['html_strip'] - }, - 'autocomplete': { - 'type': 'custom', - 'tokenizer': 'standard', - 'filter': [ - 'lowercase', - 'autocomplete_filter' - ] - }, - 'subject_analyzer': { - 'type': 'custom', - 'tokenizer': 'subject_tokenizer', - 'filter': [ - 'lowercase', - ] - }, - 'subject_search_analyzer': { - 'type': 'custom', - 'tokenizer': 'keyword', - 'filter': [ - 'lowercase', - ] - }, - }, - 'tokenizer': { - 'subject_tokenizer': { - 'type': 'path_hierarchy', - 'delimiter': '|', - } - } - } - } - - def _index_mappings(self): - autocomplete_field = { - 'autocomplete': { - 'type': 'string', - 'analyzer': 'autocomplete', - 'search_analyzer': 'standard', - 'include_in_all': False - } - } - exact_field = { - 'exact': { - 'type': 'keyword', - # From Elasticsearch documentation: - # The value for ignore_above is the character count, but Lucene counts bytes. - # If you use UTF-8 text with many non-ASCII characters, you may want to set the limit to 32766 / 3 = 10922 since UTF-8 characters may occupy at most 3 bytes - 'ignore_above': 10922 - } - } - return { - 'creativeworks': { - 'dynamic': 'strict', - 'properties': { - 'affiliations': {'type': 'text', 'fields': exact_field}, - 'contributors': {'type': 'text', 'fields': exact_field}, - 'date': {'type': 'date', 'format': 'strict_date_optional_time', 'include_in_all': False}, - 'date_created': {'type': 'date', 'format': 'strict_date_optional_time', 'include_in_all': False}, - 'date_modified': {'type': 'date', 'format': 'strict_date_optional_time', 'include_in_all': False}, - 'date_published': {'type': 'date', 'format': 'strict_date_optional_time', 'include_in_all': False}, - 'date_updated': {'type': 'date', 'format': 'strict_date_optional_time', 'include_in_all': False}, - 'description': {'type': 'text'}, - 'funders': {'type': 'text', 'fields': exact_field}, - 'hosts': {'type': 'text', 'fields': exact_field}, - 'id': {'type': 'keyword', 'include_in_all': False}, - 'identifiers': {'type': 'text', 'fields': exact_field}, - 'justification': {'type': 'text', 'include_in_all': False}, - 'language': {'type': 'keyword', 'include_in_all': False}, - 'publishers': {'type': 'text', 'fields': exact_field}, - 'registration_type': {'type': 'keyword', 'include_in_all': False}, - 'retracted': {'type': 'boolean', 'include_in_all': False}, - 'source_config': {'type': 'keyword', 'include_in_all': False}, - 'source_unique_id': {'type': 'keyword'}, - 'sources': {'type': 'keyword', 'include_in_all': False}, - 'subjects': {'type': 'text', 'include_in_all': False, 'analyzer': 'subject_analyzer', 'search_analyzer': 'subject_search_analyzer'}, - 'subject_synonyms': {'type': 'text', 'include_in_all': False, 'analyzer': 'subject_analyzer', 'search_analyzer': 'subject_search_analyzer', 'copy_to': 'subjects'}, - 'tags': {'type': 'text', 'fields': exact_field}, - 'title': {'type': 'text', 'fields': exact_field}, - 'type': {'type': 'keyword', 'include_in_all': False}, - 'types': {'type': 'keyword', 'include_in_all': False}, - 'withdrawn': {'type': 'boolean', 'include_in_all': False}, - 'osf_related_resource_types': {'type': 'object', 'dynamic': True, 'include_in_all': False}, - 'lists': {'type': 'object', 'dynamic': True, 'include_in_all': False}, - }, - 'dynamic_templates': [ - {'exact_field_on_lists_strings': {'path_match': 'lists.*', 'match_mapping_type': 'string', 'mapping': {'type': 'text', 'fields': exact_field}}}, - ] - }, - 'agents': { - 'dynamic': False, - 'properties': { - 'id': {'type': 'keyword', 'include_in_all': False}, - 'identifiers': {'type': 'text', 'fields': exact_field}, - 'name': {'type': 'text', 'fields': {**autocomplete_field, **exact_field}}, - 'family_name': {'type': 'text', 'include_in_all': False}, - 'given_name': {'type': 'text', 'include_in_all': False}, - 'additional_name': {'type': 'text', 'include_in_all': False}, - 'suffix': {'type': 'text', 'include_in_all': False}, - 'location': {'type': 'text', 'include_in_all': False}, - 'sources': {'type': 'keyword', 'include_in_all': False}, - 'type': {'type': 'keyword', 'include_in_all': False}, - 'types': {'type': 'keyword', 'include_in_all': False}, - } - }, - 'sources': { - 'dynamic': False, - 'properties': { - 'id': {'type': 'keyword', 'include_in_all': False}, - 'name': {'type': 'text', 'fields': {**autocomplete_field, **exact_field}}, - 'short_name': {'type': 'keyword', 'include_in_all': False}, - 'type': {'type': 'keyword', 'include_in_all': False}, - } - }, - 'tags': { - 'dynamic': False, - 'properties': { - 'id': {'type': 'keyword', 'include_in_all': False}, - 'name': {'type': 'text', 'fields': {**autocomplete_field, **exact_field}}, - 'type': {'type': 'keyword', 'include_in_all': False}, - } - }, - } - - def _get_message_target_id(self, doc_id): - return IDObfuscator.decode_id(doc_id) - - def _build_elastic_actions(self, messages_chunk): - action_template = { - '_index': self.STATIC_INDEXNAME, - '_type': 'creativeworks', - } - suid_ids = set(messages_chunk.target_ids_chunk) - _derived_qs = ( - trove_db.DerivedIndexcard.objects - .filter(upriver_indexcard__source_record_suid_id=suid_ids) - .filter(deriver_identifier__in=( - trove_db.ResourceIdentifier.objects - .queryset_for_iri(SHAREv2.sharev2_elastic) - )) - .select_related('upriver_indexcard') - ) - for _derived_card in _derived_qs: - _suid_id = _derived_card.upriver_indexcard.source_record_suid_id - doc_id = get_doc_id(_suid_id) - suid_ids.remove(_suid_id) - source_doc = json.loads(_derived_card.derived_text) - assert source_doc['id'] == doc_id - if source_doc.pop('is_deleted', False): - action = { - **action_template, - '_id': doc_id, - '_op_type': 'delete', - } - else: - action = { - **action_template, - '_id': doc_id, - '_op_type': 'index', - '_source': source_doc, - } - logger.debug('built action for suid_id=%s: %s', _suid_id, action) - yield action - # delete any that don't have the expected DerivedIndexcard - for leftover_suid_id in suid_ids: - logger.debug('deleting suid_id=%s', leftover_suid_id) - action = { - **action_template, - '_id': get_doc_id(leftover_suid_id), - '_op_type': 'delete', - } - yield action - - # optional method from IndexStrategy - def pls_handle_search__passthru(self, request_body=None, request_queryparams=None) -> dict: - '''the definitive sharev2-search api: passthru to elasticsearch version 5 - ''' - if request_queryparams: - request_queryparams.pop('indexStrategy', None) - try: - return self.es5_client.search( - index=self.STATIC_INDEXNAME, - body=request_body or {}, - params=request_queryparams or {}, - ) - except elasticsearch5.TransportError as error: - raise exceptions.IndexStrategyError() from error # TODO: error messaging - - class SpecificIndex(IndexStrategy.SpecificIndex): - index_strategy: Sharev2Elastic5IndexStrategy # narrow type - - # override IndexStrategy.SpecificIndex - @property - def full_index_name(self): - return self.index_strategy.STATIC_INDEXNAME - - # abstract method from IndexStrategy.SpecificIndex - def pls_create(self): - # check index exists (if not, create) - logger.debug('Ensuring index %s', self.full_index_name) - indices_api = self.index_strategy.es5_client.indices - if not indices_api.exists(index=self.full_index_name): - indices_api.create( - self.full_index_name, - body={ - 'settings': self.index_strategy._index_settings(), - 'mappings': self.index_strategy._index_mappings(), - }, - ) - self.pls_refresh() - logger.debug('Waiting for yellow status') - ( - self.index_strategy.es5_client.cluster - .health(wait_for_status='yellow') - ) - logger.info('Finished setting up Elasticsearch index %s', self.full_index_name) - - # abstract method from IndexStrategy.SpecificIndex - def pls_start_keeping_live(self): - pass # there is just the one index, always kept live - - # abstract method from IndexStrategy.SpecificIndex - def pls_stop_keeping_live(self): - raise exceptions.IndexStrategyError( - f'{self.__class__.__qualname__} is implemented for only one index, ' - f'"{self.full_index_name}", which is always kept live (until elasticsearch5 ' - 'support is dropped)' - ) - - # abstract method from IndexStrategy.SpecificIndex - def pls_refresh(self): - ( - self.index_strategy.es5_client.indices - .refresh(index=self.full_index_name) - ) - logger.info('Refreshed index %s', self.full_index_name) - - # abstract method from IndexStrategy.SpecificIndex - def pls_delete(self): - logger.warning(f'{self.__class__.__name__}: deleting index {self.full_index_name}') - ( - self.index_strategy.es5_client.indices - .delete(index=self.full_index_name, ignore=[400, 404]) - ) - - # abstract method from IndexStrategy.SpecificIndex - def pls_check_exists(self): - return bool( - self.index_strategy.es5_client.indices - .exists(index=self.full_index_name) - ) - - # abstract method from IndexStrategy.SpecificIndex - def pls_get_status(self) -> IndexStatus: - try: - stats = ( - self.index_strategy.es5_client.indices - .stats(index=self.full_index_name, metric='docs') - ) - existing_indexes = ( - self.index_strategy.es5_client.indices - .get_settings(index=self.full_index_name, name='index.creation_date') - ) - index_settings = existing_indexes[self.full_index_name] - index_stats = stats['indices'][self.full_index_name] - except (KeyError, elasticsearch5.exceptions.NotFoundError): - # not yet created - return IndexStatus( - index_subname=self.subname, - specific_indexname=self.full_index_name, - is_kept_live=False, - is_default_for_searching=False, - creation_date='', - doc_count=0, - ) - return IndexStatus( - index_subname=self.subname, - specific_indexname=self.full_index_name, - is_kept_live=True, - is_default_for_searching=True, - creation_date=timestamp_to_readable_datetime( - index_settings['settings']['index']['creation_date'], - ), - doc_count=index_stats['primaries']['docs']['count'], - ) diff --git a/share/search/index_strategy/trove_indexcard_flats.py b/share/search/index_strategy/trove_indexcard_flats.py deleted file mode 100644 index 49874d189..000000000 --- a/share/search/index_strategy/trove_indexcard_flats.py +++ /dev/null @@ -1,953 +0,0 @@ -import base64 -from collections import defaultdict -import dataclasses -import datetime -import json -import logging -import re -import uuid -from typing import Iterable, Iterator, Any - -from django.conf import settings -import elasticsearch8 -from primitive_metadata import primitive_rdf - -from share.search import exceptions -from share.search import messages -from share.search.index_strategy._base import IndexStrategy -from share.search.index_strategy.elastic8 import Elastic8IndexStrategy -from share.util.checksum_iri import ChecksumIri -from trove import models as trove_db -from trove.trovesearch.page_cursor import ( - MANY_MORE, - OffsetCursor, - PageCursor, - ReproduciblyRandomSampleCursor, -) -from trove.trovesearch.search_params import ( - CardsearchParams, - ValuesearchParams, - SearchFilter, - Textsegment, - SortParam, - GLOB_PATHSTEP, -) -from trove.trovesearch.search_handle import ( - CardsearchHandle, - ValuesearchHandle, - TextMatchEvidence, - CardsearchResult, - ValuesearchResult, - PropertypathUsage, -) -from trove.util.iris import get_sufficiently_unique_iri, is_worthwhile_iri, iri_path_as_keyword -from trove.vocab import osfmap -from trove.vocab.namespaces import RDF, OWL -from ._trovesearch_util import ( - latest_rdf_for_indexcard_pks, - GraphWalk, - KEYWORD_LENGTH_MAX, -) - - -logger = logging.getLogger(__name__) - - -class TroveIndexcardFlatsIndexStrategy(Elastic8IndexStrategy): - CURRENT_STRATEGY_CHECKSUM = ChecksumIri( - checksumalgorithm_name='sha-256', - salt='TroveIndexcardFlatsIndexStrategy', - hexdigest='bdec536873e1ed0c58facaa5d1145bef73bba09d671deef48e45c019def5c5a5', - ) - - # abstract method from IndexStrategy - @property - def supported_message_types(self): - return { - messages.MessageType.UPDATE_INDEXCARD, - messages.MessageType.BACKFILL_INDEXCARD, - } - - # abstract method from IndexStrategy - @property - def backfill_message_type(self): - return messages.MessageType.BACKFILL_INDEXCARD - - @classmethod - def define_current_indexes(cls): - return { # empty index subname, for backcompat - '': cls.IndexDefinition( - mappings=cls.index_mappings(), - settings=cls.index_settings(), - ), - } - - @classmethod - def index_settings(cls): - return {} - - @classmethod - def index_mappings(cls): - _capped_keyword = { - 'type': 'keyword', - 'ignore_above': KEYWORD_LENGTH_MAX, - } - _common_nested_keywords = { - 'path_from_focus': _capped_keyword, - 'suffuniq_path_from_focus': _capped_keyword, - 'property_iri': _capped_keyword, - 'distance_from_focus': {'type': 'keyword'}, # numeric value as keyword (used for 'term' filter) - } - return { - 'dynamic': 'false', - 'properties': { - 'indexcard_uuid': _capped_keyword, - 'focus_iri': _capped_keyword, - 'suffuniq_focus_iri': _capped_keyword, - 'source_record_identifier': _capped_keyword, - 'source_config_label': _capped_keyword, - 'flat_iri_values': { - 'type': 'flattened', - 'ignore_above': KEYWORD_LENGTH_MAX, - }, - 'flat_iri_values_suffuniq': { - 'type': 'flattened', - 'ignore_above': KEYWORD_LENGTH_MAX, - }, - 'iri_paths_present': _capped_keyword, - 'iri_paths_present_suffuniq': _capped_keyword, - 'nested_iri': { - 'type': 'nested', - 'properties': { - **_common_nested_keywords, - 'iri_value': _capped_keyword, - 'suffuniq_iri_value': _capped_keyword, - 'value_type_iri': _capped_keyword, - 'value_name_text': { - 'type': 'text', - 'fields': {'raw': _capped_keyword}, - 'copy_to': 'nested_iri.value_namelike_text', - }, - 'value_title_text': { - 'type': 'text', - 'fields': {'raw': _capped_keyword}, - 'copy_to': 'nested_iri.value_namelike_text', - }, - 'value_label_text': { - 'type': 'text', - 'fields': {'raw': _capped_keyword}, - 'copy_to': 'nested_iri.value_namelike_text', - }, - 'value_namelike_text': {'type': 'text'}, - }, - }, - 'nested_date': { - 'type': 'nested', - 'properties': { - **_common_nested_keywords, - 'date_value': { - 'type': 'date', - 'format': 'strict_date_optional_time', - }, - }, - }, - 'nested_text': { - 'type': 'nested', - 'properties': { - **_common_nested_keywords, - 'language_iri': _capped_keyword, - 'text_value': { - 'type': 'text', - 'index_options': 'offsets', # for faster highlighting - 'store': True, # avoid loading _source to render highlights - 'fields': {'raw': _capped_keyword}, - }, - }, - }, - }, - } - - @property - def __index(self) -> IndexStrategy.SpecificIndex: - # this is a single-index strategy -- for back-compat, that index has empty subname - return self.get_index('') - - def _build_sourcedoc(self, indexcard_rdf): - _rdfdoc = indexcard_rdf.as_rdfdoc_with_supplements() - if _should_skip_card(indexcard_rdf, _rdfdoc): - return None # will be deleted from the index - _nested_iris = defaultdict(set) - _nested_dates = defaultdict(set) - _nested_texts = defaultdict(set) - _walk = GraphWalk(_rdfdoc, indexcard_rdf.focus_iri) - for _walk_path, _walk_iris in _walk.iri_values.items(): - for _iri_obj in _walk_iris: - _nested_iris[_NestedIriKey.for_iri_at_path(_walk_path, _iri_obj, _rdfdoc)].add(_iri_obj) - for _walk_path, _walk_dates in _walk.date_values.items(): - for _date_obj in _walk_dates: - _nested_dates[_walk_path].add(datetime.date.isoformat(_date_obj)) - for _walk_path, _walk_texts in _walk.text_values.items(): - for _text_obj in _walk_texts: - _nested_texts[(_walk_path, tuple(_text_obj.datatype_iris))].add(_text_obj.unicode_value) - _focus_iris = {indexcard_rdf.focus_iri} - _suffuniq_focus_iris = {get_sufficiently_unique_iri(indexcard_rdf.focus_iri)} - for _identifier in indexcard_rdf.indexcard.focus_identifier_set.all(): - _focus_iris.update(_identifier.raw_iri_list) - _suffuniq_focus_iris.add(_identifier.sufficiently_unique_iri) - return { - 'indexcard_uuid': str(indexcard_rdf.indexcard.uuid), - 'focus_iri': list(_focus_iris), - 'suffuniq_focus_iri': list(_suffuniq_focus_iris), - 'source_record_identifier': indexcard_rdf.indexcard.source_record_suid.identifier, - 'source_config_label': indexcard_rdf.indexcard.source_record_suid.source_config.label, - 'flat_iri_values': self._flattened_iris(_nested_iris), - 'flat_iri_values_suffuniq': self._flattened_iris_suffuniq(_nested_iris), - 'iri_paths_present': [ - iri_path_as_keyword(_path) - for _path in _walk.paths_walked - ], - 'iri_paths_present_suffuniq': [ - iri_path_as_keyword(_path, suffuniq=True) - for _path in _walk.paths_walked - ], - 'nested_iri': list(filter(bool, ( - self._iri_nested_sourcedoc(_nested_iri_key, _iris, _rdfdoc) - for _nested_iri_key, _iris in _nested_iris.items() - ))), - 'nested_date': [ - { - **_iri_path_as_indexable_fields(_path), - 'date_value': list(_value_set), - } - for _path, _value_set in _nested_dates.items() - ], - 'nested_text': [ - { - **_iri_path_as_indexable_fields(_path), - 'language_iri': _language_iris, - 'text_value': list(_value_set), - } - for (_path, _language_iris), _value_set in _nested_texts.items() - ], - } - - def _iri_nested_sourcedoc(self, iri_key: '_NestedIriKey', iris, rdfdoc): - _iris_with_synonyms = set(filter(is_worthwhile_iri, iris)) - for _iri in iris: - _iris_with_synonyms.update( - filter(is_worthwhile_iri, rdfdoc.q(_iri, OWL.sameAs)), - ) - if not _iris_with_synonyms: - return None - _sourcedoc = { - **iri_key.as_indexable_fields(), - 'iri_value': list(_iris_with_synonyms), - 'suffuniq_iri_value': [ - get_sufficiently_unique_iri(_iri) - for _iri in _iris_with_synonyms - ], - } - return _sourcedoc - - def _flattened_iris_by_path(self, nested_iris: dict['_NestedIriKey', set[str]]): - _by_path = defaultdict(set) - for _iri_key, _iris in nested_iris.items(): - _by_path[_iri_key.path].update(_iris) - return _by_path - - def _flattened_iris(self, nested_iris: dict['_NestedIriKey', set[str]]): - return { - _iri_path_as_flattened_key(_path): list(_iris) - for _path, _iris in self._flattened_iris_by_path(nested_iris).items() - } - - def _flattened_iris_suffuniq(self, nested_iris: dict['_NestedIriKey', set[str]]): - return { - _iri_path_as_flattened_key(_path): [ - get_sufficiently_unique_iri(_iri) - for _iri in _iris - ] - for _path, _iris in self._flattened_iris_by_path(nested_iris).items() - } - - def build_elastic_actions(self, messages_chunk: messages.MessagesChunk): - def _make_actionset(indexcard_id, *actions): - return self.MessageActionSet(indexcard_id, {'': actions}) - _indexcard_rdf_qs = latest_rdf_for_indexcard_pks(messages_chunk.target_ids_chunk) - _remaining_indexcard_ids = set(messages_chunk.target_ids_chunk) - for _indexcard_rdf in _indexcard_rdf_qs: - _suid = _indexcard_rdf.indexcard.source_record_suid - if _suid.has_forecompat_replacement(): - continue # skip this one, let it get deleted - _sourcedoc = self._build_sourcedoc(_indexcard_rdf) - if _sourcedoc: - _index_action = self.build_index_action( - doc_id=_indexcard_rdf.indexcard.get_iri(), - doc_source=_sourcedoc, - ) - _remaining_indexcard_ids.discard(_indexcard_rdf.indexcard_id) - yield _make_actionset(_indexcard_rdf.indexcard_id, _index_action) - # delete any that don't have "latest" rdf and derived osfmap_json - _leftovers = trove_db.Indexcard.objects.filter(id__in=_remaining_indexcard_ids) - for _indexcard in _leftovers: - yield _make_actionset(_indexcard.id, self.build_delete_action(_indexcard.get_iri())) - - def pls_handle_search__passthru(self, request_body=None, request_queryparams=None) -> dict: - return self.es8_client.search( - index=self.__index.full_index_name, - body={ - **(request_body or {}), - 'track_total_hits': True, - }, - params=(request_queryparams or {}), - ) - - def pls_handle_cardsearch(self, cardsearch_params: CardsearchParams) -> CardsearchHandle: - _cursor = self._cardsearch_cursor(cardsearch_params) - _sort = self._cardsearch_sort(cardsearch_params.sort_list) - _query = self._cardsearch_query( - cardsearch_params.cardsearch_filter_set, - cardsearch_params.cardsearch_textsegment_set, - cardsearch_cursor=_cursor, - ) - _from_offset = ( - _cursor.start_offset - if _cursor.is_first_page() or not isinstance(_cursor, ReproduciblyRandomSampleCursor) - else _cursor.start_offset - len(_cursor.first_page_ids) - ) - _search_kwargs = dict( - query=_query, - aggs=self._cardsearch_aggs(cardsearch_params), - sort=_sort, - from_=_from_offset, - size=_cursor.bounded_page_size, - source=False, # no need to get _source; _id is enough - ) - if settings.DEBUG: - logger.info(json.dumps(_search_kwargs, indent=2)) - try: - _es8_response = self.es8_client.search( - index=self.__index.full_index_name, - **_search_kwargs, - ) - except elasticsearch8.TransportError as error: - raise exceptions.IndexStrategyError() from error # TODO: error messaging - return self._cardsearch_handle(cardsearch_params, _es8_response, _cursor) - - def pls_handle_valuesearch(self, valuesearch_params: ValuesearchParams) -> ValuesearchHandle: - _cursor = OffsetCursor.from_cursor(valuesearch_params.page_cursor) - _is_date_search = osfmap.is_date_property(valuesearch_params.valuesearch_propertypath[-1]) - _search_kwargs = dict( - query=self._cardsearch_query( - valuesearch_params.cardsearch_filter_set, - valuesearch_params.cardsearch_textsegment_set, - additional_filters=[{'term': {'iri_paths_present': iri_path_as_keyword( - valuesearch_params.valuesearch_propertypath, - )}}], - ), - size=0, # ignore cardsearch hits; just want the aggs - aggs=( - self._valuesearch_date_aggs(valuesearch_params) - if _is_date_search - else self._valuesearch_iri_aggs(valuesearch_params, _cursor) - ), - ) - if settings.DEBUG: - logger.info(json.dumps(_search_kwargs, indent=2)) - try: - _es8_response = self.es8_client.search( - index=self.__index.full_index_name, - **_search_kwargs, - ) - except elasticsearch8.TransportError as error: - raise exceptions.IndexStrategyError() from error # TODO: error messaging - return self._valuesearch_handle(valuesearch_params, _es8_response, _cursor) - - ### - # query implementation - - def _cardsearch_cursor(self, cardsearch_params: CardsearchParams) -> OffsetCursor: - _request_cursor = cardsearch_params.page_cursor - if ( - _request_cursor.is_basic() - and not cardsearch_params.sort_list - and not cardsearch_params.cardsearch_textsegment_set - ): - return ReproduciblyRandomSampleCursor.from_cursor(_request_cursor) - return OffsetCursor.from_cursor(_request_cursor) - - def _cardsearch_query( - self, - filter_set, textsegment_set, *, - additional_filters=None, - cardsearch_cursor: PageCursor | None = None, - ) -> dict: - _bool_query = { - 'filter': additional_filters or [], - 'must': [], - 'must_not': [], - 'should': [], - } - for _searchfilter in filter_set: - if _searchfilter.operator == SearchFilter.FilterOperator.NONE_OF: - _bool_query['must_not'].append(self._cardsearch_iri_filter(_searchfilter)) - elif _searchfilter.operator == SearchFilter.FilterOperator.ANY_OF: - _bool_query['filter'].append(self._cardsearch_iri_filter(_searchfilter)) - elif _searchfilter.operator == SearchFilter.FilterOperator.IS_PRESENT: - _bool_query['filter'].append(self._cardsearch_presence_query(_searchfilter)) - elif _searchfilter.operator == SearchFilter.FilterOperator.IS_ABSENT: - _bool_query['must_not'].append(self._cardsearch_presence_query(_searchfilter)) - elif _searchfilter.operator.is_date_operator(): - _bool_query['filter'].append(self._cardsearch_date_filter(_searchfilter)) - else: - raise ValueError(f'unknown filter operator {_searchfilter.operator}') - _textq_builder = self._NestedTextQueryBuilder( - relevance_matters=not isinstance(cardsearch_cursor, ReproduciblyRandomSampleCursor), - ) - for _textsegment in textsegment_set: - for _boolkey, _textqueries in _textq_builder.textsegment_boolparts(_textsegment).items(): - _bool_query[_boolkey].extend(_textqueries) - if not isinstance(cardsearch_cursor, ReproduciblyRandomSampleCursor): - # no need for randomness - return {'bool': _bool_query} - if not cardsearch_cursor.first_page_ids: - # independent random sample - return { - 'function_score': { - 'query': {'bool': _bool_query}, - 'boost_mode': 'replace', - 'random_score': {}, # default random_score is fast and unpredictable - }, - } - _firstpage_uuid_query = {'terms': {'indexcard_uuid': cardsearch_cursor.first_page_ids}} - if cardsearch_cursor.is_first_page(): - # returning to a first page previously visited - _bool_query['filter'].append(_firstpage_uuid_query) - return {'bool': _bool_query} - # get a subsequent page using reproducible randomness - _bool_query['must_not'].append(_firstpage_uuid_query) - return { - 'function_score': { - 'query': {'bool': _bool_query}, - 'boost_mode': 'replace', - 'random_score': { - 'seed': ''.join(cardsearch_cursor.first_page_ids), - 'field': 'indexcard_uuid', - }, - }, - } - - def _cardsearch_aggs(self, cardsearch_params): - _aggs = {} - if cardsearch_params.related_property_paths: - _aggs['related_propertypath_usage'] = {'terms': { - 'field': 'iri_paths_present', - 'include': [ - iri_path_as_keyword(_path) - for _path in cardsearch_params.related_property_paths - ], - 'size': len(cardsearch_params.related_property_paths), - }} - return _aggs - - def _valuesearch_iri_aggs(self, valuesearch_params: ValuesearchParams, cursor: OffsetCursor): - _nested_iri_bool: dict[str, Any] = { - 'filter': [{'term': {'nested_iri.suffuniq_path_from_focus': iri_path_as_keyword( - valuesearch_params.valuesearch_propertypath, - suffuniq=True, - )}}], - 'must': [], - 'must_not': [], - 'should': [], - } - _nested_terms_agg = { - 'field': 'nested_iri.iri_value', - # WARNING: terribly inefficient pagination (part one) - 'size': cursor.start_offset + cursor.bounded_page_size + 1, - } - _iris = list(valuesearch_params.valuesearch_iris()) - if _iris: - _nested_iri_bool['filter'].append({'terms': { - 'nested_iri.iri_value': _iris, - }}) - _nested_terms_agg['size'] = len(_iris) - _nested_terms_agg['include'] = _iris - _type_iris = list(valuesearch_params.valuesearch_type_iris()) - if _type_iris: - _nested_iri_bool['filter'].append({'terms': { - 'nested_iri.value_type_iri': _type_iris, - }}) - _textq_builder = self._SimpleTextQueryBuilder('nested_iri.value_namelike_text') - for _textsegment in valuesearch_params.valuesearch_textsegment_set: - for _boolkey, _textqueries in _textq_builder.textsegment_boolparts(_textsegment).items(): - _nested_iri_bool[_boolkey].extend(_textqueries) - return { - 'in_nested_iri': { - 'nested': {'path': 'nested_iri'}, - 'aggs': { - 'value_at_propertypath': { - 'filter': {'bool': _nested_iri_bool}, - 'aggs': { - 'iri_values': { - 'terms': _nested_terms_agg, - 'aggs': { - 'type_iri': {'terms': { - 'field': 'nested_iri.value_type_iri', - }}, - 'name_text': {'terms': { - 'field': 'nested_iri.value_name_text.raw', - }}, - 'title_text': {'terms': { - 'field': 'nested_iri.value_title_text.raw', - }}, - 'label_text': {'terms': { - 'field': 'nested_iri.value_label_text.raw', - }}, - }, - }, - }, - }, - }, - }, - } - - def _valuesearch_date_aggs(self, valuesearch_params: ValuesearchParams): - _aggs = { - 'in_nested_date': { - 'nested': {'path': 'nested_date'}, - 'aggs': { - 'value_at_propertypath': { - 'filter': {'term': { - 'nested_date.suffuniq_path_from_focus': iri_path_as_keyword( - valuesearch_params.valuesearch_propertypath, - suffuniq=True, - ), - }}, - 'aggs': { - 'count_by_year': { - 'date_histogram': { - 'field': 'nested_date.date_value', - 'calendar_interval': 'year', - 'format': 'yyyy', - 'order': {'_key': 'desc'}, - 'min_doc_count': 1, - }, - }, - }, - }, - }, - }, - } - return _aggs - - def _valuesearch_handle( - self, - valuesearch_params: ValuesearchParams, - es8_response: dict, - cursor: OffsetCursor, - ): - _iri_aggs = es8_response['aggregations'].get('in_nested_iri') - if _iri_aggs: - _buckets = _iri_aggs['value_at_propertypath']['iri_values']['buckets'] - _bucket_count = len(_buckets) - # WARNING: terribly inefficient pagination (part two) - _page_end_index = cursor.start_offset + cursor.bounded_page_size - _bucket_page = _buckets[cursor.start_offset:_page_end_index] # discard prior pages - cursor.total_count = ( - MANY_MORE - if (_bucket_count > _page_end_index) # agg includes one more, if there - else _bucket_count - ) - return ValuesearchHandle( - cursor=cursor, - search_result_page=[ - self._valuesearch_iri_result(_iri_bucket) - for _iri_bucket in _bucket_page - ], - search_params=valuesearch_params, - ) - else: # assume date - _year_buckets = ( - es8_response['aggregations']['in_nested_date'] - ['value_at_propertypath']['count_by_year']['buckets'] - ) - return ValuesearchHandle( - cursor=PageCursor(len(_year_buckets)), - search_result_page=[ - self._valuesearch_date_result(_year_bucket) - for _year_bucket in _year_buckets - ], - search_params=valuesearch_params, - ) - - def _valuesearch_iri_result(self, iri_bucket): - return ValuesearchResult( - value_iri=iri_bucket['key'], - value_type=_bucketlist(iri_bucket['type_iri']), - name_text=_bucketlist(iri_bucket['name_text']), - title_text=_bucketlist(iri_bucket['title_text']), - label_text=_bucketlist(iri_bucket['label_text']), - match_count=iri_bucket['doc_count'], - ) - - def _valuesearch_date_result(self, date_bucket): - return ValuesearchResult( - value_iri=None, - value_value=date_bucket['key_as_string'], - label_text=(date_bucket['key_as_string'],), - match_count=date_bucket['doc_count'], - ) - - def _cardsearch_presence_query(self, search_filter) -> dict: - _filters = [ - self._cardsearch_path_presence_query(_path) - for _path in search_filter.propertypath_set - ] - if len(_filters) == 1: - return _filters[0] - return {'bool': { - 'minimum_should_match': 1, - 'should': _filters, - }} - - def _cardsearch_path_presence_query(self, path: tuple[str, ...]): - if all(_pathstep == GLOB_PATHSTEP for _pathstep in path): - return {'nested': { - 'path': 'nested_iri', - 'query': {'term': {'nested_iri.distance_from_focus': len(path)}}, - }} - return {'term': { - 'iri_paths_present_suffuniq': iri_path_as_keyword(path, suffuniq=True), - }} - - def _cardsearch_iri_filter(self, search_filter) -> dict: - _filters = [ - self._cardsearch_path_iri_query(_path, search_filter.value_set) - for _path in search_filter.propertypath_set - ] - if len(_filters) == 1: - return _filters[0] - return {'bool': { - 'minimum_should_match': 1, - 'should': _filters, - }} - - def _cardsearch_path_iri_query(self, path, value_set): - _suffuniq_values = [ - get_sufficiently_unique_iri(_iri) - for _iri in value_set - ] - if all(_pathstep == GLOB_PATHSTEP for _pathstep in path): - return {'nested': { - 'path': 'nested_iri', - 'query': {'bool': { - 'must': [ # both - {'term': {'nested_iri.distance_from_focus': len(path)}}, - {'terms': {'nested_iri.suffuniq_iri_value': _suffuniq_values}}, - ], - }}, - }} - # without a glob-path, can use the flattened keyword field - return {'terms': {_iri_path_as_flattened_field(path): _suffuniq_values}} - - def _cardsearch_date_filter(self, search_filter): - return {'nested': { - 'path': 'nested_date', - 'query': {'bool': {'filter': list(self._iter_nested_date_filters(search_filter))}}, - }} - - def _iter_nested_date_filters(self, search_filter) -> Iterator[dict]: - # filter by requested paths - yield _pathset_as_nestedvalue_filter(search_filter.propertypath_set, 'nested_date') - # filter by requested value/operator - if search_filter.operator == SearchFilter.FilterOperator.BEFORE: - _value = min(search_filter.value_set) # rely on string-comparable isoformat - yield {'range': {'nested_date.date_value': { - 'lt': _daterange_value_and_format(_value) - }}} - elif search_filter.operator == SearchFilter.FilterOperator.AFTER: - _value = max(search_filter.value_set) # rely on string-comparable isoformat - yield {'range': {'nested_date.date_value': { - 'gt': _daterange_value_and_format(_value) - }}} - elif search_filter.operator == SearchFilter.FilterOperator.AT_DATE: - for _value in search_filter.value_set: - _filtervalue = _daterange_value_and_format(_value) - yield {'range': {'nested_date.date_value': { - 'gte': _filtervalue, - 'lte': _filtervalue, - }}} - else: - raise ValueError(f'invalid date filter operator (got {search_filter.operator})') - - def _cardsearch_sort(self, sort_list: tuple[SortParam, ...]): - if not sort_list: - return None - return [ - {'nested_date.date_value': { - 'order': ('desc' if _sortparam.descending else 'asc'), - 'nested': { - 'path': 'nested_date', - 'filter': {'term': { - 'nested_date.suffuniq_path_from_focus': iri_path_as_keyword( - _sortparam.propertypath, - suffuniq=True, - ), - }}, - }, - }} - for _sortparam in sort_list - ] - - def _cardsearch_handle( - self, - cardsearch_params: CardsearchParams, - es8_response: dict, - cursor: OffsetCursor, - ) -> CardsearchHandle: - _es8_total = es8_response['hits']['total'] - if _es8_total['relation'] != 'eq': - cursor.total_count = MANY_MORE - elif isinstance(cursor, ReproduciblyRandomSampleCursor) and not cursor.is_first_page(): - # account for the filtered-out first page - cursor.total_count = _es8_total['value'] + len(cursor.first_page_ids) - else: # exact (and small) count - cursor.total_count = _es8_total['value'] - _results = [] - for _es8_hit in es8_response['hits']['hits']: - _card_iri = _es8_hit['_id'] - _results.append(CardsearchResult( - card_iri=_card_iri, - text_match_evidence=list(self._gather_textmatch_evidence(_es8_hit)), - )) - _relatedproperty_list: list[PropertypathUsage] = [] - if cardsearch_params.related_property_paths: - _relatedproperty_list.extend( - PropertypathUsage(property_path=_path, usage_count=0) - for _path in cardsearch_params.related_property_paths - ) - _relatedproperty_by_path = { - _result.property_path: _result - for _result in _relatedproperty_list - } - for _bucket in es8_response['aggregations']['related_propertypath_usage']['buckets']: - _path = tuple(json.loads(_bucket['key'])) - _relatedproperty_by_path[_path].usage_count += _bucket['doc_count'] - return CardsearchHandle( - cursor=cursor, - search_result_page=_results, - related_propertypath_results=_relatedproperty_list, - search_params=cardsearch_params, - ) - - def _gather_textmatch_evidence(self, es8_hit) -> Iterable[TextMatchEvidence]: - for _innerhit_group in es8_hit.get('inner_hits', {}).values(): - for _innerhit in _innerhit_group['hits']['hits']: - _property_path = tuple( - json.loads(_innerhit['fields']['nested_text.path_from_focus'][0]), - ) - try: - _language_iris = _innerhit['fields']['nested_text.language_iri'] - except KeyError: - _language_iris = () - for _highlight in _innerhit['highlight']['nested_text.text_value']: - yield TextMatchEvidence( - property_path=_property_path, - matching_highlight=primitive_rdf.literal(_highlight, datatype_iris=_language_iris), - card_iri=_innerhit['_id'], - ) - - class _SimpleTextQueryBuilder: - def __init__( - self, text_field, *, - relevance_matters=False, - ): - self._text_field = text_field - self._relevance_matters = relevance_matters - - def textsegment_boolparts(self, textsegment: Textsegment) -> dict[str, list]: - if textsegment.is_negated: - return {'must_not': [self.exact_text_query(textsegment.text)]} - if not textsegment.is_fuzzy: - return {'must': [self.exact_text_query(textsegment.text)]} - if not self._relevance_matters: - return {'must': [self.fuzzy_text_must_query(textsegment.text)]} - return { - 'must': [self.fuzzy_text_must_query(textsegment.text)], - 'should': [self.fuzzy_text_should_query(textsegment.text)], - } - - def exact_text_query(self, text: str) -> dict: - # TODO: textsegment.is_openended (prefix query) - return {'match_phrase': { - self._text_field: {'query': text}, - }} - - def fuzzy_text_must_query(self, text: str) -> dict: - # TODO: textsegment.is_openended (prefix query) - return {'match': { - self._text_field: { - 'query': text, - 'fuzziness': 'AUTO', - # TODO: 'operator': 'and' (by query param FilterOperator, `cardSearchText[*][every-word]=...`) - }, - }} - - def fuzzy_text_should_query(self, text: str): - return {'match_phrase': { - self._text_field: { - 'query': text, - 'slop': len(text.split()), - }, - }} - - class _NestedTextQueryBuilder(_SimpleTextQueryBuilder): - def __init__(self, **kwargs): - super().__init__('nested_text.text_value', **kwargs) - - def textsegment_boolparts(self, textsegment: Textsegment) -> dict[str, list]: - return { - _boolkey: [ - self._make_nested_query(textsegment, _query) - for _query in _queries - ] - for _boolkey, _queries in super().textsegment_boolparts(textsegment).items() - } - - def _make_nested_query(self, textsegment, query): - _nested_q = {'nested': { - 'path': 'nested_text', - 'query': {'bool': { - 'filter': _pathset_as_nestedvalue_filter(textsegment.propertypath_set, 'nested_text'), - 'must': query, - }}, - }} - if self._relevance_matters: - _nested_q['nested']['inner_hits'] = self._inner_hits() - return _nested_q - - def _inner_hits(self, *, highlight_query=None) -> dict: - _highlight = { - 'type': 'unified', - 'fields': {'nested_text.text_value': {}}, - } - if highlight_query is not None: - _highlight['highlight_query'] = highlight_query - return { - 'name': str(uuid.uuid4()), # avoid inner-hit name collisions - 'highlight': _highlight, - '_source': False, # _source is expensive for nested docs - 'docvalue_fields': [ - 'nested_text.path_from_focus', - 'nested_text.language_iri', - ], - } - - -### -# module-local utils - -def _should_skip_card(indexcard_rdf, rdfdoc): - # skip cards without some value for name/title/label - return not any(rdfdoc.q(indexcard_rdf.focus_iri, osfmap.NAMELIKE_PROPERTIES)) - - -def _bucketlist(agg_result: dict) -> list[str]: - return [ - _bucket['key'] - for _bucket in agg_result['buckets'] - ] - - -def _daterange_value_and_format(datevalue: str): - _cleanvalue = datevalue.strip() - if re.fullmatch(r'\d{4,}', _cleanvalue): - return f'{_cleanvalue}||/y' - if re.fullmatch(r'\d{4,}-\d{2}', _cleanvalue): - return f'{_cleanvalue}||/M' - if re.fullmatch(r'\d{4,}-\d{2}-\d{2}', _cleanvalue): - return f'{_cleanvalue}||/d' - raise ValueError(f'bad date value "{datevalue}"') - - -def _iri_path_as_indexable_fields(path: tuple[str, ...]): - assert path, 'path should not be empty' - return { - 'path_from_focus': iri_path_as_keyword(path), - 'suffuniq_path_from_focus': iri_path_as_keyword(path, suffuniq=True), - 'property_iri': path[-1], - 'distance_from_focus': len(path), - } - - -def _iri_path_as_flattened_key(path: tuple[str, ...]) -> str: - return base64.b16encode(json.dumps(path).encode()).decode() - - -def _iri_path_as_flattened_field(path: tuple[str, ...]) -> str: - return f'flat_iri_values_suffuniq.{_iri_path_as_flattened_key(path)}' - - -def _pathset_as_nestedvalue_filter(propertypath_set: frozenset[tuple[str, ...]], nested_path: str): - _suffuniq_iri_paths = [] - _glob_path_lengths = [] - for _path in propertypath_set: - if all(_pathstep == GLOB_PATHSTEP for _pathstep in _path): - _glob_path_lengths.append(len(_path)) - else: - _suffuniq_iri_paths.append(iri_path_as_keyword(_path, suffuniq=True)) - if _suffuniq_iri_paths and _glob_path_lengths: - return {'bool': { - 'minimum_should_match': 1, - 'should': [ - {'terms': {f'{nested_path}.distance_from_focus': _glob_path_lengths}}, - {'terms': {f'{nested_path}.suffuniq_path_from_focus': _suffuniq_iri_paths}}, - ], - }} - if _glob_path_lengths: - return {'terms': {f'{nested_path}.distance_from_focus': _glob_path_lengths}} - return {'terms': {f'{nested_path}.suffuniq_path_from_focus': _suffuniq_iri_paths}} - - -@dataclasses.dataclass(frozen=True) -class _NestedIriKey: - '''if this is the same for multiple iri values, they can be combined in one `nested_iri` doc - ''' - path: tuple[str, ...] - type_iris: frozenset[str] - label_text: frozenset[str] - title_text: frozenset[str] - name_text: frozenset[str] - - @classmethod - def for_iri_at_path(cls, path: tuple[str, ...], iri: str, rdfdoc): - return cls( - path=path, - type_iris=frozenset(rdfdoc.q(iri, RDF.type)), - # TODO: don't discard language for name/title/label - name_text=frozenset( - _text.unicode_value - for _text in rdfdoc.q(iri, osfmap.NAME_PROPERTIES) - if isinstance(_text, primitive_rdf.Literal) - ), - title_text=frozenset( - _text.unicode_value - for _text in rdfdoc.q(iri, osfmap.TITLE_PROPERTIES) - if isinstance(_text, primitive_rdf.Literal) - ), - label_text=frozenset( - _text.unicode_value - for _text in rdfdoc.q(iri, osfmap.LABEL_PROPERTIES) - if isinstance(_text, primitive_rdf.Literal) - ), - ) - - def as_indexable_fields(self): - # matches fields in the mapping for `nested_iri`, above - return { - **_iri_path_as_indexable_fields(self.path), - 'value_type_iri': list(self.type_iris), - 'value_label_text': list(self.label_text), - 'value_title_text': list(self.title_text), - 'value_name_text': list(self.name_text), - } diff --git a/tests/share/search/conftest.py b/tests/share/search/conftest.py index 3cba6ba08..0b10d906f 100644 --- a/tests/share/search/conftest.py +++ b/tests/share/search/conftest.py @@ -6,12 +6,8 @@ @pytest.fixture def mock_elastic_clients(settings): # set elastic urls to non-empty but non-usable values - settings.ELASTICSEARCH5_URL = 'fake://bleh' settings.ELASTICSEARCH8_URL = 'fake://bluh' - with mock.patch('share.search.index_strategy.sharev2_elastic5.elasticsearch5'): - with mock.patch('share.search.index_strategy.elastic8.elasticsearch8'): - yield + with mock.patch('share.search.index_strategy.elastic8.elasticsearch8'): + yield from share.search.index_strategy.elastic8 import Elastic8IndexStrategy Elastic8IndexStrategy._get_elastic8_client.cache_clear() - from share.search.index_strategy.sharev2_elastic5 import Sharev2Elastic5IndexStrategy - Sharev2Elastic5IndexStrategy._get_elastic5_client.cache_clear() diff --git a/tests/share/search/index_strategy/test_sharev2_elastic5.py b/tests/share/search/index_strategy/test_sharev2_elastic5.py deleted file mode 100644 index 016330c84..000000000 --- a/tests/share/search/index_strategy/test_sharev2_elastic5.py +++ /dev/null @@ -1,89 +0,0 @@ -import unittest - -from django.conf import settings -from primitive_metadata import primitive_rdf as rdf - -from share.search import messages -from share.search.index_strategy.sharev2_elastic5 import Sharev2Elastic5IndexStrategy -from tests.trove.factories import create_indexcard -from trove.vocab.namespaces import DCTERMS, SHAREv2, RDF, BLARG -from ._with_real_services import RealElasticTestCase - - -@unittest.skipUnless(settings.ELASTICSEARCH5_URL, 'missing ELASTICSEARCH5_URL setting') -class TestSharev2Elastic5(RealElasticTestCase): - # for RealElasticTestCase - def get_index_strategy(self): - index_strategy = Sharev2Elastic5IndexStrategy('test_sharev2_elastic5') - if not index_strategy.STATIC_INDEXNAME.startswith('test_'): - index_strategy.STATIC_INDEXNAME = f'test_{index_strategy.STATIC_INDEXNAME}' - return index_strategy - - def setUp(self): - super().setUp() - self.__indexcard = create_indexcard( - BLARG.hello, - { - BLARG.hello: { - RDF.type: {SHAREv2.CreativeWork}, - DCTERMS.title: {rdf.literal('hello', language='en')}, - }, - }, - deriver_iris=[SHAREv2.sharev2_elastic], - ) - - def test_without_daemon(self): - _formatted_record = self._get_formatted_record() - _messages_chunk = messages.MessagesChunk( - messages.MessageType.INDEX_SUID, - [_formatted_record.suid_id], - ) - self._assert_happypath_without_daemon( - _messages_chunk, - expected_doc_count=1, - ) - - def test_with_daemon(self): - _formatted_record = self._get_formatted_record() - _messages_chunk = messages.MessagesChunk( - messages.MessageType.INDEX_SUID, - [_formatted_record.suid_id], - ) - self._assert_happypath_with_daemon( - _messages_chunk, - expected_doc_count=1, - ) - - # override RealElasticTestCase to match hacks done with assumptions - # (single index that will not be updated again before being deleted) - def _assert_happypath_until_ingest(self): - # initial - _index = next(self.index_strategy.each_subnamed_index()) - assert not _index.pls_check_exists() - index_status = _index.pls_get_status() - assert not index_status.creation_date - assert not index_status.is_kept_live - assert not index_status.is_default_for_searching - assert not index_status.doc_count - # create index - _index.pls_create() - assert _index.pls_check_exists() - index_status = _index.pls_get_status() - assert index_status.creation_date - assert index_status.is_kept_live # change from base class - assert index_status.is_default_for_searching # change from base class - assert not index_status.doc_count - # keep index live (with ingested updates) - self.index_strategy.pls_start_keeping_live() # now a no-op - index_status = _index.pls_get_status() - assert index_status.creation_date - assert index_status.is_kept_live - assert index_status.is_default_for_searching # change from base class - assert not index_status.doc_count - # default index for searching - self.index_strategy.pls_make_default_for_searching() # now a no-op - index_status = _index.pls_get_status() - assert index_status.creation_date - assert index_status.is_kept_live - assert index_status.is_default_for_searching - assert not index_status.doc_count diff --git a/tests/share/search/index_strategy/test_strategy_selection.py b/tests/share/search/index_strategy/test_strategy_selection.py index b4d8a1045..4c04ba58b 100644 --- a/tests/share/search/index_strategy/test_strategy_selection.py +++ b/tests/share/search/index_strategy/test_strategy_selection.py @@ -6,7 +6,6 @@ IndexStrategy, each_strategy, get_strategy, - sharev2_elastic5, sharev2_elastic8, trove_indexcard_flats, trovesearch_denorm, @@ -19,7 +18,6 @@ @pytest.fixture def patched_strategies(mock_elastic_clients): _strategies = [ - sharev2_elastic5.Sharev2Elastic5IndexStrategy('sharev2_elastic5'), sharev2_elastic8.Sharev2Elastic8IndexStrategy('sharev2_elastic8'), trove_indexcard_flats.TroveIndexcardFlatsIndexStrategy('trove_indexcard_flats'), trovesearch_denorm.TrovesearchDenormIndexStrategy('trovesearch_denorm'), From 3b7176b06d3b8e4f04581e0ed6dbc8e9869fd437 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Wed, 9 Apr 2025 12:24:02 -0400 Subject: [PATCH 39/46] remove trove_indexcard_flats index strategy --- share/search/index_strategy/__init__.py | 11 ++-------- .../index_strategy/test_strategy_selection.py | 2 -- .../test_trove_indexcard_flats.py | 21 ------------------- 3 files changed, 2 insertions(+), 32 deletions(-) delete mode 100644 tests/share/search/index_strategy/test_trove_indexcard_flats.py diff --git a/share/search/index_strategy/__init__.py b/share/search/index_strategy/__init__.py index f5c346fd6..b56a8de1a 100644 --- a/share/search/index_strategy/__init__.py +++ b/share/search/index_strategy/__init__.py @@ -5,10 +5,8 @@ from django.conf import settings from share.search.exceptions import IndexStrategyError -from share.models import FeatureFlag from trove.trovesearch import search_params from .sharev2_elastic8 import Sharev2Elastic8IndexStrategy -from .trove_indexcard_flats import TroveIndexcardFlatsIndexStrategy from .trovesearch_denorm import TrovesearchDenormIndexStrategy from ._base import IndexStrategy from ._indexnames import parse_indexname_parts @@ -33,7 +31,6 @@ class _AvailableStrategies(enum.Enum): ''' if settings.ELASTICSEARCH8_URL: sharev2_elastic8 = Sharev2Elastic8IndexStrategy('sharev2_elastic8') - trove_indexcard_flats = TroveIndexcardFlatsIndexStrategy('trove_indexcard_flats') trovesearch_denorm = TrovesearchDenormIndexStrategy('trovesearch_denorm') @@ -86,12 +83,8 @@ def get_strategy_for_sharev2_search(requested_name: str | None = None) -> IndexS def get_strategy_for_trovesearch(params: search_params.CardsearchParams) -> IndexStrategy: if params.index_strategy_name: # specific strategy requested _strategy = parse_strategy_name(params.index_strategy_name, for_search=True) - else: - _strategy_name = ( - _AvailableStrategies.trovesearch_denorm.name - if FeatureFlag.objects.flag_is_up(FeatureFlag.TROVESEARCH_DENORMILY) - else _AvailableStrategies.trove_indexcard_flats.name - ) + else: # static default: + _strategy_name = _AvailableStrategies.trovesearch_denorm.name _strategy = get_strategy(_strategy_name, for_search=True) return _strategy diff --git a/tests/share/search/index_strategy/test_strategy_selection.py b/tests/share/search/index_strategy/test_strategy_selection.py index 4c04ba58b..5f5e1df48 100644 --- a/tests/share/search/index_strategy/test_strategy_selection.py +++ b/tests/share/search/index_strategy/test_strategy_selection.py @@ -7,7 +7,6 @@ each_strategy, get_strategy, sharev2_elastic8, - trove_indexcard_flats, trovesearch_denorm, parse_strategy_name, ) @@ -19,7 +18,6 @@ def patched_strategies(mock_elastic_clients): _strategies = [ sharev2_elastic8.Sharev2Elastic8IndexStrategy('sharev2_elastic8'), - trove_indexcard_flats.TroveIndexcardFlatsIndexStrategy('trove_indexcard_flats'), trovesearch_denorm.TrovesearchDenormIndexStrategy('trovesearch_denorm'), ] with patch_index_strategies(_strategies): diff --git a/tests/share/search/index_strategy/test_trove_indexcard_flats.py b/tests/share/search/index_strategy/test_trove_indexcard_flats.py deleted file mode 100644 index 0718ad346..000000000 --- a/tests/share/search/index_strategy/test_trove_indexcard_flats.py +++ /dev/null @@ -1,21 +0,0 @@ -from share.search.index_strategy.trove_indexcard_flats import TroveIndexcardFlatsIndexStrategy - -from . import _common_trovesearch_tests - - -class TestTroveIndexcardFlats(_common_trovesearch_tests.CommonTrovesearchTests): - # for RealElasticTestCase - def get_index_strategy(self): - return TroveIndexcardFlatsIndexStrategy('test_trove_indexcard_flats') - - def cardsearch_integer_cases(self): - yield from () # integers not indexed by this strategy - - def cardsearch_trailingslash_cases(self): - yield from () # trailing-slash handling improved in trovesearch_denorm - - def valuesearch_sameas_cases(self): - yield from () # sameas handling improved in trovesearch_denorm - - def valuesearch_trailingslash_cases(self): - yield from () # trailing-slash handling improved in trovesearch_denorm From 2a500778fcd96c7a713e66ddd8e5053e839f20bc Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Wed, 9 Apr 2025 13:29:06 -0400 Subject: [PATCH 40/46] use python deps from the built image --- docker-compose.yml | 25 ------------------------- how-to/run-locally.md | 16 +++------------- 2 files changed, 3 insertions(+), 38 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 6a7468b61..015bd8851 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -17,8 +17,6 @@ volumes: external: false share_dist_vol: external: false - share_requirements_vol: - external: false networks: share_network: @@ -142,26 +140,6 @@ services: # SHARE software # ################## - requirements: - build: - context: . - dockerfile: Dockerfile - command: - - /bin/bash - - -c - - apt-get update && - apt-get install -y gcc && - $POETRY_HOME/bin/poetry install --no-root --compile --with dev && - rm -Rf /python3.13/* && - apt-get remove -y gcc && - cp -Rf -p /usr/local/lib/python3.13 / - restart: 'no' - volumes: - - ./:/code:cached - - share_requirements_vol:/python3.13 - networks: - - share_network - worker: build: context: . @@ -176,7 +154,6 @@ services: - indexer volumes: - ./:/code:cached - - share_requirements_vol:/usr/local/lib/python3.13 - elastic8_cert_vol:/elastic8_certs env_file: - .docker-compose.env @@ -200,7 +177,6 @@ services: - elastic8 volumes: - ./:/code:cached - - share_requirements_vol:/usr/local/lib/python3.13 - elastic8_cert_vol:/elastic8_certs env_file: - .docker-compose.env @@ -222,7 +198,6 @@ services: - elastic8 volumes: - ./:/code:cached - - share_requirements_vol:/usr/local/lib/python3.13 - elastic8_cert_vol:/elastic8_certs env_file: - .docker-compose.env diff --git a/how-to/run-locally.md b/how-to/run-locally.md index 99e4a523d..f308f22c3 100644 --- a/how-to/run-locally.md +++ b/how-to/run-locally.md @@ -22,17 +22,7 @@ the rest of this guide assumes your working directory is the SHARE repository ro cd ./share ``` -### 1. download several bits -download docker images (depending on your internet connection, this may take a beat): -``` -docker-compose pull -``` -install python dependencies (in a shared docker volume): -``` -docker-compose up requirements -``` - -### 2. structured data +### 1. structured data there are two services that store more-or-less persistent data: `postgres` and `elastic8` let's start them from the host machine: @@ -52,12 +42,12 @@ from within that worker shell, use django's `migrate` command to set up tables i ``` python manage.py migrate ``` -...and use `sharectl` to set up indexes in elasticsearch: +...and us the `shtrove_search_setup` management command to set up indexes in elasticsearch: ``` python manage.py shtrove_search_setup --initial ``` -### 3. start 'em up +### 2. start 'em up all other services can now be started from the host machine (upping `worker` ups all) ``` docker-compose up -d worker From 28e507fb542bbb61663b9bb6e40be3d0a53c334b Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Wed, 9 Apr 2025 13:51:34 -0400 Subject: [PATCH 41/46] Revert "use python deps from the built image" This reverts commit 5776adc340b43d4dbc5a22f61971b7d68f4dd8d2. --- docker-compose.yml | 25 +++++++++++++++++++++++++ how-to/run-locally.md | 16 +++++++++++++--- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 015bd8851..6a7468b61 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -17,6 +17,8 @@ volumes: external: false share_dist_vol: external: false + share_requirements_vol: + external: false networks: share_network: @@ -140,6 +142,26 @@ services: # SHARE software # ################## + requirements: + build: + context: . + dockerfile: Dockerfile + command: + - /bin/bash + - -c + - apt-get update && + apt-get install -y gcc && + $POETRY_HOME/bin/poetry install --no-root --compile --with dev && + rm -Rf /python3.13/* && + apt-get remove -y gcc && + cp -Rf -p /usr/local/lib/python3.13 / + restart: 'no' + volumes: + - ./:/code:cached + - share_requirements_vol:/python3.13 + networks: + - share_network + worker: build: context: . @@ -154,6 +176,7 @@ services: - indexer volumes: - ./:/code:cached + - share_requirements_vol:/usr/local/lib/python3.13 - elastic8_cert_vol:/elastic8_certs env_file: - .docker-compose.env @@ -177,6 +200,7 @@ services: - elastic8 volumes: - ./:/code:cached + - share_requirements_vol:/usr/local/lib/python3.13 - elastic8_cert_vol:/elastic8_certs env_file: - .docker-compose.env @@ -198,6 +222,7 @@ services: - elastic8 volumes: - ./:/code:cached + - share_requirements_vol:/usr/local/lib/python3.13 - elastic8_cert_vol:/elastic8_certs env_file: - .docker-compose.env diff --git a/how-to/run-locally.md b/how-to/run-locally.md index f308f22c3..99e4a523d 100644 --- a/how-to/run-locally.md +++ b/how-to/run-locally.md @@ -22,7 +22,17 @@ the rest of this guide assumes your working directory is the SHARE repository ro cd ./share ``` -### 1. structured data +### 1. download several bits +download docker images (depending on your internet connection, this may take a beat): +``` +docker-compose pull +``` +install python dependencies (in a shared docker volume): +``` +docker-compose up requirements +``` + +### 2. structured data there are two services that store more-or-less persistent data: `postgres` and `elastic8` let's start them from the host machine: @@ -42,12 +52,12 @@ from within that worker shell, use django's `migrate` command to set up tables i ``` python manage.py migrate ``` -...and us the `shtrove_search_setup` management command to set up indexes in elasticsearch: +...and use `sharectl` to set up indexes in elasticsearch: ``` python manage.py shtrove_search_setup --initial ``` -### 2. start 'em up +### 3. start 'em up all other services can now be started from the host machine (upping `worker` ups all) ``` docker-compose up -d worker From 8502306f29c0d6f70623772d01e7d1b5d3212b5c Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Wed, 9 Apr 2025 13:53:18 -0400 Subject: [PATCH 42/46] fix requirements in docker-compose --- docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index 6a7468b61..5ba18f63f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -151,7 +151,7 @@ services: - -c - apt-get update && apt-get install -y gcc && - $POETRY_HOME/bin/poetry install --no-root --compile --with dev && + $$POETRY_HOME/bin/poetry install --compile --with dev && rm -Rf /python3.13/* && apt-get remove -y gcc && cp -Rf -p /usr/local/lib/python3.13 / From 25bf800490bd1a39ab072557d5cfbf65d0fe56fe Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Thu, 10 Apr 2025 11:51:10 -0400 Subject: [PATCH 43/46] clarify poetry venv usage --- Dockerfile | 14 +++- poetry.lock | 219 +++++++++++++++++-------------------------------- pyproject.toml | 6 +- 3 files changed, 86 insertions(+), 153 deletions(-) diff --git a/Dockerfile b/Dockerfile index 17a029675..217b745b1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -25,12 +25,17 @@ RUN update-ca-certificates RUN mkdir -p /code WORKDIR /code +### +# python dependencies + +# note: installs dependencies on the system, roundabouts `/usr/local/lib/python3.13/site-packages/` + ENV POETRY_NO_INTERACTION=1 \ POETRY_VIRTUALENVS_OPTIONS_ALWAYS_COPY=1 \ POETRY_VIRTUALENVS_CREATE=0 \ - POETRY_VIRTUALENVS_IN_PROJECT=1 \ + POETRY_VIRTUALENVS_IN_PROJECT=0 \ POETRY_CACHE_DIR=/tmp/poetry-cache \ - POETRY_HOME=/tmp/poetry + POETRY_HOME=/tmp/poetry-venv RUN python -m venv $POETRY_HOME @@ -43,7 +48,8 @@ RUN $POETRY_HOME/bin/poetry install --compile RUN apt-get remove -y \ gcc \ - zlib1g-dev + zlib1g-dev \ + && apt-get autoremove -y COPY ./ /code/ @@ -59,7 +65,7 @@ CMD ["python", "manage.py", "--help"] ### Dist FROM app AS dist -RUN $POETRY_HOME/bin/poetry install --compile --only dist +RUN $POETRY_HOME/bin/poetry install --compile --only deploy ### Dev FROM app AS dev diff --git a/poetry.lock b/poetry.lock index a64b8416f..10716908c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand. [[package]] name = "amqp" @@ -30,38 +30,6 @@ files = [ [package.extras] tests = ["mypy (>=0.800)", "pytest", "pytest-asyncio"] -[[package]] -name = "atomicwrites" -version = "1.4.1" -description = "Atomic file writes." -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -groups = ["dev"] -markers = "sys_platform == \"win32\"" -files = [ - {file = "atomicwrites-1.4.1.tar.gz", hash = "sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"}, -] - -[[package]] -name = "attrs" -version = "25.3.0" -description = "Classes Without Boilerplate" -optional = false -python-versions = ">=3.8" -groups = ["dev"] -files = [ - {file = "attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3"}, - {file = "attrs-25.3.0.tar.gz", hash = "sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b"}, -] - -[package.extras] -benchmark = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] -cov = ["cloudpickle ; platform_python_implementation == \"CPython\"", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] -dev = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pre-commit-uv", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] -docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier"] -tests = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-xdist[psutil]"] -tests-mypy = ["mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\"", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.10\""] - [[package]] name = "bcrypt" version = "4.3.0" @@ -214,7 +182,7 @@ version = "1.17.1" description = "Foreign Function Interface for Python calling C code." optional = false python-versions = ">=3.8" -groups = ["main", "dist"] +groups = ["main", "deploy"] files = [ {file = "cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14"}, {file = "cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67"}, @@ -284,7 +252,7 @@ files = [ {file = "cffi-1.17.1-cp39-cp39-win_amd64.whl", hash = "sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662"}, {file = "cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824"}, ] -markers = {main = "platform_python_implementation != \"PyPy\"", dist = "platform_python_implementation == \"CPython\" and sys_platform == \"win32\""} +markers = {main = "platform_python_implementation != \"PyPy\"", deploy = "platform_python_implementation == \"CPython\" and sys_platform == \"win32\""} [package.dependencies] pycparser = "*" @@ -556,18 +524,18 @@ toml = ["toml"] [[package]] name = "coveralls" -version = "3.1.0" +version = "3.3.1" description = "Show coverage stats online via coveralls.io" optional = false python-versions = ">= 3.5" groups = ["dev"] files = [ - {file = "coveralls-3.1.0-py2.py3-none-any.whl", hash = "sha256:172fb79c5f61c6ede60554f2cac46deff6d64ee735991fb2124fb414e188bdb4"}, - {file = "coveralls-3.1.0.tar.gz", hash = "sha256:9b3236e086627340bf2c95f89f757d093cbed43d17179d3f4fb568c347e7d29a"}, + {file = "coveralls-3.3.1-py2.py3-none-any.whl", hash = "sha256:f42015f31d386b351d4226389b387ae173207058832fbf5c8ec4b40e27b16026"}, + {file = "coveralls-3.3.1.tar.gz", hash = "sha256:b32a8bb5d2df585207c119d6c01567b81fba690c9c10a753bfe27a335bfc43ea"}, ] [package.dependencies] -coverage = ">=4.1,<6.0" +coverage = ">=4.1,<6.0.dev0 || >6.1,<6.1.1 || >6.1.1,<7.0" docopt = ">=0.6.1" requests = ">=1.0.0" @@ -728,19 +696,19 @@ django = ">=4.2" [[package]] name = "django-debug-toolbar" -version = "3.2.1" +version = "5.1.0" description = "A configurable set of panels that display various debug information about the current request/response." optional = false -python-versions = ">=3.6" +python-versions = ">=3.9" groups = ["dev"] files = [ - {file = "django-debug-toolbar-3.2.1.tar.gz", hash = "sha256:a5ff2a54f24bf88286f9872836081078f4baa843dc3735ee88524e89f8821e33"}, - {file = "django_debug_toolbar-3.2.1-py3-none-any.whl", hash = "sha256:e759e63e3fe2d3110e0e519639c166816368701eab4a47fed75d7de7018467b9"}, + {file = "django_debug_toolbar-5.1.0-py3-none-any.whl", hash = "sha256:c0591e338ee9603bdfce5aebf8d18ca7341fdbb69595e2b0b34869be5857180e"}, + {file = "django_debug_toolbar-5.1.0.tar.gz", hash = "sha256:8a3b9da4aeab8d384a366e20304bd939a451f0242523c5b7b402248ad474eed2"}, ] [package.dependencies] -Django = ">=2.2" -sqlparse = ">=0.2.0" +django = ">=4.2.9" +sqlparse = ">=0.2" [[package]] name = "django-extensions" @@ -916,55 +884,54 @@ vectorstore-mmr = ["numpy (>=1)", "simsimd (>=3)"] [[package]] name = "factory-boy" -version = "3.2.0" +version = "3.3.3" description = "A versatile test fixtures replacement based on thoughtbot's factory_bot for Ruby." optional = false -python-versions = ">=3.6" +python-versions = ">=3.8" groups = ["dev"] files = [ - {file = "factory_boy-3.2.0-py2.py3-none-any.whl", hash = "sha256:1d3db4b44b8c8c54cdd8b83ae4bdb9aeb121e464400035f1f03ae0e1eade56a4"}, - {file = "factory_boy-3.2.0.tar.gz", hash = "sha256:401cc00ff339a022f84d64a4339503d1689e8263a4478d876e58a3295b155c5b"}, + {file = "factory_boy-3.3.3-py2.py3-none-any.whl", hash = "sha256:1c39e3289f7e667c4285433f305f8d506efc2fe9c73aaea4151ebd5cdea394fc"}, + {file = "factory_boy-3.3.3.tar.gz", hash = "sha256:866862d226128dfac7f2b4160287e899daf54f2612778327dd03d0e2cb1e3d03"}, ] [package.dependencies] Faker = ">=0.7.0" [package.extras] -dev = ["Django", "Pillow", "SQLAlchemy", "coverage", "flake8", "isort", "mongoengine", "tox", "wheel (>=0.32.0)", "zest.releaser[recommended]"] +dev = ["Django", "Pillow", "SQLAlchemy", "coverage", "flake8", "isort", "mongoengine", "mongomock", "mypy", "tox", "wheel (>=0.32.0)", "zest.releaser[recommended]"] doc = ["Sphinx", "sphinx-rtd-theme", "sphinxcontrib-spelling"] [[package]] name = "faker" -version = "8.10.0" +version = "37.1.0" description = "Faker is a Python package that generates fake data for you." optional = false -python-versions = ">=3.6" +python-versions = ">=3.9" groups = ["dev"] files = [ - {file = "Faker-8.10.0-py3-none-any.whl", hash = "sha256:7df5697bc712bdd2f98051246ffd7bbac10104602727053b736e90d8adcaa5ad"}, - {file = "Faker-8.10.0.tar.gz", hash = "sha256:198684f146590986cde75307f12f378c899379ef9f2bc962bd25ddd005b4e7c3"}, + {file = "faker-37.1.0-py3-none-any.whl", hash = "sha256:dc2f730be71cb770e9c715b13374d80dbcee879675121ab51f9683d262ae9a1c"}, + {file = "faker-37.1.0.tar.gz", hash = "sha256:ad9dc66a3b84888b837ca729e85299a96b58fdaef0323ed0baace93c9614af06"}, ] [package.dependencies] -python-dateutil = ">=2.4" -text-unidecode = "1.3" +tzdata = "*" [[package]] name = "flake8" -version = "5.0.4" +version = "7.2.0" description = "the modular source code checker: pep8 pyflakes and co" optional = false -python-versions = ">=3.6.1" +python-versions = ">=3.9" groups = ["dev"] files = [ - {file = "flake8-5.0.4-py2.py3-none-any.whl", hash = "sha256:7a1cf6b73744f5806ab95e526f6f0d8c01c66d7bbe349562d22dfca20610b248"}, - {file = "flake8-5.0.4.tar.gz", hash = "sha256:6fbe320aad8d6b95cec8b8e47bc933004678dc63095be98528b7bdd2a9f510db"}, + {file = "flake8-7.2.0-py2.py3-none-any.whl", hash = "sha256:93b92ba5bdb60754a6da14fa3b93a9361fd00a59632ada61fd7b130436c40343"}, + {file = "flake8-7.2.0.tar.gz", hash = "sha256:fa558ae3f6f7dbf2b4f22663e5343b6b6023620461f8d4ff2019ef4b5ee70426"}, ] [package.dependencies] mccabe = ">=0.7.0,<0.8.0" -pycodestyle = ">=2.9.0,<2.10.0" -pyflakes = ">=2.5.0,<2.6.0" +pycodestyle = ">=2.13.0,<2.14.0" +pyflakes = ">=3.3.0,<3.4.0" [[package]] name = "gevent" @@ -972,7 +939,7 @@ version = "24.11.1" description = "Coroutine-based network library" optional = false python-versions = ">=3.9" -groups = ["dist"] +groups = ["deploy"] files = [ {file = "gevent-24.11.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:92fe5dfee4e671c74ffaa431fd7ffd0ebb4b339363d24d0d944de532409b935e"}, {file = "gevent-24.11.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b7bfcfe08d038e1fa6de458891bca65c1ada6d145474274285822896a858c870"}, @@ -1033,7 +1000,7 @@ version = "3.1.1" description = "Lightweight in-process concurrent programming" optional = false python-versions = ">=3.7" -groups = ["dist"] +groups = ["deploy"] markers = "platform_python_implementation == \"CPython\"" files = [ {file = "greenlet-3.1.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:0bbae94a29c9e5c7e4a2b7f0aae5c17e8e90acbfd3bf6270eeba60c39fce3563"}, @@ -1393,7 +1360,7 @@ version = "10.7.0" description = "New Relic Python Agent" optional = false python-versions = ">=3.7" -groups = ["dist"] +groups = ["deploy"] files = [ {file = "newrelic-10.7.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08e959814e0b23a8f96383955cceecb6180dc66f240279c45ee8484058f96eb4"}, {file = "newrelic-10.7.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e12b7e88e0d78497b4e3dfca0411a76a548ee15842b9d6ef971035bbdc91693"}, @@ -1460,18 +1427,19 @@ files = [ [[package]] name = "pluggy" -version = "0.13.1" +version = "1.5.0" description = "plugin and hook calling mechanisms for python" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +python-versions = ">=3.8" groups = ["dev"] files = [ - {file = "pluggy-0.13.1-py2.py3-none-any.whl", hash = "sha256:966c145cd83c96502c3c3868f50408687b38434af77734af1e9ca461a4081d2d"}, - {file = "pluggy-0.13.1.tar.gz", hash = "sha256:15b2acde666561e1298d71b523007ed7364de07029219b604cf808bfa1c765b0"}, + {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, + {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, ] [package.extras] dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] [[package]] name = "primitive-metadata" @@ -1506,7 +1474,7 @@ version = "1.0.2" description = "psycopg2 integration with coroutine libraries" optional = false python-versions = "*" -groups = ["dist"] +groups = ["deploy"] files = [ {file = "psycogreen-1.0.2.tar.gz", hash = "sha256:c429845a8a49cf2f76b71265008760bcd7c7c77d80b806db4dc81116dbcd130d"}, ] @@ -1531,18 +1499,6 @@ files = [ {file = "psycopg2-2.9.10.tar.gz", hash = "sha256:12ec0b40b0273f95296233e8750441339298e6a572f7039da5b260e3c8b60e11"}, ] -[[package]] -name = "py" -version = "1.11.0" -description = "library with cross-python path, ini-parsing, io, code, log facilities" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -groups = ["dev"] -files = [ - {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, - {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, -] - [[package]] name = "py-cpuinfo" version = "9.0.0" @@ -1557,14 +1513,14 @@ files = [ [[package]] name = "pycodestyle" -version = "2.9.1" +version = "2.13.0" description = "Python style guide checker" optional = false -python-versions = ">=3.6" +python-versions = ">=3.9" groups = ["dev"] files = [ - {file = "pycodestyle-2.9.1-py2.py3-none-any.whl", hash = "sha256:d1735fc58b418fd7c5f658d28d943854f8a849b01a5d0a1e6f3f3fdd0166804b"}, - {file = "pycodestyle-2.9.1.tar.gz", hash = "sha256:2c9607871d58c76354b697b42f5d57e1ada7d261c261efac224b664affdc5785"}, + {file = "pycodestyle-2.13.0-py2.py3-none-any.whl", hash = "sha256:35863c5974a271c7a726ed228a14a4f6daf49df369d8c50cd9a6f58a5e143ba9"}, + {file = "pycodestyle-2.13.0.tar.gz", hash = "sha256:c8415bf09abe81d9c7f872502a6eee881fbe85d8763dd5b9924bb0a01d67efae"}, ] [[package]] @@ -1573,23 +1529,23 @@ version = "2.22" description = "C parser in Python" optional = false python-versions = ">=3.8" -groups = ["main", "dist"] +groups = ["main", "deploy"] files = [ {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, ] -markers = {main = "platform_python_implementation != \"PyPy\"", dist = "platform_python_implementation == \"CPython\" and sys_platform == \"win32\""} +markers = {main = "platform_python_implementation != \"PyPy\"", deploy = "platform_python_implementation == \"CPython\" and sys_platform == \"win32\""} [[package]] name = "pyflakes" -version = "2.5.0" +version = "3.3.2" description = "passive checker of Python programs" optional = false -python-versions = ">=3.6" +python-versions = ">=3.9" groups = ["dev"] files = [ - {file = "pyflakes-2.5.0-py2.py3-none-any.whl", hash = "sha256:4579f67d887f804e67edb544428f264b7b24f435b263c4614f384135cea553d2"}, - {file = "pyflakes-2.5.0.tar.gz", hash = "sha256:491feb020dca48ccc562a8c0cbe8df07ee13078df59813b83959cbdada312ea3"}, + {file = "pyflakes-3.3.2-py2.py3-none-any.whl", hash = "sha256:5039c8339cbb1944045f4ee5466908906180f13cc99cc9949348d10f82a5c32a"}, + {file = "pyflakes-3.3.2.tar.gz", hash = "sha256:6dfd61d87b97fba5dcfaaf781171ac16be16453be6d816147989e7f6e6a9576b"}, ] [[package]] @@ -1623,67 +1579,63 @@ diagrams = ["jinja2", "railroad-diagrams"] [[package]] name = "pytest" -version = "6.2.4" +version = "8.3.5" description = "pytest: simple powerful testing with Python" optional = false -python-versions = ">=3.6" +python-versions = ">=3.8" groups = ["dev"] files = [ - {file = "pytest-6.2.4-py3-none-any.whl", hash = "sha256:91ef2131a9bd6be8f76f1f08eac5c5317221d6ad1e143ae03894b862e8976890"}, - {file = "pytest-6.2.4.tar.gz", hash = "sha256:50bcad0a0b9c5a72c8e4e7c9855a3ad496ca6a881a3641b4260605450772c54b"}, + {file = "pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820"}, + {file = "pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845"}, ] [package.dependencies] -atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""} -attrs = ">=19.2.0" colorama = {version = "*", markers = "sys_platform == \"win32\""} iniconfig = "*" packaging = "*" -pluggy = ">=0.12,<1.0.0a1" -py = ">=1.8.2" -toml = "*" +pluggy = ">=1.5,<2" [package.extras] -testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"] +dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] [[package]] name = "pytest-benchmark" -version = "3.4.1" +version = "5.1.0" description = "A ``pytest`` fixture for benchmarking code. It will group the tests into rounds that are calibrated to the chosen timer." optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +python-versions = ">=3.9" groups = ["dev"] files = [ - {file = "pytest-benchmark-3.4.1.tar.gz", hash = "sha256:40e263f912de5a81d891619032983557d62a3d85843f9a9f30b98baea0cd7b47"}, - {file = "pytest_benchmark-3.4.1-py2.py3-none-any.whl", hash = "sha256:36d2b08c4882f6f997fd3126a3d6dfd70f3249cde178ed8bbc0b73db7c20f809"}, + {file = "pytest-benchmark-5.1.0.tar.gz", hash = "sha256:9ea661cdc292e8231f7cd4c10b0319e56a2118e2c09d9f50e1b3d150d2aca105"}, + {file = "pytest_benchmark-5.1.0-py3-none-any.whl", hash = "sha256:922de2dfa3033c227c96da942d1878191afa135a29485fb942e85dff1c592c89"}, ] [package.dependencies] py-cpuinfo = "*" -pytest = ">=3.8" +pytest = ">=8.1" [package.extras] aspect = ["aspectlib"] elasticsearch = ["elasticsearch"] -histogram = ["pygal", "pygaljs"] +histogram = ["pygal", "pygaljs", "setuptools"] [[package]] name = "pytest-django" -version = "4.4.0" +version = "4.11.1" description = "A Django plugin for pytest." optional = false -python-versions = ">=3.5" +python-versions = ">=3.8" groups = ["dev"] files = [ - {file = "pytest-django-4.4.0.tar.gz", hash = "sha256:b5171e3798bf7e3fc5ea7072fe87324db67a4dd9f1192b037fed4cc3c1b7f455"}, - {file = "pytest_django-4.4.0-py3-none-any.whl", hash = "sha256:65783e78382456528bd9d79a35843adde9e6a47347b20464eb2c885cb0f1f606"}, + {file = "pytest_django-4.11.1-py3-none-any.whl", hash = "sha256:1b63773f648aa3d8541000c26929c1ea63934be1cfa674c76436966d73fe6a10"}, + {file = "pytest_django-4.11.1.tar.gz", hash = "sha256:a949141a1ee103cb0e7a20f1451d355f83f5e4a5d07bdd4dcfdd1fd0ff227991"}, ] [package.dependencies] -pytest = ">=5.4.0" +pytest = ">=7.0.0" [package.extras] -docs = ["sphinx", "sphinx-rtd-theme"] +docs = ["sphinx", "sphinx_rtd_theme"] testing = ["Django", "django-configurations (>=2.0)"] [[package]] @@ -1711,7 +1663,7 @@ version = "2.9.0.post0" description = "Extensions to the standard Python datetime module" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" -groups = ["main", "dev"] +groups = ["main"] files = [ {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, @@ -1828,7 +1780,7 @@ version = "78.1.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.9" -groups = ["dist"] +groups = ["deploy"] files = [ {file = "setuptools-78.1.0-py3-none-any.whl", hash = "sha256:3e386e96793c8702ae83d17b853fb93d3e09ef82ec62722e61da5cd22376dcd8"}, {file = "setuptools-78.1.0.tar.gz", hash = "sha256:18fd474d4a82a5f83dac888df697af65afa82dec7323d09c3e37d1f14288da54"}, @@ -1849,7 +1801,7 @@ version = "1.17.0" description = "Python 2 and 3 compatibility utilities" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" -groups = ["main", "dev"] +groups = ["main"] files = [ {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"}, {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, @@ -1871,30 +1823,6 @@ files = [ dev = ["build", "hatch"] doc = ["sphinx"] -[[package]] -name = "text-unidecode" -version = "1.3" -description = "The most basic Text::Unidecode port" -optional = false -python-versions = "*" -groups = ["dev"] -files = [ - {file = "text-unidecode-1.3.tar.gz", hash = "sha256:bad6603bb14d279193107714b288be206cac565dfa49aa5b105294dd5c4aab93"}, - {file = "text_unidecode-1.3-py2.py3-none-any.whl", hash = "sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8"}, -] - -[[package]] -name = "toml" -version = "0.10.2" -description = "Python Library for Tom's Obvious, Minimal Language" -optional = false -python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" -groups = ["dev"] -files = [ - {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, - {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, -] - [[package]] name = "typing-extensions" version = "4.13.0" @@ -1918,7 +1846,6 @@ files = [ {file = "tzdata-2025.1-py2.py3-none-any.whl", hash = "sha256:7e127113816800496f027041c570f50bcd464a020098a3b6b199517772303639"}, {file = "tzdata-2025.1.tar.gz", hash = "sha256:24894909e88cdb28bd1636c6887801df64cb485bd593f2fd83ef29075a81d694"}, ] -markers = {dev = "sys_platform == \"win32\""} [[package]] name = "urllib3" @@ -1943,7 +1870,7 @@ version = "2.0.28" description = "The uWSGI server" optional = false python-versions = "*" -groups = ["dist"] +groups = ["deploy"] files = [ {file = "uwsgi-2.0.28.tar.gz", hash = "sha256:79ca1891ef2df14508ab0471ee8c0eb94bd2d51d03f32f90c4bbe557ab1e99d0"}, ] @@ -1978,7 +1905,7 @@ version = "5.0" description = "Very basic event publishing system" optional = false python-versions = ">=3.7" -groups = ["dist"] +groups = ["deploy"] files = [ {file = "zope.event-5.0-py3-none-any.whl", hash = "sha256:2832e95014f4db26c47a13fdaef84cef2f4df37e66b59d8f1f4a8f319a632c26"}, {file = "zope.event-5.0.tar.gz", hash = "sha256:bac440d8d9891b4068e2b5a2c5e2c9765a9df762944bda6955f96bb9b91e67cd"}, @@ -1997,7 +1924,7 @@ version = "7.2" description = "Interfaces for Python" optional = false python-versions = ">=3.8" -groups = ["dist"] +groups = ["deploy"] files = [ {file = "zope.interface-7.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ce290e62229964715f1011c3dbeab7a4a1e4971fd6f31324c4519464473ef9f2"}, {file = "zope.interface-7.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:05b910a5afe03256b58ab2ba6288960a2892dfeef01336dc4be6f1b9ed02ab0a"}, @@ -2049,4 +1976,4 @@ testing = ["coverage[toml]", "zope.event", "zope.testing"] [metadata] lock-version = "2.1" python-versions = ">=3.13,<3.14" -content-hash = "f7be2d5f032b5455141b352dec94484ade3397d17d0afad7249c9f406e8437e1" +content-hash = "668191024e988e7adb33a94f69036fcc98c1672ac8784364f1edbc18a2339874" diff --git a/pyproject.toml b/pyproject.toml index 2d7a36bda..92d9ecdfa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,10 +38,10 @@ dependencies = [ [tool.poetry] package-mode = false -# "dist" dependency group relevant only in deployment: -[tool.poetry.group.dist] +# "deploy" dependency group relevant only in deployment: +[tool.poetry.group.deploy] optional = true -[tool.poetry.group.dist.dependencies] +[tool.poetry.group.deploy.dependencies] uwsgi = "2.0.28" newrelic = "10.7.0" # newrelic APM agent, Custom License gevent = "24.11.1" # MIT From bd055bbef5115a5b336904445be49dfd513baad6 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Thu, 10 Apr 2025 15:06:56 -0400 Subject: [PATCH 44/46] finish removing elasticsearch5 --- .github/workflows/run_tests.yml | 6 ------ poetry.lock | 20 +------------------- pyproject.toml | 1 - 3 files changed, 1 insertion(+), 26 deletions(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 8fd2886e4..902d069e0 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -37,12 +37,6 @@ jobs: cluster.initial_master_nodes: singlenode ports: - 9208:9200 - elasticsearch5: - image: elasticsearch:5.4 - env: - ES_JAVA_OPTS: "-Xms512m -Xmx512m" - ports: - - 9205:9200 rabbitmq: image: rabbitmq:management ports: diff --git a/poetry.lock b/poetry.lock index 10716908c..caa54c383 100644 --- a/poetry.lock +++ b/poetry.lock @@ -840,24 +840,6 @@ urllib3 = ">=1.26.2,<3" [package.extras] develop = ["aiohttp", "furo", "httpx", "opentelemetry-api", "opentelemetry-sdk", "orjson", "pytest", "pytest-asyncio", "pytest-cov", "pytest-httpserver", "pytest-mock", "requests", "respx", "sphinx (>2)", "sphinx-autodoc-typehints", "trustme"] -[[package]] -name = "elasticsearch5" -version = "5.5.6" -description = "Python client for Elasticsearch" -optional = false -python-versions = "*" -groups = ["main"] -files = [ - {file = "elasticsearch5-5.5.6-py2.py3-none-any.whl", hash = "sha256:3d95aef3317b1e28288ab8dd2ee38e2a6aae96df14ffcd4ecbea4f681dc4891d"}, - {file = "elasticsearch5-5.5.6.tar.gz", hash = "sha256:331ce226182c75cfdf6b823f9f30b5a555fa91b85f1d05ac9958758150e2e8c7"}, -] - -[package.dependencies] -urllib3 = ">=1.21.1" - -[package.extras] -develop = ["coverage", "mock", "nose", "nosexcover", "pyaml", "requests (>=2.0.0,<3.0.0)", "sphinx", "sphinx-rtd-theme"] - [[package]] name = "elasticsearch8" version = "8.17.2" @@ -1976,4 +1958,4 @@ testing = ["coverage[toml]", "zope.event", "zope.testing"] [metadata] lock-version = "2.1" python-versions = ">=3.13,<3.14" -content-hash = "668191024e988e7adb33a94f69036fcc98c1672ac8784364f1edbc18a2339874" +content-hash = "30485a69a2a345196896058384368072dcfd8b232699b1c06b28c381c1de721e" diff --git a/pyproject.toml b/pyproject.toml index 92d9ecdfa..d547a2e96 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,6 @@ dependencies = [ # to be removed in near-future work: "djangorestframework==3.16.0", # BSD "djangorestframework-jsonapi==7.1.0", # BSD - "elasticsearch5==5.5.6", # Apache 2.0 "PyJWE==1.0.0", # Apache 2.0 ] From 19344e1a1d7738a01882926bad91ed1077f0b97b Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Tue, 15 Apr 2025 15:34:15 -0400 Subject: [PATCH 45/46] fix: dockerfile warnings --- Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 217b745b1..dc06aeee8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.13-slim-bullseye as app +FROM python:3.13-slim-bullseye AS app RUN apt-get update \ && apt-get install -y \ @@ -57,8 +57,8 @@ RUN python manage.py collectstatic --noinput ARG GIT_TAG= ARG GIT_COMMIT= -ENV VERSION ${GIT_TAG} -ENV GIT_COMMIT ${GIT_COMMIT} +ENV VERSION=${GIT_TAG} +ENV GIT_COMMIT=${GIT_COMMIT} CMD ["python", "manage.py", "--help"] From e698c8d1c12b73674c8e6bb0bc10318edbc5c751 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Thu, 17 Apr 2025 10:08:54 -0400 Subject: [PATCH 46/46] add toml (for coveralls) --- poetry.lock | 14 +++++++++++++- pyproject.toml | 1 + 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index caa54c383..54c36b738 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1805,6 +1805,18 @@ files = [ dev = ["build", "hatch"] doc = ["sphinx"] +[[package]] +name = "toml" +version = "0.10.2" +description = "Python Library for Tom's Obvious, Minimal Language" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" +groups = ["dev"] +files = [ + {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, + {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, +] + [[package]] name = "typing-extensions" version = "4.13.0" @@ -1958,4 +1970,4 @@ testing = ["coverage[toml]", "zope.event", "zope.testing"] [metadata] lock-version = "2.1" python-versions = ">=3.13,<3.14" -content-hash = "30485a69a2a345196896058384368072dcfd8b232699b1c06b28c381c1de721e" +content-hash = "7d0393d82289b6ad36d4c0c857126f1e330d7e830800d844e7f1a48c76c15fb6" diff --git a/pyproject.toml b/pyproject.toml index d547a2e96..6ebe81f6b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,7 @@ psycogreen = "1.0.2" # BSD optional = true [tool.poetry.group.dev.dependencies] coveralls = "3.3.1" +toml = "0.10.2" # until coveralls/coverage update to tomllib django-debug-toolbar = "5.1.0" factory-boy = "3.3.3" faker = "37.1.0"