Skip to content
This repository has been archived by the owner on Oct 2, 2019. It is now read-only.

Commit

Permalink
pickled VOCABS working
Browse files Browse the repository at this point in the history
  • Loading branch information
Nicholas Car committed Apr 11, 2019
1 parent 209052a commit 03afcac
Show file tree
Hide file tree
Showing 15 changed files with 701 additions and 215 deletions.
8 changes: 4 additions & 4 deletions DATA_SOURCES.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ Here you see vocabularies with IDs 'rva-50', 'rva-52', 'tenement_type' & 'Test_R

The controlled list of source types (`VocabSource.FILE`, `VocabSource.VOCBENCH` etc.) are handled by dedicated *source* Python code classes that present a standard set of methods for each type. The files currently implemented, all in the `data/` folder, are:

* `source_RVA.py` - RVA
* `source_FILE.py` - FILE
* `source_VOCBENCH.py` - VOCBENCH
* `RVA.py` - RVA
* `FILE.py` - FILE
* `VOCBENCH.py` - VOCBENCH

Additional source files for other vocabulary data sources can be made by creating new `source_*.py` files inheriting from `source.py`.

The specific requirements for each source are contained within their particular files but, summarising the requirements for the sources already catered for, Vocabularies from RVA need to have endpoints specified in the vocab source file `data/source_RVA.py` so VocPrez knows where to get info from. RDF files in `data/` will automatically be picked up by VocPrez so don;t need any more config than a title, provided the ID matched the file name, minus file extension. Vocabs from VocBench require that a `VB_ENDPOINT`, `VB_USER` & `VB_PASSWORD` are all given in the config file.
The specific requirements for each source are contained within their particular files but, summarising the requirements for the sources already catered for, Vocabularies from RVA need to have endpoints specified in the vocab source file `data/RVA.py` so VocPrez knows where to get info from. RDF files in `data/` will automatically be picked up by VocPrez so don;t need any more config than a title, provided the ID matched the file name, minus file extension. Vocabs from VocBench require that a `VB_ENDPOINT`, `VB_USER` & `VB_PASSWORD` are all given in the config file.
4 changes: 1 addition & 3 deletions _config/template.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
from os import path
from data.source_FILE import FILE
from data.source_RVA import RVA

# RVA doesnt need to be imported as it's list_vocabularies method isn't used- vocabs from that are statically listed
from data.source_VOCBENCH import VOCBENCH

APP_DIR = path.dirname(path.dirname(path.realpath(__file__)))
TEMPLATES_DIR = path.join(APP_DIR, 'view', 'templates')
Expand Down
57 changes: 40 additions & 17 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,49 @@
import logging
import _config
from flask import Flask
import _config as config
from flask import Flask, g
from controller import routes
import helper
from data.source_FILE import FILE
from data.source_RVA import RVA
from data.source_VOCBENCH import VOCBENCH
import data.source as source
import os
import pickle

app = Flask(__name__, template_folder=_config.TEMPLATES_DIR, static_folder=_config.STATIC_DIR)
app = Flask(__name__, template_folder=config.TEMPLATES_DIR, static_folder=config.STATIC_DIR)

app.register_blueprint(routes.routes)


@app.before_first_request
def start_up_tasks():
VOCBENCH.init()
RVA.init()
FILE.init()
# extend this instances' list of vocabs by using the known sources
VOCABS = {**_config.VOCABS, **FILE.list_vocabularies()} # picks up all vocab RDF (turtle) files in data/
# VOCABS = {**VOCABS, **VOCBENCH.list_vocabularies()} # picks up all vocabs at the relevant VocBench instance
print('Finished startup tasks.')
@app.before_request
def before_request():
"""
Runs before every request and populates vocab index either from disk (VOCABS.p) or from a complete reload by
calling collect() for each of the vocab sources defined in config/__init__.py -> VOCAB_SOURCES
:return: nothing
"""
# check to see if g.VOCABS exists, if so, do nothing
if hasattr(g, 'VOCABS'):
return

# we have no g.VOCABS so try and load it from a pickled VOCABS.p file
vocabs_file_path = os.path.join(config.APP_DIR, 'VOCABS.p')
if os.path.isfile(vocabs_file_path):
with open(vocabs_file_path, 'rb') as f:
g.VOCABS = pickle.load(f)
f.close()
return

# we haven't been able to load from VOCABS.p so run collect() on each vocab source to recreate it

# check each vocab source and,
# using the appropriate class (from details['source']),
# load all the vocabs from it into this session's (g) VOCABS variable
g.VOCABS = {}
for name, details in config.VOCAB_SOURCES.items():
getattr(source, details['source']).collect(details)

# also load all vocabs into VOCABS.p on disk for future use
with open(vocabs_file_path, 'wb') as f:
pickle.dump(g.VOCABS, f)
f.close()


@app.context_processor
Expand All @@ -35,9 +58,9 @@ def context_processor():

# run the Flask app
if __name__ == '__main__':
logging.basicConfig(filename=_config.LOGFILE,
logging.basicConfig(filename=config.LOGFILE,
level=logging.DEBUG,
datefmt='%Y-%m-%d %H:%M:%S',
format='%(asctime)s %(levelname)s %(filename)s:%(lineno)s %(message)s')

app.run(debug=_config.DEBUG, threaded=True)
app.run(debug=config.DEBUG, threaded=True)
63 changes: 45 additions & 18 deletions controller/routes.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,24 @@
from flask import Blueprint, Response, request, render_template
from flask import Blueprint, Response, request, render_template, Markup, g
from model.vocabulary import VocabularyRenderer
from model.concept import ConceptRenderer
from model.collection import CollectionRenderer
from model.skos_register import SkosRegisterRenderer
import _config as config
import markdown
from flask import Markup
from data.source import Source
from data.source_VOCBENCH import VbException
from data.source._source import Source
from data.source.VOCBENCH import VbException
import json

routes = Blueprint('routes', __name__)


def render_invalid_vocab_id_response():
msg = """The vocabulary ID that was supplied was not known. It must be one of these: \n\n* """ + '\n* '.join(config.VOCABS.keys())
msg = """The vocabulary ID that was supplied was not known. It must be one of these: \n\n* """ + '\n* '.join(g.VOCABS.keys())
msg = Markup(markdown.markdown(msg))
return render_template('error.html', title='Error - invalid vocab id', heading='Invalid Vocab ID', msg=msg)
# return Response(
# 'The vocabulary ID you\'ve supplied is not known. Must be one of:\n ' +
# '\n'.join(config.VOCABS.keys()),
# '\n'.join(g.VOCABS.keys()),
# status=400,
# mimetype='text/plain'
# )
Expand All @@ -43,14 +42,14 @@ def render_invalid_object_class_response(vocab_id, uri, c_type):
return render_template('error.html', title='Error - Object Class URI', heading='Concept Class Type Error', msg=msg)


def get_a_vocab_source_key():
def get_a_vocab_key():
"""
Get the first key from the config.VOCABS dictionary.
Get the first key from the g.VOCABS dictionary.
:return: Key name
:rtype: str
"""
return next(iter(config.VOCABS))
return next(iter(g.VOCABS))


@routes.route('/')
Expand All @@ -64,6 +63,16 @@ def index():
)


def get_a_vocab_source_key():
"""
Get the first key from the config.VOCABS dictionary.
:return: Key name
:rtype: str
"""
return next(iter(g.VOCABS))


def match(vocabs, query):
"""
Generate a generator of vocabulary items that match the search query
Expand All @@ -84,15 +93,15 @@ def vocabularies():
per_page = int(request.values.get('per_page')) if request.values.get('per_page') is not None else 20

# TODO: replace this logic with the following
# 1. read all static vocabs from config.VOCABS
# 1. read all static vocabs from g.VOCABS
# get this instance's list of vocabs
vocabs = []
for k, v in config.VOCABS.items():
vocabs = [] # local copy (to this request) for sorting
for k, v in g.VOCABS.items():
v['vocab_id'] = k
v['uri'] = request.base_url + k
vocabs.append(v)
vocabs.sort(key=lambda item: item['title'])
total = len(config.VOCABS.items())
total = len(g.VOCABS.items())

# Search
query = request.values.get('search')
Expand Down Expand Up @@ -124,7 +133,7 @@ def vocabularies():

@routes.route('/vocabulary/<vocab_id>')
def vocabulary(vocab_id):
if vocab_id not in config.VOCABS.keys():
if vocab_id not in g.VOCABS.keys():
return render_invalid_vocab_id_response()

# get vocab details using appropriate source handler
Expand All @@ -141,7 +150,7 @@ def vocabulary(vocab_id):

@routes.route('/vocabulary/<vocab_id>/concept/')
def vocabulary_list(vocab_id):
if vocab_id not in config.VOCABS.keys():
if vocab_id not in g.VOCABS.keys():
return render_invalid_vocab_id_response()

v = Source(vocab_id, request)
Expand Down Expand Up @@ -169,7 +178,7 @@ def vocabulary_list(vocab_id):
request,
[],
concepts,
config.VOCABS[vocab_id]['title'] + ' concepts',
g.VOCABS[vocab_id]['title'] + ' concepts',
total,
search_query=query,
search_enabled=True,
Expand Down Expand Up @@ -206,10 +215,10 @@ def object():
uri = request.values.get('uri')

# check this vocab ID is known
if vocab_id not in config.VOCABS.keys():
if vocab_id not in g.VOCABS.keys():
return Response(
'The vocabulary ID you\'ve supplied is not known. Must be one of:\n ' +
'\n'.join(config.VOCABS.keys()),
'\n'.join(g.VOCABS.keys()),
status=400,
mimetype='text/plain'
)
Expand Down Expand Up @@ -264,3 +273,21 @@ def about():
navs={},
content=content
)


@routes.route('/test')
def test():
txt = ''
# for vocab_id, details in g.VOCABS.items():
# txt = txt + '{}: {}\n'.format(vocab_id, details['title'])

import os
import pickle
import pprint
vocabs_file_path = os.path.join(config.APP_DIR, 'VOCABS.p')
if os.path.isfile(vocabs_file_path):
with open(vocabs_file_path, 'rb') as f:
txt = str(pickle.load(f))
f.close()

return Response(txt, mimetype='text/plain')
33 changes: 16 additions & 17 deletions data/source_FILE.py → data/source/FILE.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from data.source import Source
from os.path import dirname, realpath, join, abspath
from data.source._source import Source
from os.path import join
import _config as config
from rdflib import Graph, URIRef, RDF
from rdflib.namespace import SKOS, DCTERMS, DC, OWL
from rdflib.namespace import SKOS, DCTERMS, OWL
import os
import pickle
from helper import APP_DIR, make_title
Expand Down Expand Up @@ -46,9 +46,9 @@ def init():
f.close()

# Get register item metadata
for vocab_id in config.VOCABS:
if vocab_id in config.VOCABS:
if config.VOCABS[vocab_id]['source'] != config.VocabSource.FILE:
for vocab_id in g.VOCABS:
if vocab_id in g.VOCABS:
if g.VOCABS[vocab_id]['source'] != config.VocabSource.FILE:
continue

# Creators
Expand All @@ -58,7 +58,7 @@ def init():
for creator in g.objects(uri, DCTERMS.creator):
creators.append(str(creator))
break
config.VOCABS[vocab_id]['creators'] = creators
g.VOCABS[vocab_id]['creators'] = creators

# Date Created
date_created = None
Expand All @@ -71,22 +71,21 @@ def init():
for uri in g.subjects(RDF.type, SKOS.ConceptScheme):
for date in g.objects(uri, DCTERMS.date):
date_created = str(date)[:10]
config.VOCABS[vocab_id]['date_created'] = date_created
g.VOCABS[vocab_id]['date_created'] = date_created

# Date Modified
date_modified = None
for uri in g.subjects(RDF.type, SKOS.ConceptScheme):
for date in g.objects(uri, DCTERMS.modified):
date_modified = str(date)[:10]
config.VOCABS[vocab_id]['date_modified'] = date_modified
g.VOCABS[vocab_id]['date_modified'] = date_modified

# Version
version = None
for uri in g.subjects(RDF.type, SKOS.ConceptScheme):
for versionInfo in g.objects(uri, OWL.versionInfo):
version = versionInfo
config.VOCABS[vocab_id]['version'] = version

g.VOCABS[vocab_id]['version'] = version

@classmethod
def list_vocabularies(self):
Expand All @@ -103,8 +102,8 @@ def list_vocabularies(self):
# TODO: Move this to list_concepts() method
# list concepts
vocabs = {}
# for v in config.VOCABS:
# if config.VOCABS[v]['source'] == config.VocabSource.FILE:
# for v in g.VOCABS:
# if g.VOCABS[v]['source'] == config.VocabSource.FILE:
# g = FILE.load_pickle(v)
# for s, p, o in g.triples((None, SKOS.inScheme, None)):
# if s not in vocabs:
Expand Down Expand Up @@ -267,8 +266,8 @@ def get_collection(self, uri):
pass

def get_concept(self, uri):
if config.VOCABS[self.vocab_id].get('turtle'):
g = Graph().parse(config.VOCABS[self.vocab_id]['turtle'])
if g.VOCABS[self.vocab_id].get('turtle'):
g = Graph().parse(g.VOCABS[self.vocab_id]['turtle'])
else:
g = Graph().parse(os.path.join(APP_DIR, 'vocab_files', self.vocab_id + '.ttl'), format='turtle')

Expand Down Expand Up @@ -523,8 +522,8 @@ def build_concept_hierarchy(vocab_id):
raise Exception('topConcept not found')

def get_object_class(self, uri):
if config.VOCABS[self.vocab_id].get('turtle'):
g = Graph().parse(config.VOCABS[self.vocab_id]['turtle'], format='turtle')
if g.VOCABS[self.vocab_id].get('turtle'):
g = Graph().parse(g.VOCABS[self.vocab_id]['turtle'], format='turtle')
else:
g = Graph().parse(os.path.join(APP_DIR, 'vocab_files', self.vocab_id + '.ttl'), format='turtle')
for s, p, o in g.triples((URIRef(uri), RDF.type, SKOS.Concept)):
Expand Down
2 changes: 1 addition & 1 deletion data/source_GITHUB.py → data/source/GITHUB.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from data.source import Source
from data.source._source import Source
from os.path import dirname, realpath, join, abspath
import _config as config
from rdflib import Graph
Expand Down
Loading

0 comments on commit 03afcac

Please sign in to comment.