diff --git a/Dockerfile b/Dockerfile index 490f6cc..9e2e26f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,73 @@ -FROM python:3 +FROM python:3 AS pylode-cli + +# Upgrade the base-OS packages +RUN --mount=target=/var/lib/apt/lists,type=cache,sharing=locked \ + --mount=target=/var/cache/apt,type=cache,sharing=locked \ + apt-get update \ + && DEBIAN_FRONTEND=noninteractive apt-get -y upgrade \ + && apt-get clean \ + && DEBIAN_FRONTEND=noninteractive apt-get autoremove --purge \ + && rm -rf /var/lib/apt/lists/* + +# Initialise the python environment +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONIOENCODING="utf-8" WORKDIR /usr/src/app +COPY ./requirements.txt ./ +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install -r requirements.txt + +# Install pyLODE source code as an editable package COPY . ./ -COPY schema.ttl ./ -RUN pip3.10 install --no-cache-dir -r requirements.txt +RUN pip install -e ./ + +# Run the pylode-cli command when containers built from this image are launched +ENTRYPOINT ["python", "-m", "pylode.cli"] +CMD ["--help"] + + +FROM pylode-cli AS pylode-server + +# Install some additional packages required by the pylode server +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install bs4 falcon gunicorn validators + +# Predefine default (empty) values for environment variables that can be used +# to customise the appearance and behaviour of the pylode-server +ENV \ + # Optional URL for a custom CSS stylesheet to be referenced in HTML responses + CSS_URL="" \ + # MIME type for the resource at FAVICON_URL, if that is set (e.g. `image/png`) + FAVICON_MIME="image/x-icon" \ + # Optional URL for a custom favicon image to be referenced in HTML responses + FAVICON_URL="" \ + # Optional Google Analytics tag ID to be used to track requests to this server + GTAGID="" \ + # Log threshold for pyLODE server log messages to appear in the logs. + LOG_LEVEL="INFO" \ + # Port that the web server will listen for requests on + PORT="8000" + +# Run the pyLODE server when the container launches +ENTRYPOINT ["python", "-m", "pylode.server"] +CMD [] + +FROM pylode-server AS pylode-gunicorn + +# Add GUnicorn to the installation +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install gunicorn + +# Define additional GUnicorn-configuration environment variables +# Note: GUnicorn also respects `PORT` from the base image +ENV \ + # Configuration options for the GUnicorn web server that make it compatible with docker. + # (Log to stdout and stderr, and ensure workers use tmpfs storage) + GUNICORN_CMD_ARGS="--access-logfile - --error-logfile - --worker-tmp-dir /dev/shm" \ + # Number of worker processes that the GUnicorn web server will use to handle requests. + # In production, you should set this to 2-4 x the number of CPU cores. + WEB_CONCURRENCY=1 + +ENTRYPOINT ["gunicorn"] +CMD ["pylode.server:api"] diff --git a/README.rst b/README.rst index 9c4d90f..8612ff6 100644 --- a/README.rst +++ b/README.rst @@ -119,20 +119,21 @@ match exactly the file ``examples/minimal.html``. * as a docker container -build the docker image: +build the docker image for the pyLODE Client: .. code-block:: bash - docker build -t pylode:latest . + docker build --target=pylode-cli -t pylode-cli:latest . -copy the example directory, mount it to the container and run cli.py in the container: +copy the example directory, mount it to the container and run the container with arguments to `pylode.cli` .. code-block:: bash - docker run --mount 'type=bind,src=,target=/app/pylode/data' pylode:latest python3.10 pylode/cli.py data/ -o data/ + docker run --rm --mount 'type=bind,src=,target=/usr/src/app/data' pylode-cli:latest data/ -o data/ Note: ```` must be absolute + Module Use ^^^^^^^^^^ @@ -168,6 +169,74 @@ for SKOS: This will read from ``some-ontology-file.ttl`` to produce the file ``some-resulting-html-file.html`` in this directory. + +Local Server Use +^^^^^^^^^^^^^^^^ + +The pyLODE server uses the popular `Falcon framework `__ to implement a lightweight web api. +It can be run standalone as a single-thread, single process HTTP server, or more robustly as a WSGI application with +`GUnicorn `__. + +In all launch methods listed here, the server will be available at http://localhost:8000 for the landing page and http://localhost:8000/pylode for the active endpoint. + +The active endpoint accepts the following querystring parameters: +* ``url`` for the absolute URL of the ontology document that you wish to render. The server hosting that ontology document must be capable of responding to Content Negotiation, +i.e. it must supply RDF according to an HTTP `Accept` request for `text/turtle`, `application/rdf+xml` etc. +* ``profile`` for the profile to use to generate HTML. Must be one of: + * ``ontpub`` (https://w3id.org/profile/ontpub) for ontologies. This is the default if no ``profile`` is provided. + * ``vocpub`` (https://w3id.org/profile/vocpub) for SKOS vocabularies + * ``supermodel`` for profiles of profiles +* ``sort`` to indicate whether subjects should be sorted in the rendered output. Must be one of: + * ``true`` to sort the subjects (this is the default) + * ``false`` to NOT sort the subjects + + +.. code-block:: bash + + curl localhost:8000/pylode?profile=ontpub&sort=false&url=http://sweetontology.net/sweetAll.ttl + + +The LODE responses generated by the server can be globally customised by setting the following optional environment variables: + +* ``CSS_URL`` can be set to the absolute URL of a CSS stylesheet hosted elsewhere that should be referenced by pyLODE documents +* ``FAVICON_URL`` can be set to the absolute URL of a favicon image hosted elsewhere that should be referenced by pyLODE documents +* ``FAVICON_MIME`` should be set to the MIME type of the resource at ``FAVICON_URL`` if that has been configured (e.g. ``image/png``) +* ``GTAGID`` can be set to a Google Analytics Tag ID that you would like to use for tracking requests to your server. + + +**Launch the pyLODE server standalone from your local directory:** + +You will need a few extra python modules installed locally: + +.. code-block:: bash + pip install bs4 falcon validators + +You can then run the pyLODE Server in standalone mode like this: + +.. code-block:: bash + + python -m pylode.server + + +* Launch the pyLODE server standalone as a docker container + +**OR Build and run the docker image for the pyLODE Standalone Server:** + +.. code-block:: bash + + docker build --target=pylode-server -t pylode-server:latest . + docker run --rm -p 8000:8000 pylode-server:latest + +* Launch the pyLODE server under GUnicorn as a docker container + +**OR Build and run the docker image for the pyLODE GUnicorn Server:** + +.. code-block:: bash + + docker build --target=pylode-gunicorn -t pylode-gunicorn:latest . + docker run --rm -p 8000:8000 pylode-gunicorn:latest + + Examples ======== diff --git a/pylode/server.py b/pylode/server.py new file mode 100644 index 0000000..35fa8ae --- /dev/null +++ b/pylode/server.py @@ -0,0 +1,164 @@ + +import logging +import os +import sys + +from bs4 import BeautifulSoup as Soup +import falcon +import validators +from wsgiref.simple_server import make_server + +from .profiles.ontpub import OntPub +from .profiles.vocpub import VocPub +from .profiles.supermodel.html import Supermodel + + +# Configure logging before anything else +if __name__ == '__main__': + # Log configuration should up to this application + for handler in logging.root.handlers[:]: + # remove existing handler added by some import somewhere... + logging.root.removeHandler(handler) + logging.basicConfig( + format=f'%(asctime)s.%(msecs)03d %(levelname)s [pid=%(process)d %(threadName)s %(name)s:%(lineno)s] %(message)s', + datefmt='%Y-%m-%d %H:%M:%S', + level=os.getenv('LOG_LEVEL', 'INFO').upper(), + stream=sys.stdout + ) +elif 'gunicorn.error' in logging.Logger.manager.loggerDict: + # This application is running under GUnicorn, which has already initialised logging. + # Ensure pylode server logs are directed to the GUnicorn error log. + root_logger = logging.getLogger() + root_logger.setLevel(os.getenv('LOG_LEVEL', 'INFO').upper()) + gunicorn_logger = logging.getLogger('gunicorn.error') + for gunicorn_handler in gunicorn_logger.handlers: + root_logger.addHandler(gunicorn_handler) + + +_logger = logging.getLogger(__name__) + + +class HtmlResponseCustomiser: + def process_response(self, req: falcon.Request, resp: falcon.Response, resource: object, req_succeeded: bool) -> None: + """Post-processing of a Falcon app response (after routing). + + Args: + req: Request object. + resp: Response object. + resource: Resource object to which the request was + routed. May be None if no route was found + for the request. + req_succeeded: True if no exceptions were raised while + the framework processed and routed the request; + otherwise False. + """ + content_type = resp.get_header('content-type', None) + if content_type is None or content_type == 'text/html': + # Determine whether any customisation has been required + css_url = os.getenv('CSS_URL') + favicon_url = os.getenv('FAVICON_URL') + gtag_id = os.getenv('GTAGID') + if css_url or favicon_url or gtag_id: + # Customisations *have* been configured + # Modify whatever HTML is currently in the response object + soup = Soup(resp.text, features='html.parser') + head = soup.find('head') + if css_url: + _logger.debug('Injecting custom css link') + css_tag = soup.new_tag('link') + css_tag['rel'] = 'stylesheet' + css_tag['href'] = css_url + head.append(css_tag) + + if favicon_url: + _logger.debug('Injecting custom favicon link') + favicon_tag = soup.new_tag('link') + favicon_tag['rel'] = 'icon' + favicon_tag['type'] = os.getenv('FAVICON_MIME', 'image/x-icon') + favicon_tag['href'] = favicon_url + head.append(favicon_tag) + + if gtag_id: + _logger.debug('Injecting custom google analytics tag') + async_tag = soup.new_tag('script') + async_tag['async src'] = f'https://www.googletagmanager.com/gtag/js?id={gtag_id}' + gtag = soup.new_tag('script') + gtag.string = f"""window.dataLayer = window.dataLayer || []; + function gtag(){{dataLayer.push(arguments);}} + gtag('js', new Date()); + gtag('config', '{gtag_id}');""" + head.append(async_tag) + async_tag.insert_after(gtag) + resp.text = soup.prettify(formatter='html') + if content_type is None: + resp.content_type = 'text/html' + else: + _logger.debug('No HTML cutomisations have been configured') + else: + _logger.debug(f'Skipping HTML customisations for {content_type} response') + + +class InfoResource: + def on_get(self, req: falcon.Request, resp: falcon.Response) -> None: + """Landing page for the pyLODE Web Service.""" + + resp.text = """ + + pyLODE Server + + +

pyLODE Server

+

This is pyLODE online via Falcon.

+

To use this server to document ontologies, supply an ontology RDF file to it via the /pylode endpoint.

+

For example, to document the PHS ontology, do this:

+
    +
  • http://localhost:8000/pylode?url=https://linked.data.gov.au/def/phs
  • +
+

Note that this server will use Content Negotiation to try and get an RDF response from the URI supplied so the ontology must be served with the apprpriate Media Type.

+ + """ + resp.set_header("content-type", "text/html") + resp.status = falcon.HTTP_200 + + +class DocResource: + def on_get(self, req: falcon.Request, resp: falcon.Response) -> None: + """Serves up a pyLODE-converted copy of the RDF document provided by the `url` request parameter""" + url = req.get_param('url', required=True) + if not validators.url(url): + _logger.error(f"Failing pyLODE request for invalid url parameter '{url}'") + raise falcon.HTTPBadRequest( + description='`url` parameter value must be a valid absolute URL' + ) + sort_subjects = req.get_param_as_bool('sort', required=False, blank_as_true=True, default=True) + profile = req.get_param("profile", required=False, default="ontpub").lower() + _logger.info(f"Processing pyLODE request for '{profile}' rendering of '{url}' with sort_subjects = {sort_subjects}") + match profile: + case "ontpub": + ontology_doc = OntPub(url, sort_subjects=sort_subjects) + case "supermodel": + ontology_doc = Supermodel(url, sort_subjects=sort_subjects) + case "vocpub": + ontology_doc = VocPub(url, sort_subjects=sort_subjects) + case _: + _logger.error(f"Failing pyLODE request for invalid profile parameter '{profile}'") + raise falcon.HTTPBadRequest( + description='Unrecognised `profile` parameter value' + ) + resp.text = ontology_doc.make_html(include_css=True) + resp.set_header("content-type", "text/html") + resp.status = falcon.HTTP_200 + + +# Initialise the Web Application +api = falcon.App(middleware=[HtmlResponseCustomiser()]) +api.add_route("/", InfoResource()) +api.add_route("/pylode", DocResource()) + + +if __name__ == '__main__': + # Launch a standalone HTTP server + listen_port = int(os.getenv('PORT', 8000)) + with make_server('', listen_port, api) as httpd: + # Serve until process is killed + httpd.serve_forever()