RDFLib · sharon-tickell · Sep 25, 2025 · Sep 25, 2025
diff --git a/Dockerfile b/Dockerfile
@@ -1,6 +1,73 @@
-FROM python:3
+FROM python:3 AS pylode-cli
+
+# Upgrade the base-OS packages
+RUN --mount=target=/var/lib/apt/lists,type=cache,sharing=locked \
+    --mount=target=/var/cache/apt,type=cache,sharing=locked \
+    apt-get update \
+    && DEBIAN_FRONTEND=noninteractive apt-get -y upgrade \
+    && apt-get clean \
+    && DEBIAN_FRONTEND=noninteractive apt-get autoremove --purge \
+    && rm -rf /var/lib/apt/lists/*
+
+# Initialise the python environment
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONIOENCODING="utf-8"
 
 WORKDIR /usr/src/app
+COPY ./requirements.txt ./
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install -r requirements.txt
+
+# Install pyLODE source code as an editable package
 COPY . ./
-COPY schema.ttl ./
-RUN pip3.10 install --no-cache-dir -r requirements.txt
+RUN pip install -e ./
+
+# Run the pylode-cli command when containers built from this image are launched
+ENTRYPOINT ["python", "-m", "pylode.cli"]
+CMD ["--help"]
+
+
+FROM pylode-cli AS pylode-server
+
+# Install some additional packages required by the pylode server
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install bs4 falcon gunicorn validators
+
+# Predefine default (empty) values for environment variables that can be used
+# to customise the appearance and behaviour of the pylode-server
+ENV \
+    # Optional URL for a custom CSS stylesheet to be referenced in HTML responses
+    CSS_URL="" \
+    # MIME type for the resource at FAVICON_URL, if that is set (e.g. `image/png`)
+    FAVICON_MIME="image/x-icon" \
+    # Optional URL for a custom favicon image to be referenced in HTML responses
+    FAVICON_URL="" \
+    # Optional Google Analytics tag ID to be used to track requests to this server
+    GTAGID="" \
+    # Log threshold for pyLODE server log messages to appear in the logs.
+    LOG_LEVEL="INFO" \
+    # Port that the web server will listen for requests on
+    PORT="8000"
+
+# Run the pyLODE server when the container launches
+ENTRYPOINT ["python", "-m", "pylode.server"]
+CMD []
+
+FROM pylode-server AS pylode-gunicorn
+
+# Add GUnicorn to the installation
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install gunicorn
+
+# Define additional GUnicorn-configuration environment variables
+# Note: GUnicorn also respects `PORT` from the base image
+ENV \
+    # Configuration options for the GUnicorn web server that make it compatible with docker.
+    # (Log to stdout and stderr, and ensure workers use tmpfs storage)
+    GUNICORN_CMD_ARGS="--access-logfile - --error-logfile - --worker-tmp-dir /dev/shm" \
+    # Number of worker processes that the GUnicorn web server will use to handle requests.
+    # In production, you should set this to 2-4 x the number of CPU cores.
+    WEB_CONCURRENCY=1
+
+ENTRYPOINT ["gunicorn"]
+CMD ["pylode.server:api"]
diff --git a/README.rst b/README.rst
@@ -119,20 +119,21 @@ match exactly the file ``examples/minimal.html``.
 
 * as a docker container
 
-build the docker image:
+build the docker image for the pyLODE Client:
 
 .. code-block:: bash
 
-    docker build -t pylode:latest .
+    docker build --target=pylode-cli -t pylode-cli:latest .
 
-copy the example directory, mount it to the container and run cli.py in the container:
+copy the example directory, mount it to the container and run the container with arguments to `pylode.cli`
 
 .. code-block:: bash
 
-    docker  run  --mount 'type=bind,src=<ttl_directory>,target=/app/pylode/data' pylode:latest  python3.10 pylode/cli.py data/<ttl_file> -o data/<html_file>
+    docker run --rm --mount 'type=bind,src=<ttl_directory>,target=/usr/src/app/data' pylode-cli:latest  data/<ttl_file> -o data/<html_file>
 
 Note: ``<ttl_directory>`` must be absolute
 
+
 Module Use
 ^^^^^^^^^^
 
@@ -168,6 +169,74 @@ for SKOS:
 
 This will read from ``some-ontology-file.ttl`` to produce the file ``some-resulting-html-file.html`` in this directory.
 
+
+Local Server Use
+^^^^^^^^^^^^^^^^
+
+The pyLODE server uses the popular `Falcon framework <https://falconframework.org/>`__ to implement a lightweight web api.
+It can be run standalone as a single-thread, single process HTTP server, or more robustly as a WSGI application with
+`GUnicorn <https://gunicorn.org/>`__.
+
+In all launch methods listed here, the server will be available at http://localhost:8000 for the landing page and http://localhost:8000/pylode for the active endpoint.
+
+The active endpoint accepts the following querystring parameters:
+* ``url`` for the absolute URL of the ontology document that you wish to render. The server hosting that ontology document must be capable of responding to Content Negotiation,
+i.e. it must supply RDF according to an HTTP `Accept` request for `text/turtle`, `application/rdf+xml` etc.
+* ``profile`` for the profile to use to generate HTML. Must be one of:
+    * ``ontpub`` (https://w3id.org/profile/ontpub) for ontologies. This is the default if no ``profile`` is provided.
+    * ``vocpub`` (https://w3id.org/profile/vocpub) for SKOS vocabularies
+    * ``supermodel`` for profiles of profiles
+* ``sort`` to indicate whether subjects should be sorted in the rendered output. Must be one of:
+    * ``true`` to sort the subjects (this is the default)
+    * ``false`` to NOT sort the subjects
+
+
+.. code-block:: bash
+
+    curl localhost:8000/pylode?profile=ontpub&sort=false&url=http://sweetontology.net/sweetAll.ttl
+
+
+The LODE responses generated by the server can be globally customised by setting the following optional environment variables:
+
+* ``CSS_URL`` can be set to the absolute URL of a CSS stylesheet hosted elsewhere that should be referenced by pyLODE documents
+* ``FAVICON_URL`` can be set to the absolute URL of a favicon image hosted elsewhere that should be referenced by pyLODE documents
+* ``FAVICON_MIME`` should be set to the MIME type of the resource at ``FAVICON_URL`` if that has been configured (e.g. ``image/png``)
+* ``GTAGID`` can be set to a Google Analytics Tag ID that you would like to use for tracking requests to your server.
+
+
+**Launch the pyLODE server standalone from your local directory:**
+
+You will need a few extra python modules installed locally:
+
+.. code-block:: bash
+    pip install bs4 falcon validators
+
+You can then run the pyLODE Server in standalone mode like this:
+
+.. code-block:: bash
+
+    python -m pylode.server
+
+
+* Launch the pyLODE server standalone as a docker container
+
+**OR Build and run the docker image for the pyLODE Standalone Server:**
+
+.. code-block:: bash
+
+    docker build --target=pylode-server -t pylode-server:latest .
+    docker run --rm -p 8000:8000 pylode-server:latest
+
+* Launch the pyLODE server under GUnicorn as a docker container
+
+**OR Build and run the docker image for the pyLODE GUnicorn Server:**
+
+.. code-block:: bash
+
+    docker build --target=pylode-gunicorn -t pylode-gunicorn:latest .
+    docker run --rm -p 8000:8000 pylode-gunicorn:latest
+
+
 Examples
 ========
 

diff --git a/pylode/server.py b/pylode/server.py
@@ -0,0 +1,164 @@
+
+import logging
+import os
+import sys
+
+from bs4 import BeautifulSoup as Soup
+import falcon
+import validators
+from wsgiref.simple_server import make_server
+
+from .profiles.ontpub import OntPub
+from .profiles.vocpub import VocPub
+from .profiles.supermodel.html import Supermodel
+
+
+# Configure logging before anything else
+if __name__ == '__main__':
+    # Log configuration should up to this application
+    for handler in logging.root.handlers[:]:
+        # remove existing handler added by some import somewhere...
+        logging.root.removeHandler(handler)
+    logging.basicConfig(
+        format=f'%(asctime)s.%(msecs)03d %(levelname)s [pid=%(process)d %(threadName)s %(name)s:%(lineno)s] %(message)s',
+        datefmt='%Y-%m-%d %H:%M:%S',
+        level=os.getenv('LOG_LEVEL', 'INFO').upper(),
+        stream=sys.stdout
+    )
+elif 'gunicorn.error' in logging.Logger.manager.loggerDict:
+    # This application is running under GUnicorn, which has already initialised logging.
+    # Ensure pylode server logs are directed to the GUnicorn error log.
+    root_logger = logging.getLogger()
+    root_logger.setLevel(os.getenv('LOG_LEVEL', 'INFO').upper())
+    gunicorn_logger = logging.getLogger('gunicorn.error')
+    for gunicorn_handler in gunicorn_logger.handlers:
+        root_logger.addHandler(gunicorn_handler)
+
+
+_logger = logging.getLogger(__name__)
+
+
+class HtmlResponseCustomiser:
+    def process_response(self, req: falcon.Request, resp: falcon.Response, resource: object, req_succeeded: bool) -> None:
+        """Post-processing of a Falcon app response (after routing).
+
+        Args:
+            req: Request object.
+            resp: Response object.
+            resource: Resource object to which the request was
+                routed. May be None if no route was found
+                for the request.
+            req_succeeded: True if no exceptions were raised while
+                the framework processed and routed the request;
+                otherwise False.
+        """
+        content_type = resp.get_header('content-type', None)
+        if content_type is None or content_type == 'text/html':
+            # Determine whether any customisation has been required
+            css_url = os.getenv('CSS_URL')
+            favicon_url = os.getenv('FAVICON_URL')
+            gtag_id = os.getenv('GTAGID')
+            if css_url or favicon_url or gtag_id:
+                # Customisations *have* been configured
+                # Modify whatever HTML is currently in the response object
+                soup = Soup(resp.text, features='html.parser')
+                head = soup.find('head')
+                if css_url:
+                    _logger.debug('Injecting custom css link')
+                    css_tag = soup.new_tag('link')
+                    css_tag['rel'] = 'stylesheet'
+                    css_tag['href'] = css_url
+                    head.append(css_tag)
+
+                if favicon_url:
+                    _logger.debug('Injecting custom favicon link')
+                    favicon_tag = soup.new_tag('link')
+                    favicon_tag['rel'] = 'icon'
+                    favicon_tag['type'] = os.getenv('FAVICON_MIME', 'image/x-icon')
+                    favicon_tag['href'] = favicon_url
+                    head.append(favicon_tag)
+
+                if gtag_id:
+                    _logger.debug('Injecting custom google analytics tag')
+                    async_tag = soup.new_tag('script')
+                    async_tag['async src'] = f'https://www.googletagmanager.com/gtag/js?id={gtag_id}'
+                    gtag = soup.new_tag('script')
+                    gtag.string = f"""window.dataLayer = window.dataLayer || [];
+                        function gtag(){{dataLayer.push(arguments);}}
+                        gtag('js', new Date());
+                        gtag('config', '{gtag_id}');"""
+                    head.append(async_tag)
+                    async_tag.insert_after(gtag)
+                resp.text = soup.prettify(formatter='html')
+                if content_type is None:
+                    resp.content_type = 'text/html'
+            else:
+                _logger.debug('No HTML cutomisations have been configured')
+        else:
+            _logger.debug(f'Skipping HTML customisations for {content_type} response')
+
+
+class InfoResource:
+    def on_get(self, req: falcon.Request, resp: falcon.Response) -> None:
+        """Landing page for the pyLODE Web Service."""
+
+        resp.text = """<html>
+            <head>
+                <title>pyLODE Server</title>
+            </head>
+            <body>
+                <h1>pyLODE Server</h1>
+                <p>This is <a href=\"https://github.com/rdflib/pyLODE/\">pyLODE</a> online via Falcon.</p>
+                <p>To use this server to document ontologies, supply an ontology RDF file to it via the <a href=\"/pylode\">/pylode</a> endpoint.</p>
+                <p>For example, to document the PHS ontology, do this:</p>
+                <ul>
+                    <li><code>http://localhost:8000/pylode?url=https://linked.data.gov.au/def/phs</code></li>
+                </ul>
+                <p>Note that this server will use <a href="https://developer.mozilla.org/en-US/docs/Web/HTTP/Content_negotiation">Content Negotiation</a> to try and get an RDF response from the URI supplied so the ontology must be served with the apprpriate Media Type.</p>
+            </body>
+        </html>"""
+        resp.set_header("content-type", "text/html")
+        resp.status = falcon.HTTP_200
+
+
+class DocResource:
+    def on_get(self, req: falcon.Request, resp: falcon.Response) -> None:
+        """Serves up a pyLODE-converted copy of the RDF document provided by the `url` request parameter"""
+        url = req.get_param('url', required=True)
+        if not validators.url(url):
+            _logger.error(f"Failing pyLODE request for invalid url parameter '{url}'")
+            raise falcon.HTTPBadRequest(
+                description='`url` parameter value must be a valid absolute URL'
+            )
+        sort_subjects = req.get_param_as_bool('sort', required=False, blank_as_true=True, default=True)
+        profile = req.get_param("profile", required=False, default="ontpub").lower()
+        _logger.info(f"Processing pyLODE request for '{profile}' rendering of '{url}' with sort_subjects = {sort_subjects}")
+        match profile:
+            case "ontpub":
+                ontology_doc = OntPub(url, sort_subjects=sort_subjects)
+            case "supermodel":
+                ontology_doc = Supermodel(url, sort_subjects=sort_subjects)
+            case "vocpub":
+                ontology_doc = VocPub(url, sort_subjects=sort_subjects)
+            case _:
+                _logger.error(f"Failing pyLODE request for invalid profile parameter '{profile}'")
+                raise falcon.HTTPBadRequest(
+                    description='Unrecognised `profile` parameter value'
+                )
+        resp.text = ontology_doc.make_html(include_css=True)
+        resp.set_header("content-type", "text/html")
+        resp.status = falcon.HTTP_200
+
+
+# Initialise the Web Application
+api = falcon.App(middleware=[HtmlResponseCustomiser()])
+api.add_route("/", InfoResource())
+api.add_route("/pylode", DocResource())
+
+
+if __name__ == '__main__':
+    # Launch a standalone HTTP server
+    listen_port = int(os.getenv('PORT', 8000))
+    with make_server('', listen_port, api) as httpd:
+        # Serve until process is killed
+        httpd.serve_forever()