Skip to content

Refactor RdfsImportEngine #152

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 24 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
1d99dbc
Refactor postgres into an extra
multimeric Dec 17, 2024
0630620
Clean up other dependencies
multimeric Dec 17, 2024
ff6ec53
Slots correctly being attached to classes
multimeric Dec 18, 2024
517f475
Refactor slot and class generation into separate methods for clarity
multimeric Dec 18, 2024
c512723
Fix all type errors
multimeric Dec 18, 2024
fa08fc7
Add RDFS domain and range to default mappings
multimeric Jan 6, 2025
9b2c83a
Normalise schema.org to HTTP
multimeric Jan 6, 2025
5219645
Remove some unused prefix namespaces
multimeric Jan 7, 2025
41f2ac4
Use --format in CLI
multimeric Jan 7, 2025
9fe5c15
Remove unused prefixes
multimeric Jan 7, 2025
65869ba
Don't delete prefixes used in imports
multimeric Jan 8, 2025
8f682d8
Remove everything mkdocs related
multimeric Dec 18, 2024
3e31b42
add stub of a dbml importer
multimeric Jan 31, 2025
fccf9ef
remove uk processing for now
sierra-moxon Jan 9, 2025
a3b1f23
add tests
sierra-moxon Jan 10, 2025
2676364
fix tests
sierra-moxon Jan 10, 2025
5472613
fix tests
sierra-moxon Jan 10, 2025
f2b5a8f
remove commented out multi-column unique indexes
sierra-moxon Jan 10, 2025
ef9f794
Merge branch 'main' of github.com:linkml/schema-automator into cleanu…
multimeric Jan 31, 2025
e44be56
Merge branch 'cleanup-deps' of github.com:multimeric/schema-automator…
multimeric Jan 31, 2025
d1ba4b1
Use a property set
multimeric Jan 31, 2025
49c78ac
Correctly pass --schema-name flag to importer
multimeric Jan 31, 2025
9a65077
FOAF test
multimeric Feb 2, 2025
9c36fa4
Infer schema metadata from RDFS
multimeric Feb 3, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 0 additions & 35 deletions .github/workflows/build-deploy-documentation.yaml

This file was deleted.

3,632 changes: 1,824 additions & 1,808 deletions poetry.lock

Large diffs are not rendered by default.

17 changes: 11 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -33,25 +33,29 @@ packages = [
[tool.poetry.dependencies]
python = "^3.9"
linkml = "^1.7.4"
mkdocs = ">=1.2.3"
pandas = ">=1.3.5"
python-dateutil = ">=2.8.2"
jsonpatch = ">=1.32"
quantulum3 = ">=0.7.9"
funowl = ">=0.2.3"
click-log = ">=0.4.0"
psycopg2-binary = "^2.9.2"
psycopg2-binary = { version = "^2.9.2", optional = true }
strsimpy = ">=0.2.1"
requests = ">=2.26.0"
oaklib = ">=0.5.25"
pandera = ">=0.12.0"
tomlkit = ">=0.11.4"
inflect = ">=6.0.0"
schemasheets = ">=0.1.24"
xmltodict = "^0.13.0"
click-default-group = "^1.2.4"
linkml-runtime = "^1.7.2"
duckdb = "^0.10.1"
duckdb = { version = "^0.10.1", optional = true }
click = "^8.1.7"
deprecated = "^1.2.15"
mariadb = {version = "^1.1.11", optional = true}
sqlalchemy = "^2.0.36"
ruamel-yaml = "^0.18.6"
rdflib = "^7.1.1"
jsonasobj2 = "^1.0.4"
deprecation = "^2.1.0"
numpy = "<2.0"
pydbml = "^1.1.2"

@@ -84,6 +88,7 @@ extract-schema = "schema_automator.utils.schema_extractor:cli"
[tool.poetry.extras]
docs = ["Sphinx", "sphinx-pdj-theme", "sphinxcontrib-mermaid"]
mariadb = ["mariadb"]
postgres = ["psycopg2-binary"]

[tool.codespell]
# Ref: https://github.com/codespell-project/codespell#using-a-config-file
6 changes: 3 additions & 3 deletions schema_automator/cli.py
Original file line number Diff line number Diff line change
@@ -473,15 +473,15 @@ def import_owl(owlfile, output, **args):
@click.argument('rdfsfile')
@output_option
@schema_name_option
@click.option('--input-type', '-I',
@click.option('--format', '-f',
default='turtle',
help="Input format, eg. turtle")
@click.option('--identifier', '-I', help="Slot to use as identifier")
@click.option('--model-uri', help="Model URI prefix")
@click.option('--metamodel-mappings',
help="Path to metamodel mappings YAML dictionary")
@click.option('--output', '-o', help="Path to saved yaml schema")
def import_rdfs(rdfsfile, output, metamodel_mappings, **args):
def import_rdfs(rdfsfile: str, output: str, metamodel_mappings: str, schema_name: str, **args):
"""
Import an RDFS schema to LinkML

@@ -494,7 +494,7 @@ def import_rdfs(rdfsfile, output, metamodel_mappings, **args):
with open(metamodel_mappings) as f:
mappings_obj = yaml.safe_load(f)
sie = RdfsImportEngine(initial_metamodel_mappings=mappings_obj)
schema = sie.convert(rdfsfile, **args)
schema = sie.convert(rdfsfile, name=schema_name, **args)
write_schema(schema, output)

@main.command()
304 changes: 204 additions & 100 deletions schema_automator/importers/rdfs_import_engine.py

Large diffs are not rendered by default.

19 changes: 19 additions & 0 deletions tests/resources/foaf_snippet.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .

foaf:knows rdf:type owl:ObjectProperty ;
rdfs:domain foaf:Person ;
rdfs:range foaf:Person ;
rdfs:comment "A person known by this person (indicating some level of reciprocated interaction between the parties)." ;
rdfs:isDefinedBy <http://xmlns.com/foaf/0.1/> ;
rdfs:label "knows" .

foaf:Person rdf:type owl:Class ;
rdfs:subClassOf <http://www.w3.org/2003/01/geo/wgs84_pos#SpatialThing> ,
foaf:Agent ;
owl:disjointWith foaf:Project ;
rdfs:comment "A person." ;
rdfs:isDefinedBy <http://xmlns.com/foaf/0.1/> ;
rdfs:label "Person" .
53 changes: 48 additions & 5 deletions tests/test_importers/test_rdfs_importer.py
Original file line number Diff line number Diff line change
@@ -2,6 +2,7 @@

"""Test the module can be imported."""

from io import StringIO
import unittest
import os
import yaml
@@ -16,8 +17,55 @@

REPRO = os.path.join(INPUT_DIR, 'reproschema.ttl')
OUTSCHEMA = os.path.join(OUTPUT_DIR, 'reproschema-from-ttl.yaml')
FOAF = os.path.join(INPUT_DIR, 'foaf_snippet.ttl')


def test_import_foaf():
engine = RdfsImportEngine()
schema = engine.convert(FOAF)
sv = SchemaView(schema)
assert len(sv.all_classes()) == 3
assert len(sv.all_slots()) == 1
assert sv.get_slot("knows").range == "Person"
assert sv.schema.default_prefix == "foaf"
assert "foaf" in sv.schema.prefixes

def test_comment_description():
"""
rdfs:comment should be converted to description
"""
rdf = StringIO("""
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
<http://example.org/Class> a rdfs:Class ;
rdfs:comment "A class." .
""")
engine = RdfsImportEngine()
schema = engine.convert(rdf)
sv = SchemaView(schema)
cls = sv.get_class("Class")
assert cls.description == "A class."

def test_infer_prefix():
"""
If the schema has no name, id or default prefix, the importer should infer them from prefix usage in the schema.
"""
rdf = StringIO("""
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix foo: <https://foo.com> .
foo:Class a rdfs:Class ;
rdfs:comment "A class." .
foo:prop a rdfs:Property ;
rdfs:comment "A property." .
""")
engine = RdfsImportEngine()
schema = engine.convert(rdf)
# Although not explicitly provided, the importer should realise that the prefix is "foo"
assert schema.default_prefix == "foo"
assert schema.id == "https://foo.com"
assert schema.name == "foo"

def test_from_rdfs():
"""Test OWL conversion."""
@@ -26,7 +74,6 @@ def test_from_rdfs():
write_schema(schema, OUTSCHEMA)
# roundtrip
s = YAMLGenerator(OUTSCHEMA).serialize()
print(s[0:100])
sv = SchemaView(OUTSCHEMA)
activity = sv.get_class("Activity")
assert activity
@@ -36,7 +83,3 @@ def test_from_rdfs():
assert len(slots) == 1
slot = slots[0]
assert slot.name == "id"