Skip to content

Commit

Permalink
small fixes; plus new function p89_falls_within
Browse files Browse the repository at this point in the history
  • Loading branch information
csae8092 committed Dec 13, 2024
1 parent 8e852c9 commit af6f38d
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 33 deletions.
1 change: 1 addition & 0 deletions .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ exclude =
build
dist
env
venv
35 changes: 35 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,41 @@ g.serialize(format="ttl")
rdfs:label "Tallinn"@und ;
ns1:P2_has_type <http://hansi/4/ever/alt-label> .
```

### connects to places (E53_Place) with P89_falls_within

```python
domain = "https://foo/bar/"
subj = URIRef(f"{domain}place__237979")
sample = """
<TEI xmlns="http://www.tei-c.org/ns/1.0">
<place xml:id="place__237979">
<placeName>Lerchenfelder G&#252;rtel 48</placeName>
<desc type="entity_type">Wohngeb&#228;ude (K.WHS)</desc>
<desc type="entity_type_id">36</desc>
<location type="coords">
<geo>48,209035 16,339257</geo>
</location>
<location>
<placeName ref="place__50">Wien</placeName>
<geo>48,208333 16,373056</geo>
</location>
</place>
</TEI>"""
doc = ET.fromstring(sample)
node = doc.xpath(".//tei:place[1]", namespaces=NSMAP)[0]
g = p89_falls_within(
subj, node, domain, location_id_xpath="./tei:location/tei:placeName/@ref"
)
result = g.serialize(format="ttl")
```
returns
```ttl
@prefix ns1: <http://www.cidoc-crm.org/cidoc-crm/> .
<https://foo/bar/place__237979> ns1:P89_falls_within <https://foo/bar/place__50> .
```

### normalize_string

```python
Expand Down
50 changes: 33 additions & 17 deletions acdh_cidoc_pyutils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,24 +343,13 @@ def make_e42_identifiers(
type_domain = f"{type_domain}/"
app_uri = URIRef(f"{subj}/identifier/{xml_id}")
type_uri = URIRef(f"{type_domain}idno/xml-id")
approx_uri = URIRef(f"{type_domain}date/approx")
g.add((approx_uri, RDF.type, CIDOC["E55_Type"]))
g.add((approx_uri, RDFS.label, Literal("approx")))
g.add((type_uri, RDF.type, CIDOC["E55_Type"]))
g.add((subj, CIDOC["P1_is_identified_by"], app_uri))
g.add((app_uri, RDF.type, CIDOC["E42_Identifier"]))
g.add((app_uri, RDFS.label, Literal(label_value, lang=lang)))
g.add((app_uri, RDF.value, Literal(normalize_string(xml_id))))
g.add((app_uri, CIDOC["P2_has_type"], type_uri))
events_types = {}
for i, x in enumerate(node.xpath(".//tei:event[@type]", namespaces=NSMAP)):
events_types[x.attrib["type"]] = x.attrib["type"]
if events_types:
for i, x in enumerate(events_types.keys()):
event_type_uri = URIRef(f"{type_domain}event/{x}")
g.add((event_type_uri, RDF.type, CIDOC["E55_Type"]))
g.add((event_type_uri, RDFS.label, Literal(x, lang=default_lang)))
for i, x in enumerate(node.xpath(".//tei:idno", namespaces=NSMAP)):
for i, x in enumerate(node.xpath("./tei:idno", namespaces=NSMAP)):
idno_type_base_uri = f"{type_domain}idno"
if x.text:
idno_uri = URIRef(f"{subj}/identifier/idno/{i}")
Expand Down Expand Up @@ -398,7 +387,7 @@ def make_occupations(
id_xpath=False,
default_lang="de",
not_known_value="undefined",
special_label=None
special_label=None,
):
g = Graph()
occ_uris = []
Expand All @@ -423,7 +412,9 @@ def make_occupations(
occ_uris.append(occ_uri)
g.add((occ_uri, RDF.type, FRBROO["F51_Pursuit"]))
if special_label:
g.add((occ_uri, RDFS.label, Literal(f"{special_label}{occ_text}", lang=lang)))
g.add(
(occ_uri, RDFS.label, Literal(f"{special_label}{occ_text}", lang=lang))
)
else:
g.add((occ_uri, RDFS.label, Literal(occ_text, lang=lang)))
g.add((subj, CIDOC["P14i_performed"], occ_uri))
Expand All @@ -450,9 +441,6 @@ def make_affiliations(
lang="en",
):
g = Graph()
xml_id = node.attrib["{http://www.w3.org/XML/1998/namespace}id"]
item_id = f"{domain}{xml_id}"
subj = URIRef(item_id)
for i, x in enumerate(node.xpath(".//tei:affiliation", namespaces=NSMAP)):
try:
affiliation_id = x.xpath(org_id_xpath, namespaces=NSMAP)[0]
Expand Down Expand Up @@ -556,3 +544,31 @@ def make_birth_death_entities(
place_uri = URIRef(f"{domain}{place_node}")
g.add((event_uri, CIDOC["P7_took_place_at"], place_uri))
return (g, event_uri, time_stamp_uri)


def p89_falls_within(
subj: URIRef,
node: Element,
domain: URIRef,
location_id_xpath="./tei:location[@type='located_in_place']/tei:placeName/@key",
) -> Graph:
"""connects to places (E53_Place) with P89_falls_within
Args:
subj (URIRef): The Uri of the Place
node (Element): The tei:place Element
domain (URIRef): An URI used to create the ID of the domain object: {domain}{ID of target object}
location_id_xpath (str, optional): An XPath expression pointing to the parent's place ID.
Defaults to "./tei:location[@type='located_in_place']/tei:placeName/@key".
Returns:
Graph: A Graph object linking two places via P89_falls_within
"""
g = Graph()
try:
range_id = node.xpath(location_id_xpath, namespaces=NSMAP)[0]
except IndexError:
return g
range_uri = URIRef(f"{domain}{range_id}")
g.add((subj, CIDOC["P89_falls_within"], range_uri))
return g
6 changes: 4 additions & 2 deletions acdh_cidoc_pyutils/namespaces.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from rdflib import Namespace

CIDOC = Namespace("http://www.cidoc-crm.org/cidoc-crm/")
FRBROO = Namespace("https://cidoc-crm.org/frbroo/sites/default/files/FRBR2.4-draft.rdfs#")
FRBROO = Namespace(
"https://cidoc-crm.org/frbroo/sites/default/files/FRBR2.4-draft.rdfs#"
)
INT = Namespace("https://w3id.org/lso/intro/beta202304#")
SCHEMA = Namespace("https://schema.org/")

Expand All @@ -20,5 +22,5 @@
"to": "end",
"to-iso": "end",
"when": "when",
"when-iso": "when"
"when-iso": "when",
}
11 changes: 5 additions & 6 deletions requirements_dev.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
black
coverage>=6.4.4,<7
flake8>=5.0.4,<6
pytest>=7.1.3,<8
lxml
acdh_tei_pyutils
python-slugify>=8.0.1
coverage
flake8
pytest
acdh-tei-pyutils
python-slugify
rdflib
58 changes: 50 additions & 8 deletions tests/test_cidoc_pyutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
make_birth_death_entities,
make_occupations,
make_affiliations,
p89_falls_within,
)
from acdh_cidoc_pyutils.namespaces import NSMAP, CIDOC

Expand Down Expand Up @@ -141,12 +142,12 @@ def test_004_create_e52(self):
e52 = create_e52(uri, end_of_end=begin_of_begin)
self.assertTrue(begin_of_begin in graph_string)
e52 = create_e52(uri, begin_of_begin=begin_of_begin, end_of_end=begin_of_begin)
e52.serialize('e52.ttl')
e52.serialize("e52.ttl")
self.assertTrue('rdfs:label "1234-05-06"^^xsd:string' in f"{e52.serialize()}")
e52 = create_e52(uri, begin_of_begin="1222", end_of_end=begin_of_begin)
e52.serialize('e52.ttl')
e52.serialize("e52.ttl")
self.assertFalse('rdfs:label "1234-05-06"^^xsd:string' in f"{e52.serialize()}")
e52.serialize('e521.ttl')
e52.serialize("e521.ttl")

def test_005_normalize_string(self):
string = """\n\nhallo
Expand Down Expand Up @@ -228,7 +229,10 @@ def test_007_make_appellations(self):
subj = URIRef(item_id)
g.add((subj, RDF.type, CIDOC["hansi"]))
g += make_appellations(
subj, x, type_domain="https://sk.acdh.oeaw.ac.at/types", default_lang="it"
subj,
x,
type_domain="https://sk.acdh.oeaw.ac.at/types",
default_lang="it",
)
data = g.serialize(format="turtle")
g.serialize("appellation.ttl", format="turtle")
Expand All @@ -247,7 +251,10 @@ def test_008_make_e42_identifiers(self):
subj = URIRef(item_id)
g.add((subj, RDF.type, CIDOC["hansi"]))
g += make_e42_identifiers(
subj, x, type_domain="https://sk.acdh.oeaw.ac.at/types", default_lang="it"
subj,
x,
type_domain="https://sk.acdh.oeaw.ac.at/types",
default_lang="it",
)
data = g.serialize(format="turtle")
g = Graph()
Expand Down Expand Up @@ -283,7 +290,7 @@ def test_008_make_e42_identifiers(self):
default_lang="it",
set_lang=True,
same_as=False,
default_prefix=default_prefix
default_prefix=default_prefix,
)
data = g.serialize(format="turtle")
self.assertTrue("@it" in data)
Expand Down Expand Up @@ -413,7 +420,9 @@ def test_011_occupation(self):
g, uris = make_occupations(subj, x)
self.assertFalse("occupation/hansi" in g.serialize(format="turtle"))
g.serialize("occupations.ttl")
g1, uris = make_occupations(subj, x, id_xpath="@key", not_known_value="ronjaundhanna")
g1, uris = make_occupations(
subj, x, id_xpath="@key", not_known_value="ronjaundhanna"
)
g1.serialize("occupations1.ttl")
self.assertTrue("occupation/hansi" in g1.serialize(format="turtle"))
self.assertTrue("ronjaundhanna" in g1.serialize(format="turtle"))
Expand Down Expand Up @@ -474,6 +483,39 @@ def test_012_affiliations(self):
domain,
person_label=person_label,
org_id_xpath="./tei:orgName[1]/@key",
org_label_xpath="./tei:orgName[1]//text()"
org_label_xpath="./tei:orgName[1]//text()",
)
g.serialize("affiliations1.ttl")

def test_013_p89_falls_within(self):
domain = "https://foo/bar/"
subj = URIRef(f"{domain}place__237979")
sample = """
<TEI xmlns="http://www.tei-c.org/ns/1.0">
<place xml:id="place__237979">
<placeName>Lerchenfelder G&#252;rtel 48</placeName>
<desc type="entity_type">Wohngeb&#228;ude (K.WHS)</desc>
<desc type="entity_type_id">36</desc>
<location type="coords">
<geo>48,209035 16,339257</geo>
</location>
<location>
<placeName ref="place__50">Wien</placeName>
<geo>48,208333 16,373056</geo>
</location>
</place>
</TEI>"""
doc = ET.fromstring(sample)
node = doc.xpath(".//tei:place[1]", namespaces=NSMAP)[0]
g = p89_falls_within(
subj, node, domain, location_id_xpath="./tei:location/tei:placeName/@ref"
)
result = g.serialize(format="ttl")
self.assertTrue("https://foo/bar/place__50" in result)
g.serialize("p89.ttl")

g = p89_falls_within(
subj, node, domain, location_id_xpath="./tei:location/tei:placeName/@key"
)
result = g.serialize(format="ttl")
self.assertFalse("https://foo/bar/place__50" in result)

0 comments on commit af6f38d

Please sign in to comment.