Skip to content

Commit 0489a5d

Browse files
author
Evgenii Grigorev
committed
Add custom .ttl serialiser
1 parent 8dab7c8 commit 0489a5d

File tree

7 files changed

+416
-16
lines changed

7 files changed

+416
-16
lines changed
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
package cz.cvut.spipes.util;
2+
3+
import org.apache.jena.riot.*;
4+
5+
public class CustomLangs {
6+
public static final Lang SPIPES_TURTLE = LangBuilder.create("SPIPES-TURTLE", "text/spipes+turtle").build();
7+
public static final RDFFormat SPIPES_FORMAT = new RDFFormat(SPIPES_TURTLE);
8+
9+
static {
10+
RDFWriterRegistry.register(SPIPES_FORMAT, (WriterGraphRIOTFactory) (lang) -> new SPipesTurtleWriter());
11+
}
12+
}

s-pipes-core/src/main/java/cz/cvut/spipes/util/JenaUtils.java

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,7 @@
44
import org.apache.jena.graph.Graph;
55
import org.apache.jena.graph.compose.MultiUnion;
66
import org.apache.jena.rdf.model.*;
7-
import org.apache.jena.riot.Lang;
8-
import org.apache.jena.riot.RDFWriter;
9-
import org.apache.jena.riot.RIOT;
7+
import org.apache.jena.riot.*;
108
import org.apache.jena.util.FileUtils;
119
import org.apache.jena.util.iterator.ExtendedIterator;
1210
import org.apache.jena.vocabulary.OWL;
@@ -15,15 +13,10 @@
1513
import org.slf4j.Logger;
1614
import org.slf4j.LoggerFactory;
1715

18-
import java.io.ByteArrayInputStream;
19-
import java.io.IOException;
20-
import java.io.InputStream;
21-
import java.io.OutputStream;
16+
import java.io.*;
2217
import java.nio.file.Files;
2318
import java.nio.file.Path;
24-
import java.util.ArrayList;
25-
import java.util.Comparator;
26-
import java.util.List;
19+
import java.util.*;
2720
import java.util.stream.Stream;
2821

2922
public class JenaUtils {
@@ -154,12 +147,11 @@ public static void write(OutputStream outputStream, Model model, Lang lang){
154147
* @param outputStream output stream to write data to
155148
* @param model rdf data to write
156149
*/
157-
public static void writeScript(OutputStream outputStream, Model model){
150+
public static void writeScript(OutputStream outputStream, Model model) {
158151
RDFWriter.create()
159-
.source(model)
160-
.lang(Lang.TTL)
161-
.set(RIOT.multilineLiterals, true)
162-
.output(outputStream);
152+
.format(new RDFFormat(CustomLangs.SPIPES_TURTLE))
153+
.source(model)
154+
.output(outputStream);
163155
}
164156

165157
/**
Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,258 @@
1+
package cz.cvut.spipes.util;
2+
3+
import org.apache.jena.datatypes.xsd.XSDDatatype;
4+
import org.apache.jena.rdf.model.*;
5+
import org.apache.jena.vocabulary.RDF;
6+
import org.jetbrains.annotations.NotNull;
7+
8+
import java.io.OutputStream;
9+
import java.io.OutputStreamWriter;
10+
import java.io.PrintWriter;
11+
import java.nio.charset.StandardCharsets;
12+
import java.util.*;
13+
import java.util.stream.Collectors;
14+
15+
public class SPipesFormatter {
16+
17+
private static final Comparator<Property> PREDICATE_ORDER = (p1, p2) -> {
18+
if (p1.equals(p2)) return 0;
19+
if (RDF.type.equals(p1)) return -1;
20+
if (RDF.type.equals(p2)) return 1;
21+
return p1.getURI().compareTo(p2.getURI());
22+
};
23+
24+
private final Model model;
25+
private final Map<String, String> ns;
26+
private final Map<Resource, Map<Property, List<RDFNode>>> subjectMap = new LinkedHashMap<>();
27+
28+
private final Map<Resource, Integer> inDegree = new HashMap<>();
29+
private final Map<Resource, String> bnodeLabels = new LinkedHashMap<>();
30+
private int bCounter = 0;
31+
32+
public SPipesFormatter(Model model) {
33+
this.model = model;
34+
this.ns = model.getNsPrefixMap();
35+
buildSubjectMap();
36+
}
37+
38+
public void writeTo(OutputStream outputStream) {
39+
var writer = new PrintWriter(new OutputStreamWriter(outputStream, StandardCharsets.UTF_8), false);
40+
writePrefixes(writer);
41+
writeTriples(writer);
42+
writer.flush();
43+
}
44+
45+
private void buildSubjectMap() {
46+
StmtIterator stmtIter = model.listStatements();
47+
while (stmtIter.hasNext()) {
48+
Statement stmt = stmtIter.nextStatement();
49+
Resource subj = stmt.getSubject();
50+
Property pred = stmt.getPredicate();
51+
RDFNode obj = stmt.getObject();
52+
53+
subjectMap
54+
.computeIfAbsent(subj, k -> new LinkedHashMap<>())
55+
.computeIfAbsent(pred, k -> new ArrayList<>())
56+
.add(obj);
57+
58+
if (obj.isAnon()) {
59+
Resource br = obj.asResource();
60+
inDegree.merge(br, 1, Integer::sum);
61+
}
62+
}
63+
for (Resource subj : subjectMap.keySet()) {
64+
if (subj.isAnon() && inDegreeOf(subj) > 1) {
65+
allocLabel(subj);
66+
}
67+
}
68+
}
69+
70+
private void writePrefixes(PrintWriter writer) {
71+
List<String> priority = List.of("owl", "rdf", "rdfs", "skos", "sm", "sml", "sp", "spin", "xsd");
72+
73+
Comparator<Map.Entry<String, String>> prefixComparator = (e1, e2) -> {
74+
if (e1.getKey().isEmpty() && !e2.getKey().isEmpty()) return -1;
75+
if (!e1.getKey().isEmpty() && e2.getKey().isEmpty()) return 1;
76+
int i1 = priority.indexOf(e1.getKey());
77+
int i2 = priority.indexOf(e2.getKey());
78+
if (i1 != -1 && i2 != -1) return Integer.compare(i1, i2);
79+
if (i1 != -1) return -1;
80+
if (i2 != -1) return 1;
81+
return e1.getKey().compareToIgnoreCase(e2.getKey());
82+
};
83+
84+
ns.entrySet().stream()
85+
.sorted(prefixComparator)
86+
.forEach(e -> writer.printf("@prefix %s: <%s> .%n", e.getKey(), e.getValue()));
87+
88+
writer.println();
89+
}
90+
91+
92+
private void writeTriples(PrintWriter writer) {
93+
List<Resource> subjects = getSubjects();
94+
95+
for (Resource subject : subjects) {
96+
if (subject.isAnon() && !hasLabel(subject) && inDegreeOf(subject) >= 1) {
97+
continue;
98+
}
99+
if (subject.isAnon() && !hasLabel(subject) && inDegreeOf(subject) == 0) {
100+
writer.println(formatBNodeAsPropertyList(subject, new HashSet<>()));
101+
continue;
102+
}
103+
104+
writer.println(formatNode(subject));
105+
106+
Map<Property, List<RDFNode>> predMap = new TreeMap<>(PREDICATE_ORDER);
107+
predMap.putAll(subjectMap.get(subject));
108+
109+
if (predMap.isEmpty()) {
110+
writer.println(" .\n");
111+
continue;
112+
}
113+
114+
List<Map.Entry<Property, List<RDFNode>>> predEntries = new ArrayList<>(predMap.entrySet());
115+
for (Map.Entry<Property, List<RDFNode>> predEntry : predEntries) {
116+
String predStr = RDF.type.equals(predEntry.getKey()) ? "a" : formatNode(predEntry.getKey());
117+
118+
List<String> objStrs = predEntry.getValue().stream()
119+
.map(this::formatNode)
120+
.toList();
121+
122+
for (String objStr : objStrs) {
123+
writer.println(" " + predStr + " " + objStr + " ;");
124+
}
125+
}
126+
127+
writer.println(" .");
128+
}
129+
}
130+
131+
@NotNull
132+
private List<Resource> getSubjects() {
133+
List<Resource> subjects = new ArrayList<>(subjectMap.keySet());
134+
subjects.sort((a, b) -> {
135+
int ca = a.isURIResource() ? 0 : (hasLabel(a) ? 1 : 2);
136+
int cb = b.isURIResource() ? 0 : (hasLabel(b) ? 1 : 2);
137+
if (ca != cb) return Integer.compare(ca, cb);
138+
if (a.isURIResource() && b.isURIResource()) return a.getURI().compareTo(b.getURI());
139+
if (hasLabel(a) && hasLabel(b)) return getLabel(a).compareTo(getLabel(b));
140+
return 0;
141+
});
142+
return subjects;
143+
}
144+
145+
private String formatNode(RDFNode node) {
146+
if (node.isLiteral()) {
147+
return formatLiteral(node.asLiteral());
148+
} else if (node.isAnon()) {
149+
Resource br = node.asResource();
150+
if (hasLabel(br)) return getLabel(br);
151+
return formatBNodeAsPropertyList(br, new HashSet<>());
152+
} else if (node.isURIResource()) {
153+
return formatURI(node.asResource());
154+
} else {
155+
return node.toString();
156+
}
157+
}
158+
159+
private String formatURI(Resource res) {
160+
String uri = res.getURI();
161+
for (var e : ns.entrySet()) {
162+
if (uri.startsWith(e.getValue())) {
163+
return e.getKey() + ":" + uri.substring(e.getValue().length());
164+
}
165+
}
166+
return "<" + uri + ">";
167+
}
168+
169+
private String formatLiteral(Literal lit) {
170+
String value = lit.getString();
171+
boolean multiline = value.contains("\n") || value.contains("\r");
172+
String escaped = escapeString(value, multiline);
173+
String lex = multiline ? "\"\"\"" + escaped + "\"\"\"" : "\"" + escaped + "\"";
174+
175+
String lang = lit.getLanguage();
176+
if (lang != null && !lang.isEmpty()) {
177+
return lex + "@" + lang;
178+
}
179+
180+
String dt = lit.getDatatypeURI();
181+
if (dt != null && !dt.equals(XSDDatatype.XSDstring.getURI())) {
182+
return lex + "^^" + formatURI(ResourceFactory.createResource(dt));
183+
}
184+
185+
return lex;
186+
}
187+
188+
private String escapeString(String s, boolean multiline) {
189+
StringBuilder b = new StringBuilder();
190+
for (int i = 0; i < s.length(); i++) {
191+
char c = s.charAt(i);
192+
switch (c) {
193+
case '\\': b.append("\\\\"); break;
194+
case '"':
195+
if (!multiline) {
196+
b.append("\\\"");
197+
} else {
198+
if (i + 2 < s.length() && s.charAt(i+1) == '"' && s.charAt(i+2) == '"') {
199+
b.append("\\\"\\\"\\\"");
200+
i += 2;
201+
} else {
202+
b.append('"');
203+
}
204+
}
205+
break;
206+
case '\n': b.append(multiline ? "\n" : "\\n"); break;
207+
case '\r': b.append(multiline ? "\r" : "\\r"); break;
208+
case '\t': b.append("\\t"); break;
209+
case '\b': b.append("\\b"); break;
210+
case '\f': b.append("\\f"); break;
211+
default:
212+
if (c < 0x20) b.append(String.format("\\u%04X", (int) c));
213+
else b.append(c);
214+
}
215+
}
216+
return b.toString();
217+
}
218+
219+
private String formatBNodeAsPropertyList(Resource blank, Set<Resource> path) {
220+
if (hasLabel(blank)) return getLabel(blank);
221+
if (!path.add(blank)) return allocLabel(blank);
222+
223+
List<Statement> props = model.listStatements(blank, null, (RDFNode) null).toList();
224+
if (props.isEmpty()) return "[]";
225+
226+
props.sort(Comparator
227+
.comparing(Statement::getPredicate, PREDICATE_ORDER)
228+
.thenComparing(s -> formatNode(s.getObject())));
229+
230+
StringBuilder builder = new StringBuilder("[ ");
231+
for (Statement stmt : props) {
232+
String predStr = stmt.getPredicate().equals(RDF.type) ? "a" : formatNode(stmt.getPredicate());
233+
String objStr = formatNodeWithPath(stmt.getObject(), path);
234+
builder.append(predStr).append(" ").append(objStr).append(" ; ");
235+
}
236+
builder.append("]");
237+
path.remove(blank);
238+
return builder.toString();
239+
}
240+
241+
private String formatNodeWithPath(RDFNode node, Set<Resource> path) {
242+
if (node.isAnon()) {
243+
Resource br = node.asResource();
244+
if (hasLabel(br)) return getLabel(br);
245+
if (inDegreeOf(br) <= 1) return formatBNodeAsPropertyList(br, path);
246+
return allocLabel(br);
247+
}
248+
return formatNode(node);
249+
}
250+
251+
private int inDegreeOf(Resource r) { return inDegree.getOrDefault(r, 0); }
252+
private boolean hasLabel(Resource r) { return bnodeLabels.containsKey(r); }
253+
private String getLabel(Resource r) { return bnodeLabels.get(r); }
254+
private String allocLabel(Resource r) {
255+
return bnodeLabels.computeIfAbsent(r, k -> "_:b" + (bCounter++));
256+
}
257+
258+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
package cz.cvut.spipes.util;
2+
3+
import org.apache.commons.io.output.WriterOutputStream;
4+
import org.apache.jena.graph.Graph;
5+
import org.apache.jena.rdf.model.Model;
6+
import org.apache.jena.rdf.model.ModelFactory;
7+
import org.apache.jena.riot.Lang;
8+
import org.apache.jena.riot.RiotException;
9+
import org.apache.jena.riot.WriterGraphRIOT;
10+
import org.apache.jena.riot.system.PrefixMap;
11+
import org.apache.jena.sparql.util.Context;
12+
13+
import java.io.IOException;
14+
import java.io.OutputStream;
15+
import java.io.Writer;
16+
import java.nio.charset.StandardCharsets;
17+
18+
public class SPipesTurtleWriter implements WriterGraphRIOT {
19+
20+
@Override
21+
public void write(OutputStream out, Graph graph, PrefixMap prefixMap, String baseURI, Context context) {
22+
Model model = ModelFactory.createModelForGraph(graph);
23+
SPipesFormatter formatter = new SPipesFormatter(model);
24+
formatter.writeTo(out);
25+
try {
26+
out.flush();
27+
} catch (IOException e) {
28+
throw new RiotException("Failed to flush output", e);
29+
}
30+
}
31+
32+
@Override
33+
public void write(Writer writer, Graph graph, PrefixMap prefixMap, String baseURI, Context context) {
34+
write(new WriterOutputStream(writer, StandardCharsets.UTF_8), graph, prefixMap, baseURI, context);
35+
}
36+
37+
@Override
38+
public Lang getLang() {
39+
return CustomLangs.SPIPES_TURTLE;
40+
}
41+
}

0 commit comments

Comments
 (0)