Skip to content

Commit dacec8e

Browse files
author
Evgenii Grigorev
committed
Refactor the code
1 parent 0489a5d commit dacec8e

File tree

5 files changed

+276
-218
lines changed

5 files changed

+276
-218
lines changed
Lines changed: 57 additions & 207 deletions
Original file line numberDiff line numberDiff line change
@@ -1,75 +1,61 @@
11
package cz.cvut.spipes.util;
22

3-
import org.apache.jena.datatypes.xsd.XSDDatatype;
4-
import org.apache.jena.rdf.model.*;
3+
import org.apache.jena.graph.Node;
4+
import org.apache.jena.graph.Triple;
5+
import org.apache.jena.rdf.model.Model;
56
import org.apache.jena.vocabulary.RDF;
6-
import org.jetbrains.annotations.NotNull;
7+
import org.apache.jena.atlas.io.AWriter;
78

89
import java.io.OutputStream;
910
import java.io.OutputStreamWriter;
1011
import java.io.PrintWriter;
1112
import java.nio.charset.StandardCharsets;
1213
import java.util.*;
13-
import java.util.stream.Collectors;
1414

1515
public class SPipesFormatter {
1616

17-
private static final Comparator<Property> PREDICATE_ORDER = (p1, p2) -> {
18-
if (p1.equals(p2)) return 0;
19-
if (RDF.type.equals(p1)) return -1;
20-
if (RDF.type.equals(p2)) return 1;
21-
return p1.getURI().compareTo(p2.getURI());
22-
};
23-
2417
private final Model model;
2518
private final Map<String, String> ns;
26-
private final Map<Resource, Map<Property, List<RDFNode>>> subjectMap = new LinkedHashMap<>();
27-
28-
private final Map<Resource, Integer> inDegree = new HashMap<>();
29-
private final Map<Resource, String> bnodeLabels = new LinkedHashMap<>();
19+
private final Map<Node, Map<Node, List<Node>>> subjectMap = new LinkedHashMap<>();
20+
private final Map<String, Integer> inDegree = new HashMap<>();
21+
private final Map<String, String> bnodeLabels = new LinkedHashMap<>();
3022
private int bCounter = 0;
3123

24+
private final SPipesNodeFormatter nodeFormatter;
25+
3226
public SPipesFormatter(Model model) {
3327
this.model = model;
3428
this.ns = model.getNsPrefixMap();
29+
this.nodeFormatter = new SPipesNodeFormatter(model, ns, inDegree, bnodeLabels, bCounter);
3530
buildSubjectMap();
36-
}
37-
38-
public void writeTo(OutputStream outputStream) {
39-
var writer = new PrintWriter(new OutputStreamWriter(outputStream, StandardCharsets.UTF_8), false);
40-
writePrefixes(writer);
41-
writeTriples(writer);
42-
writer.flush();
31+
for (Node subj : subjectMap.keySet())
32+
if (subj.isBlank() && inDegreeOf(subj) > 1) bnodeLabels.put(subj.getBlankNodeLabel(), "_:b" + (bCounter++));
4333
}
4434

4535
private void buildSubjectMap() {
46-
StmtIterator stmtIter = model.listStatements();
47-
while (stmtIter.hasNext()) {
48-
Statement stmt = stmtIter.nextStatement();
49-
Resource subj = stmt.getSubject();
50-
Property pred = stmt.getPredicate();
51-
RDFNode obj = stmt.getObject();
36+
Iterator<Triple> it = model.getGraph().find();
37+
while (it.hasNext()) {
38+
Triple t = it.next();
39+
Node s = t.getSubject(), p = t.getPredicate(), o = t.getObject();
40+
subjectMap.computeIfAbsent(s, k -> new LinkedHashMap<>())
41+
.computeIfAbsent(p, k -> new ArrayList<>()).add(o);
42+
if (o.isBlank()) inDegree.merge(o.getBlankNodeLabel(), 1, Integer::sum);
43+
}
44+
}
5245

53-
subjectMap
54-
.computeIfAbsent(subj, k -> new LinkedHashMap<>())
55-
.computeIfAbsent(pred, k -> new ArrayList<>())
56-
.add(obj);
46+
private int inDegreeOf(Node n) { return n.isBlank() ? inDegree.getOrDefault(n.getBlankNodeLabel(), 0) : 0; }
47+
private boolean hasLabel(Node n) { return n.isBlank() && bnodeLabels.containsKey(n.getBlankNodeLabel()); }
5748

58-
if (obj.isAnon()) {
59-
Resource br = obj.asResource();
60-
inDegree.merge(br, 1, Integer::sum);
61-
}
62-
}
63-
for (Resource subj : subjectMap.keySet()) {
64-
if (subj.isAnon() && inDegreeOf(subj) > 1) {
65-
allocLabel(subj);
66-
}
67-
}
49+
public void writeTo(OutputStream out) {
50+
PrintWriter pw = new PrintWriter(new OutputStreamWriter(out, StandardCharsets.UTF_8), false);
51+
AWriter aw = new SimpleAWriter(pw);
52+
writePrefixes(aw);
53+
writeTriples(aw);
54+
aw.flush();
6855
}
6956

70-
private void writePrefixes(PrintWriter writer) {
57+
private void writePrefixes(AWriter w) {
7158
List<String> priority = List.of("owl", "rdf", "rdfs", "skos", "sm", "sml", "sp", "spin", "xsd");
72-
7359
Comparator<Map.Entry<String, String>> prefixComparator = (e1, e2) -> {
7460
if (e1.getKey().isEmpty() && !e2.getKey().isEmpty()) return -1;
7561
if (!e1.getKey().isEmpty() && e2.getKey().isEmpty()) return 1;
@@ -83,176 +69,40 @@ private void writePrefixes(PrintWriter writer) {
8369

8470
ns.entrySet().stream()
8571
.sorted(prefixComparator)
86-
.forEach(e -> writer.printf("@prefix %s: <%s> .%n", e.getKey(), e.getValue()));
72+
.forEach(e -> w.print(String.format("@prefix %s: <%s> .%n", e.getKey(), e.getValue())));
8773

88-
writer.println();
74+
w.println();
8975
}
9076

77+
private final Comparator<Node> PRED_ORDER = (p1,p2)->{
78+
if(p1.equals(p2)) return 0;
79+
if(RDF.type.asNode().equals(p1)) return -1;
80+
if(RDF.type.asNode().equals(p2)) return 1;
81+
return (p1.isURI()?p1.getURI():p1.toString()).compareTo(p2.isURI()?p2.getURI():p2.toString());
82+
};
9183

92-
private void writeTriples(PrintWriter writer) {
93-
List<Resource> subjects = getSubjects();
94-
95-
for (Resource subject : subjects) {
96-
if (subject.isAnon() && !hasLabel(subject) && inDegreeOf(subject) >= 1) {
97-
continue;
98-
}
99-
if (subject.isAnon() && !hasLabel(subject) && inDegreeOf(subject) == 0) {
100-
writer.println(formatBNodeAsPropertyList(subject, new HashSet<>()));
101-
continue;
102-
}
103-
104-
writer.println(formatNode(subject));
105-
106-
Map<Property, List<RDFNode>> predMap = new TreeMap<>(PREDICATE_ORDER);
107-
predMap.putAll(subjectMap.get(subject));
108-
109-
if (predMap.isEmpty()) {
110-
writer.println(" .\n");
111-
continue;
112-
}
113-
114-
List<Map.Entry<Property, List<RDFNode>>> predEntries = new ArrayList<>(predMap.entrySet());
115-
for (Map.Entry<Property, List<RDFNode>> predEntry : predEntries) {
116-
String predStr = RDF.type.equals(predEntry.getKey()) ? "a" : formatNode(predEntry.getKey());
117-
118-
List<String> objStrs = predEntry.getValue().stream()
119-
.map(this::formatNode)
120-
.toList();
121-
122-
for (String objStr : objStrs) {
123-
writer.println(" " + predStr + " " + objStr + " ;");
124-
}
125-
}
126-
127-
writer.println(" .");
128-
}
129-
}
130-
131-
@NotNull
132-
private List<Resource> getSubjects() {
133-
List<Resource> subjects = new ArrayList<>(subjectMap.keySet());
134-
subjects.sort((a, b) -> {
135-
int ca = a.isURIResource() ? 0 : (hasLabel(a) ? 1 : 2);
136-
int cb = b.isURIResource() ? 0 : (hasLabel(b) ? 1 : 2);
137-
if (ca != cb) return Integer.compare(ca, cb);
138-
if (a.isURIResource() && b.isURIResource()) return a.getURI().compareTo(b.getURI());
139-
if (hasLabel(a) && hasLabel(b)) return getLabel(a).compareTo(getLabel(b));
84+
private void writeTriples(AWriter w) {
85+
List<Node> subjects = new ArrayList<>(subjectMap.keySet());
86+
subjects.sort((a,b)->{
87+
int ca=a.isURI()?0:hasLabel(a)?1:2, cb=b.isURI()?0:hasLabel(b)?1:2;
88+
if(ca!=cb) return Integer.compare(ca, cb);
89+
if(a.isURI() && b.isURI()) return a.getURI().compareTo(b.getURI());
90+
if(hasLabel(a)&&hasLabel(b)) return bnodeLabels.get(a.getBlankNodeLabel())
91+
.compareTo(bnodeLabels.get(b.getBlankNodeLabel()));
14092
return 0;
14193
});
142-
return subjects;
143-
}
144-
145-
private String formatNode(RDFNode node) {
146-
if (node.isLiteral()) {
147-
return formatLiteral(node.asLiteral());
148-
} else if (node.isAnon()) {
149-
Resource br = node.asResource();
150-
if (hasLabel(br)) return getLabel(br);
151-
return formatBNodeAsPropertyList(br, new HashSet<>());
152-
} else if (node.isURIResource()) {
153-
return formatURI(node.asResource());
154-
} else {
155-
return node.toString();
156-
}
157-
}
158-
159-
private String formatURI(Resource res) {
160-
String uri = res.getURI();
161-
for (var e : ns.entrySet()) {
162-
if (uri.startsWith(e.getValue())) {
163-
return e.getKey() + ":" + uri.substring(e.getValue().length());
164-
}
165-
}
166-
return "<" + uri + ">";
167-
}
168-
169-
private String formatLiteral(Literal lit) {
170-
String value = lit.getString();
171-
boolean multiline = value.contains("\n") || value.contains("\r");
172-
String escaped = escapeString(value, multiline);
173-
String lex = multiline ? "\"\"\"" + escaped + "\"\"\"" : "\"" + escaped + "\"";
174-
175-
String lang = lit.getLanguage();
176-
if (lang != null && !lang.isEmpty()) {
177-
return lex + "@" + lang;
178-
}
179-
180-
String dt = lit.getDatatypeURI();
181-
if (dt != null && !dt.equals(XSDDatatype.XSDstring.getURI())) {
182-
return lex + "^^" + formatURI(ResourceFactory.createResource(dt));
183-
}
184-
185-
return lex;
186-
}
187-
188-
private String escapeString(String s, boolean multiline) {
189-
StringBuilder b = new StringBuilder();
190-
for (int i = 0; i < s.length(); i++) {
191-
char c = s.charAt(i);
192-
switch (c) {
193-
case '\\': b.append("\\\\"); break;
194-
case '"':
195-
if (!multiline) {
196-
b.append("\\\"");
197-
} else {
198-
if (i + 2 < s.length() && s.charAt(i+1) == '"' && s.charAt(i+2) == '"') {
199-
b.append("\\\"\\\"\\\"");
200-
i += 2;
201-
} else {
202-
b.append('"');
203-
}
204-
}
205-
break;
206-
case '\n': b.append(multiline ? "\n" : "\\n"); break;
207-
case '\r': b.append(multiline ? "\r" : "\\r"); break;
208-
case '\t': b.append("\\t"); break;
209-
case '\b': b.append("\\b"); break;
210-
case '\f': b.append("\\f"); break;
211-
default:
212-
if (c < 0x20) b.append(String.format("\\u%04X", (int) c));
213-
else b.append(c);
94+
for(Node subject : subjects){
95+
if(subject.isBlank() && !hasLabel(subject) && inDegreeOf(subject)>=1) continue;
96+
nodeFormatter.format(w, subject); w.println();
97+
Map<Node,List<Node>> predMap = new TreeMap<>(PRED_ORDER);
98+
predMap.putAll(subjectMap.getOrDefault(subject, Collections.emptyMap()));
99+
if(predMap.isEmpty()){w.println(" .\n"); continue;}
100+
for(Map.Entry<Node,List<Node>> e:predMap.entrySet()){
101+
Node pred=e.getKey(); boolean isA = RDF.type.asNode().equals(pred);
102+
w.print(" "); if(isA) w.print("a "); else nodeFormatter.format(w,pred); w.print(isA?"":" ");
103+
for(Node obj:e.getValue()){ nodeFormatter.formatNodeWithPath(w,obj,new HashSet<>()); w.println(" ;"); w.print(" "); }
214104
}
105+
w.println(" .\n");
215106
}
216-
return b.toString();
217-
}
218-
219-
private String formatBNodeAsPropertyList(Resource blank, Set<Resource> path) {
220-
if (hasLabel(blank)) return getLabel(blank);
221-
if (!path.add(blank)) return allocLabel(blank);
222-
223-
List<Statement> props = model.listStatements(blank, null, (RDFNode) null).toList();
224-
if (props.isEmpty()) return "[]";
225-
226-
props.sort(Comparator
227-
.comparing(Statement::getPredicate, PREDICATE_ORDER)
228-
.thenComparing(s -> formatNode(s.getObject())));
229-
230-
StringBuilder builder = new StringBuilder("[ ");
231-
for (Statement stmt : props) {
232-
String predStr = stmt.getPredicate().equals(RDF.type) ? "a" : formatNode(stmt.getPredicate());
233-
String objStr = formatNodeWithPath(stmt.getObject(), path);
234-
builder.append(predStr).append(" ").append(objStr).append(" ; ");
235-
}
236-
builder.append("]");
237-
path.remove(blank);
238-
return builder.toString();
239-
}
240-
241-
private String formatNodeWithPath(RDFNode node, Set<Resource> path) {
242-
if (node.isAnon()) {
243-
Resource br = node.asResource();
244-
if (hasLabel(br)) return getLabel(br);
245-
if (inDegreeOf(br) <= 1) return formatBNodeAsPropertyList(br, path);
246-
return allocLabel(br);
247-
}
248-
return formatNode(node);
249107
}
250-
251-
private int inDegreeOf(Resource r) { return inDegree.getOrDefault(r, 0); }
252-
private boolean hasLabel(Resource r) { return bnodeLabels.containsKey(r); }
253-
private String getLabel(Resource r) { return bnodeLabels.get(r); }
254-
private String allocLabel(Resource r) {
255-
return bnodeLabels.computeIfAbsent(r, k -> "_:b" + (bCounter++));
256-
}
257-
258108
}

0 commit comments

Comments
 (0)