diff --git a/build.sbt b/build.sbt index 2e169da..9332727 100644 --- a/build.sbt +++ b/build.sbt @@ -8,7 +8,7 @@ name := "gstore" version := "0.2.0-SNAPSHOT" val ScalatraVersion = "2.6.3" -val jenaVersion = "3.17.0" +val jenaVersion = "4.10.0" val jettyVersion = "9.4.9.v20180320" libraryDependencies ++= Seq( diff --git a/src/main/scala/org/dbpedia/databus/ApiImpl.scala b/src/main/scala/org/dbpedia/databus/ApiImpl.scala index 40bf0a3..84881b2 100644 --- a/src/main/scala/org/dbpedia/databus/ApiImpl.scala +++ b/src/main/scala/org/dbpedia/databus/ApiImpl.scala @@ -8,6 +8,7 @@ import javax.servlet.http.HttpServletRequest import org.apache.jena.rdf.model.Model import org.apache.jena.riot.Lang import org.apache.jena.shared.JenaException +import org.apache.jena.sys.JenaSystem import org.dbpedia.databus.ApiImpl.Config import org.dbpedia.databus.RdfConversions.{contextUrl, generateGraphId, graphToBytes, jenaJsonLdContextWithFallbackForLocalhost, mapContentType, readModel} import org.dbpedia.databus.swagger.api.DatabusApi @@ -26,8 +27,12 @@ class ApiImpl(config: Config) extends DatabusApi { import ApiImpl._ private val client: GitClient = initGitClient(config) - private val defaultLang = Lang.JSONLD + private val defaultLang = Lang.JSONLD10 private lazy val sparqlClient: SparqlClient = SparqlClient.get(config) + init() + + def init() = JenaSystem.init() + def stop() = JenaSystem.shutdown() override def dataidSubgraph(body: String)(request: HttpServletRequest): Try[String] = diff --git a/src/main/scala/org/dbpedia/databus/SparqlClient.scala b/src/main/scala/org/dbpedia/databus/SparqlClient.scala index ba88bd2..dc39fac 100644 --- a/src/main/scala/org/dbpedia/databus/SparqlClient.scala +++ b/src/main/scala/org/dbpedia/databus/SparqlClient.scala @@ -9,10 +9,11 @@ import com.github.jsonldjava.utils.JsonUtils import com.mchange.v2.c3p0.ComboPooledDataSource import org.apache.jena.atlas.json.JsonString import org.apache.jena.graph.{Graph, Node} +import org.apache.jena.iri.ViolationCodes import org.apache.jena.rdf.model.{Model, ModelFactory} -import org.apache.jena.riot.lang.JsonLDReader +import org.apache.jena.riot.lang.LangJSONLD10 import org.apache.jena.riot.system.{ErrorHandler, ErrorHandlerFactory, StreamRDFLib} -import org.apache.jena.riot.writer.JsonLDWriter +import org.apache.jena.riot.writer.JsonLD10Writer import org.apache.jena.riot.{Lang, RDFDataMgr, RDFFormat, RDFLanguages, RDFParserBuilder, RDFWriter, RIOT} import org.apache.jena.shacl.{ShaclValidator, Shapes, ValidationReport} import org.apache.jena.sparql.util @@ -187,7 +188,7 @@ object RdfConversions { context.foreach(ctx => { val jctx = jenaContext(CachingContext.parse(ctx.toString)) builder.context(jctx) - builder.set(JsonLDWriter.JSONLD_CONTEXT_SUBSTITUTION, new JsonString(ctx.toString)) + builder.set(JsonLD10Writer.JSONLD_CONTEXT_SUBSTITUTION, new JsonString(ctx.toString)) }) builder @@ -201,6 +202,7 @@ object RdfConversions { ShaclValidator.get() .validate(Shapes.parse(shacl), model.getGraph) ) + def validateWithShacl(file: Array[Byte], modelLang: Lang, shaclGraph: Graph, fileCtx: Option[util.Context]): Try[ValidationReport] = for { (model, _) <- readModel(file, modelLang, fileCtx) @@ -222,7 +224,9 @@ object RdfConversions { def langToFormat(lang: Lang): RDFFormat = lang match { case RDFLanguages.TURTLE => RDFFormat.TURTLE_PRETTY case RDFLanguages.TTL => RDFFormat.TTL - case RDFLanguages.JSONLD => RDFFormat.JSONLD_FLATTEN_PRETTY + case RDFLanguages.JSONLD => RDFFormat.JSONLD10 + case RDFLanguages.JSONLD10 => RDFFormat.JSONLD10 + case RDFLanguages.JSONLD11 => RDFFormat.JSONLD11 case RDFLanguages.TRIG => RDFFormat.TRIG_PRETTY case RDFLanguages.RDFXML => RDFFormat.RDFXML_PRETTY case RDFLanguages.RDFTHRIFT => RDFFormat.RDF_THRIFT @@ -249,7 +253,7 @@ object RdfConversions { case "text/turtle" => Lang.TURTLE case "application/rdf+xml" => Lang.RDFXML case "application/n-triples" => Lang.NTRIPLES - case "application/ld+json" => Lang.JSONLD + case "application/ld+json" => Lang.JSONLD10 case "text/trig" => Lang.TRIG case "application/n-quads" => Lang.NQUADS case "application/trix+xml" => Lang.TRIX @@ -302,7 +306,7 @@ object RdfConversions { } def contextUrl(data: Array[Byte], lang: Lang): Option[URL] = - if (lang == Lang.JSONLD) { + if (lang == Lang.JSONLD10) { jsonLdContextUrl(data) .get } else { @@ -342,8 +346,8 @@ object RdfConversions { private def jenaContext(jsonLdCtx: core.Context) = { val context: util.Context = RIOT.getContext.copy() jsonLdCtx.putAll(jsonLdCtx.getPrefixes(true)) - context.put(JsonLDWriter.JSONLD_CONTEXT, jsonLdCtx) - context.put(JsonLDReader.JSONLD_CONTEXT, jsonLdCtx) + context.put(JsonLD10Writer.JSONLD_CONTEXT, jsonLdCtx) + context.put(LangJSONLD10.JSONLD_CONTEXT, jsonLdCtx) context } @@ -430,9 +434,44 @@ object RdfConversions { import org.apache.jena.riot.SysRIOT.fmtMessage + private val reportAsError = List( + ViolationCodes.ILLEGAL_CHARACTER, + ViolationCodes.CONTROL_CHARACTER, + ViolationCodes.NON_XML_CHARACTER, + ViolationCodes.EMPTY_SCHEME, + ViolationCodes.SCHEME_MUST_START_WITH_LETTER, + ViolationCodes.BIDI_FORMATTING_CHARACTER, + ViolationCodes.WHITESPACE, + ViolationCodes.DOUBLE_WHITESPACE, + ViolationCodes.NOT_XML_SCHEMA_WHITESPACE, + ViolationCodes.NOT_DNS_NAME, + ViolationCodes.ILLEGAL_PERCENT_ENCODING, + ViolationCodes.LONE_SURROGATE, + ViolationCodes.DNS_LABEL_DASH_START_OR_END, + ViolationCodes.BAD_IDN, + ViolationCodes.HAS_PASSWORD, + ViolationCodes.UNREGISTERED_IANA_SCHEME, + ViolationCodes.UNREGISTERED_NONIETF_SCHEME_TREE, + ViolationCodes.DEPRECATED_UNICODE_CHARACTER, + ViolationCodes.UNDEFINED_UNICODE_CHARACTER, + ViolationCodes.PRIVATE_USE_CHARACTER, + ViolationCodes.UNICODE_CONTROL_CHARACTER, + ViolationCodes.UNICODE_WHITESPACE, + ViolationCodes.COMPATIBILITY_CHARACTER, + ViolationCodes.REQUIRED_COMPONENT_MISSING, + ViolationCodes.PROHIBITED_COMPONENT_PRESENT, + ViolationCodes.SCHEME_REQUIRES_LOWERCASE, + ViolationCodes.SCHEME_PATTERN_MATCH_FAILED + ).map(i => s"Code: $i") + // there is a weird additional URI check for spaces + // org.apache.jena.riot.system.ParserProfileStd method internalMakeIRI line 95 + // {@link org.apache.jena.riot.system.ParserProfileStd#internalMakeIRI} + .+("Spaces are not legal in URIs/IRIs.").toSet + + override def warning(message: String, line: Long, col: Long): Unit = // Fix for https://github.com/dbpedia/databus/issues/156, need to convert this to error - if (message.contains("Spaces are not legal in URIs/IRIs")) { + if (reportAsError.exists(s => message.contains(s))) { error(message, line, col) } else { warnings = warnings :+ Warning(fmtMessage(message, line, col)) diff --git a/src/test/resources/newline_in_iri.jsonld b/src/test/resources/newline_in_iri.jsonld new file mode 100644 index 0000000..cfb3976 --- /dev/null +++ b/src/test/resources/newline_in_iri.jsonld @@ -0,0 +1,39 @@ +{ + "@context": "https://raw.githubusercontent.com/dbpedia/databus/master/server/app/common/res/context.jsonld", + "@graph": [ + { + "@id": "https://databus.coypu.org/narndt/coypu", + "@type": "Group", + "title": "CoyPu" + }, + { + "@id": "https://databus.coypu.org/narndt/coypu/countries", + "@type": "Artifact", + "title": "Raise VirtuosoException", + "abstract": "Counties and regions", + "description": "Counties and regions" + }, + { + "@type": [ + "Version", + "Dataset" + ], + "@id": "https://databus.coypu.org/narndt/coypu/countries/2023-09-18T122214Z", + "hasVersion": "2023-09-18T122214Z", + "title": "Countries", + "abstract": "Countries\n2023-09-18T12:22:14Z", + "description": "Countries\n2023-09-18T12:22:14Z", + "license": "https://dalicc.net/licenselibrary/Cc010Universal", + "wasDerivedFrom": "https://metadata.coypu.org/dataset/wikidata-distribution\nWikidataQueryService\nhttps://query.wikidata.org/", + "distribution": [ + { + "@type": "Part", + "formatExtension": "ttl", + "compression": "none", + "downloadURL": "https://databus.coypu.org/dav/narndt/coypu/countries/2023-09-18T122214Z/countries_freqency=static.ttl", + "dcv:frequency": "static" + } + ] + } + ] +} \ No newline at end of file diff --git a/src/test/resources/report_syntax_err.jsonld b/src/test/resources/space_in_iri.jsonld similarity index 100% rename from src/test/resources/report_syntax_err.jsonld rename to src/test/resources/space_in_iri.jsonld diff --git a/src/test/scala/org/dbpedia/databus/CacheTests.scala b/src/test/scala/org/dbpedia/databus/CacheTests.scala index 14732b2..432e9c5 100644 --- a/src/test/scala/org/dbpedia/databus/CacheTests.scala +++ b/src/test/scala/org/dbpedia/databus/CacheTests.scala @@ -1,11 +1,19 @@ package org.dbpedia.databus -import java.util.UUID +import org.apache.jena.sys.JenaSystem +import java.util.UUID import org.dbpedia.databus.CachingJsonldContext.ApproxSizeStringKeyCache -import org.scalatest.{FlatSpec, Matchers} +import org.scalatest.{BeforeAndAfter, FlatSpec, Matchers} + +class CacheTests extends FlatSpec with Matchers with BeforeAndAfter { -class CacheTests extends FlatSpec with Matchers { + before { + JenaSystem.init() + } + after { + JenaSystem.shutdown() + } "CacheKey" should "be sorted by time of creation" in { diff --git a/src/test/scala/org/dbpedia/databus/DatabusScalatraTest.scala b/src/test/scala/org/dbpedia/databus/DatabusScalatraTest.scala index d305d6c..d872e8d 100644 --- a/src/test/scala/org/dbpedia/databus/DatabusScalatraTest.scala +++ b/src/test/scala/org/dbpedia/databus/DatabusScalatraTest.scala @@ -1,6 +1,8 @@ package org.dbpedia.databus +import org.apache.jena.iri.ViolationCodes + import java.io.ByteArrayInputStream import java.nio.file.{Files, Paths} import org.apache.jena.rdf.model.ModelFactory @@ -21,9 +23,11 @@ class DatabusScalatraTest extends ScalatraFlatSpec with BeforeAndAfter { val dir = Files.createDirectories(Paths.get("target", "test_dir-git")) before { + impl.init() Files.createDirectories(Paths.get("target", "test_dir-git")) } after { + impl.stop() Directory(Path.jfile2path(dir.toFile)).deleteRecursively() } @@ -63,8 +67,8 @@ class DatabusScalatraTest extends ScalatraFlatSpec with BeforeAndAfter { get("/databus/graph/read?repo=kuckuck&path=pa/fl.jsonld") { status should equal(200) - val respCtx = RdfConversions.contextUrl(bodyBytes, Lang.JSONLD) - respCtx should equal(RdfConversions.contextUrl(bytes, Lang.JSONLD)) + val respCtx = RdfConversions.contextUrl(bodyBytes, Lang.JSONLD10) + respCtx should equal(RdfConversions.contextUrl(bytes, Lang.JSONLD10)) respCtx.get.toString.nonEmpty should equal(true) } @@ -73,19 +77,34 @@ class DatabusScalatraTest extends ScalatraFlatSpec with BeforeAndAfter { "File save" should "report problems in input" in { - val file = "report_syntax_err.jsonld" + val file = "space_in_iri.jsonld" val bytes = Files.readAllBytes(Paths.get(getClass.getClassLoader.getResource(file).getFile)) post("/databus/graph/save?repo=kuckuck&path=pa/syntax_err.jsonld", bytes) { - (status >= 400) should equal(true) + (status >= 400) should equal(true) response.body.contains("Spaces are not legal in URIs/IRIs") should equal(true) } } - "Shacl validation" should "report problems in input" in { + "Shacl validation" should "report problems in input with spaces in IRIs" in { + + val file = "space_in_iri.jsonld" + val sha = "test.shacl" + val bytes = Paths.get(getClass.getClassLoader.getResource(file).getFile).toFile + val shacl = Paths.get(getClass.getClassLoader.getResource(sha).getFile).toFile + + post("/databus/shacl/validate", Map.empty, Map("shacl" -> shacl, "graph" -> bytes)) { + status should equal(400) + body should include("Bad IRI") + body should include(s"Spaces are not legal") + } + + } + + "Shacl validation" should "report problems in input with newlines in IRIs" in { - val file = "report_syntax_err.jsonld" + val file = "newline_in_iri.jsonld" val sha = "test.shacl" val bytes = Paths.get(getClass.getClassLoader.getResource(file).getFile).toFile val shacl = Paths.get(getClass.getClassLoader.getResource(sha).getFile).toFile @@ -93,6 +112,7 @@ class DatabusScalatraTest extends ScalatraFlatSpec with BeforeAndAfter { post("/databus/shacl/validate", Map.empty, Map("shacl" -> shacl, "graph" -> bytes)) { status should equal(400) body should include("Bad IRI") + body should include(s"Code: ${ViolationCodes.CONTROL_CHARACTER}") } } @@ -141,7 +161,7 @@ class DatabusScalatraTest extends ScalatraFlatSpec with BeforeAndAfter { val model = ModelFactory.createDefaultModel() val dataStream = new ByteArrayInputStream(version) - RDFDataMgr.read(model, dataStream, Lang.JSONLD) + RDFDataMgr.read(model, dataStream, Lang.JSONLD10) val tr = Tractate.extract(model.getGraph, TractateV1.Version) body should equal(tr.get.stringForSigning) } diff --git a/src/test/scala/org/dbpedia/databus/TractateTest.scala b/src/test/scala/org/dbpedia/databus/TractateTest.scala index 6178ab4..25844f6 100644 --- a/src/test/scala/org/dbpedia/databus/TractateTest.scala +++ b/src/test/scala/org/dbpedia/databus/TractateTest.scala @@ -2,12 +2,20 @@ package org.dbpedia.databus import java.io.ByteArrayInputStream import java.nio.file.{Files, Paths} - import org.apache.jena.rdf.model.ModelFactory import org.apache.jena.riot.{Lang, RDFDataMgr} -import org.scalatest.{FlatSpec, Matchers} +import org.apache.jena.sys.JenaSystem +import org.scalatest.{BeforeAndAfter, FlatSpec, Matchers} + +class TractateTest extends FlatSpec with Matchers with BeforeAndAfter { -class TractateTest extends FlatSpec with Matchers { + before { + JenaSystem.init() + } + + after { + JenaSystem.shutdown() + } "Tractate" should "be extracted from dataid" in { @@ -15,7 +23,7 @@ class TractateTest extends FlatSpec with Matchers { val bytes = Files.readAllBytes(Paths.get(getClass.getClassLoader.getResource(file).getFile)) val model = ModelFactory.createDefaultModel() val dataStream = new ByteArrayInputStream(bytes) - RDFDataMgr.read(model, dataStream, Lang.JSONLD) + RDFDataMgr.read(model, dataStream, Lang.JSONLD10) val t = Tractate.extract(model.getGraph, TractateV1.Version) val expected = """Databus Tractate V1 diff --git a/src/test/scala/org/dbpedia/databus/ValidationTest.scala b/src/test/scala/org/dbpedia/databus/ValidationTest.scala index 6f00abf..ef5c58c 100644 --- a/src/test/scala/org/dbpedia/databus/ValidationTest.scala +++ b/src/test/scala/org/dbpedia/databus/ValidationTest.scala @@ -1,15 +1,21 @@ package org.dbpedia.databus import java.nio.file.{Files, Paths} -import org.apache.jena.query.ARQ import org.apache.jena.riot.Lang +import org.apache.jena.sys.JenaSystem import org.dbpedia.databus.RdfConversions.{contextUrl, jenaJsonLdContextWithFallbackForLocalhost} -import org.scalatest.{FlatSpec, Matchers} +import org.scalatest.{BeforeAndAfter, FlatSpec, Matchers} -class ValidationTest extends FlatSpec with Matchers { +class ValidationTest extends FlatSpec with Matchers with BeforeAndAfter { - ARQ.init() - val lang = Lang.JSONLD + before { + JenaSystem.init() + } + after { + JenaSystem.shutdown() + } + + val lang = Lang.JSONLD10 "SHACL validation" should "work for version" in { val shacl = "https://raw.githubusercontent.com/dbpedia/databus-git-mockup/main/dev/dataid-shacl.ttl" diff --git a/src/test/scala/org/dbpedia/databus/VirtuosoQueriesTest.scala b/src/test/scala/org/dbpedia/databus/VirtuosoQueriesTest.scala index a716e6f..e6358bd 100644 --- a/src/test/scala/org/dbpedia/databus/VirtuosoQueriesTest.scala +++ b/src/test/scala/org/dbpedia/databus/VirtuosoQueriesTest.scala @@ -2,22 +2,27 @@ package org.dbpedia.databus import java.io.ByteArrayInputStream import java.nio.file.{Files, Paths} - import org.apache.jena.rdf.model.ModelFactory import org.apache.jena.riot.{Lang, RDFDataMgr} -import org.scalatest.{FlatSpec, Matchers} -import sttp.client3.{DigestAuthenticationBackend, HttpURLConnectionBackend} -import sttp.model.Uri +import org.apache.jena.sys.JenaSystem +import org.scalatest.{BeforeAndAfter, FlatSpec, Matchers} -class VirtuosoQueriesTest extends FlatSpec with Matchers { +class VirtuosoQueriesTest extends FlatSpec with Matchers with BeforeAndAfter { import collection.JavaConverters._ + before { + JenaSystem.init() + } + after { + JenaSystem.shutdown() + } + "Generator" should "work" in { val file = "version.jsonld" val bytes = Files.readAllBytes(Paths.get(getClass.getClassLoader.getResource(file).getFile)) val model = ModelFactory.createDefaultModel() val dataStream = new ByteArrayInputStream(bytes) - RDFDataMgr.read(model, dataStream, Lang.JSONLD) + RDFDataMgr.read(model, dataStream, Lang.JSONLD10) val bld = RdfConversions.makeInsertSparqlQuery(model.getGraph.find().asScala.toSeq, "http://randomGraphId") @@ -30,7 +35,7 @@ class VirtuosoQueriesTest extends FlatSpec with Matchers { val bytes = Files.readAllBytes(Paths.get(getClass.getClassLoader.getResource(file).getFile)) val model = ModelFactory.createDefaultModel() val dataStream = new ByteArrayInputStream(bytes) - RDFDataMgr.read(model, dataStream, Lang.JSONLD) + RDFDataMgr.read(model, dataStream, Lang.JSONLD10) val bld = RdfConversions.makeInsertSparqlQuery(model.getGraph.find().asScala.toSeq, "http://randomGraphId")