diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..e69de29 diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..13edb27 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +* @artob diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3b14619 --- /dev/null +++ b/.gitignore @@ -0,0 +1,15 @@ +# macOS +.DS_Store + +# Visual Studio Code +.vscode/ + +# Editor backup files +*~ + +# Ruby artifacts +.bundle +.irb_history +.yardoc +Gemfile.lock +*.gem diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..280fb8d --- /dev/null +++ b/AUTHORS @@ -0,0 +1 @@ +* Arto Bendiken diff --git a/CHANGES.md b/CHANGES.md new file mode 100644 index 0000000..16d7438 --- /dev/null +++ b/CHANGES.md @@ -0,0 +1,8 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## 0.0.0 - 2025-01-07 diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..77effbc --- /dev/null +++ b/Gemfile @@ -0,0 +1,13 @@ +source "https://rubygems.org" + +gemspec + +gem 'rdf', git: "https://github.com/ruby-rdf/rdf", branch: "develop" + +group :development do + gem "byebug", platforms: :mri +end + +group :development, :test do + gem 'rdf-spec', git: "https://github.com/ruby-rdf/rdf-spec", branch: "develop" +end diff --git a/README.md b/README.md new file mode 100644 index 0000000..0e97643 --- /dev/null +++ b/README.md @@ -0,0 +1,43 @@ +# RDF/Borsh for Ruby + +[![License](https://img.shields.io/badge/license-Public%20Domain-blue.svg)](https://unlicense.org) +[![Compatibility](https://img.shields.io/badge/ruby-3.0%2B-blue)](https://rubygems.org/gems/rdf-borsh) +[![Package](https://img.shields.io/gem/v/rdf-borsh)](https://rubygems.org/gems/rdf-borsh) + +A Ruby library for encoding and decoding RDF data using the [Borsh] +binary serialization format. + +[Borsh]: https://borsh.io + +## 🛠️ Prerequisites + +- [Ruby](https://ruby-lang.org) 3.0+ + +## ⬇️ Installation + +### Installation via RubyGems + +```bash +gem install rdf-borsh +``` + +## 👉 Examples + +### Importing the library + +```ruby +require 'rdf/borsh' +``` + +## 👨‍💻 Development + +```bash +git clone https://github.com/ruby-rdf/rdf-borsh.git +``` + +- - - + +[![Share on Twitter](https://img.shields.io/badge/share%20on-twitter-03A9F4?logo=twitter)](https://twitter.com/share?url=https://github.com/ruby-rdf/rdf-borsh&text=RDF%2FBorsh+for+Ruby) +[![Share on Reddit](https://img.shields.io/badge/share%20on-reddit-red?logo=reddit)](https://reddit.com/submit?url=https://github.com/ruby-rdf/rdf-borsh&title=RDF%2FBorsh+for+Ruby) +[![Share on Hacker News](https://img.shields.io/badge/share%20on-hacker%20news-orange?logo=ycombinator)](https://news.ycombinator.com/submitlink?u=https://github.com/ruby-rdf/rdf-borsh&t=RDF%2FBorsh+for+Ruby) +[![Share on Facebook](https://img.shields.io/badge/share%20on-facebook-1976D2?logo=facebook)](https://www.facebook.com/sharer/sharer.php?u=https://github.com/ruby-rdf/rdf-borsh) diff --git a/UNLICENSE b/UNLICENSE new file mode 100644 index 0000000..efb9808 --- /dev/null +++ b/UNLICENSE @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff --git a/VERSION b/VERSION new file mode 100644 index 0000000..77d6f4c --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +0.0.0 diff --git a/lib/rdf/borsh.rb b/lib/rdf/borsh.rb new file mode 100644 index 0000000..f35e119 --- /dev/null +++ b/lib/rdf/borsh.rb @@ -0,0 +1,13 @@ +# This is free and unencumbered software released into the public domain. + +module RDF + ## + # RDF/Borsh. + module Borsh + autoload :Format, 'rdf/borsh/format' + autoload :Reader, 'rdf/borsh/reader' + autoload :Writer, 'rdf/borsh/writer' + end +end + +require 'rdf/borsh/version' diff --git a/lib/rdf/borsh/format.rb b/lib/rdf/borsh/format.rb new file mode 100644 index 0000000..5803978 --- /dev/null +++ b/lib/rdf/borsh/format.rb @@ -0,0 +1,21 @@ +# This is free and unencumbered software released into the public domain. + +require 'rdf/format' + +module RDF::Borsh + class Format < RDF::Format + content_type 'application/x-rdf+borsh', extension: :borsh + reader { RDF::Borsh::Reader } + writer { RDF::Borsh::Writer } + + MAGIC = 'RDFB'.freeze + VERSION = '1'.ord + FLAGS = 0b00000111 + + def self.name; "RDF/Borsh"; end + + def self.detect(sample) + sample[0..4] == [MAGIC, VERSION].pack('a4C') + end + end # Format +end # RDF::Borsh diff --git a/lib/rdf/borsh/reader.rb b/lib/rdf/borsh/reader.rb new file mode 100644 index 0000000..00cb912 --- /dev/null +++ b/lib/rdf/borsh/reader.rb @@ -0,0 +1,90 @@ +# This is free and unencumbered software released into the public domain. + +require 'extlz4' +require 'rdf' +require 'stringio' + +module RDF::Borsh + class Reader < RDF::Reader + format RDF::Borsh::Format + + MAGIC = RDF::Borsh::Format::MAGIC + VERSION = RDF::Borsh::Format::VERSION + FLAGS = RDF::Borsh::Format::FLAGS + + def initialize(input = $stdin, **options, &block) + super(input, **options) do + input = @input + @version, @flags, @quad_count = self.read_header + + input_size = input.read(4).unpack('V').first + @input = StringIO.new(self.decompress(input.read(input_size)), 'rb') + @terms = [nil] + self.read_terms + + input_size = input.read(4).unpack('V').first + @input = StringIO.new(self.decompress(input.read(input_size)), 'rb') + _ = @input.read(4).unpack('V').first + + if block_given? + case block.arity + when 0 then self.instance_eval(&block) + else block.call(self) + end + end + end + end + + def read_statement + quad_data = @input.read(8) or raise EOFError + g, s, p, o = quad_data.unpack('v4').map! { |term_id| @terms[term_id] } + RDF::Statement.new(s, p, o, graph_name: g) + end + + def read_quad; self.read_statement.to_quad; end + def read_triple; self.read_statement.to_triple; end + + ## + # Reads the compressed terms dictionary. + def read_terms + term_count = @input.read(4).unpack('V').first + term_count.times.map do + term_kind, term_string_size = @input.read(5).unpack('CV') + term_string = @input.read(term_string_size) + + case term_kind + when 1 then RDF::URI(term_string) + when 2 then RDF::Node(term_string) + when 3 then RDF::Literal(term_string) + when 4 + term_datatype_size = @input.read(4).unpack('V') + RDF::Literal(term_string, datatype: @input.read(term_datatype_size)) + when 5 + term_language_size = @input.read(4).unpack('V') + RDF::Literal(term_string, language: @input.read(term_language_size)) + else + raise RDF::ReaderError, "unknown RDF/Borsh term type: #{term_kind}" + end + end + end + + ## + # Reads the uncompressed header. + def read_header + magic = @input.read(4).unpack('a4').first + raise RDF::ReaderError, "invalid RDF/Borsh header: #{magic.inspect}" if magic != MAGIC + + version = @input.read(1).unpack('C').first + raise RDF::ReaderError, "invalid RDF/Borsh version: #{version}" if version != VERSION + + flags = @input.read(1).unpack('C').first + raise RDF::ReaderError, "invalid RDF/Borsh flags: #{flags}" if flags != FLAGS + + quad_count = @input.read(4).unpack('V').first + [version, flags, quad_count] + end + + def decompress(data) + LZ4::BlockDecoder.new.decode(data) + end + end # Reader +end # RDF::Borsh diff --git a/lib/rdf/borsh/version.rb b/lib/rdf/borsh/version.rb new file mode 100644 index 0000000..8b265cc --- /dev/null +++ b/lib/rdf/borsh/version.rb @@ -0,0 +1 @@ +# This is free and unencumbered software released into the public domain. diff --git a/lib/rdf/borsh/writer.rb b/lib/rdf/borsh/writer.rb new file mode 100644 index 0000000..0095f67 --- /dev/null +++ b/lib/rdf/borsh/writer.rb @@ -0,0 +1,123 @@ +# This is free and unencumbered software released into the public domain. + +require 'extlz4' +require 'rdf' +require 'sorted_set' +require 'stringio' + +module RDF::Borsh + class Writer < RDF::Writer + format RDF::Borsh::Format + + MAGIC = RDF::Borsh::Format::MAGIC + VERSION = RDF::Borsh::Format::VERSION + FLAGS = RDF::Borsh::Format::FLAGS + LZ4HC_CLEVEL_MAX = 12 + + def initialize(output = $stdout, **options, &block) + @terms_dict, @terms_map = [], {} + @quads_set = SortedSet.new + + super(output, **options) do + if block_given? + case block.arity + when 0 then self.instance_eval(&block) + else block.call(self) + end + end + end + end + + def write_triple(subject, predicate, object) + self.write_quad(subject, predicate, object, nil) + end + + def write_quad(subject, predicate, object, context) + s = self.intern_term(subject) + p = self.intern_term(predicate) + o = self.intern_term(object) + g = self.intern_term(context) + @quads_set << [g, s, p, o] + end + + def flush + self.finish + super + end + + def finish + self.write_header + self.write_terms + self.write_quads + end + + # Writes the uncompressed header. + def write_header + @output.write([MAGIC, VERSION, FLAGS].pack('a4CC')) + @output.write([@quads_set.size].pack('V')) + end + + # Writes the compressed terms dictionary. + def write_terms + buffer = StringIO.open do |output| + output.binmode + output.write([@terms_dict.size].pack('V')) + @terms_dict.each do |term| + output.write(case + when term.iri? + string = term.to_s + [1, string.bytesize, string].pack('CVa*') + when term.node? + string = term.id.to_s + [2, string.bytesize, string].pack('CVa*') + when term.literal? && term.plain? + string = term.value.to_s + [3, string.bytesize, string].pack('CVa*') + when term.literal? && term.datatype? + string = term.value.to_s + datatype = term.datatype.to_s + [4, string.bytesize, string, datatype.bytesize, datatype].pack('CVa*Va*') + when term.literal? && term.language? + string = term.value.to_s + language = term.language.to_s + [5, string.bytesize, string, datatype.language, language].pack('CVa*Va*') + else + raise RDF::WriterError, "unsupported RDF/Borsh term type: #{term.inspect}" + end) + end + self.compress(output.string) + end + @output.write([buffer.size].pack('V')) + @output.write(buffer) + end + + def write_quads + buffer = StringIO.open do |output| + output.binmode + output.write([@quads_set.size].pack('V')) + @quads_set.each do |quad| + output.write(quad.pack('v4')) + end + self.compress(output.string) + end + @output.write([buffer.size].pack('V')) + @output.write(buffer) + end + + # @return [Integer] + def intern_term(term) + return 0 if term.nil? # for the default graph + term_id = @terms_map[term] + if !term_id + term_id = @terms_dict.size + 1 + @terms_dict << term + @terms_map[term] = term_id + end + term_id + end + + def compress(data) + LZ4::BlockEncoder.new(LZ4HC_CLEVEL_MAX).encode(data) + end + end # Writer +end # RDF::Borsh diff --git a/rdf-borsh.gemspec b/rdf-borsh.gemspec new file mode 100644 index 0000000..4b531cc --- /dev/null +++ b/rdf-borsh.gemspec @@ -0,0 +1,33 @@ +Gem::Specification.new do |gem| + gem.version = File.read('VERSION').chomp + gem.date = File.mtime('VERSION').strftime('%Y-%m-%d') + + gem.name = "rdf-borsh" + gem.homepage = "https://github.com/ruby-rdf/rdf-borsh" + gem.license = "Unlicense" + gem.summary = "RDF/Borsh for Ruby" + gem.description = "A Ruby library for encoding and decoding RDF data using the Borsh binary serialization format." + gem.metadata = { + 'bug_tracker_uri' => "https://github.com/ruby-rdf/rdf-borsh/issues", + 'changelog_uri' => "https://github.com/ruby-rdf/rdf-borsh/blob/master/CHANGES.md", + 'documentation_uri' => "https://github.com/ruby-rdf/rdf-borsh/blob/master/README.md", + 'homepage_uri' => gem.homepage, + 'source_code_uri' => "https://github.com/ruby-rdf/rdf-borsh", + } + + gem.author = "Arto Bendiken" + gem.email = "public-rdf-ruby@w3.org" + + gem.platform = Gem::Platform::RUBY + gem.files = %w(AUTHORS CHANGES.md README.md UNLICENSE VERSION) + Dir.glob('lib/**/*.rb') + gem.bindir = %q(bin) + gem.executables = %w() + + gem.required_ruby_version = '>= 3.0' # RDF.rb 3.3 + gem.add_runtime_dependency 'extlz4', '~> 0.3' + gem.add_runtime_dependency 'rdf', '~> 3.3' + gem.add_runtime_dependency 'sorted_set', '~> 1.0' + gem.add_development_dependency 'rdf-spec', '~> 3.3' + gem.add_development_dependency 'rspec', '~> 3.12' + gem.add_development_dependency 'yard' , '~> 0.9' +end diff --git a/spec/.gitkeep b/spec/.gitkeep new file mode 100644 index 0000000..e69de29