diff --git a/Gemfile b/Gemfile index 3d89ccc0..9f4d0abf 100644 --- a/Gemfile +++ b/Gemfile @@ -19,7 +19,7 @@ gem "connection_pool" # gem "sequel" # gem "sqlite3" -# gem "aws-sdk-s3", "~> 1" +gem "aws-sdk-s3", require: false # Caching - Rails 5.2 shipped with a redis cache for fragments, but doesn't # provide session storage via redis too, which redis-actionpack does. @@ -34,7 +34,7 @@ gem "rack-attack" gem "bcrypt" # Server -gem "puma"#, "~> 3.11" +gem "puma" # Windows does not include zoneinfo files, so bundle the tzinfo-data gem gem "tzinfo-data" @@ -63,11 +63,10 @@ gem "kaminari" gem "jbuilder" # ActiveJob Worker, Cron Schedulers -# gem "sidekiq" gem "good_job" # Logs -gem "logster", github: "discourse/logster", branch: "redis_4_6" +gem "logster" # Auth # Provider @@ -92,9 +91,7 @@ gem "nokogiri" # gem "ruby-readability" # gem "stopwords-filter" gem "marcel", "~> 1.0" -# gem "parslet" - -gem "aws-sdk-s3", require: false +gem "parslet" gem "exception_notification" gem "slack-notifier" diff --git a/Gemfile.lock b/Gemfile.lock index 856ede56..6c9a5492 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,10 +1,3 @@ -GIT - remote: https://github.com/discourse/logster.git - revision: fbc7cc43e58305d5f46d6fc243091158c57ee814 - branch: redis_4_6 - specs: - logster (2.11.0) - GIT remote: https://github.com/elabs/pundit.git revision: 856d74d55d79a87102cbaa2df64d87c94dc7e3bc @@ -135,9 +128,9 @@ GEM activerecord (>= 5.a) database_cleaner-core (~> 2.0.0) database_cleaner-core (2.0.1) - debug (1.4.0) + debug (1.6.2) irb (>= 1.3.6) - reline (>= 0.2.7) + reline (>= 0.3.1) diff-lcs (1.5.0) docile (1.4.0) domain_name (0.5.20190701) @@ -227,7 +220,7 @@ GEM activesupport (>= 3.0) nokogiri (>= 1.6) io-console (0.5.11) - irb (1.4.1) + irb (1.4.2) reline (>= 0.3.0) jbuilder (2.11.5) actionview (>= 5.0.0) @@ -257,6 +250,7 @@ GEM llhttp-ffi (0.4.0) ffi-compiler (~> 1.0) rake (~> 13.0) + logster (2.11.3) loofah (2.19.0) crass (~> 1.0.2) nokogiri (>= 1.5.9) @@ -302,6 +296,7 @@ GEM parallel (1.21.0) parser (3.1.0.0) ast (~> 2.4.1) + parslet (2.0.0) pg (1.3.1) propshaft (0.6.1) actionpack (>= 7.0.0) @@ -532,10 +527,11 @@ DEPENDENCIES json_matchers kaminari listen - logster! + logster mailcatcher marcel (~> 1.0) nokogiri + parslet pg propshaft puma diff --git a/app/models/bookmark.rb b/app/models/bookmark.rb index dcc2026a..58da77ce 100644 --- a/app/models/bookmark.rb +++ b/app/models/bookmark.rb @@ -49,28 +49,32 @@ class Bookmark < ApplicationRecord where(Arel::Nodes.build_quoted(query).eq(any_host)) } + # Searches the title for the query OR where the query is similar to + # words in an existing bookmark's title scope :title_search, ->(query) { where(<<~SQL.squish, { query: }) } search_title @@ websearch_to_tsquery(:query) OR search_title @@ to_tsquery( - ( - select - word - from ts_stat('select search_title from bookmarks') - where similarity(:query, word) > 0.5 - order by similarity(:query, word) > 0.5 desc - limit 1 - ) + SELECT + word + FROM ts_stat('SELECT search_title FROM bookmarks') + WHERE similarity(:query, word) > 0.5 + ORDER BY similarity(:query, word) DESC + LIMIT 1 ) SQL scope :search, lambda { |query| - left_joins(:tags) + joins(:tags) .uri_search(query) .or(breakdown_search(query)) .or(title_search(query)) .or(tag_search(query)) } + scope :advanced_search, lambda { |query| + BookmarksSearcher.new.search query + } + # This has potential performance costs if we start retrying lots of times def self.for(user, uri)= find_or_initialize_by(user:, uri:) diff --git a/app/searchers/application_searcher.rb b/app/searchers/application_searcher.rb new file mode 100644 index 00000000..f8541a3c --- /dev/null +++ b/app/searchers/application_searcher.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +class ApplicationSearcher + class << self + attr_reader :index, :compiler, :search + + def define_index(&) + @index = QueryGrammar::Index.build(&) + end + + def use_compiler val + @compiler = val + end + + def execute_query &block + @search = block + end + end + + def search query + query + .then { QueryGrammar.parse _1, index: self.class.index } + .then { self.class.compiler.compile _1 } + .then { QueryGrammar::Cloaker.new(bind: self).cloak _1, &self.class.search } + end +end diff --git a/app/searchers/bookmarks_searcher.rb b/app/searchers/bookmarks_searcher.rb new file mode 100644 index 00000000..7c33264e --- /dev/null +++ b/app/searchers/bookmarks_searcher.rb @@ -0,0 +1,198 @@ +# frozen_string_literal: true + +class BookmarksSearcher < ApplicationSearcher + define_index do + # Defines the available types of specialized prefix searches + # in the query. The symbol name gets mapped to a function that + # registers a prefix + type :text do |clause| + clause.values.map do |value| + QueryGrammar::Ast::MatchClause.new( + field: clause.prefix, + value:, + origin: clause + ) + end + end + + type :keyword do |clause| + clause.values.map do |value| + QueryGrammar::Ast::EqualClause.new( + field: clause.prefix, + value:, + origin: clause + ) + end + end + + type :number do |clause| + clause.values.map do |value| + QueryGrammar::Ast::EqualClause.new( + field: clause.prefix, + value:, + origin: clause + ) + end + end + + type :date do |clause| + clause.values.map do |value| + QueryGrammar::Ast::EqualClause.new( + field: clause.prefix, + value:, + origin: clause + ) + end + end + + # This sets up the prefixes that are available in the query + # as well as what "type" they parse as as defined by the types + # above + field :uri, + type: :keyword, + name: "URI", + description: "" + + field :host, + type: :keyword, + name: "Host", + description: "", + sortable: true + + field :title, + type: :text, + name: "Title", + description: "", + sortable: true + + field :description, + type: :text, + name: "Description/Notes", + description: "", + existable: true + + field :tags, + type: :keyword, + name: "Tags", + description: "", + aliases: [ "tag" ], + existable: true + + field :created_at, + type: :date, + name: "Created at Date", + description: "", + aliases: [ "created_date" ], + sortable: true + + # Handles custom prefixes for various other operations such as breaking apart + # the created_date field into two pseudo fields "after" and "before" or an + # existence "has" or sort helpers + operator :after do + name "Created After Date" + description <<~DESC + DESC + + arity 1 + types :date + + parse do |clause| + QueryGrammar::Ast::GtRangeClause.new( + field: :created_at, + value: clause.values.first, + origin: clause + ) + end + end + + operator :before do + name "Created Before Date" + description <<~DESC + DESC + + arity 1 + types :date + + parse do |clause| + QueryGrammar::Ast::LtRangeClause.new( + field: :created_at, + value: clause.values.first, + origin: clause + ) + end + end + + operator :between do + name "Created Between Dates" + description <<~DESC + DESC + + arity 2 + types :date + + parse do |clause| + QueryGrammar::Ast::RangeClause.new( + field: :created_at, + low: clause.values.first, + high: clause.values.second, + origin: clause + ) + end + end + + operator :has do + name "Has property" + description <<~DESC + DESC + + parse do |clause| + clause.values.map do |value| + field = resolve_field value + + QueryGrammar::Ast::ExistClause.new( + field:, + origin: clause + ) + end + end + end + + operator :sort do + name "Sort Field and Direction" + description <<~DESC + DESC + + parse do |clause| + clause.values.map do |value| + field = resolve_field value + + QueryGrammar::Ast::SortClause.new( + field:, + direction: (clause.unary == "+" ? :asc : :desc), + origin: clause + ) + end + end + end + + fallback do |clause| + QueryGrammar::Ast::MatchClause.new( + field: :title, + value: "#{ clause.unary }#{ clause.prefix }:#{ clause.values.join ' ' }", + origin: clause + ) + end + + # Default fields to search on + default :title, :tags, :uri + end + + use_compiler QueryGrammar::Compiler::Arel.new(Bookmark) + + execute_query do |compiled_query| + # pp compiled_query + Bookmark.left_joins(:tags) + .where(compiled_query[:query]) + .order(compiled_query[:order]) + end +end diff --git a/app/searchers/query_grammar.rb b/app/searchers/query_grammar.rb new file mode 100644 index 00000000..cd30d5a7 --- /dev/null +++ b/app/searchers/query_grammar.rb @@ -0,0 +1,69 @@ +# frozen_string_literal: true + +module QueryGrammar + class Error < StandardError + def initialize message, line, column, original=nil, *args + super(message, *args) + + @line = line + @column = column + @original = original + end + end + + ScanError = Class.new Error + + autoload :Cloaker, "query_grammar/cloaker" + autoload :AST, "query_grammar/ast" + + autoload :Index, "query_grammar/index" + + autoload :Parser, "query_grammar/parser" + autoload :Transformer, "query_grammar/transformer" + autoload :Compiler, "query_grammar/compiler" + + class << self + # Scans and parses an input string to generate an AST of the query against + # the index + # + # TODO: Error handling + def parse input, index: + ast = QueryGrammar::Transformer.new(index).apply scan(input) + ast ||= QueryGrammar::Ast::Group.new joiner: :and, items: [], origin: nil + + ast + end + + # Generates a parslet parser tree from the input, which is then used with the + # transformer to generate an AST of the query, based off of the index + # + # @see .parse + def scan input + QueryGrammar::Parser.new.parse input.strip + rescue Parslet::ParseFailed => e + deepest = deepest_cause e.parse_failure_cause + line, column = deepest.source.line_and_column deepest.pos + + message = <<~MSG + unexpected input at line #{ line } column #{ column } - #{ deepest.message } #{ input[(column - 1)..] } + MSG + + fail ScanError.new(message, line, column, e) + rescue SystemStackError => e + fail ScanError.new("unexpected input at line 1 column 1 - #{ e }: #{ input }", 1, 1, e) + end + + protected + + # Grabs the cause in a tree of causes which has the furthest position in the + # input string, assuming its the clearest and best error message about what + # went wrong while parsing the input query + def deepest_cause cause + return cause unless cause.children.any? + + cause.children + .map { |xcause| deepest_cause xcause } + .max { |xcause, other| xcause.pos.bytepos <=> other.pos.bytepos } + end + end +end diff --git a/app/searchers/query_grammar/ast.rb b/app/searchers/query_grammar/ast.rb new file mode 100644 index 00000000..e7a7995a --- /dev/null +++ b/app/searchers/query_grammar/ast.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +module QueryGrammar + module Ast + autoload :Node, "query_grammar/ast/node" + + autoload :Negator, "query_grammar/ast/negator" + autoload :Group, "query_grammar/ast/group" + + autoload :FieldValueClause, "query_grammar/ast/field_value_clause" + + autoload :GtRangeClause, "query_grammar/ast/gt_range_clause" + autoload :LtRangeClause, "query_grammar/ast/lt_range_clause" + autoload :RangeClause, "query_grammar/ast/range_clause" + autoload :MatchClause, "query_grammar/ast/match_clause" + autoload :EqualClause, "query_grammar/ast/equal_clause" + autoload :SortClause, "query_grammar/ast/sort_clause" + end +end diff --git a/app/searchers/query_grammar/ast/equal_clause.rb b/app/searchers/query_grammar/ast/equal_clause.rb new file mode 100644 index 00000000..3801e5f2 --- /dev/null +++ b/app/searchers/query_grammar/ast/equal_clause.rb @@ -0,0 +1,8 @@ +# frozen_string_literal: true + +module QueryGrammar + module Ast + class EqualClause < FieldValueClause + end + end +end diff --git a/app/searchers/query_grammar/ast/exist_clause.rb b/app/searchers/query_grammar/ast/exist_clause.rb new file mode 100644 index 00000000..5afa6010 --- /dev/null +++ b/app/searchers/query_grammar/ast/exist_clause.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +module QueryGrammar + module Ast + class ExistClause < Node + attr_reader :field + + def initialize field:, **opts + super(**opts) + + @field = field + end + + def to_s + "has:#{ field }" + end + + def to_h + { + exist: { + field:, + }, + } + end + + def == other + return false unless other.is_a? ExistClause + + field == other.field + end + end + end +end diff --git a/app/searchers/query_grammar/ast/field_value_clause.rb b/app/searchers/query_grammar/ast/field_value_clause.rb new file mode 100644 index 00000000..0bbf9e05 --- /dev/null +++ b/app/searchers/query_grammar/ast/field_value_clause.rb @@ -0,0 +1,48 @@ +# frozen_string_literal: true + +module QueryGrammar + module Ast + class FieldValueClause < Node + attr_reader :field, :value + + def initialize field: nil, value: nil, **opts + super(**opts) + + @field = field + @value = value + end + + def to_s + val = Array(value).map do |entry| + next entry.to_s if entry.is_a? Date + next "\"#{ entry }\"" if entry.index(" ") + + entry + end.join " " + + val = "(#{ val })" if value.is_a? Array + + "#{ field }:#{ val }" + end + + def to_h + values = Array(value).map do |val| + type_hash_value val + end + + { + self.class.name.to_s.demodulize.underscore => { + field:, + values:, + }, + } + end + + def == other + return false unless other.is_a? self.class + + field == other.field && value == other.value + end + end + end +end diff --git a/app/searchers/query_grammar/ast/group.rb b/app/searchers/query_grammar/ast/group.rb new file mode 100644 index 00000000..e472891d --- /dev/null +++ b/app/searchers/query_grammar/ast/group.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +module QueryGrammar + module Ast + class Group < Node + attr_reader :items, :joiner + + def initialize items:, joiner:, **opts + super(**opts) + + @items = items + @joiner = joiner + end + + def to_h + { + joiner => items.map(&:to_h), + } + end + + def to_s + inside = items.map(&:to_s).join(" #{ joiner.to_s.upcase } ") + "(#{ inside })" + end + + def == other + return false unless other.is_a? Group + + items == other.items && joiner == other.joiner + end + end + end +end diff --git a/app/searchers/query_grammar/ast/gt_range_clause.rb b/app/searchers/query_grammar/ast/gt_range_clause.rb new file mode 100644 index 00000000..09a73295 --- /dev/null +++ b/app/searchers/query_grammar/ast/gt_range_clause.rb @@ -0,0 +1,8 @@ +# frozen_string_literal: true + +module QueryGrammar + module Ast + class GtRangeClause < FieldValueClause + end + end +end diff --git a/app/searchers/query_grammar/ast/lt_range_clause.rb b/app/searchers/query_grammar/ast/lt_range_clause.rb new file mode 100644 index 00000000..54bd2f1d --- /dev/null +++ b/app/searchers/query_grammar/ast/lt_range_clause.rb @@ -0,0 +1,8 @@ +# frozen_string_literal: true + +module QueryGrammar + module Ast + class LtRangeClause < FieldValueClause + end + end +end diff --git a/app/searchers/query_grammar/ast/match_clause.rb b/app/searchers/query_grammar/ast/match_clause.rb new file mode 100644 index 00000000..5d115d09 --- /dev/null +++ b/app/searchers/query_grammar/ast/match_clause.rb @@ -0,0 +1,8 @@ +# frozen_string_literal: true + +module QueryGrammar + module Ast + class MatchClause < FieldValueClause + end + end +end diff --git a/app/searchers/query_grammar/ast/negator.rb b/app/searchers/query_grammar/ast/negator.rb new file mode 100644 index 00000000..8b0ebf75 --- /dev/null +++ b/app/searchers/query_grammar/ast/negator.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +module QueryGrammar + module Ast + class Negator < Node + attr_reader :items + + def initialize items:, **opts + super(**opts) + + @items = items + end + + def to_h + { + not: items.to_h, + } + end + + def to_s + "NOT #{ items }" + end + + def == other + return false unless other.is_a? Negator + + items == other.items + end + end + end +end diff --git a/app/searchers/query_grammar/ast/node.rb b/app/searchers/query_grammar/ast/node.rb new file mode 100644 index 00000000..9b646b74 --- /dev/null +++ b/app/searchers/query_grammar/ast/node.rb @@ -0,0 +1,49 @@ +# frozen_string_literal: true + +module QueryGrammar + module Ast + class Node + attr_reader :origin + + def initialize origin: + @origin = origin + end + + # Should return a generic, human readable version of the query. Useful + # for debugging + def to_s + fail NotImplementedError + end + + # Should return a generic, machine and human readable version of the + # query. Useful for debugging and storing + def to_h + fail NotImplementedError + end + + def == _other + fail NotImplementedError + end + + def accept visitor + visitor.send :"visit_#{ self.class.name.to_s.demodulize.underscore }", self + end + + def as_json(*) + to_h.deep_stringify_keys + end + + protected + + def type_hash_value val + type = :date if val.is_a? Date + type ||= val.index(" ") ? :phrase : :term + + { + type:, + value: val.as_json, + } + end + end + end +end diff --git a/app/searchers/query_grammar/ast/range_clause.rb b/app/searchers/query_grammar/ast/range_clause.rb new file mode 100644 index 00000000..886b2e9a --- /dev/null +++ b/app/searchers/query_grammar/ast/range_clause.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +module QueryGrammar + module Ast + class RangeClause < Node + attr_reader :field, :low, :high + + def initialize field:, low:, high:, **opts + super(**opts) + + @field = field + @low = low + @high = high + end + + def to_s + "#{ field }:(#{ low } #{ high })" + end + + def to_h + { + range: { + field:, + low: type_hash_value(low), + high: type_hash_value(high), + }, + } + end + + def == other + return false unless other.is_a? RangeClause + + field == other.field && low == other.low && high == other.high + end + end + end +end diff --git a/app/searchers/query_grammar/ast/sort_clause.rb b/app/searchers/query_grammar/ast/sort_clause.rb new file mode 100644 index 00000000..46d80ef4 --- /dev/null +++ b/app/searchers/query_grammar/ast/sort_clause.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +module QueryGrammar + module Ast + class SortClause < Node + attr_reader :field, :direction + + def initialize field:, direction:, **opts + super(**opts) + + @field = field + @direction = direction + end + + def to_s + direction_unary = direction == :asc ? "+" : "-" + + "#{ direction_unary }sort:#{ field }" + end + + def to_h + { + sort: { + field:, + direction:, + }, + } + end + + def == other + return false unless other.is_a? SortClause + + field == other.field && direction == other.direction + end + end + end +end diff --git a/app/searchers/query_grammar/cloaker.rb b/app/searchers/query_grammar/cloaker.rb new file mode 100644 index 00000000..78a4d0d1 --- /dev/null +++ b/app/searchers/query_grammar/cloaker.rb @@ -0,0 +1,49 @@ +# frozen_string_literal: true + +module QueryGrammar + # Ruby blocks don't closure over more than local variables, but we can + # get around that with this "cloaker" hack. First we make an + # unboundmethod which we rebind to our current instance. We also keep the + # blocks original binding and with the method missing magic, proxy out to + # it, which allows us to closure over methods and more from where the + # block was defined at. + class Cloaker < BasicObject + attr_reader :bind, :closure + + def initialize bind:, closure: nil + @bind = bind + @closure = closure + end + + def cloak *args, &block + closure ||= block.binding + + executor = bind.class.class_eval do + define_method :dsl_executor, &block + meth = instance_method :dsl_executor + remove_method :dsl_executor + meth + end + + with_closure_from closure do + executor.bind(bind).call(*args) + end + end + + def with_closure_from binding + @_parent_binding = binding + result = yield + @_parent_binding = nil + result + end + + def respond_to_missing? *args + @_parent_binding.respond_to_missing?(*args) + end + + def method_missing(method, *args, **opts, &) + args << opts if opts.any? + @_parent_binding.send(method, *args, &) + end + end +end diff --git a/app/searchers/query_grammar/compiler.rb b/app/searchers/query_grammar/compiler.rb new file mode 100644 index 00000000..8c6a596a --- /dev/null +++ b/app/searchers/query_grammar/compiler.rb @@ -0,0 +1,61 @@ +# frozen_string_literal: true + +module QueryGrammar + class Compiler + autoload :Arel, "query_grammar/compiler/arel" + + class << self + def visitors + @visitors ||= ancestor_hash :visitors + end + + def visit klass, &block + visitors[ klass ] = block + end + + protected + + # Allow classes to inherit their ancestors fields, joiners, types and + # operations. This allows for progressively building searchers up, such as + # having an ElasticsearchSearcher that defines the operations to build ES + # queries, a BookmarksSearcher that inherits those operations while + # defining public user facing searchable fields, and an + # Admin::BookmarksSearcher which defines additional fields that are + # available within the admin panel + def ancestor_hash func + (ancestors - [self]) + .select { |i| i.ancestors.include? QueryGrammar::Compiler } + .inject({}) { |memo, i| memo.merge i.send func } + end + end + + def visitors + self.class.visitors + end + + def compile + fail NotImplementedError + end + + def respond_to_missing? func, *args + name = func.to_s.gsub(%r{^visit_}, "").to_sym + + return true if visitors.key? name + + super + end + + # fuck you too rubocop, this does fallback to super + def method_missing func, *args + name = func.to_s.gsub(%r{^visit_}, "").to_sym + + return super unless visitors.key? name + + QueryGrammar::Cloaker.new(bind: self).cloak(*args, &visitors[name]) + rescue => e + # Hack to remove this method_missing from the backtrace + e.set_backtrace e.backtrace[1..] + fail e + end + end +end diff --git a/app/searchers/query_grammar/compiler/arel.rb b/app/searchers/query_grammar/compiler/arel.rb new file mode 100644 index 00000000..fc7818b6 --- /dev/null +++ b/app/searchers/query_grammar/compiler/arel.rb @@ -0,0 +1,135 @@ +# frozen_string_literal: true + +module QueryGrammar + class Compiler + # Converts a given query into an ActiveRecord Arel query + class Arel < Compiler + def initialize model + super() + + @model = model + @arel_table = model.arel_table + end + + def context + @context ||= { + query: {}, + order: {}, + } + end + + def compile ast + context[:query] = ast.accept self + + context + end + + visit :negator do |negator| + inside = Array(negator.items).map { |i| i.accept self }.compact + # puts "=================================================================" + # pp negator + # puts "------------------" + # pp inside + # pp inside.any? + + case inside + in [] + nil + in [only] + only.not + in [first, *parts] + parts.reduce(first) do |memo, part| + memo.and part + end + end + + # { + # bool: { + # must_not: inside, + # }, + # } + end + + visit :group do |group| + inside = group.items.map { |i| i.accept self }.compact + # puts "=================================================================" + # pp group + # puts "------------------" + # pp inside + # pp inside.any? + + case inside + in [] + nil + in [only] + only + in [first, *parts] + parts.reduce(first) do |memo, part| + memo.send group.joiner, part + end + end + + # { + # bool: { + # joiner => inside, + # }, + # } + end + + visit :sort_clause do |clause| + context[:order][ clause.field ] = clause.direction + + nil + end + + visit :match_clause do |clause| + # debugger + # type = clause.value.index(" ") ? :match_phrase : :match + + # { type => { clause.field => clause.value } } + # TODO: ts_vector this shit up + # @arel_table[clause.field].eq(clause.value) + + field = clause.field + field = :search_title if field == :title + + quoted = ::Arel::Nodes.build_quoted(clause.value) + tsquery = ::Arel::Nodes::NamedFunction.new("websearch_to_tsquery", [quoted]) + + ::Arel::Nodes::InfixOperation.new("@@", @arel_table[field], tsquery) + + # TODO: Add in an option for the field to specify if it should also + # include matches from the most similar word in the existing set of + # bookmarks or not + end + + visit :equal_clause do |clause| + # { term: { clause.field => clause.value } } + @arel_table[clause.field].eq(clause.value) + end + + visit :gt_range_clause do |clause| + # { range: { clause.field => { gt: clause.value } } } + @arel_table[clause.field].gt(clause.value) + end + + visit :lt_range_clause do |clause| + # { range: { clause.field => { lt: clause.value } } } + @arel_table[clause.field].lt(clause.value) + end + + visit :range_clause do |clause| + # { range: { clause.field => { gte: clause.low, lte: clause.high } } } + @arel_table[clause.field].lteq(clause.value).and(@arel_table[clause.field].gteq(clause.value)) + end + + visit :exist_clause do |clause| + # { exists: { field: clause.field } } + # debugger + return @model.reflections["tags"].klass.arel_table[:id].not_eq(nil) if @model.reflections.key? clause.field + + @arel_table[clause.field].not_eq(nil) + end + end + end +end diff --git a/app/searchers/query_grammar/index.rb b/app/searchers/query_grammar/index.rb new file mode 100644 index 00000000..4d5b2bbb --- /dev/null +++ b/app/searchers/query_grammar/index.rb @@ -0,0 +1,61 @@ +# frozen_string_literal: true + +module QueryGrammar + class Index + autoload :Dsl, "query_grammar/index/dsl" + autoload :OperatorDsl, "query_grammar/index/operator_dsl" + + def self.build(&) = Dsl.build(&) + + def types + @types ||= {} + end + + def fields + @fields ||= {} + end + + def operators + @operators ||= {} + end + + attr_writer :fallback + + def fallback + @fallback ||= ->(v) { v } + end + + def sortable_fields + fields.filter_map do |(field, data)| + [ field, data[:aliases] ] if data[:sortable] + end + end + + def existable_fields + fields.filter_map do |(field, data)| + [ field, data[:aliases] ] if data[:existable] + end + end + + def default_fields + fields.filter_map do |(field, data)| + field if data[:default] + end + end + + def aliases + # alias => field + fields.each_with_object({}) do |(field, data), memo| + data[:aliases].each do |aliass| + memo[ aliass ] = field + end + end + end + + def resolve_field field_or_alias + return field_or_alias unless aliases.key? field_or_alias + + aliases[ field_or_alias ] + end + end +end diff --git a/app/searchers/query_grammar/index/dsl.rb b/app/searchers/query_grammar/index/dsl.rb new file mode 100644 index 00000000..1b55fdc1 --- /dev/null +++ b/app/searchers/query_grammar/index/dsl.rb @@ -0,0 +1,86 @@ +# frozen_string_literal: true + +module QueryGrammar + class Index + class Dsl + def self.build(&) + new.tap do |obj| + obj.instance_eval(&) + end.index + end + + def index + @index ||= QueryGrammar::Index.new + end + + def type key, &block + index.types[ key ] = block + end + + def field key, type:, **opts + fail "Type #{ type } not defined yet!" unless index.types[ type ] + + field = { + aliases: (opts.delete(:aliases) || []).map(&:to_s), + existable: false, + sortable: false, + default: false, + type:, + parse: index.types[ type ], + **opts, + } + + # rubocop:disable Style/OpenStructUse + # This isn't built out of user supplied data and it's a lot more + # convenient to use OpenStruct while prototyping + index.fields[ key.to_s ] = OpenStruct.new field + # rubocop:enable Style/OpenStructUse + + field + end + + def operator(prefix, **opts, &) + index.operators[ prefix.to_s ] = QueryGrammar::Index::OperatorDsl.build(**opts, &) + end + + def fallback(&block) + index.fallback = block + end + + def default *args + args.each do |field| + index.fields[ field.to_s ][:default] = true + end + end + + # def respond_to_missing? func, *args + # return true if index.types[ func ] + + # super + # end + + # def method_missing(func, *args, **opts, &) + # if index.types[ func ] + # field = { + # aliases: (opts.delete(:aliases) || []).map(&:to_s), + # existable: false, + # sortable: false, + # default: false, + # type: func, + # parse: index.types[ func ], + # }.merge opts + + # # rubocop:disable Style/OpenStructUse + # # This isn't built out of user supplied data and it's a lot more + # # convenient to use OpenStruct while prototyping + # index.fields[ args.first.to_s ] = OpenStruct.new field + # # rubocop:enable Style/OpenStructUse + + # return field + # end + + # super + # end + end + end +end diff --git a/app/searchers/query_grammar/index/operator_dsl.rb b/app/searchers/query_grammar/index/operator_dsl.rb new file mode 100644 index 00000000..b947ffc6 --- /dev/null +++ b/app/searchers/query_grammar/index/operator_dsl.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +module QueryGrammar + class Index + class OperatorDsl + attr_reader :operator + + def self.build(**opts, &) + new(**opts).tap do |obj| + obj.instance_eval(&) + end.operator + end + + def initialize **opts + op_data = { + name: nil, + description: nil, + arity: nil, + type: nil, + parse: nil, + }.merge opts + + # rubocop:disable Style/OpenStructUse + # This isn't built out of user supplied data and it's a lot more + # convenient to use OpenStruct while prototyping + @operator = OpenStruct.new op_data + # rubocop:enable Style/OpenStructUse + end + + def respond_to_missing?(*) + true + end + + def method_missing func, *args, **_opts, &block + @operator[ func ] = args.first || block + end + end + end +end diff --git a/app/searchers/query_grammar/parser.rb b/app/searchers/query_grammar/parser.rb new file mode 100644 index 00000000..dc7b1d65 --- /dev/null +++ b/app/searchers/query_grammar/parser.rb @@ -0,0 +1,136 @@ +# frozen_string_literal: true + +module QueryGrammar + # Query Grammar Definition + # Enables parsing something like this: + # + # "middle earth" OR earth AND (tag:computers OR +tag:rendering) AND after:2014-01-01 AND + # before:2018-06-14 AND NOT host:wired.com + # + # Into a usable data structure that can then be transformed into a usable query AST + # + # EBNF Grammar: + # Query ::= Expression + # + # Expression ::= AndExpression | OrExpression | Group | Clause + # + # Group ::= Negator? '(' Space* Expression Space* ')' + # + # OrExpression ::= (Clause | Group | AndExpression) (Space+ 'OR' Space+ (Clause | Group | AndExpression))+ + # + # AndExpression ::= (Group | OrExpression | Clause) ((Space+ 'AND')? Space+ (Group | OrExpression | Clause))+ + # + # Clause ::= Negator? (Prefix (( Term | TermList )?) | Term) + # + # Prefix ::= Unary? Word ':' + # + # Negator ::= 'NOT' Space+ + # Unary ::= [^a-fA-F0-9] + # + # TermList ::= '[' Space* (Term Space*)+ ']' + # + # Term ::= Word | Phrase + # Phrase ::= '"' [^"]* '"' + # + # Word ::= [^:")(#x9#xA#xD#x20]+ + # Space ::= #x9 | #xA | #xD | #x20 + class Parser < Parslet::Parser + # rubocop:disable Layout/ClosingParenthesisIndentation + root :query + + rule :query do + any.absent? | expression + end + + rule :expression do + space? >> (infix_group | group | clause).as(:expression) >> space? + end + + rule :group do + negator.maybe >> str("(") >> space? >> expression >> space? >> str(")") + end + + rule :infix_group do + infix_expression( + (group | clause), + [(space >> str("OR") >> space), 2, :left], + [((space >> str("AND")).maybe >> space), 1, :left] + ) do |l, o, r| + op = o.to_s.strip + op = "AND" if op.empty? + + left_joiner = l.dig(:group, :joiner) + right_joiner = r.dig(:group, :joiner) + + # Merge similar logic into one array rather than deeply nested + if left_joiner == op && !right_joiner + values = [ l[:group][:values], r ].flatten + elsif left_joiner == op && right_joiner == op + values = [ l[:group][:values], r[:group][:values] ].flatten + elsif right_joiner == op && !left_joiner + values = [ r[:group][:values], l ].flatten + else + values = [ l, r ] + end + + { group: { joiner: op, values: } } + end + end + + # ################################################################################################################# + # Base functionality + # ################################################################################################################# + + rule :clause do + negator.maybe >> (unary.as(:unary).maybe >> + ((prefix >> ( term | term_list ).maybe.as(:value)) | + term.as(:value) + )).as(:clause) + end + + rule :prefix do + word.as(:term).as(:prefix) >> str(":") + end + + rule :negator do + (str("NOT") >> space.repeat(1)).as(:negator).maybe + end + + rule :unary do + match["^[:alnum:]\":)(\s"].as(:term) + end + + rule :term_list do + str("[") >> space? >> (term >> space?).repeat(1).as(:term_list) >> str("]") + end + + rule :term do + date.as(:date) | word.as(:term) | phrase + end + + rule :phrase do + str('"') >> match['^"'].repeat.as(:phrase) >> str('"') + end + + rule :word do + match["^:\s\")("].repeat(1) + end + + rule :date do + digit.repeat(4) >> str("-") >> digit.repeat(2) >> str("-") >> digit.repeat(2) + end + + rule :digit do + match("[0-9]") + end + + rule :space? do + space.maybe + end + + rule :space do + match["\s"].repeat(1) + end + # rubocop:enable Layout/ClosingParenthesisIndentation + end +end diff --git a/app/searchers/query_grammar/transformer.rb b/app/searchers/query_grammar/transformer.rb new file mode 100644 index 00000000..062cd527 --- /dev/null +++ b/app/searchers/query_grammar/transformer.rb @@ -0,0 +1,89 @@ +# frozen_string_literal: true + +module QueryGrammar + class Transformer < Parslet::Transform + attr_reader :index + + def initialize(index, *args, &) + super(*args, &) + + @index = index + end + + # Override this from parslet to allow binding the block to the transform + # instance, so that rules can work with the instances index + def call_on_match bindings, block + return unless block + + return Cloaker.new(bind: self).cloak(bindings, &block) if block.arity == 1 + + Cloaker.new(bind: Context.new(bindings)).cloak(&block) + end + + def parse_clause subtree_clause + values = Array(subtree_clause[:value]) + base_clause = OpenStruct.new subtree_clause.merge(values:) + + fields = index.resolve_field base_clause.prefix + fields ||= index.default_fields + + inner = Array(fields).map do |field| + clause = base_clause.dup.tap do |obj| + obj[:prefix] = field + end + + field_data = index.fields[ field ]&.parse + field_data ||= index.operators[ field ]&.parse + field_data ||= index.fallback + + # debugger if not field_data + + Cloaker.new(bind: index).cloak(clause, &field_data) + end.flatten + + return inner.first if inner.length == 1 + + QueryGrammar::Ast::Group.new joiner: :or, items: inner, origin: subtree_clause + end + + rule term: simple(:term) do + term.to_s + end + + rule date: simple(:date) do + Date.strptime date, "%Y-%m-%d" + end + + rule phrase: simple(:phrase) do + phrase.to_s + end + + rule term_list: subtree(:term_list) do + term_list + end + + rule clause: subtree(:clause) do |subtree| + parse_clause subtree[:clause] + end + + rule negator: simple(:negator), clause: subtree(:clause) do |subtree| + inner = parse_clause subtree[:clause] + + QueryGrammar::Ast::Negator.new items: inner, origin: subtree + end + + rule group: subtree(:group) do |subtree| + group = subtree[:group] + + QueryGrammar::Ast::Group.new items: group[:values], joiner: group[:joiner].downcase.to_sym, origin: subtree + end + + rule expression: subtree(:expression) do + expression + end + + rule negator: simple(:negator), expression: subtree(:expression) do |subtree| + QueryGrammar::Ast::Negator.new items: subtree[:expression], origin: subtree + end + end +end