From aaf800f04bce2e29a273d234e94eb93f46d2a1f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toma=CC=81s=CC=8C=20Celizna?= Date: Sat, 5 May 2018 11:18:07 +0200 Subject: [PATCH 01/24] Mongoid 7 compatibility --- .travis.yml | 5 +++-- Gemfile | 4 +++- lib/mongoid/full_text_search.rb | 14 +++++++------- mongoid_fulltext.gemspec | 2 +- spec/mongoid/full_text_search_spec.rb | 2 +- spec/spec_helper.rb | 2 +- spec/support/mongoid.rb | 2 +- 7 files changed, 17 insertions(+), 14 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8d3c306..8e48b6a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,11 +2,12 @@ sudo: false matrix: include: + - rvm: 2.5.0 + env: + - MONGOID_VERSION=7.0 - rvm: 2.3.1 env: - MONGOID_VERSION=6.0 - before_script: - - bundle exec danger - rvm: 2.3.1 env: - MONGOID_VERSION=5.0 diff --git a/Gemfile b/Gemfile index 01dcc47..1a8278b 100644 --- a/Gemfile +++ b/Gemfile @@ -1,6 +1,8 @@ source 'http://rubygems.org' -case version = ENV['MONGOID_VERSION'] || '6' +case version = ENV['MONGOID_VERSION'] || '7' +when /7/ + gem 'mongoid', '~> 7.0' when /6/ gem 'mongoid', '~> 6.0' when /5/ diff --git a/lib/mongoid/full_text_search.rb b/lib/mongoid/full_text_search.rb index b9a3fb4..6c3d84a 100644 --- a/lib/mongoid/full_text_search.rb +++ b/lib/mongoid/full_text_search.rb @@ -95,7 +95,7 @@ def fulltext_search_ensure_indexes(index_name, config) all_filter_keys |= keys.find_all { |key| key.starts_with?('filter_values.') } next unless keys & correct_keys != correct_keys Mongoid.logger.info "Dropping #{idef['name']} [#{keys & correct_keys} <=> #{correct_keys}]" if Mongoid.logger - if Mongoid::Compatibility::Version.mongoid5? || Mongoid::Compatibility::Version.mongoid6? + if Mongoid::Compatibility::Version.mongoid5_or_newer? coll.indexes.drop_one(idef['key']) else coll.indexes.drop(idef['key']) @@ -108,14 +108,14 @@ def fulltext_search_ensure_indexes(index_name, config) end Mongoid.logger.info "Ensuring fts_index on #{coll.name}: #{index_definition}" if Mongoid.logger - if Mongoid::Compatibility::Version.mongoid5? || Mongoid::Compatibility::Version.mongoid6? + if Mongoid::Compatibility::Version.mongoid5_or_newer? coll.indexes.create_one(Hash[index_definition], name: 'fts_index') else coll.indexes.create(Hash[index_definition], name: 'fts_index') end Mongoid.logger.info "Ensuring document_id index on #{coll.name}" if Mongoid.logger - if Mongoid::Compatibility::Version.mongoid5? || Mongoid::Compatibility::Version.mongoid6? + if Mongoid::Compatibility::Version.mongoid5_or_newer? coll.indexes.create_one('document_id' => 1) # to make removes fast else coll.indexes.create('document_id' => 1) # to make removes fast @@ -282,7 +282,7 @@ def all_ngrams(str, config, bound_number_returned = true) def remove_from_ngram_index mongoid_fulltext_config.each_pair do |index_name, _fulltext_config| coll = collection.database[index_name] - if Mongoid::Compatibility::Version.mongoid5? || Mongoid::Compatibility::Version.mongoid6? + if Mongoid::Compatibility::Version.mongoid5_or_newer? coll.find('class' => name).delete_many else coll.find('class' => name).remove_all @@ -328,7 +328,7 @@ def update_ngram_index # remove existing ngrams from external index coll = collection.database[index_name.to_sym] - if Mongoid::Compatibility::Version.mongoid5? || Mongoid::Compatibility::Version.mongoid6? + if Mongoid::Compatibility::Version.mongoid5_or_newer? coll.find('document_id' => _id).delete_many else coll.find('document_id' => _id).remove_all @@ -352,7 +352,7 @@ def update_ngram_index ngrams.each_pair do |ngram, score| index_document = { 'ngram' => ngram, 'document_id' => _id, 'score' => score, 'class' => self.class.name } index_document['filter_values'] = filter_values if fulltext_config.key?(:filters) - if Mongoid::Compatibility::Version.mongoid5? || Mongoid::Compatibility::Version.mongoid6? + if Mongoid::Compatibility::Version.mongoid5_or_newer? coll.insert_one(index_document) else coll.insert(index_document) @@ -364,7 +364,7 @@ def update_ngram_index def remove_from_ngram_index mongoid_fulltext_config.each_pair do |index_name, _fulltext_config| coll = collection.database[index_name] - if Mongoid::Compatibility::Version.mongoid5? || Mongoid::Compatibility::Version.mongoid6? + if Mongoid::Compatibility::Version.mongoid5_or_newer? coll.find('document_id' => _id).delete_many else coll.find('document_id' => _id).remove_all diff --git a/mongoid_fulltext.gemspec b/mongoid_fulltext.gemspec index 361ced4..6c48d70 100644 --- a/mongoid_fulltext.gemspec +++ b/mongoid_fulltext.gemspec @@ -13,7 +13,7 @@ Gem::Specification.new do |s| s.homepage = 'https://github.com/mongoid/mongoid_fulltext' s.licenses = ['MIT'] s.summary = 'Full-text search for the Mongoid ORM, using n-grams extracted from text.' - s.add_dependency 'mongoid', '>= 3.0' + s.add_dependency 'mongoid', '>= 3.0', '< 8' s.add_dependency 'mongoid-compatibility' s.add_dependency 'unicode_utils' end diff --git a/spec/mongoid/full_text_search_spec.rb b/spec/mongoid/full_text_search_spec.rb index a385122..693bbe1 100644 --- a/spec/mongoid/full_text_search_spec.rb +++ b/spec/mongoid/full_text_search_spec.rb @@ -597,7 +597,7 @@ context 'incremental' do it 'removes an existing record' do coll = Mongoid.default_session['mongoid_fulltext.index_basicartwork_0'] - if Mongoid::Compatibility::Version.mongoid5? || Mongoid::Compatibility::Version.mongoid6? + if Mongoid::Compatibility::Version.mongoid5_or_newer? coll.find('document_id' => flowers1._id).delete_many else coll.find('document_id' => flowers1._id).remove_all diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 4a41651..46d6cfb 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -24,6 +24,6 @@ end c.before :all do Mongoid.logger.level = Logger::INFO - Mongo::Logger.logger.level = Logger::INFO if Mongoid::Compatibility::Version.mongoid5? || Mongoid::Compatibility::Version.mongoid6? + Mongo::Logger.logger.level = Logger::INFO if Mongoid::Compatibility::Version.mongoid5_or_newer? end end diff --git a/spec/support/mongoid.rb b/spec/support/mongoid.rb index 788b045..d73d317 100644 --- a/spec/support/mongoid.rb +++ b/spec/support/mongoid.rb @@ -2,4 +2,4 @@ module Mongoid def self.default_session default_client end -end if Mongoid::Compatibility::Version.mongoid5? || Mongoid::Compatibility::Version.mongoid6? +end if Mongoid::Compatibility::Version.mongoid5_or_newer? From 65c22023f494001077323df6aee42d8128403e80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toma=CC=81s=CC=8C=20Celizna?= Date: Sat, 5 May 2018 11:18:45 +0200 Subject: [PATCH 02/24] readd danger --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index 8e48b6a..63a9a84 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,6 +5,8 @@ matrix: - rvm: 2.5.0 env: - MONGOID_VERSION=7.0 + before_script: + - bundle exec danger - rvm: 2.3.1 env: - MONGOID_VERSION=6.0 From 4364224e3be6a32e73017395b87ae2d9ee234c3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toma=CC=81s=CC=8C=20Celizna?= Date: Sat, 5 May 2018 11:25:29 +0200 Subject: [PATCH 03/24] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 68f51cf..6fd4122 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ ### 0.8.1 (Next) +* [#35](https://github.com/mongoid/mongoid_fulltext/pull/35): Mongoid 7 compatibility - [@tomasc](https://github.com/tomasc). * Your contribution here. ### 0.8.0 (1/19/2017) From 6a6fd8c265e54977f288abee1ab53fae50a0e61e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toma=CC=81s=CC=8C=20Celizna?= Date: Sat, 5 May 2018 15:55:48 +0200 Subject: [PATCH 04/24] SCI support --- lib/mongoid/full_text_search.rb | 10 +++++++- spec/models/my_doc.rb | 7 ++++++ spec/models/my_further_inherited_doc.rb | 2 ++ spec/models/my_inherited_doc.rb | 2 ++ spec/mongoid/sci_search_spec.rb | 31 +++++++++++++++++++++++++ spec/spec_helper.rb | 7 +++--- 6 files changed, 54 insertions(+), 5 deletions(-) create mode 100644 spec/models/my_doc.rb create mode 100644 spec/models/my_further_inherited_doc.rb create mode 100644 spec/models/my_inherited_doc.rb create mode 100644 spec/mongoid/sci_search_spec.rb diff --git a/lib/mongoid/full_text_search.rb b/lib/mongoid/full_text_search.rb index 6c3d84a..b3c3f12 100644 --- a/lib/mongoid/full_text_search.rb +++ b/lib/mongoid/full_text_search.rb @@ -143,6 +143,7 @@ def fulltext_search(query_string, options = {}) coll = collection.database[index_name] cursors = ngrams.map do |ngram| query = { 'ngram' => ngram[0] } + query.update(document_type_filters) query.update(map_query_filters options) count = coll.find(query).count { ngram: ngram, count: count, query: query } @@ -296,6 +297,13 @@ def update_ngram_index private + # add filter by type according to SCI classes + def document_type_filters + return {} unless fields['_type'].present? + kls = ([self] + descendants).map(&:to_s) + { 'document_type' => { "$in" => kls } } + end + # Take a list of filters to be mapped so they can update the query # used upon the fulltext search of the ngrams def map_query_filters(filters) @@ -350,7 +358,7 @@ def update_ngram_index end # insert new ngrams in external index ngrams.each_pair do |ngram, score| - index_document = { 'ngram' => ngram, 'document_id' => _id, 'score' => score, 'class' => self.class.name } + index_document = { 'ngram' => ngram, 'document_id' => _id, 'document_type' => model_name.to_s, 'score' => score, 'class' => self.class.name } index_document['filter_values'] = filter_values if fulltext_config.key?(:filters) if Mongoid::Compatibility::Version.mongoid5_or_newer? coll.insert_one(index_document) diff --git a/spec/models/my_doc.rb b/spec/models/my_doc.rb new file mode 100644 index 0000000..aab9e8d --- /dev/null +++ b/spec/models/my_doc.rb @@ -0,0 +1,7 @@ +class MyDoc + include Mongoid::Document + include Mongoid::FullTextSearch + + field :title + fulltext_search_in :title +end diff --git a/spec/models/my_further_inherited_doc.rb b/spec/models/my_further_inherited_doc.rb new file mode 100644 index 0000000..2184c03 --- /dev/null +++ b/spec/models/my_further_inherited_doc.rb @@ -0,0 +1,2 @@ +class MyFurtherInheritedDoc < MyInheritedDoc +end diff --git a/spec/models/my_inherited_doc.rb b/spec/models/my_inherited_doc.rb new file mode 100644 index 0000000..3cd0109 --- /dev/null +++ b/spec/models/my_inherited_doc.rb @@ -0,0 +1,2 @@ +class MyInheritedDoc < MyDoc +end diff --git a/spec/mongoid/sci_search_spec.rb b/spec/mongoid/sci_search_spec.rb new file mode 100644 index 0000000..d7651eb --- /dev/null +++ b/spec/mongoid/sci_search_spec.rb @@ -0,0 +1,31 @@ +# coding: utf-8 +require 'spec_helper' + +describe Mongoid::FullTextSearch do + context 'SCI' do + let!(:my_doc) { MyDoc.create!(title: 'My Doc') } + let!(:my_inherited_doc) { MyInheritedDoc.create!(title: 'My Inherited Doc') } + let!(:my_further_inherited_doc) { MyFurtherInheritedDoc.create!(title: 'My Inherited Doc') } + + context 'root class returns results for subclasses' do + let(:result) { MyDoc.fulltext_search("doc") } + it { expect(result).to include my_doc } + it { expect(result).to include my_inherited_doc } + it { expect(result).to include my_further_inherited_doc } + end + + context 'child class does not return superclass' do + let(:result) { MyInheritedDoc.fulltext_search("doc") } + it { expect(result).not_to include my_doc } + it { expect(result).to include my_inherited_doc } + it { expect(result).to include my_further_inherited_doc } + end + + context 'child class does not return superclass' do + let(:result) { MyFurtherInheritedDoc.fulltext_search("doc") } + it { expect(result).not_to include my_doc } + it { expect(result).not_to include my_inherited_doc } + it { expect(result).to include my_further_inherited_doc } + end + end +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 46d6cfb..e313fa9 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -15,6 +15,9 @@ config.connect_to('mongoid_fulltext_test') end +Mongoid.logger.level = Logger::INFO +Mongo::Logger.logger.level = Logger::INFO if Mongoid::Compatibility::Version.mongoid5_or_newer? + RSpec.configure do |c| c.before :each do Mongoid.purge! @@ -22,8 +25,4 @@ c.after :all do Mongoid.purge! end - c.before :all do - Mongoid.logger.level = Logger::INFO - Mongo::Logger.logger.level = Logger::INFO if Mongoid::Compatibility::Version.mongoid5_or_newer? - end end From 9f57c799cb5186f70dd4d9c65b8cd130903547e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toma=CC=81s=CC=8C=20Celizna?= Date: Sat, 5 May 2018 16:25:44 +0200 Subject: [PATCH 05/24] add support for criteria --- lib/mongoid/full_text_search.rb | 6 +++++- spec/models/my_doc.rb | 2 ++ spec/mongoid/criteria_search_spec.rb | 20 ++++++++++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 spec/mongoid/criteria_search_spec.rb diff --git a/lib/mongoid/full_text_search.rb b/lib/mongoid/full_text_search.rb index b3c3f12..e1f16aa 100644 --- a/lib/mongoid/full_text_search.rb +++ b/lib/mongoid/full_text_search.rb @@ -193,7 +193,11 @@ def fulltext_search(query_string, options = {}) end def instantiate_mapreduce_result(result) - result[:clazz].constantize.find(result[:id]) + if criteria.selector.empty? + result[:clazz].constantize.find(result[:id]) + else + criteria.where(_id: result[:id]).first + end end def instantiate_mapreduce_results(results, options) diff --git a/spec/models/my_doc.rb b/spec/models/my_doc.rb index aab9e8d..0a87892 100644 --- a/spec/models/my_doc.rb +++ b/spec/models/my_doc.rb @@ -3,5 +3,7 @@ class MyDoc include Mongoid::FullTextSearch field :title + field :value, type: Integer + fulltext_search_in :title end diff --git a/spec/mongoid/criteria_search_spec.rb b/spec/mongoid/criteria_search_spec.rb new file mode 100644 index 0000000..6854b9f --- /dev/null +++ b/spec/mongoid/criteria_search_spec.rb @@ -0,0 +1,20 @@ +# coding: utf-8 +require 'spec_helper' + +describe Mongoid::FullTextSearch do + context 'Criteria' do + let!(:my_doc_1) { MyDoc.create!(title: 'My Doc 1') } + let!(:my_doc_2) { MyDoc.create!(title: 'My Doc 2', value: 10) } + + let(:result) do + begin + MyDoc.where(value: 10).fulltext_search("doc") + rescue + nil + end + end + + it { expect(result).not_to include my_doc_1 } + it { expect(result).to include my_doc_2 } + end +end From d4f634cd0cba4d9bccbad824efe220cdd0dc2c2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toma=CC=81s=CC=8C=20Celizna?= Date: Sat, 5 May 2018 16:31:14 +0200 Subject: [PATCH 06/24] update README --- README.md | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 04ceb96..ca20e95 100644 --- a/README.md +++ b/README.md @@ -249,6 +249,40 @@ the AND of all of the individual results for each of the fields. Finally, if a f but criteria for that filter aren't passed to `fulltext_search`, the result is as if the filter had never been defined - you see both models that both pass and fail the filter in the results. +SCI Support +----------- + +The search respects SCI. From the spec: + +```ruby +class MyDoc + include Mongoid::Document + include Mongoid::FullTextSearch + + field :title + fulltext_search_in :title +end + +class MyInheritedDoc < MyDoc +end +``` + +```ruby +MyDoc.fulltext_search(…) # => will return both MyDoc as well as MyInheritedDoc documents +MyInheritedDoc.fulltext_search(…) # => will return only MyInheritedDoc documents +``` + +Criteria Support +---------------- + +It is also possible to pre-empt the search with Monogid criteria: + +```ruby +MyDoc.where(value: 10).fulltext_search(…) +``` + +Please not that this will not work in case an index is shared by multiple classes (that are not connected through inheritance). + Indexing Options ---------------- @@ -397,4 +431,3 @@ Copyright and License MIT License, see [LICENSE](LICENSE) for details. (c) 2011-2017 [Artsy Inc.](http://artsy.github.io) - From 70c7ea6d8422c822414861c1efab5e9b7fc59720 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toma=CC=81s=CC=8C=20Celizna?= Date: Sat, 5 May 2018 16:31:24 +0200 Subject: [PATCH 07/24] typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ca20e95..17d76db 100644 --- a/README.md +++ b/README.md @@ -281,7 +281,7 @@ It is also possible to pre-empt the search with Monogid criteria: MyDoc.where(value: 10).fulltext_search(…) ``` -Please not that this will not work in case an index is shared by multiple classes (that are not connected through inheritance). +Please note that this will not work in case an index is shared by multiple classes (that are not connected through inheritance). Indexing Options ---------------- From 8d6afe2cb654b86a6958b3e90f1c912e98386a1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toma=CC=81s=CC=8C=20Celizna?= Date: Sat, 5 May 2018 16:34:46 +0200 Subject: [PATCH 08/24] further explain --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 17d76db..b0709e4 100644 --- a/README.md +++ b/README.md @@ -281,7 +281,7 @@ It is also possible to pre-empt the search with Monogid criteria: MyDoc.where(value: 10).fulltext_search(…) ``` -Please note that this will not work in case an index is shared by multiple classes (that are not connected through inheritance). +Please note that this will not work in case an index is shared by multiple classes (that are not connected through inheritance), since a criteria applies only to one class. Indexing Options ---------------- From e6a1a090fc2ba562beab8b126f47f97aeb05b443 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toma=CC=81s=CC=8C=20Celizna?= Date: Sat, 5 May 2018 17:04:00 +0200 Subject: [PATCH 09/24] cleanup --- spec/mongoid/criteria_search_spec.rb | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/spec/mongoid/criteria_search_spec.rb b/spec/mongoid/criteria_search_spec.rb index 6854b9f..7c94570 100644 --- a/spec/mongoid/criteria_search_spec.rb +++ b/spec/mongoid/criteria_search_spec.rb @@ -6,13 +6,7 @@ let!(:my_doc_1) { MyDoc.create!(title: 'My Doc 1') } let!(:my_doc_2) { MyDoc.create!(title: 'My Doc 2', value: 10) } - let(:result) do - begin - MyDoc.where(value: 10).fulltext_search("doc") - rescue - nil - end - end + let(:result) { MyDoc.where(value: 10).fulltext_search("doc") } it { expect(result).not_to include my_doc_1 } it { expect(result).to include my_doc_2 } From 94a2361f0b6a00cf324b3dbff326e6ab805a47b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toma=CC=81s=CC=8C=20Celizna?= Date: Sat, 5 May 2018 17:07:11 +0200 Subject: [PATCH 10/24] mongoid-compatibility added _or_newer? in 0.5.1 --- mongoid_fulltext.gemspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mongoid_fulltext.gemspec b/mongoid_fulltext.gemspec index 6c48d70..f54c13c 100644 --- a/mongoid_fulltext.gemspec +++ b/mongoid_fulltext.gemspec @@ -14,6 +14,6 @@ Gem::Specification.new do |s| s.licenses = ['MIT'] s.summary = 'Full-text search for the Mongoid ORM, using n-grams extracted from text.' s.add_dependency 'mongoid', '>= 3.0', '< 8' - s.add_dependency 'mongoid-compatibility' + s.add_dependency 'mongoid-compatibility', '>= 0.5.1' s.add_dependency 'unicode_utils' end From 878b210b450ea37308a63940596c63665eb9ad28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toma=CC=81s=CC=8C=20Celizna?= Date: Sat, 5 May 2018 17:11:35 +0200 Subject: [PATCH 11/24] add database_cleaner --- mongoid_fulltext.gemspec | 1 + spec/spec_helper.rb | 15 +++++++++------ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/mongoid_fulltext.gemspec b/mongoid_fulltext.gemspec index f54c13c..681aa60 100644 --- a/mongoid_fulltext.gemspec +++ b/mongoid_fulltext.gemspec @@ -16,4 +16,5 @@ Gem::Specification.new do |s| s.add_dependency 'mongoid', '>= 3.0', '< 8' s.add_dependency 'mongoid-compatibility', '>= 0.5.1' s.add_dependency 'unicode_utils' + s.add_development_dependency 'database_cleaner' end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 46d6cfb..13e2950 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -3,6 +3,7 @@ require 'rspec' require 'mongoid' +require 'database_cleaner' ENV['MONGOID_ENV'] = 'test' @@ -15,15 +16,17 @@ config.connect_to('mongoid_fulltext_test') end +Mongoid.logger.level = Logger::INFO +Mongo::Logger.logger.level = Logger::INFO if Mongoid::Compatibility::Version.mongoid5_or_newer? + +DatabaseCleaner.orm = :mongoid +DatabaseCleaner.strategy = :truncation + RSpec.configure do |c| c.before :each do - Mongoid.purge! + DatabaseCleaner.clean end c.after :all do - Mongoid.purge! - end - c.before :all do - Mongoid.logger.level = Logger::INFO - Mongo::Logger.logger.level = Logger::INFO if Mongoid::Compatibility::Version.mongoid5_or_newer? + DatabaseCleaner.clean end end From 3bf0a2a85900a64ecc21eee5ab4232e970222ae2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toma=CC=81s=CC=8C=20Celizna?= Date: Sat, 5 May 2018 18:47:44 +0200 Subject: [PATCH 12/24] localized fields support --- lib/mongoid/full_text_search.rb | 120 ++++++++++++++++---------- spec/models/my_localized_doc.rb | 8 ++ spec/mongoid/localized_fields_spec.rb | 29 +++++++ 3 files changed, 110 insertions(+), 47 deletions(-) create mode 100644 spec/models/my_localized_doc.rb create mode 100644 spec/mongoid/localized_fields_spec.rb diff --git a/lib/mongoid/full_text_search.rb b/lib/mongoid/full_text_search.rb index e1f16aa..ec518bc 100644 --- a/lib/mongoid/full_text_search.rb +++ b/lib/mongoid/full_text_search.rb @@ -70,10 +70,18 @@ def fulltext_search_in(*args) def create_fulltext_indexes return unless mongoid_fulltext_config mongoid_fulltext_config.each_pair do |index_name, fulltext_config| - fulltext_search_ensure_indexes(index_name, fulltext_config) + ::I18n.available_locales.each do |locale| + fulltext_search_ensure_indexes(localized_index_name(index_name, locale), fulltext_config) + end end end + def localized_index_name(index_name, locale) + return index_name unless fields.values.any?(&:localized?) + return index_name unless ::I18n.available_locales.count > 1 + "#{index_name}_#{locale}" + end + def fulltext_search_ensure_indexes(index_name, config) db = collection.database coll = db[index_name] @@ -131,6 +139,7 @@ def fulltext_search(query_string, options = {}) end index_name = options.key?(:index) ? options.delete(:index) : mongoid_fulltext_config.keys.first + loc_index_name = localized_index_name(index_name, ::I18n.locale) # Options hash should only contain filters after this point ngrams = all_ngrams(query_string, mongoid_fulltext_config[index_name]) @@ -140,7 +149,7 @@ def fulltext_search(query_string, options = {}) # get a count of the number of index documents containing that n-gram ordering = { 'score' => -1 } limit = mongoid_fulltext_config[index_name][:max_candidate_set_size] - coll = collection.database[index_name] + coll = collection.database[loc_index_name] cursors = ngrams.map do |ngram| query = { 'ngram' => ngram[0] } query.update(document_type_filters) @@ -286,11 +295,13 @@ def all_ngrams(str, config, bound_number_returned = true) def remove_from_ngram_index mongoid_fulltext_config.each_pair do |index_name, _fulltext_config| - coll = collection.database[index_name] - if Mongoid::Compatibility::Version.mongoid5_or_newer? - coll.find('class' => name).delete_many - else - coll.find('class' => name).remove_all + ::I18n.available_locales.each do |locale| + coll = collection.database[localized_index_name(index_name, locale)] + if Mongoid::Compatibility::Version.mongoid5_or_newer? + coll.find('class' => name).delete_many + else + coll.find('class' => name).remove_all + end end end end @@ -329,45 +340,58 @@ def format_query_filter(operator, key, value) def update_ngram_index mongoid_fulltext_config.each_pair do |index_name, fulltext_config| - if condition = fulltext_config[:update_if] - case condition - when Symbol then next unless send condition - when String then next unless instance_eval condition - when Proc then next unless condition.call self - else; next + ::I18n.available_locales.each do |locale| + loc_index_name = self.class.localized_index_name(index_name, locale) + + if condition = fulltext_config[:update_if] + case condition + when Symbol then next unless send condition + when String then next unless instance_eval condition + when Proc then next unless condition.call self + else; next + end end - end - # remove existing ngrams from external index - coll = collection.database[index_name.to_sym] - if Mongoid::Compatibility::Version.mongoid5_or_newer? - coll.find('document_id' => _id).delete_many - else - coll.find('document_id' => _id).remove_all - end - # extract ngrams from fields - field_values = fulltext_config[:ngram_fields].map { |field| send(field) } - ngrams = field_values.inject({}) { |accum, item| accum.update(self.class.all_ngrams(item, fulltext_config, false)) } - return if ngrams.empty? - # apply filters, if necessary - filter_values = nil - if fulltext_config.key?(:filters) - filter_values = Hash[fulltext_config[:filters].map do |key, value| - begin - [key, value.call(self)] - rescue - # Suppress any exceptions caused by filters - end - end.compact] - end - # insert new ngrams in external index - ngrams.each_pair do |ngram, score| - index_document = { 'ngram' => ngram, 'document_id' => _id, 'document_type' => model_name.to_s, 'score' => score, 'class' => self.class.name } - index_document['filter_values'] = filter_values if fulltext_config.key?(:filters) + # remove existing ngrams from external index + coll = collection.database[loc_index_name.to_sym] if Mongoid::Compatibility::Version.mongoid5_or_newer? - coll.insert_one(index_document) + coll.find('document_id' => _id).delete_many else - coll.insert(index_document) + coll.find('document_id' => _id).remove_all + end + # extract ngrams from fields + field_values = fulltext_config[:ngram_fields].map do |field_name| + next send(field_name) if field_name == :to_s + next unless field = self.class.fields[field_name.to_s] + if field.localized? + send("#{field_name}_translations")[locale] + else + send(field_name) + end + end + + ngrams = field_values.inject({}) { |accum, item| accum.update(self.class.all_ngrams(item, fulltext_config, false)) } + return if ngrams.empty? + # apply filters, if necessary + filter_values = nil + if fulltext_config.key?(:filters) + filter_values = Hash[fulltext_config[:filters].map do |key, value| + begin + [key, value.call(self)] + rescue + # Suppress any exceptions caused by filters + end + end.compact] + end + # insert new ngrams in external index + ngrams.each_pair do |ngram, score| + index_document = { 'ngram' => ngram, 'document_id' => _id, 'document_type' => model_name.to_s, 'score' => score, 'class' => self.class.name } + index_document['filter_values'] = filter_values if fulltext_config.key?(:filters) + if Mongoid::Compatibility::Version.mongoid5_or_newer? + coll.insert_one(index_document) + else + coll.insert(index_document) + end end end end @@ -375,11 +399,13 @@ def update_ngram_index def remove_from_ngram_index mongoid_fulltext_config.each_pair do |index_name, _fulltext_config| - coll = collection.database[index_name] - if Mongoid::Compatibility::Version.mongoid5_or_newer? - coll.find('document_id' => _id).delete_many - else - coll.find('document_id' => _id).remove_all + ::I18n.available_locales.each do |locale| + coll = collection.database[self.class.localized_index_name(index_name, locale)] + if Mongoid::Compatibility::Version.mongoid5_or_newer? + coll.find('document_id' => _id).delete_many + else + coll.find('document_id' => _id).remove_all + end end end end diff --git a/spec/models/my_localized_doc.rb b/spec/models/my_localized_doc.rb new file mode 100644 index 0000000..5c6e116 --- /dev/null +++ b/spec/models/my_localized_doc.rb @@ -0,0 +1,8 @@ +class MyLocalizedDoc + include Mongoid::Document + include Mongoid::FullTextSearch + + field :title, localize: true + + fulltext_search_in :title +end diff --git a/spec/mongoid/localized_fields_spec.rb b/spec/mongoid/localized_fields_spec.rb new file mode 100644 index 0000000..2cf46a8 --- /dev/null +++ b/spec/mongoid/localized_fields_spec.rb @@ -0,0 +1,29 @@ +# coding: utf-8 +require 'spec_helper' + +describe Mongoid::FullTextSearch do + context 'Localized fields' do + let!(:my_doc) { MyLocalizedDoc.create!(title_translations: { en: 'Title', cs: "Nazev" }) } + + before(:each) do + @default_locale = ::I18n.locale + ::I18n.locale = locale + end + + after(:each) do + ::I18n.locale = @default_locale + end + + context 'en' do + let(:locale) { :en } + it { expect(MyLocalizedDoc.fulltext_search("title")).to include my_doc } + it { expect(MyLocalizedDoc.fulltext_search("nazev")).not_to include my_doc } + end + + context 'cs' do + let(:locale) { :cs } + it { expect(MyLocalizedDoc.fulltext_search("title")).not_to include my_doc } + it { expect(MyLocalizedDoc.fulltext_search("nazev")).to include my_doc } + end + end +end From 8e818bd042d1e5523cf2f4e97ec8be9b76b8615f Mon Sep 17 00:00:00 2001 From: dblock Date: Sat, 5 May 2018 15:43:33 -0400 Subject: [PATCH 13/24] Use database_cleaner. --- .travis.yml | 2 +- mongoid_fulltext.gemspec | 3 ++- spec/spec_helper.rb | 15 +++++++++------ 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8d3c306..31acee2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -25,6 +25,6 @@ services: mongodb addons: apt: sources: - - mongodb-3.2-precise + - mongodb-3.4-precise packages: - mongodb-org-server diff --git a/mongoid_fulltext.gemspec b/mongoid_fulltext.gemspec index 361ced4..5b8b62a 100644 --- a/mongoid_fulltext.gemspec +++ b/mongoid_fulltext.gemspec @@ -14,6 +14,7 @@ Gem::Specification.new do |s| s.licenses = ['MIT'] s.summary = 'Full-text search for the Mongoid ORM, using n-grams extracted from text.' s.add_dependency 'mongoid', '>= 3.0' - s.add_dependency 'mongoid-compatibility' + s.add_dependency 'mongoid-compatibility', '>= 0.5.1' s.add_dependency 'unicode_utils' + s.add_development_dependency 'database_cleaner' end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 4a41651..849707d 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -3,6 +3,7 @@ require 'rspec' require 'mongoid' +require 'database_cleaner' ENV['MONGOID_ENV'] = 'test' @@ -11,19 +12,21 @@ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each { |f| require f } Dir["#{File.dirname(__FILE__)}/models/**/*.rb"].each { |f| require f } +DatabaseCleaner.orm = :mongoid +DatabaseCleaner.strategy = :truncation + +Mongoid.logger.level = Logger::INFO +Mongo::Logger.logger.level = Logger::INFO if Mongoid::Compatibility::Version.mongoid5_or_newer? + Mongoid.configure do |config| config.connect_to('mongoid_fulltext_test') end RSpec.configure do |c| c.before :each do - Mongoid.purge! + DatabaseCleaner.clean end c.after :all do - Mongoid.purge! - end - c.before :all do - Mongoid.logger.level = Logger::INFO - Mongo::Logger.logger.level = Logger::INFO if Mongoid::Compatibility::Version.mongoid5? || Mongoid::Compatibility::Version.mongoid6? + DatabaseCleaner.clean end end From 0c1b25b40606272ea9264c1d0c9186f7b44a8787 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toma=CC=81s=CC=8C=20Celizna?= Date: Sun, 6 May 2018 09:50:57 +0200 Subject: [PATCH 14/24] split into modules --- lib/mongoid/full_text_search.rb | 425 +-------------------- lib/mongoid/full_text_search/config.rb | 11 + lib/mongoid/full_text_search/indexable.rb | 13 - lib/mongoid/full_text_search/indexes.rb | 73 +++- lib/mongoid/full_text_search/mappings.rb | 131 +++++++ lib/mongoid/full_text_search/ngrams.rb | 98 +++++ lib/mongoid/full_text_search/searchable.rb | 126 ++++++ lib/mongoid_fulltext.rb | 21 + 8 files changed, 468 insertions(+), 430 deletions(-) create mode 100644 lib/mongoid/full_text_search/config.rb delete mode 100644 lib/mongoid/full_text_search/indexable.rb create mode 100644 lib/mongoid/full_text_search/mappings.rb create mode 100644 lib/mongoid/full_text_search/ngrams.rb create mode 100644 lib/mongoid/full_text_search/searchable.rb diff --git a/lib/mongoid/full_text_search.rb b/lib/mongoid/full_text_search.rb index ec518bc..495a989 100644 --- a/lib/mongoid/full_text_search.rb +++ b/lib/mongoid/full_text_search.rb @@ -1,412 +1,17 @@ -require 'mongoid' -require 'mongoid/compatibility' - -if Mongoid::Compatibility::Version.mongoid3? - require 'mongoid/full_text_search/indexes' -else - require 'mongoid/full_text_search/indexable' -end - -require 'unicode_utils' -require 'cgi' - -module Mongoid::FullTextSearch - extend ActiveSupport::Concern - - included do - cattr_accessor :mongoid_fulltext_config - end - - class UnspecifiedIndexError < StandardError; end - class UnknownFilterQueryOperator < StandardError; end - - module ClassMethods - def fulltext_search_in(*args) - self.mongoid_fulltext_config = {} if mongoid_fulltext_config.nil? - options = args.last.is_a?(Hash) ? args.pop : {} - if options.key?(:index_name) - index_name = options[:index_name] - else - index_name = 'mongoid_fulltext.index_%s_%s' % [name.downcase, mongoid_fulltext_config.count] - end - - config = { - alphabet: 'abcdefghijklmnopqrstuvwxyz0123456789 ', - word_separators: "-_ \n\t", - ngram_width: 3, - max_ngrams_to_search: 6, - apply_prefix_scoring_to_all_words: true, - index_full_words: true, - index_short_prefixes: false, - max_candidate_set_size: 1000, - remove_accents: true, - reindex_immediately: true, - stop_words: Hash[%w(i a s t me my we he it am is be do an if - or as of at by to up in on no so our you him - his she her its who are was has had did the and - but for out off why how all any few nor not own - too can don now ours your hers they them what whom - this that were been have does with into from down over - then once here when both each more most some such only - same than very will just yours their which these those - being doing until while about after above below under - again there where other myself itself theirs having during - before should himself herself because against between through - further yourself ourselves yourselves themselves).map { |x| [x, true] }] - } - - config.update(options) - - args = [:to_s] if args.empty? - config[:ngram_fields] = args - config[:alphabet] = Hash[config[:alphabet].split('').map { |ch| [ch, ch] }] - config[:word_separators] = Hash[config[:word_separators].split('').map { |ch| [ch, ch] }] - mongoid_fulltext_config[index_name] = config - - before_save(:update_ngram_index) if config[:reindex_immediately] - before_destroy :remove_from_ngram_index - end - - def create_fulltext_indexes - return unless mongoid_fulltext_config - mongoid_fulltext_config.each_pair do |index_name, fulltext_config| - ::I18n.available_locales.each do |locale| - fulltext_search_ensure_indexes(localized_index_name(index_name, locale), fulltext_config) - end - end - end - - def localized_index_name(index_name, locale) - return index_name unless fields.values.any?(&:localized?) - return index_name unless ::I18n.available_locales.count > 1 - "#{index_name}_#{locale}" - end - - def fulltext_search_ensure_indexes(index_name, config) - db = collection.database - coll = db[index_name] - - # The order of filters matters when the same index is used from two or more collections. - filter_indexes = (config[:filters] || []).map do |key, _value| - ["filter_values.#{key}", 1] - end.sort_by { |filter_index| filter_index[0] } - - index_definition = [['ngram', 1], ['score', -1]].concat(filter_indexes) - - # Since the definition of the index could have changed, we'll clean up by - # removing any indexes that aren't on the exact. - correct_keys = index_definition.map { |field_def| field_def[0] } - all_filter_keys = filter_indexes.map { |field_def| field_def[0] } - coll.indexes.each do |idef| - keys = idef['key'].keys - next unless keys.member?('ngram') - all_filter_keys |= keys.find_all { |key| key.starts_with?('filter_values.') } - next unless keys & correct_keys != correct_keys - Mongoid.logger.info "Dropping #{idef['name']} [#{keys & correct_keys} <=> #{correct_keys}]" if Mongoid.logger - if Mongoid::Compatibility::Version.mongoid5_or_newer? - coll.indexes.drop_one(idef['key']) - else - coll.indexes.drop(idef['key']) - end - end - - if all_filter_keys.length > filter_indexes.length - filter_indexes = all_filter_keys.map { |key| [key, 1] }.sort_by { |filter_index| filter_index[0] } - index_definition = [['ngram', 1], ['score', -1]].concat(filter_indexes) - end - - Mongoid.logger.info "Ensuring fts_index on #{coll.name}: #{index_definition}" if Mongoid.logger - if Mongoid::Compatibility::Version.mongoid5_or_newer? - coll.indexes.create_one(Hash[index_definition], name: 'fts_index') - else - coll.indexes.create(Hash[index_definition], name: 'fts_index') - end - - Mongoid.logger.info "Ensuring document_id index on #{coll.name}" if Mongoid.logger - if Mongoid::Compatibility::Version.mongoid5_or_newer? - coll.indexes.create_one('document_id' => 1) # to make removes fast - else - coll.indexes.create('document_id' => 1) # to make removes fast - end - end - - def fulltext_search(query_string, options = {}) - max_results = options.key?(:max_results) ? options.delete(:max_results) : 10 - return_scores = options.key?(:return_scores) ? options.delete(:return_scores) : false - if mongoid_fulltext_config.count > 1 && !options.key?(:index) - error_message = '%s is indexed by multiple full-text indexes. You must specify one by passing an :index_name parameter' - fail UnspecifiedIndexError, error_message % name, caller - end - index_name = options.key?(:index) ? options.delete(:index) : mongoid_fulltext_config.keys.first - - loc_index_name = localized_index_name(index_name, ::I18n.locale) - # Options hash should only contain filters after this point - - ngrams = all_ngrams(query_string, mongoid_fulltext_config[index_name]) - return [] if ngrams.empty? - - # For each ngram, construct the query we'll use to pull index documents and - # get a count of the number of index documents containing that n-gram - ordering = { 'score' => -1 } - limit = mongoid_fulltext_config[index_name][:max_candidate_set_size] - coll = collection.database[loc_index_name] - cursors = ngrams.map do |ngram| - query = { 'ngram' => ngram[0] } - query.update(document_type_filters) - query.update(map_query_filters options) - count = coll.find(query).count - { ngram: ngram, count: count, query: query } - end.sort! { |record1, record2| record1[:count] <=> record2[:count] } - - # Using the queries we just constructed and the n-gram frequency counts we - # just computed, pull in about *:max_candidate_set_size* candidates by - # considering the n-grams in order of increasing frequency. When we've - # spent all *:max_candidate_set_size* candidates, pull the top-scoring - # *max_results* candidates for each remaining n-gram. - results_so_far = 0 - candidates_list = cursors.map do |doc| - next if doc[:count] == 0 - query_result = coll.find(doc[:query]) - if results_so_far >= limit - query_result = query_result.sort(ordering).limit(max_results) - elsif doc[:count] > limit - results_so_far - query_result = query_result.sort(ordering).limit(limit - results_so_far) - end - results_so_far += doc[:count] - ngram_score = ngrams[doc[:ngram][0]] - Hash[query_result.map do |candidate| - [candidate['document_id'], - { clazz: candidate['class'], score: candidate['score'] * ngram_score }] - end] - end.compact - - # Finally, score all candidates by matching them up with other candidates that are - # associated with the same document. This is similar to how you might process a - # boolean AND query, except that with an AND query, you'd stop after considering - # the first candidate list and matching its candidates up with candidates from other - # lists, whereas here we want the search to be a little fuzzier so we'll run through - # all candidate lists, removing candidates as we match them up. - all_scores = [] - until candidates_list.empty? - candidates = candidates_list.pop - scores = candidates.map do |candidate_id, data| - { id: candidate_id, - clazz: data[:clazz], - score: data[:score] + candidates_list.map { |others| (others.delete(candidate_id) || { score: 0 })[:score] }.sum - } - end - all_scores.concat(scores) - end - all_scores.sort! { |document1, document2| -document1[:score] <=> -document2[:score] } - instantiate_mapreduce_results(all_scores[0..max_results - 1], return_scores: return_scores) - end - - def instantiate_mapreduce_result(result) - if criteria.selector.empty? - result[:clazz].constantize.find(result[:id]) - else - criteria.where(_id: result[:id]).first - end - end - - def instantiate_mapreduce_results(results, options) - if options[:return_scores] - results.map { |result| [instantiate_mapreduce_result(result), result[:score]] }.find_all { |result| !result[0].nil? } - else - results.map { |result| instantiate_mapreduce_result(result) }.compact - end - end - - def all_ngrams(str, config, bound_number_returned = true) - return {} if str.nil? - - if config[:remove_accents] - if defined?(UnicodeUtils) - str = UnicodeUtils.nfkd(str) - elsif defined?(DiacriticsFu) - str = DiacriticsFu.escape(str) - end - end - - # Remove any characters that aren't in the alphabet and aren't word separators - filtered_str = str.mb_chars.downcase.to_s.split('').find_all { |ch| config[:alphabet][ch] || config[:word_separators][ch] }.join('') - - # Figure out how many ngrams to extract from the string. If we can't afford to extract all ngrams, - # step over the string in evenly spaced strides to extract ngrams. For example, to extract 3 3-letter - # ngrams from 'abcdefghijk', we'd want to extract 'abc', 'efg', and 'ijk'. - if bound_number_returned - step_size = [((filtered_str.length - config[:ngram_width]).to_f / config[:max_ngrams_to_search]).ceil, 1].max - else - step_size = 1 - end - - # Create an array of records of the form {:ngram => x, :score => y} for all ngrams that occur in the - # input string using the step size that we just computed. Let score(x,y) be the score of string x - # compared with string y - assigning scores to ngrams with the square root-based scoring function - # below and multiplying scores of matching ngrams together yields a score function that has the - # property that score(x,y) > score(x,z) for any string z containing y and score(x,y) > score(x,z) - # for any string z contained in y. - ngram_array = (0..filtered_str.length - config[:ngram_width]).step(step_size).map do |i| - if i == 0 || (config[:apply_prefix_scoring_to_all_words] && \ - config[:word_separators].key?(filtered_str[i - 1].chr)) - score = Math.sqrt(1 + 1.0 / filtered_str.length) - else - score = Math.sqrt(2.0 / filtered_str.length) - end - { ngram: filtered_str[i..i + config[:ngram_width] - 1], score: score } - end - - # If an ngram appears multiple times in the query string, keep the max score - ngram_array = ngram_array.group_by { |h| h[:ngram] }.map { |key, values| { ngram: key, score: values.map { |v| v[:score] }.max } } - - if config[:index_short_prefixes] || config[:index_full_words] - split_regex_def = config[:word_separators].keys.map { |k| Regexp.escape(k) }.join - split_regex = Regexp.compile("[#{split_regex_def}]") - all_words = filtered_str.split(split_regex) - end - - # Add 'short prefix' records to the array: prefixes of the string that are length (ngram_width - 1) - if config[:index_short_prefixes] - prefixes_seen = {} - all_words.each do |word| - next if word.length < config[:ngram_width] - 1 - prefix = word[0...config[:ngram_width] - 1] - if prefixes_seen[prefix].nil? && (config[:stop_words][word].nil? || word == filtered_str) - ngram_array << { ngram: prefix, score: 1 + 1.0 / filtered_str.length } - prefixes_seen[prefix] = true - end - end - end - - # Add records to the array of ngrams for each full word in the string that isn't a stop word - if config[:index_full_words] - full_words_seen = {} - all_words.each do |word| - if word.length > 1 && full_words_seen[word].nil? && (config[:stop_words][word].nil? || word == filtered_str) - ngram_array << { ngram: word, score: 1 + 1.0 / filtered_str.length } - full_words_seen[word] = true - end - end - end - - # If an ngram appears as any combination of full word, short prefix, and ngram, keep the sum of the two scores - Hash[ngram_array.group_by { |h| h[:ngram] }.map { |key, values| [key, values.map { |v| v[:score] }.sum] }] - end - - def remove_from_ngram_index - mongoid_fulltext_config.each_pair do |index_name, _fulltext_config| - ::I18n.available_locales.each do |locale| - coll = collection.database[localized_index_name(index_name, locale)] - if Mongoid::Compatibility::Version.mongoid5_or_newer? - coll.find('class' => name).delete_many - else - coll.find('class' => name).remove_all - end - end - end - end - - def update_ngram_index - all.each(&:update_ngram_index) - end - - private - - # add filter by type according to SCI classes - def document_type_filters - return {} unless fields['_type'].present? - kls = ([self] + descendants).map(&:to_s) - { 'document_type' => { "$in" => kls } } - end - - # Take a list of filters to be mapped so they can update the query - # used upon the fulltext search of the ngrams - def map_query_filters(filters) - Hash[filters.map do|key, value| - case value - when Hash then - if value.key? :any then format_query_filter('$in', key, value[:any]) - elsif value.key? :all then format_query_filter('$all', key, value[:all]) - else fail UnknownFilterQueryOperator, value.keys.join(','), caller end - else format_query_filter('$all', key, value) - end - end] - end - - def format_query_filter(operator, key, value) - ['filter_values.%s' % key, { operator => [value].flatten }] - end - end - - def update_ngram_index - mongoid_fulltext_config.each_pair do |index_name, fulltext_config| - ::I18n.available_locales.each do |locale| - loc_index_name = self.class.localized_index_name(index_name, locale) - - if condition = fulltext_config[:update_if] - case condition - when Symbol then next unless send condition - when String then next unless instance_eval condition - when Proc then next unless condition.call self - else; next - end - end - - # remove existing ngrams from external index - coll = collection.database[loc_index_name.to_sym] - if Mongoid::Compatibility::Version.mongoid5_or_newer? - coll.find('document_id' => _id).delete_many - else - coll.find('document_id' => _id).remove_all - end - # extract ngrams from fields - field_values = fulltext_config[:ngram_fields].map do |field_name| - next send(field_name) if field_name == :to_s - next unless field = self.class.fields[field_name.to_s] - if field.localized? - send("#{field_name}_translations")[locale] - else - send(field_name) - end - end - - ngrams = field_values.inject({}) { |accum, item| accum.update(self.class.all_ngrams(item, fulltext_config, false)) } - return if ngrams.empty? - # apply filters, if necessary - filter_values = nil - if fulltext_config.key?(:filters) - filter_values = Hash[fulltext_config[:filters].map do |key, value| - begin - [key, value.call(self)] - rescue - # Suppress any exceptions caused by filters - end - end.compact] - end - # insert new ngrams in external index - ngrams.each_pair do |ngram, score| - index_document = { 'ngram' => ngram, 'document_id' => _id, 'document_type' => model_name.to_s, 'score' => score, 'class' => self.class.name } - index_document['filter_values'] = filter_values if fulltext_config.key?(:filters) - if Mongoid::Compatibility::Version.mongoid5_or_newer? - coll.insert_one(index_document) - else - coll.insert(index_document) - end - end - end - end - end - - def remove_from_ngram_index - mongoid_fulltext_config.each_pair do |index_name, _fulltext_config| - ::I18n.available_locales.each do |locale| - coll = collection.database[self.class.localized_index_name(index_name, locale)] - if Mongoid::Compatibility::Version.mongoid5_or_newer? - coll.find('document_id' => _id).delete_many - else - coll.find('document_id' => _id).remove_all - end - end - end +require 'mongoid/full_text_search/config' +require 'mongoid/full_text_search/indexes' +require 'mongoid/full_text_search/mappings' +require 'mongoid/full_text_search/ngrams' +require 'mongoid/full_text_search/searchable' + +module Mongoid + module FullTextSearch + extend ActiveSupport::Concern + + include Config + include Indexes + include Mappings + include Ngrams + include Searchable end end diff --git a/lib/mongoid/full_text_search/config.rb b/lib/mongoid/full_text_search/config.rb new file mode 100644 index 0000000..1bcd6ed --- /dev/null +++ b/lib/mongoid/full_text_search/config.rb @@ -0,0 +1,11 @@ +module Mongoid + module FullTextSearch + module Config + extend ActiveSupport::Concern + + included do + cattr_accessor :mongoid_fulltext_config + end + end + end +end diff --git a/lib/mongoid/full_text_search/indexable.rb b/lib/mongoid/full_text_search/indexable.rb deleted file mode 100644 index 976578c..0000000 --- a/lib/mongoid/full_text_search/indexable.rb +++ /dev/null @@ -1,13 +0,0 @@ -# hook onto model index creation to create related FT indexes -module Mongoid - module Indexable - module ClassMethods - alias_method :create_fulltext_indexes_hook, :create_indexes - - def create_indexes - create_fulltext_indexes if respond_to?(:create_fulltext_indexes) - create_fulltext_indexes_hook - end - end - end -end diff --git a/lib/mongoid/full_text_search/indexes.rb b/lib/mongoid/full_text_search/indexes.rb index 9c399ca..5358ff1 100644 --- a/lib/mongoid/full_text_search/indexes.rb +++ b/lib/mongoid/full_text_search/indexes.rb @@ -1,12 +1,71 @@ -# hook onto model index creation to create related FT indexes module Mongoid - module Indexes - module ClassMethods - alias_method :create_fulltext_indexes_hook, :create_indexes + module FullTextSearch + module Indexes + extend ActiveSupport::Concern - def create_indexes - create_fulltext_indexes if respond_to?(:create_fulltext_indexes) - create_fulltext_indexes_hook + module ClassMethods + def create_fulltext_indexes + return unless mongoid_fulltext_config + mongoid_fulltext_config.each_pair do |index_name, fulltext_config| + ::I18n.available_locales.each do |locale| + fulltext_search_ensure_indexes(localized_index_name(index_name, locale), fulltext_config) + end + end + end + + def localized_index_name(index_name, locale) + return index_name unless fields.values.any?(&:localized?) + return index_name unless ::I18n.available_locales.count > 1 + "#{index_name}_#{locale}" + end + + def fulltext_search_ensure_indexes(index_name, config) + db = collection.database + coll = db[index_name] + + # The order of filters matters when the same index is used from two or more collections. + filter_indexes = (config[:filters] || []).map do |key, _value| + ["filter_values.#{key}", 1] + end.sort_by { |filter_index| filter_index[0] } + + index_definition = [['ngram', 1], ['score', -1]].concat(filter_indexes) + + # Since the definition of the index could have changed, we'll clean up by + # removing any indexes that aren't on the exact. + correct_keys = index_definition.map { |field_def| field_def[0] } + all_filter_keys = filter_indexes.map { |field_def| field_def[0] } + coll.indexes.each do |idef| + keys = idef['key'].keys + next unless keys.member?('ngram') + all_filter_keys |= keys.find_all { |key| key.starts_with?('filter_values.') } + next unless keys & correct_keys != correct_keys + Mongoid.logger.info "Dropping #{idef['name']} [#{keys & correct_keys} <=> #{correct_keys}]" if Mongoid.logger + if Mongoid::Compatibility::Version.mongoid5_or_newer? + coll.indexes.drop_one(idef['key']) + else + coll.indexes.drop(idef['key']) + end + end + + if all_filter_keys.length > filter_indexes.length + filter_indexes = all_filter_keys.map { |key| [key, 1] }.sort_by { |filter_index| filter_index[0] } + index_definition = [['ngram', 1], ['score', -1]].concat(filter_indexes) + end + + Mongoid.logger.info "Ensuring fts_index on #{coll.name}: #{index_definition}" if Mongoid.logger + if Mongoid::Compatibility::Version.mongoid5_or_newer? + coll.indexes.create_one(Hash[index_definition], name: 'fts_index') + else + coll.indexes.create(Hash[index_definition], name: 'fts_index') + end + + Mongoid.logger.info "Ensuring document_id index on #{coll.name}" if Mongoid.logger + if Mongoid::Compatibility::Version.mongoid5_or_newer? + coll.indexes.create_one('document_id' => 1) # to make removes fast + else + coll.indexes.create('document_id' => 1) # to make removes fast + end + end end end end diff --git a/lib/mongoid/full_text_search/mappings.rb b/lib/mongoid/full_text_search/mappings.rb new file mode 100644 index 0000000..48ad96d --- /dev/null +++ b/lib/mongoid/full_text_search/mappings.rb @@ -0,0 +1,131 @@ +module Mongoid + module FullTextSearch + module Mappings + extend ActiveSupport::Concern + + module ClassMethods + def fulltext_search_in(*args) + self.mongoid_fulltext_config = {} if mongoid_fulltext_config.nil? + options = args.last.is_a?(Hash) ? args.pop : {} + if options.key?(:index_name) + index_name = options[:index_name] + else + index_name = 'mongoid_fulltext.index_%s_%s' % [name.downcase, mongoid_fulltext_config.count] + end + + config = { + alphabet: 'abcdefghijklmnopqrstuvwxyz0123456789 ', + word_separators: "-_ \n\t", + ngram_width: 3, + max_ngrams_to_search: 6, + apply_prefix_scoring_to_all_words: true, + index_full_words: true, + index_short_prefixes: false, + max_candidate_set_size: 1000, + remove_accents: true, + reindex_immediately: true, + stop_words: Hash[%w(i a s t me my we he it am is be do an if + or as of at by to up in on no so our you him + his she her its who are was has had did the and + but for out off why how all any few nor not own + too can don now ours your hers they them what whom + this that were been have does with into from down over + then once here when both each more most some such only + same than very will just yours their which these those + being doing until while about after above below under + again there where other myself itself theirs having during + before should himself herself because against between through + further yourself ourselves yourselves themselves).map { |x| [x, true] }] + } + + config.update(options) + + args = [:to_s] if args.empty? + config[:ngram_fields] = args + config[:alphabet] = Hash[config[:alphabet].split('').map { |ch| [ch, ch] }] + config[:word_separators] = Hash[config[:word_separators].split('').map { |ch| [ch, ch] }] + mongoid_fulltext_config[index_name] = config + + before_save(:update_ngram_index) if config[:reindex_immediately] + before_destroy :remove_from_ngram_index + end + + def update_ngram_index + all.each(&:update_ngram_index) + end + end + + def update_ngram_index + mongoid_fulltext_config.each_pair do |index_name, fulltext_config| + ::I18n.available_locales.each do |locale| + loc_index_name = self.class.localized_index_name(index_name, locale) + + if condition = fulltext_config[:update_if] + case condition + when Symbol then next unless send condition + when String then next unless instance_eval condition + when Proc then next unless condition.call self + else; next + end + end + + # remove existing ngrams from external index + coll = collection.database[loc_index_name.to_sym] + if Mongoid::Compatibility::Version.mongoid5_or_newer? + coll.find('document_id' => _id).delete_many + else + coll.find('document_id' => _id).remove_all + end + # extract ngrams from fields + field_values = fulltext_config[:ngram_fields].map do |field_name| + next send(field_name) if field_name == :to_s + next unless field = self.class.fields[field_name.to_s] + if field.localized? + send("#{field_name}_translations")[locale] + else + send(field_name) + end + end + + ngrams = field_values.inject({}) { |accum, item| accum.update(self.class.all_ngrams(item, fulltext_config, false)) } + return if ngrams.empty? + # apply filters, if necessary + filter_values = nil + if fulltext_config.key?(:filters) + filter_values = Hash[fulltext_config[:filters].map do |key, value| + begin + [key, value.call(self)] + rescue + # Suppress any exceptions caused by filters + end + end.compact] + end + # insert new ngrams in external index + ngrams.each_pair do |ngram, score| + index_document = { 'ngram' => ngram, 'document_id' => _id, 'document_type' => model_name.to_s, 'score' => score, 'class' => self.class.name } + index_document['filter_values'] = filter_values if fulltext_config.key?(:filters) + if Mongoid::Compatibility::Version.mongoid5_or_newer? + coll.insert_one(index_document) + else + coll.insert(index_document) + end + end + end + end + end + + def remove_from_ngram_index + mongoid_fulltext_config.each_pair do |index_name, _fulltext_config| + ::I18n.available_locales.each do |locale| + coll = collection.database[self.class.localized_index_name(index_name, locale)] + if Mongoid::Compatibility::Version.mongoid5_or_newer? + coll.find('document_id' => _id).delete_many + else + coll.find('document_id' => _id).remove_all + end + end + end + end + end + end +end diff --git a/lib/mongoid/full_text_search/ngrams.rb b/lib/mongoid/full_text_search/ngrams.rb new file mode 100644 index 0000000..63a0b85 --- /dev/null +++ b/lib/mongoid/full_text_search/ngrams.rb @@ -0,0 +1,98 @@ +module Mongoid + module FullTextSearch + module Ngrams + extend ActiveSupport::Concern + + module ClassMethods + def all_ngrams(str, config, bound_number_returned = true) + return {} if str.nil? + + if config[:remove_accents] + if defined?(UnicodeUtils) + str = UnicodeUtils.nfkd(str) + elsif defined?(DiacriticsFu) + str = DiacriticsFu.escape(str) + end + end + + # Remove any characters that aren't in the alphabet and aren't word separators + filtered_str = str.mb_chars.downcase.to_s.split('').find_all { |ch| config[:alphabet][ch] || config[:word_separators][ch] }.join('') + + # Figure out how many ngrams to extract from the string. If we can't afford to extract all ngrams, + # step over the string in evenly spaced strides to extract ngrams. For example, to extract 3 3-letter + # ngrams from 'abcdefghijk', we'd want to extract 'abc', 'efg', and 'ijk'. + if bound_number_returned + step_size = [((filtered_str.length - config[:ngram_width]).to_f / config[:max_ngrams_to_search]).ceil, 1].max + else + step_size = 1 + end + + # Create an array of records of the form {:ngram => x, :score => y} for all ngrams that occur in the + # input string using the step size that we just computed. Let score(x,y) be the score of string x + # compared with string y - assigning scores to ngrams with the square root-based scoring function + # below and multiplying scores of matching ngrams together yields a score function that has the + # property that score(x,y) > score(x,z) for any string z containing y and score(x,y) > score(x,z) + # for any string z contained in y. + ngram_array = (0..filtered_str.length - config[:ngram_width]).step(step_size).map do |i| + if i == 0 || (config[:apply_prefix_scoring_to_all_words] && \ + config[:word_separators].key?(filtered_str[i - 1].chr)) + score = Math.sqrt(1 + 1.0 / filtered_str.length) + else + score = Math.sqrt(2.0 / filtered_str.length) + end + { ngram: filtered_str[i..i + config[:ngram_width] - 1], score: score } + end + + # If an ngram appears multiple times in the query string, keep the max score + ngram_array = ngram_array.group_by { |h| h[:ngram] }.map { |key, values| { ngram: key, score: values.map { |v| v[:score] }.max } } + + if config[:index_short_prefixes] || config[:index_full_words] + split_regex_def = config[:word_separators].keys.map { |k| Regexp.escape(k) }.join + split_regex = Regexp.compile("[#{split_regex_def}]") + all_words = filtered_str.split(split_regex) + end + + # Add 'short prefix' records to the array: prefixes of the string that are length (ngram_width - 1) + if config[:index_short_prefixes] + prefixes_seen = {} + all_words.each do |word| + next if word.length < config[:ngram_width] - 1 + prefix = word[0...config[:ngram_width] - 1] + if prefixes_seen[prefix].nil? && (config[:stop_words][word].nil? || word == filtered_str) + ngram_array << { ngram: prefix, score: 1 + 1.0 / filtered_str.length } + prefixes_seen[prefix] = true + end + end + end + + # Add records to the array of ngrams for each full word in the string that isn't a stop word + if config[:index_full_words] + full_words_seen = {} + all_words.each do |word| + if word.length > 1 && full_words_seen[word].nil? && (config[:stop_words][word].nil? || word == filtered_str) + ngram_array << { ngram: word, score: 1 + 1.0 / filtered_str.length } + full_words_seen[word] = true + end + end + end + + # If an ngram appears as any combination of full word, short prefix, and ngram, keep the sum of the two scores + Hash[ngram_array.group_by { |h| h[:ngram] }.map { |key, values| [key, values.map { |v| v[:score] }.sum] }] + end + + def remove_from_ngram_index + mongoid_fulltext_config.each_pair do |index_name, _fulltext_config| + ::I18n.available_locales.each do |locale| + coll = collection.database[localized_index_name(index_name, locale)] + if Mongoid::Compatibility::Version.mongoid5_or_newer? + coll.find('class' => name).delete_many + else + coll.find('class' => name).remove_all + end + end + end + end + end + end + end +end diff --git a/lib/mongoid/full_text_search/searchable.rb b/lib/mongoid/full_text_search/searchable.rb new file mode 100644 index 0000000..cc8998e --- /dev/null +++ b/lib/mongoid/full_text_search/searchable.rb @@ -0,0 +1,126 @@ +module Mongoid + module FullTextSearch + class UnspecifiedIndexError < StandardError; end + class UnknownFilterQueryOperator < StandardError; end + + module Searchable + extend ActiveSupport::Concern + + module ClassMethods + def fulltext_search(query_string, options = {}) + max_results = options.key?(:max_results) ? options.delete(:max_results) : 10 + return_scores = options.key?(:return_scores) ? options.delete(:return_scores) : false + if mongoid_fulltext_config.count > 1 && !options.key?(:index) + error_message = '%s is indexed by multiple full-text indexes. You must specify one by passing an :index_name parameter' + fail UnspecifiedIndexError, error_message % name, caller + end + index_name = options.key?(:index) ? options.delete(:index) : mongoid_fulltext_config.keys.first + + loc_index_name = localized_index_name(index_name, ::I18n.locale) + # Options hash should only contain filters after this point + + ngrams = all_ngrams(query_string, mongoid_fulltext_config[index_name]) + return [] if ngrams.empty? + + # For each ngram, construct the query we'll use to pull index documents and + # get a count of the number of index documents containing that n-gram + ordering = { 'score' => -1 } + limit = mongoid_fulltext_config[index_name][:max_candidate_set_size] + coll = collection.database[loc_index_name] + cursors = ngrams.map do |ngram| + query = { 'ngram' => ngram[0] } + query.update(document_type_filters) + query.update(map_query_filters options) + count = coll.find(query).count + { ngram: ngram, count: count, query: query } + end.sort! { |record1, record2| record1[:count] <=> record2[:count] } + + # Using the queries we just constructed and the n-gram frequency counts we + # just computed, pull in about *:max_candidate_set_size* candidates by + # considering the n-grams in order of increasing frequency. When we've + # spent all *:max_candidate_set_size* candidates, pull the top-scoring + # *max_results* candidates for each remaining n-gram. + results_so_far = 0 + candidates_list = cursors.map do |doc| + next if doc[:count] == 0 + query_result = coll.find(doc[:query]) + if results_so_far >= limit + query_result = query_result.sort(ordering).limit(max_results) + elsif doc[:count] > limit - results_so_far + query_result = query_result.sort(ordering).limit(limit - results_so_far) + end + results_so_far += doc[:count] + ngram_score = ngrams[doc[:ngram][0]] + Hash[query_result.map do |candidate| + [candidate['document_id'], + { clazz: candidate['class'], score: candidate['score'] * ngram_score }] + end] + end.compact + + # Finally, score all candidates by matching them up with other candidates that are + # associated with the same document. This is similar to how you might process a + # boolean AND query, except that with an AND query, you'd stop after considering + # the first candidate list and matching its candidates up with candidates from other + # lists, whereas here we want the search to be a little fuzzier so we'll run through + # all candidate lists, removing candidates as we match them up. + all_scores = [] + until candidates_list.empty? + candidates = candidates_list.pop + scores = candidates.map do |candidate_id, data| + { id: candidate_id, + clazz: data[:clazz], + score: data[:score] + candidates_list.map { |others| (others.delete(candidate_id) || { score: 0 })[:score] }.sum + } + end + all_scores.concat(scores) + end + all_scores.sort! { |document1, document2| -document1[:score] <=> -document2[:score] } + instantiate_mapreduce_results(all_scores[0..max_results - 1], return_scores: return_scores) + end + + def instantiate_mapreduce_result(result) + if criteria.selector.empty? + result[:clazz].constantize.find(result[:id]) + else + criteria.where(_id: result[:id]).first + end + end + + def instantiate_mapreduce_results(results, options) + if options[:return_scores] + results.map { |result| [instantiate_mapreduce_result(result), result[:score]] }.find_all { |result| !result[0].nil? } + else + results.map { |result| instantiate_mapreduce_result(result) }.compact + end + end + + private + + # add filter by type according to SCI classes + def document_type_filters + return {} unless fields['_type'].present? + kls = ([self] + descendants).map(&:to_s) + { 'document_type' => { "$in" => kls } } + end + + # Take a list of filters to be mapped so they can update the query + # used upon the fulltext search of the ngrams + def map_query_filters(filters) + Hash[filters.map do|key, value| + case value + when Hash then + if value.key? :any then format_query_filter('$in', key, value[:any]) + elsif value.key? :all then format_query_filter('$all', key, value[:all]) + else fail UnknownFilterQueryOperator, value.keys.join(','), caller end + else format_query_filter('$all', key, value) + end + end] + end + + def format_query_filter(operator, key, value) + ['filter_values.%s' % key, { operator => [value].flatten }] + end + end + end + end +end diff --git a/lib/mongoid_fulltext.rb b/lib/mongoid_fulltext.rb index 25a0538..b741bb8 100644 --- a/lib/mongoid_fulltext.rb +++ b/lib/mongoid_fulltext.rb @@ -1 +1,22 @@ +require 'mongoid' +require 'mongoid/compatibility' + require 'mongoid/full_text_search' + +require 'unicode_utils' +require 'cgi' + +module Mongoid + module CreateIndexesPatch + def create_indexes + create_fulltext_indexes if respond_to?(:create_fulltext_indexes) + super + end + end +end + +if Mongoid::Compatibility::Version.mongoid3? + Mongoid::Indexes::ClassMethods.send(:prepend, Mongoid::CreateIndexesPatch) +else + Mongoid::Indexable::ClassMethods.send(:prepend, Mongoid::CreateIndexesPatch) +end From d1bd3ff8bc970d34fb68bea96681466ad1b9ae58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toma=CC=81s=CC=8C=20Celizna?= Date: Sun, 6 May 2018 09:53:43 +0200 Subject: [PATCH 15/24] beautify --- lib/mongoid/full_text_search/indexes.rb | 4 +++- lib/mongoid/full_text_search/mappings.rb | 16 ++++++++-------- lib/mongoid/full_text_search/ngrams.rb | 20 ++++++++++---------- lib/mongoid/full_text_search/searchable.rb | 15 +++++++-------- lib/mongoid/full_text_search/version.rb | 2 +- 5 files changed, 29 insertions(+), 28 deletions(-) diff --git a/lib/mongoid/full_text_search/indexes.rb b/lib/mongoid/full_text_search/indexes.rb index 5358ff1..54837a4 100644 --- a/lib/mongoid/full_text_search/indexes.rb +++ b/lib/mongoid/full_text_search/indexes.rb @@ -8,7 +8,9 @@ def create_fulltext_indexes return unless mongoid_fulltext_config mongoid_fulltext_config.each_pair do |index_name, fulltext_config| ::I18n.available_locales.each do |locale| - fulltext_search_ensure_indexes(localized_index_name(index_name, locale), fulltext_config) + fulltext_search_ensure_indexes( + localized_index_name(index_name, locale), fulltext_config + ) end end end diff --git a/lib/mongoid/full_text_search/mappings.rb b/lib/mongoid/full_text_search/mappings.rb index 48ad96d..1e7c4cb 100644 --- a/lib/mongoid/full_text_search/mappings.rb +++ b/lib/mongoid/full_text_search/mappings.rb @@ -7,11 +7,11 @@ module ClassMethods def fulltext_search_in(*args) self.mongoid_fulltext_config = {} if mongoid_fulltext_config.nil? options = args.last.is_a?(Hash) ? args.pop : {} - if options.key?(:index_name) - index_name = options[:index_name] - else - index_name = 'mongoid_fulltext.index_%s_%s' % [name.downcase, mongoid_fulltext_config.count] - end + index_name = if options.key?(:index_name) + options[:index_name] + else + format('mongoid_fulltext.index_%s_%s', name.downcase, mongoid_fulltext_config.count) + end config = { alphabet: 'abcdefghijklmnopqrstuvwxyz0123456789 ', @@ -24,7 +24,7 @@ def fulltext_search_in(*args) max_candidate_set_size: 1000, remove_accents: true, reindex_immediately: true, - stop_words: Hash[%w(i a s t me my we he it am is be do an if + stop_words: Hash[%w[i a s t me my we he it am is be do an if or as of at by to up in on no so our you him his she her its who are was has had did the and but for out off why how all any few nor not own @@ -35,7 +35,7 @@ def fulltext_search_in(*args) being doing until while about after above below under again there where other myself itself theirs having during before should himself herself because against between through - further yourself ourselves yourselves themselves).map { |x| [x, true] }] + further yourself ourselves yourselves themselves].map { |x| [x, true] }] } config.update(options) @@ -95,7 +95,7 @@ def update_ngram_index filter_values = Hash[fulltext_config[:filters].map do |key, value| begin [key, value.call(self)] - rescue + rescue StandardError # Suppress any exceptions caused by filters end end.compact] diff --git a/lib/mongoid/full_text_search/ngrams.rb b/lib/mongoid/full_text_search/ngrams.rb index 63a0b85..88e5511 100644 --- a/lib/mongoid/full_text_search/ngrams.rb +++ b/lib/mongoid/full_text_search/ngrams.rb @@ -21,11 +21,11 @@ def all_ngrams(str, config, bound_number_returned = true) # Figure out how many ngrams to extract from the string. If we can't afford to extract all ngrams, # step over the string in evenly spaced strides to extract ngrams. For example, to extract 3 3-letter # ngrams from 'abcdefghijk', we'd want to extract 'abc', 'efg', and 'ijk'. - if bound_number_returned - step_size = [((filtered_str.length - config[:ngram_width]).to_f / config[:max_ngrams_to_search]).ceil, 1].max - else - step_size = 1 - end + step_size = if bound_number_returned + [((filtered_str.length - config[:ngram_width]).to_f / config[:max_ngrams_to_search]).ceil, 1].max + else + 1 + end # Create an array of records of the form {:ngram => x, :score => y} for all ngrams that occur in the # input string using the step size that we just computed. Let score(x,y) be the score of string x @@ -34,12 +34,12 @@ def all_ngrams(str, config, bound_number_returned = true) # property that score(x,y) > score(x,z) for any string z containing y and score(x,y) > score(x,z) # for any string z contained in y. ngram_array = (0..filtered_str.length - config[:ngram_width]).step(step_size).map do |i| - if i == 0 || (config[:apply_prefix_scoring_to_all_words] && \ + score = if i == 0 || (config[:apply_prefix_scoring_to_all_words] && \ config[:word_separators].key?(filtered_str[i - 1].chr)) - score = Math.sqrt(1 + 1.0 / filtered_str.length) - else - score = Math.sqrt(2.0 / filtered_str.length) - end + Math.sqrt(1 + 1.0 / filtered_str.length) + else + Math.sqrt(2.0 / filtered_str.length) + end { ngram: filtered_str[i..i + config[:ngram_width] - 1], score: score } end diff --git a/lib/mongoid/full_text_search/searchable.rb b/lib/mongoid/full_text_search/searchable.rb index cc8998e..cb00985 100644 --- a/lib/mongoid/full_text_search/searchable.rb +++ b/lib/mongoid/full_text_search/searchable.rb @@ -12,7 +12,7 @@ def fulltext_search(query_string, options = {}) return_scores = options.key?(:return_scores) ? options.delete(:return_scores) : false if mongoid_fulltext_config.count > 1 && !options.key?(:index) error_message = '%s is indexed by multiple full-text indexes. You must specify one by passing an :index_name parameter' - fail UnspecifiedIndexError, error_message % name, caller + raise UnspecifiedIndexError, error_message % name, caller end index_name = options.key?(:index) ? options.delete(:index) : mongoid_fulltext_config.keys.first @@ -30,7 +30,7 @@ def fulltext_search(query_string, options = {}) cursors = ngrams.map do |ngram| query = { 'ngram' => ngram[0] } query.update(document_type_filters) - query.update(map_query_filters options) + query.update(map_query_filters(options)) count = coll.find(query).count { ngram: ngram, count: count, query: query } end.sort! { |record1, record2| record1[:count] <=> record2[:count] } @@ -69,8 +69,7 @@ def fulltext_search(query_string, options = {}) scores = candidates.map do |candidate_id, data| { id: candidate_id, clazz: data[:clazz], - score: data[:score] + candidates_list.map { |others| (others.delete(candidate_id) || { score: 0 })[:score] }.sum - } + score: data[:score] + candidates_list.map { |others| (others.delete(candidate_id) || { score: 0 })[:score] }.sum } end all_scores.concat(scores) end @@ -100,25 +99,25 @@ def instantiate_mapreduce_results(results, options) def document_type_filters return {} unless fields['_type'].present? kls = ([self] + descendants).map(&:to_s) - { 'document_type' => { "$in" => kls } } + { 'document_type' => { '$in' => kls } } end # Take a list of filters to be mapped so they can update the query # used upon the fulltext search of the ngrams def map_query_filters(filters) - Hash[filters.map do|key, value| + Hash[filters.map do |key, value| case value when Hash then if value.key? :any then format_query_filter('$in', key, value[:any]) elsif value.key? :all then format_query_filter('$all', key, value[:all]) - else fail UnknownFilterQueryOperator, value.keys.join(','), caller end + else raise UnknownFilterQueryOperator, value.keys.join(','), caller end else format_query_filter('$all', key, value) end end] end def format_query_filter(operator, key, value) - ['filter_values.%s' % key, { operator => [value].flatten }] + [format('filter_values.%s', key), { operator => [value].flatten }] end end end diff --git a/lib/mongoid/full_text_search/version.rb b/lib/mongoid/full_text_search/version.rb index 7f8fa0f..aaa05f1 100644 --- a/lib/mongoid/full_text_search/version.rb +++ b/lib/mongoid/full_text_search/version.rb @@ -1,5 +1,5 @@ module Mongoid module FullTextSearch - VERSION = '0.8.1' + VERSION = '0.8.1'.freeze end end From ad88a0d938ca2ad2e0d63a5c9edcbbb49bfbb1c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toma=CC=81s=CC=8C=20Celizna?= Date: Sun, 6 May 2018 10:39:20 +0200 Subject: [PATCH 16/24] cleanup index definition --- .../full_text_search/index_definition.rb | 52 ++++++++++++++++++ lib/mongoid/full_text_search/indexes.rb | 53 +++++-------------- 2 files changed, 66 insertions(+), 39 deletions(-) create mode 100644 lib/mongoid/full_text_search/index_definition.rb diff --git a/lib/mongoid/full_text_search/index_definition.rb b/lib/mongoid/full_text_search/index_definition.rb new file mode 100644 index 0000000..f8f4853 --- /dev/null +++ b/lib/mongoid/full_text_search/index_definition.rb @@ -0,0 +1,52 @@ +module Mongoid + module FullTextSearch + class IndexDefinition < Struct.new(:coll, :filters) + def self.call(*args) + new(*args).call + end + + def call + res = index_definition + all_filter_keys = filter_indexes.map(&:first) + + # Since the definition of the index could have changed, we'll clean up by + # removing any indexes that aren't on the exact. + coll.indexes.each do |idef| + keys = idef['key'].keys + next unless keys.member?('ngram') + all_filter_keys |= keys.find_all { |key| key.starts_with?('filter_values.') } + next unless keys & correct_keys != correct_keys + Mongoid.logger.info "Dropping #{idef['name']} [#{keys & correct_keys} <=> #{correct_keys}]" if Mongoid.logger + coll.indexes.send drop_index_method_name, idef['key'] + end + + if all_filter_keys.length > filter_indexes.length + updated_filter_indexes = all_filter_keys.map { |key| [key, 1] }.sort_by(&:first) + res = [['ngram', 1], ['score', -1]].concat(updated_filter_indexes) + end + + res + end + + private + + def drop_index_method_name + return :drop unless Mongoid::Compatibility::Version.mongoid5_or_newer? + :drop_one + end + + # The order of filters matters when the same index is used from two or more collections. + def filter_indexes + filters.map { |key, _| ["filter_values.#{key}", 1] }.sort_by(&:first) + end + + def index_definition + [['ngram', 1], ['score', -1]].concat(filter_indexes) + end + + def correct_keys + index_definition.map { |field_def| field_def[0] } + end + end + end +end diff --git a/lib/mongoid/full_text_search/indexes.rb b/lib/mongoid/full_text_search/indexes.rb index 54837a4..36cd04a 100644 --- a/lib/mongoid/full_text_search/indexes.rb +++ b/lib/mongoid/full_text_search/indexes.rb @@ -1,3 +1,5 @@ +require 'mongoid/full_text_search/index_definition' + module Mongoid module FullTextSearch module Indexes @@ -6,6 +8,7 @@ module Indexes module ClassMethods def create_fulltext_indexes return unless mongoid_fulltext_config + mongoid_fulltext_config.each_pair do |index_name, fulltext_config| ::I18n.available_locales.each do |locale| fulltext_search_ensure_indexes( @@ -24,49 +27,21 @@ def localized_index_name(index_name, locale) def fulltext_search_ensure_indexes(index_name, config) db = collection.database coll = db[index_name] + filters = config.fetch(:filters, []) + index_definition = IndexDefinition.call(coll, filters) - # The order of filters matters when the same index is used from two or more collections. - filter_indexes = (config[:filters] || []).map do |key, _value| - ["filter_values.#{key}", 1] - end.sort_by { |filter_index| filter_index[0] } - - index_definition = [['ngram', 1], ['score', -1]].concat(filter_indexes) - - # Since the definition of the index could have changed, we'll clean up by - # removing any indexes that aren't on the exact. - correct_keys = index_definition.map { |field_def| field_def[0] } - all_filter_keys = filter_indexes.map { |field_def| field_def[0] } - coll.indexes.each do |idef| - keys = idef['key'].keys - next unless keys.member?('ngram') - all_filter_keys |= keys.find_all { |key| key.starts_with?('filter_values.') } - next unless keys & correct_keys != correct_keys - Mongoid.logger.info "Dropping #{idef['name']} [#{keys & correct_keys} <=> #{correct_keys}]" if Mongoid.logger - if Mongoid::Compatibility::Version.mongoid5_or_newer? - coll.indexes.drop_one(idef['key']) - else - coll.indexes.drop(idef['key']) - end - end + Mongoid.logger.info("Ensuring fts_index on #{coll.name}: #{index_definition}") if Mongoid.logger + coll.indexes.send create_index_method_name, Hash[index_definition], name: 'fts_index' - if all_filter_keys.length > filter_indexes.length - filter_indexes = all_filter_keys.map { |key| [key, 1] }.sort_by { |filter_index| filter_index[0] } - index_definition = [['ngram', 1], ['score', -1]].concat(filter_indexes) - end + Mongoid.logger.info("Ensuring document_id index on #{coll.name}") if Mongoid.logger + coll.indexes.send create_index_method_name, { document_id: 1 } + end - Mongoid.logger.info "Ensuring fts_index on #{coll.name}: #{index_definition}" if Mongoid.logger - if Mongoid::Compatibility::Version.mongoid5_or_newer? - coll.indexes.create_one(Hash[index_definition], name: 'fts_index') - else - coll.indexes.create(Hash[index_definition], name: 'fts_index') - end + private - Mongoid.logger.info "Ensuring document_id index on #{coll.name}" if Mongoid.logger - if Mongoid::Compatibility::Version.mongoid5_or_newer? - coll.indexes.create_one('document_id' => 1) # to make removes fast - else - coll.indexes.create('document_id' => 1) # to make removes fast - end + def create_index_method_name + return :create unless Mongoid::Compatibility::Version.mongoid5_or_newer? + :create_one end end end From 27fc03561f8bc9786ab675fdb397817cc5f5cb0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toma=CC=81s=CC=8C=20Celizna?= Date: Sun, 6 May 2018 11:07:39 +0200 Subject: [PATCH 17/24] mappings --- lib/mongoid/full_text_search.rb | 8 ++ lib/mongoid/full_text_search/config.rb | 4 +- .../full_text_search/index_definition.rb | 9 +- lib/mongoid/full_text_search/indexes.rb | 11 +- lib/mongoid/full_text_search/mappings.rb | 128 +++++++++--------- lib/mongoid/full_text_search/searchable.rb | 5 +- 6 files changed, 80 insertions(+), 85 deletions(-) diff --git a/lib/mongoid/full_text_search.rb b/lib/mongoid/full_text_search.rb index 495a989..6c1af56 100644 --- a/lib/mongoid/full_text_search.rb +++ b/lib/mongoid/full_text_search.rb @@ -6,6 +6,14 @@ module Mongoid module FullTextSearch + class UnknownFilterQueryOperator < StandardError; end + class UnspecifiedIndexError < StandardError; end + + CREATE_INDEX_METHOD_NAME = Mongoid::Compatibility::Version.mongoid5_or_newer? ? :create_one : :create + DELETE_FROM_INDEX_METHOD_NAME = Mongoid::Compatibility::Version.mongoid5_or_newer? ? :delete_many : :remove_all + DROP_INDEX_METHOD_NAME = Mongoid::Compatibility::Version.mongoid5_or_newer? ? :drop_one : :drop + INSERT_METHOD_NAME = Mongoid::Compatibility::Version.mongoid5_or_newer? ? :insert_one : :insert + extend ActiveSupport::Concern include Config diff --git a/lib/mongoid/full_text_search/config.rb b/lib/mongoid/full_text_search/config.rb index 1bcd6ed..f12fed2 100644 --- a/lib/mongoid/full_text_search/config.rb +++ b/lib/mongoid/full_text_search/config.rb @@ -4,7 +4,9 @@ module Config extend ActiveSupport::Concern included do - cattr_accessor :mongoid_fulltext_config + cattr_accessor :mongoid_fulltext_config do + {} + end end end end diff --git a/lib/mongoid/full_text_search/index_definition.rb b/lib/mongoid/full_text_search/index_definition.rb index f8f4853..f3770a6 100644 --- a/lib/mongoid/full_text_search/index_definition.rb +++ b/lib/mongoid/full_text_search/index_definition.rb @@ -17,7 +17,7 @@ def call all_filter_keys |= keys.find_all { |key| key.starts_with?('filter_values.') } next unless keys & correct_keys != correct_keys Mongoid.logger.info "Dropping #{idef['name']} [#{keys & correct_keys} <=> #{correct_keys}]" if Mongoid.logger - coll.indexes.send drop_index_method_name, idef['key'] + coll.indexes.send DROP_INDEX_METHOD_NAME, idef['key'] end if all_filter_keys.length > filter_indexes.length @@ -28,13 +28,6 @@ def call res end - private - - def drop_index_method_name - return :drop unless Mongoid::Compatibility::Version.mongoid5_or_newer? - :drop_one - end - # The order of filters matters when the same index is used from two or more collections. def filter_indexes filters.map { |key, _| ["filter_values.#{key}", 1] }.sort_by(&:first) diff --git a/lib/mongoid/full_text_search/indexes.rb b/lib/mongoid/full_text_search/indexes.rb index 36cd04a..01d9025 100644 --- a/lib/mongoid/full_text_search/indexes.rb +++ b/lib/mongoid/full_text_search/indexes.rb @@ -31,17 +31,10 @@ def fulltext_search_ensure_indexes(index_name, config) index_definition = IndexDefinition.call(coll, filters) Mongoid.logger.info("Ensuring fts_index on #{coll.name}: #{index_definition}") if Mongoid.logger - coll.indexes.send create_index_method_name, Hash[index_definition], name: 'fts_index' + coll.indexes.send CREATE_INDEX_METHOD_NAME, Hash[index_definition], name: 'fts_index' Mongoid.logger.info("Ensuring document_id index on #{coll.name}") if Mongoid.logger - coll.indexes.send create_index_method_name, { document_id: 1 } - end - - private - - def create_index_method_name - return :create unless Mongoid::Compatibility::Version.mongoid5_or_newer? - :create_one + coll.indexes.send CREATE_INDEX_METHOD_NAME, { document_id: 1 } end end end diff --git a/lib/mongoid/full_text_search/mappings.rb b/lib/mongoid/full_text_search/mappings.rb index 1e7c4cb..bf4b819 100644 --- a/lib/mongoid/full_text_search/mappings.rb +++ b/lib/mongoid/full_text_search/mappings.rb @@ -5,54 +5,58 @@ module Mappings module ClassMethods def fulltext_search_in(*args) - self.mongoid_fulltext_config = {} if mongoid_fulltext_config.nil? options = args.last.is_a?(Hash) ? args.pop : {} - index_name = if options.key?(:index_name) - options[:index_name] - else - format('mongoid_fulltext.index_%s_%s', name.downcase, mongoid_fulltext_config.count) - end - config = { - alphabet: 'abcdefghijklmnopqrstuvwxyz0123456789 ', - word_separators: "-_ \n\t", - ngram_width: 3, - max_ngrams_to_search: 6, - apply_prefix_scoring_to_all_words: true, - index_full_words: true, - index_short_prefixes: false, - max_candidate_set_size: 1000, - remove_accents: true, - reindex_immediately: true, - stop_words: Hash[%w[i a s t me my we he it am is be do an if - or as of at by to up in on no so our you him - his she her its who are was has had did the and - but for out off why how all any few nor not own - too can don now ours your hers they them what whom - this that were been have does with into from down over - then once here when both each more most some such only - same than very will just yours their which these those - being doing until while about after above below under - again there where other myself itself theirs having during - before should himself herself because against between through - further yourself ourselves yourselves themselves].map { |x| [x, true] }] - } + index_name = options.fetch(:index_name) do + "mongoid_fulltext.index_#{name.downcase}_#{mongoid_fulltext_config.count}" + end - config.update(options) + config = default_config.update(options) args = [:to_s] if args.empty? config[:ngram_fields] = args config[:alphabet] = Hash[config[:alphabet].split('').map { |ch| [ch, ch] }] config[:word_separators] = Hash[config[:word_separators].split('').map { |ch| [ch, ch] }] + mongoid_fulltext_config[index_name] = config before_save(:update_ngram_index) if config[:reindex_immediately] - before_destroy :remove_from_ngram_index + before_destroy(:remove_from_ngram_index) end def update_ngram_index all.each(&:update_ngram_index) end + + private + + def default_config + { + alphabet: 'abcdefghijklmnopqrstuvwxyz0123456789 ', + word_separators: "-_ \n\t", + ngram_width: 3, + max_ngrams_to_search: 6, + apply_prefix_scoring_to_all_words: true, + index_full_words: true, + index_short_prefixes: false, + max_candidate_set_size: 1000, + remove_accents: true, + reindex_immediately: true, + stop_words: Hash[ + %w[i a s t me my we he it am is be do an if + or as of at by to up in on no so our you him + his she her its who are was has had did the and + but for out off why how all any few nor not own + too can don now ours your hers they them what whom + this that were been have does with into from down over + then once here when both each more most some such only + same than very will just yours their which these those + being doing until while about after above below under + again there where other myself itself theirs having during + before should himself herself because against between through + further yourself ourselves yourselves themselves].map { |x| [x, true] }] + } + end end def update_ngram_index @@ -71,58 +75,56 @@ def update_ngram_index # remove existing ngrams from external index coll = collection.database[loc_index_name.to_sym] - if Mongoid::Compatibility::Version.mongoid5_or_newer? - coll.find('document_id' => _id).delete_many - else - coll.find('document_id' => _id).remove_all - end + coll.find(document_id: _id).send(DELETE_FROM_INDEX_METHOD_NAME) + # extract ngrams from fields field_values = fulltext_config[:ngram_fields].map do |field_name| next send(field_name) if field_name == :to_s next unless field = self.class.fields[field_name.to_s] - if field.localized? - send("#{field_name}_translations")[locale] - else - send(field_name) - end + field.localized? ? send("#{field_name}_translations")[locale] : send(field_name) + end + + ngrams = field_values.inject({}) do |accum, item| + accum.update(self.class.all_ngrams(item, fulltext_config, false)) end - ngrams = field_values.inject({}) { |accum, item| accum.update(self.class.all_ngrams(item, fulltext_config, false)) } return if ngrams.empty? + # apply filters, if necessary filter_values = nil if fulltext_config.key?(:filters) - filter_values = Hash[fulltext_config[:filters].map do |key, value| - begin - [key, value.call(self)] - rescue StandardError - # Suppress any exceptions caused by filters - end - end.compact] + filter_values = Hash[ + fulltext_config[:filters].map do |key, value| + begin + [key, value.call(self)] + rescue StandardError # Suppress any exceptions caused by filters + end + end.compact + ] end + # insert new ngrams in external index ngrams.each_pair do |ngram, score| - index_document = { 'ngram' => ngram, 'document_id' => _id, 'document_type' => model_name.to_s, 'score' => score, 'class' => self.class.name } - index_document['filter_values'] = filter_values if fulltext_config.key?(:filters) - if Mongoid::Compatibility::Version.mongoid5_or_newer? - coll.insert_one(index_document) - else - coll.insert(index_document) - end + index_document = { + class: self.class.name, + document_id: _id, + ngram: ngram, + score: score + } + + index_document[:filter_values] = filter_values if fulltext_config.key?(:filters) + + coll.send INSERT_METHOD_NAME, index_document end end end end def remove_from_ngram_index - mongoid_fulltext_config.each_pair do |index_name, _fulltext_config| + mongoid_fulltext_config.each_pair do |index_name, _| ::I18n.available_locales.each do |locale| coll = collection.database[self.class.localized_index_name(index_name, locale)] - if Mongoid::Compatibility::Version.mongoid5_or_newer? - coll.find('document_id' => _id).delete_many - else - coll.find('document_id' => _id).remove_all - end + coll.find(document_id: _id).send(DELETE_FROM_INDEX_METHOD_NAME) end end end diff --git a/lib/mongoid/full_text_search/searchable.rb b/lib/mongoid/full_text_search/searchable.rb index cb00985..7850a79 100644 --- a/lib/mongoid/full_text_search/searchable.rb +++ b/lib/mongoid/full_text_search/searchable.rb @@ -1,8 +1,5 @@ module Mongoid module FullTextSearch - class UnspecifiedIndexError < StandardError; end - class UnknownFilterQueryOperator < StandardError; end - module Searchable extend ActiveSupport::Concern @@ -99,7 +96,7 @@ def instantiate_mapreduce_results(results, options) def document_type_filters return {} unless fields['_type'].present? kls = ([self] + descendants).map(&:to_s) - { 'document_type' => { '$in' => kls } } + { 'class' => { '$in' => kls } } end # Take a list of filters to be mapped so they can update the query From 6459ff40b2f50a49ca0d62834f7e227bb6a58bfc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toma=CC=81s=CC=8C=20Celizna?= Date: Sun, 6 May 2018 12:48:36 +0200 Subject: [PATCH 18/24] refactor --- lib/mongoid/full_text_search.rb | 36 +++- .../full_text_search/index_definition.rb | 45 ----- lib/mongoid/full_text_search/indexes.rb | 4 +- lib/mongoid/full_text_search/mappings.rb | 41 ++--- lib/mongoid/full_text_search/ngrams.rb | 98 ----------- lib/mongoid/full_text_search/searchable.rb | 35 ++-- .../services/calculate_ngrams.rb | 166 ++++++++++++++++++ .../services/index_definition.rb | 49 ++++++ lib/mongoid_fulltext.rb | 1 - 9 files changed, 278 insertions(+), 197 deletions(-) delete mode 100644 lib/mongoid/full_text_search/index_definition.rb delete mode 100644 lib/mongoid/full_text_search/ngrams.rb create mode 100644 lib/mongoid/full_text_search/services/calculate_ngrams.rb create mode 100644 lib/mongoid/full_text_search/services/index_definition.rb diff --git a/lib/mongoid/full_text_search.rb b/lib/mongoid/full_text_search.rb index 6c1af56..18947f6 100644 --- a/lib/mongoid/full_text_search.rb +++ b/lib/mongoid/full_text_search.rb @@ -1,7 +1,6 @@ require 'mongoid/full_text_search/config' require 'mongoid/full_text_search/indexes' require 'mongoid/full_text_search/mappings' -require 'mongoid/full_text_search/ngrams' require 'mongoid/full_text_search/searchable' module Mongoid @@ -9,17 +8,42 @@ module FullTextSearch class UnknownFilterQueryOperator < StandardError; end class UnspecifiedIndexError < StandardError; end - CREATE_INDEX_METHOD_NAME = Mongoid::Compatibility::Version.mongoid5_or_newer? ? :create_one : :create - DELETE_FROM_INDEX_METHOD_NAME = Mongoid::Compatibility::Version.mongoid5_or_newer? ? :delete_many : :remove_all - DROP_INDEX_METHOD_NAME = Mongoid::Compatibility::Version.mongoid5_or_newer? ? :drop_one : :drop - INSERT_METHOD_NAME = Mongoid::Compatibility::Version.mongoid5_or_newer? ? :insert_one : :insert + CREATE_INDEX_METHOD_NAME = Compatibility::Version.mongoid5_or_newer? ? :create_one : :create + DELETE_FROM_INDEX_METHOD_NAME = Compatibility::Version.mongoid5_or_newer? ? :delete_many : :remove_all + DROP_INDEX_METHOD_NAME = Compatibility::Version.mongoid5_or_newer? ? :drop_one : :drop + INSERT_METHOD_NAME = Compatibility::Version.mongoid5_or_newer? ? :insert_one : :insert + + DEFAULT_CONFIG = { + alphabet: 'abcdefghijklmnopqrstuvwxyz0123456789 ', + apply_prefix_scoring_to_all_words: true, + index_full_words: true, + index_short_prefixes: false, + max_candidate_set_size: 1000, + max_ngrams_to_search: 6, + ngram_width: 3, + reindex_immediately: true, + remove_accents: true, + word_separators: "-_ \n\t", + stop_words: Hash[ + %w[i a s t me my we he it am is be do an if + or as of at by to up in on no so our you him + his she her its who are was has had did the and + but for out off why how all any few nor not own + too can don now ours your hers they them what whom + this that were been have does with into from down over + then once here when both each more most some such only + same than very will just yours their which these those + being doing until while about after above below under + again there where other myself itself theirs having during + before should himself herself because against between through + further yourself ourselves yourselves themselves].map { |x| [x, true] }] + } extend ActiveSupport::Concern include Config include Indexes include Mappings - include Ngrams include Searchable end end diff --git a/lib/mongoid/full_text_search/index_definition.rb b/lib/mongoid/full_text_search/index_definition.rb deleted file mode 100644 index f3770a6..0000000 --- a/lib/mongoid/full_text_search/index_definition.rb +++ /dev/null @@ -1,45 +0,0 @@ -module Mongoid - module FullTextSearch - class IndexDefinition < Struct.new(:coll, :filters) - def self.call(*args) - new(*args).call - end - - def call - res = index_definition - all_filter_keys = filter_indexes.map(&:first) - - # Since the definition of the index could have changed, we'll clean up by - # removing any indexes that aren't on the exact. - coll.indexes.each do |idef| - keys = idef['key'].keys - next unless keys.member?('ngram') - all_filter_keys |= keys.find_all { |key| key.starts_with?('filter_values.') } - next unless keys & correct_keys != correct_keys - Mongoid.logger.info "Dropping #{idef['name']} [#{keys & correct_keys} <=> #{correct_keys}]" if Mongoid.logger - coll.indexes.send DROP_INDEX_METHOD_NAME, idef['key'] - end - - if all_filter_keys.length > filter_indexes.length - updated_filter_indexes = all_filter_keys.map { |key| [key, 1] }.sort_by(&:first) - res = [['ngram', 1], ['score', -1]].concat(updated_filter_indexes) - end - - res - end - - # The order of filters matters when the same index is used from two or more collections. - def filter_indexes - filters.map { |key, _| ["filter_values.#{key}", 1] }.sort_by(&:first) - end - - def index_definition - [['ngram', 1], ['score', -1]].concat(filter_indexes) - end - - def correct_keys - index_definition.map { |field_def| field_def[0] } - end - end - end -end diff --git a/lib/mongoid/full_text_search/indexes.rb b/lib/mongoid/full_text_search/indexes.rb index 01d9025..299a1eb 100644 --- a/lib/mongoid/full_text_search/indexes.rb +++ b/lib/mongoid/full_text_search/indexes.rb @@ -1,4 +1,4 @@ -require 'mongoid/full_text_search/index_definition' +require 'mongoid/full_text_search/services/index_definition' module Mongoid module FullTextSearch @@ -28,7 +28,7 @@ def fulltext_search_ensure_indexes(index_name, config) db = collection.database coll = db[index_name] filters = config.fetch(:filters, []) - index_definition = IndexDefinition.call(coll, filters) + index_definition = Services::IndexDefinition.call(coll, filters) Mongoid.logger.info("Ensuring fts_index on #{coll.name}: #{index_definition}") if Mongoid.logger coll.indexes.send CREATE_INDEX_METHOD_NAME, Hash[index_definition], name: 'fts_index' diff --git a/lib/mongoid/full_text_search/mappings.rb b/lib/mongoid/full_text_search/mappings.rb index bf4b819..a240c9e 100644 --- a/lib/mongoid/full_text_search/mappings.rb +++ b/lib/mongoid/full_text_search/mappings.rb @@ -1,3 +1,5 @@ +require 'mongoid/full_text_search/services/calculate_ngrams' + module Mongoid module FullTextSearch module Mappings @@ -11,7 +13,7 @@ def fulltext_search_in(*args) "mongoid_fulltext.index_#{name.downcase}_#{mongoid_fulltext_config.count}" end - config = default_config.update(options) + config = DEFAULT_CONFIG.dup.update(options) args = [:to_s] if args.empty? config[:ngram_fields] = args @@ -28,34 +30,13 @@ def update_ngram_index all.each(&:update_ngram_index) end - private - - def default_config - { - alphabet: 'abcdefghijklmnopqrstuvwxyz0123456789 ', - word_separators: "-_ \n\t", - ngram_width: 3, - max_ngrams_to_search: 6, - apply_prefix_scoring_to_all_words: true, - index_full_words: true, - index_short_prefixes: false, - max_candidate_set_size: 1000, - remove_accents: true, - reindex_immediately: true, - stop_words: Hash[ - %w[i a s t me my we he it am is be do an if - or as of at by to up in on no so our you him - his she her its who are was has had did the and - but for out off why how all any few nor not own - too can don now ours your hers they them what whom - this that were been have does with into from down over - then once here when both each more most some such only - same than very will just yours their which these those - being doing until while about after above below under - again there where other myself itself theirs having during - before should himself herself because against between through - further yourself ourselves yourselves themselves].map { |x| [x, true] }] - } + def remove_from_ngram_index + mongoid_fulltext_config.each_pair do |index_name, _| + ::I18n.available_locales.each do |locale| + coll = collection.database[localized_index_name(index_name, locale)] + coll.find(class: name).send(DELETE_FROM_INDEX_METHOD_NAME) + end + end end end @@ -85,7 +66,7 @@ def update_ngram_index end ngrams = field_values.inject({}) do |accum, item| - accum.update(self.class.all_ngrams(item, fulltext_config, false)) + accum.update(Services::CalculateNgrams.call(item, fulltext_config, false)) end return if ngrams.empty? diff --git a/lib/mongoid/full_text_search/ngrams.rb b/lib/mongoid/full_text_search/ngrams.rb deleted file mode 100644 index 88e5511..0000000 --- a/lib/mongoid/full_text_search/ngrams.rb +++ /dev/null @@ -1,98 +0,0 @@ -module Mongoid - module FullTextSearch - module Ngrams - extend ActiveSupport::Concern - - module ClassMethods - def all_ngrams(str, config, bound_number_returned = true) - return {} if str.nil? - - if config[:remove_accents] - if defined?(UnicodeUtils) - str = UnicodeUtils.nfkd(str) - elsif defined?(DiacriticsFu) - str = DiacriticsFu.escape(str) - end - end - - # Remove any characters that aren't in the alphabet and aren't word separators - filtered_str = str.mb_chars.downcase.to_s.split('').find_all { |ch| config[:alphabet][ch] || config[:word_separators][ch] }.join('') - - # Figure out how many ngrams to extract from the string. If we can't afford to extract all ngrams, - # step over the string in evenly spaced strides to extract ngrams. For example, to extract 3 3-letter - # ngrams from 'abcdefghijk', we'd want to extract 'abc', 'efg', and 'ijk'. - step_size = if bound_number_returned - [((filtered_str.length - config[:ngram_width]).to_f / config[:max_ngrams_to_search]).ceil, 1].max - else - 1 - end - - # Create an array of records of the form {:ngram => x, :score => y} for all ngrams that occur in the - # input string using the step size that we just computed. Let score(x,y) be the score of string x - # compared with string y - assigning scores to ngrams with the square root-based scoring function - # below and multiplying scores of matching ngrams together yields a score function that has the - # property that score(x,y) > score(x,z) for any string z containing y and score(x,y) > score(x,z) - # for any string z contained in y. - ngram_array = (0..filtered_str.length - config[:ngram_width]).step(step_size).map do |i| - score = if i == 0 || (config[:apply_prefix_scoring_to_all_words] && \ - config[:word_separators].key?(filtered_str[i - 1].chr)) - Math.sqrt(1 + 1.0 / filtered_str.length) - else - Math.sqrt(2.0 / filtered_str.length) - end - { ngram: filtered_str[i..i + config[:ngram_width] - 1], score: score } - end - - # If an ngram appears multiple times in the query string, keep the max score - ngram_array = ngram_array.group_by { |h| h[:ngram] }.map { |key, values| { ngram: key, score: values.map { |v| v[:score] }.max } } - - if config[:index_short_prefixes] || config[:index_full_words] - split_regex_def = config[:word_separators].keys.map { |k| Regexp.escape(k) }.join - split_regex = Regexp.compile("[#{split_regex_def}]") - all_words = filtered_str.split(split_regex) - end - - # Add 'short prefix' records to the array: prefixes of the string that are length (ngram_width - 1) - if config[:index_short_prefixes] - prefixes_seen = {} - all_words.each do |word| - next if word.length < config[:ngram_width] - 1 - prefix = word[0...config[:ngram_width] - 1] - if prefixes_seen[prefix].nil? && (config[:stop_words][word].nil? || word == filtered_str) - ngram_array << { ngram: prefix, score: 1 + 1.0 / filtered_str.length } - prefixes_seen[prefix] = true - end - end - end - - # Add records to the array of ngrams for each full word in the string that isn't a stop word - if config[:index_full_words] - full_words_seen = {} - all_words.each do |word| - if word.length > 1 && full_words_seen[word].nil? && (config[:stop_words][word].nil? || word == filtered_str) - ngram_array << { ngram: word, score: 1 + 1.0 / filtered_str.length } - full_words_seen[word] = true - end - end - end - - # If an ngram appears as any combination of full word, short prefix, and ngram, keep the sum of the two scores - Hash[ngram_array.group_by { |h| h[:ngram] }.map { |key, values| [key, values.map { |v| v[:score] }.sum] }] - end - - def remove_from_ngram_index - mongoid_fulltext_config.each_pair do |index_name, _fulltext_config| - ::I18n.available_locales.each do |locale| - coll = collection.database[localized_index_name(index_name, locale)] - if Mongoid::Compatibility::Version.mongoid5_or_newer? - coll.find('class' => name).delete_many - else - coll.find('class' => name).remove_all - end - end - end - end - end - end - end -end diff --git a/lib/mongoid/full_text_search/searchable.rb b/lib/mongoid/full_text_search/searchable.rb index 7850a79..3a1cb6f 100644 --- a/lib/mongoid/full_text_search/searchable.rb +++ b/lib/mongoid/full_text_search/searchable.rb @@ -1,3 +1,5 @@ +require 'mongoid/full_text_search/services/calculate_ngrams' + module Mongoid module FullTextSearch module Searchable @@ -7,16 +9,19 @@ module ClassMethods def fulltext_search(query_string, options = {}) max_results = options.key?(:max_results) ? options.delete(:max_results) : 10 return_scores = options.key?(:return_scores) ? options.delete(:return_scores) : false + if mongoid_fulltext_config.count > 1 && !options.key?(:index) error_message = '%s is indexed by multiple full-text indexes. You must specify one by passing an :index_name parameter' raise UnspecifiedIndexError, error_message % name, caller end + index_name = options.key?(:index) ? options.delete(:index) : mongoid_fulltext_config.keys.first loc_index_name = localized_index_name(index_name, ::I18n.locale) + # Options hash should only contain filters after this point - ngrams = all_ngrams(query_string, mongoid_fulltext_config[index_name]) + ngrams = Services::CalculateNgrams.call(query_string, mongoid_fulltext_config[index_name]) return [] if ngrams.empty? # For each ngram, construct the query we'll use to pull index documents and @@ -75,11 +80,8 @@ def fulltext_search(query_string, options = {}) end def instantiate_mapreduce_result(result) - if criteria.selector.empty? - result[:clazz].constantize.find(result[:id]) - else - criteria.where(_id: result[:id]).first - end + return result[:clazz].constantize.find(result[:id]) if criteria.selector.empty? + criteria.where(_id: result[:id]).first end def instantiate_mapreduce_results(results, options) @@ -96,21 +98,24 @@ def instantiate_mapreduce_results(results, options) def document_type_filters return {} unless fields['_type'].present? kls = ([self] + descendants).map(&:to_s) - { 'class' => { '$in' => kls } } + { class: { '$in' => kls } } end # Take a list of filters to be mapped so they can update the query # used upon the fulltext search of the ngrams def map_query_filters(filters) - Hash[filters.map do |key, value| - case value - when Hash then - if value.key? :any then format_query_filter('$in', key, value[:any]) - elsif value.key? :all then format_query_filter('$all', key, value[:all]) - else raise UnknownFilterQueryOperator, value.keys.join(','), caller end - else format_query_filter('$all', key, value) + Hash[ + filters.map do |key, value| + case value + when Hash then + if value.key? :any then format_query_filter('$in', key, value[:any]) + elsif value.key? :all then format_query_filter('$all', key, value[:all]) + else raise UnknownFilterQueryOperator, value.keys.join(','), caller + end + else format_query_filter('$all', key, value) + end end - end] + ] end def format_query_filter(operator, key, value) diff --git a/lib/mongoid/full_text_search/services/calculate_ngrams.rb b/lib/mongoid/full_text_search/services/calculate_ngrams.rb new file mode 100644 index 0000000..bb9b963 --- /dev/null +++ b/lib/mongoid/full_text_search/services/calculate_ngrams.rb @@ -0,0 +1,166 @@ +require 'unicode_utils' + +module Mongoid + module FullTextSearch + module Services + class CalculateNgrams < Struct.new(:str, :config, :bound_number_returned) + def self.call(*args) + new(*args).call + end + + def initialize(str, config, bound_number_returned = true) + if str && config[:remove_accents] + if defined?(UnicodeUtils) + str = UnicodeUtils.nfkd(str) + elsif defined?(DiacriticsFu) + str = DiacriticsFu.escape(super) + end + end + + super(str, config, bound_number_returned) + end + + def call + return {} unless str + + # Create an array of records of the form {:ngram => x, :score => y} for all ngrams that occur in the + # input string using the step size that we just computed. Let score(x,y) be the score of string x + # compared with string y - assigning scores to ngrams with the square root-based scoring function + # below and multiplying scores of matching ngrams together yields a score function that has the + # property that score(x,y) > score(x,z) for any string z containing y and score(x,y) > score(x,z) + # for any string z contained in y. + ngram_array = build_ngram_array + + # If an ngram appears multiple times in the query string, keep the max score + ngram_array = ngram_array.group_by { |h| h[:ngram] }.map do |key, values| + { ngram: key, score: values.map { |v| v[:score] }.max } + end + + # Add 'short prefix' records to the array: prefixes of the string that are length (ngram_width - 1) + ngram_array += short_prefixes if index_short_prefixes? + + # Add records to the array of ngrams for each full word in the string that isn't a stop word + ngram_array += full_words if index_full_words? + + # If an ngram appears as any combination of full word, short prefix, and ngram, keep the sum of the two scores + Hash[ + ngram_array + .group_by { |h| h[:ngram] } + .map do |key, values| + [key, values.map { |v| v[:score] }.sum] + end + ] + end + + private + + def build_ngram_array + (0..filtered_str.length - ngram_width).step(step_size).map do |i| + score = if i == 0 || (apply_prefix_scoring_to_all_words? && word_separators.key?(filtered_str[i - 1].chr)) + Math.sqrt(1 + 1.0 / filtered_str.length) + else + Math.sqrt(2.0 / filtered_str.length) + end + + { ngram: filtered_str[i..i + ngram_width - 1], score: score } + end + end + + def short_prefixes + prefixes_seen = {} + all_words.each_with_object([]) do |word, res| + next res if word.length < ngram_width - 1 + prefix = word[0...ngram_width - 1] + if prefixes_seen[prefix].nil? && (stop_word?(word) || filtered_str?(word)) + res << { ngram: prefix, score: 1 + 1.0 / filtered_str.length } + prefixes_seen[prefix] = true + end + end + end + + def full_words + full_words_seen = {} + all_words.each_with_object([]) do |word, res| + if word.length > 1 && full_words_seen[word].nil? && (stop_word?(word) || filtered_str?(word)) + res << { ngram: word, score: 1 + 1.0 / filtered_str.length } + full_words_seen[word] = true + end + end + end + + def filtered_str?(word) + word == filtered_str + end + + def stop_word?(word) + stop_words[word].nil? + end + + def index_short_prefixes? + config[:index_short_prefixes] + end + + def index_full_words? + config[:index_full_words] + end + + def alphabet + config[:alphabet] + end + + def word_separators + config[:word_separators] + end + + def ngram_width + config[:ngram_width] + end + + def max_ngrams_to_search + config[:max_ngrams_to_search] + end + + def remove_accents? + config[:remove_accents] + end + + def apply_prefix_scoring_to_all_words? + config[:apply_prefix_scoring_to_all_words] + end + + def stop_words + config[:stop_words] + end + + def all_words + filtered_str.split(split_regex) + end + + # Remove any characters that aren't in the alphabet and aren't word separators + def filtered_str + str.mb_chars + .downcase + .to_s.split('') + .find_all { |ch| alphabet[ch] || word_separators[ch] } + .join('') + end + + # Figure out how many ngrams to extract from the string. If we can't afford to extract all ngrams, + # step over the string in evenly spaced strides to extract ngrams. For example, to extract 3 3-letter + # ngrams from 'abcdefghijk', we'd want to extract 'abc', 'efg', and 'ijk'. + def step_size + return 1 unless bound_number_returned + [((filtered_str.length - ngram_width).to_f / max_ngrams_to_search).ceil, 1].max + end + + def split_regex_def + word_separators.keys.map { |k| Regexp.escape(k) }.join + end + + def split_regex + Regexp.compile("[#{split_regex_def}]") + end + end + end + end +end diff --git a/lib/mongoid/full_text_search/services/index_definition.rb b/lib/mongoid/full_text_search/services/index_definition.rb new file mode 100644 index 0000000..25800ac --- /dev/null +++ b/lib/mongoid/full_text_search/services/index_definition.rb @@ -0,0 +1,49 @@ +module Mongoid + module FullTextSearch + module Services + class IndexDefinition < Struct.new(:coll, :filters) + def self.call(*args) + new(*args).call + end + + def call + res = index_definition + all_filter_keys = filter_indexes.map(&:first) + + # Since the definition of the index could have changed, we'll clean up by + # removing any indexes that aren't on the exact. + coll.indexes.each do |idef| + keys = idef['key'].keys + next unless keys.member?('ngram') + all_filter_keys |= keys.find_all { |key| key.starts_with?('filter_values.') } + next unless keys & correct_keys != correct_keys + Mongoid.logger.info "Dropping #{idef['name']} [#{keys & correct_keys} <=> #{correct_keys}]" if Mongoid.logger + coll.indexes.send DROP_INDEX_METHOD_NAME, idef['key'] + end + + if all_filter_keys.length > filter_indexes.length + updated_filter_indexes = all_filter_keys.map { |key| [key, 1] }.sort_by(&:first) + res = [['ngram', 1], ['score', -1]].concat(updated_filter_indexes) + end + + res + end + + private + + def index_definition + [['ngram', 1], ['score', -1]].concat(filter_indexes) + end + + # The order of filters matters when the same index is used from two or more collections. + def filter_indexes + filters.map { |key, _| ["filter_values.#{key}", 1] }.sort_by(&:first) + end + + def correct_keys + index_definition.map { |field_def| field_def[0] } + end + end + end + end +end diff --git a/lib/mongoid_fulltext.rb b/lib/mongoid_fulltext.rb index b741bb8..f307f60 100644 --- a/lib/mongoid_fulltext.rb +++ b/lib/mongoid_fulltext.rb @@ -3,7 +3,6 @@ require 'mongoid/full_text_search' -require 'unicode_utils' require 'cgi' module Mongoid From a6eeef914f715ac164343ba3205450cd4fbc9dad Mon Sep 17 00:00:00 2001 From: dblock Date: Sun, 6 May 2018 12:39:21 -0400 Subject: [PATCH 19/24] Mark failing specs pending. --- spec/mongoid/full_text_search_spec.rb | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/spec/mongoid/full_text_search_spec.rb b/spec/mongoid/full_text_search_spec.rb index a385122..3c024cc 100644 --- a/spec/mongoid/full_text_search_spec.rb +++ b/spec/mongoid/full_text_search_spec.rb @@ -355,6 +355,8 @@ # fields as well as the union of all the filter fields to allow for efficient lookups. it 'creates a proper index for searching efficiently' do + # see https://github.com/mongoid/mongoid_fulltext/pull/39 + pending unless Mongoid::Compatibility::Version.mongoid5_or_newer? [FilteredArtwork, FilteredArtist, FilteredOther].each(&:create_indexes) index_collection = FilteredArtwork.collection.database['mongoid_fulltext.artworks_and_artists'] ngram_indexes = [] @@ -609,6 +611,8 @@ context 'mongoid indexes' do it 'can re-create dropped indexes' do + # see https://github.com/mongoid/mongoid_fulltext/pull/39 + pending unless Mongoid::Compatibility::Version.mongoid5_or_newer? # there're no indexes by default as Mongoid.autocreate_indexes is set to false # but mongo will automatically attempt to index _id in the background expect(Mongoid.default_session['mongoid_fulltext.index_basicartwork_0'].indexes.count).to be <= 1 From 5dcd0cdb73d1d5ca40eebdc7319a0a9462f7e5f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toma=CC=81s=CC=8C=20Celizna?= Date: Sat, 5 May 2018 11:18:07 +0200 Subject: [PATCH 20/24] Mongoid 7 compatibility --- .travis.yml | 5 +++-- Gemfile | 4 +++- lib/mongoid/full_text_search.rb | 14 +++++++------- spec/mongoid/full_text_search_spec.rb | 2 +- spec/support/mongoid.rb | 2 +- 5 files changed, 15 insertions(+), 12 deletions(-) diff --git a/.travis.yml b/.travis.yml index 31acee2..37aa435 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,11 +2,12 @@ sudo: false matrix: include: + - rvm: 2.5.0 + env: + - MONGOID_VERSION=7.0 - rvm: 2.3.1 env: - MONGOID_VERSION=6.0 - before_script: - - bundle exec danger - rvm: 2.3.1 env: - MONGOID_VERSION=5.0 diff --git a/Gemfile b/Gemfile index 01dcc47..1a8278b 100644 --- a/Gemfile +++ b/Gemfile @@ -1,6 +1,8 @@ source 'http://rubygems.org' -case version = ENV['MONGOID_VERSION'] || '6' +case version = ENV['MONGOID_VERSION'] || '7' +when /7/ + gem 'mongoid', '~> 7.0' when /6/ gem 'mongoid', '~> 6.0' when /5/ diff --git a/lib/mongoid/full_text_search.rb b/lib/mongoid/full_text_search.rb index b9a3fb4..6c3d84a 100644 --- a/lib/mongoid/full_text_search.rb +++ b/lib/mongoid/full_text_search.rb @@ -95,7 +95,7 @@ def fulltext_search_ensure_indexes(index_name, config) all_filter_keys |= keys.find_all { |key| key.starts_with?('filter_values.') } next unless keys & correct_keys != correct_keys Mongoid.logger.info "Dropping #{idef['name']} [#{keys & correct_keys} <=> #{correct_keys}]" if Mongoid.logger - if Mongoid::Compatibility::Version.mongoid5? || Mongoid::Compatibility::Version.mongoid6? + if Mongoid::Compatibility::Version.mongoid5_or_newer? coll.indexes.drop_one(idef['key']) else coll.indexes.drop(idef['key']) @@ -108,14 +108,14 @@ def fulltext_search_ensure_indexes(index_name, config) end Mongoid.logger.info "Ensuring fts_index on #{coll.name}: #{index_definition}" if Mongoid.logger - if Mongoid::Compatibility::Version.mongoid5? || Mongoid::Compatibility::Version.mongoid6? + if Mongoid::Compatibility::Version.mongoid5_or_newer? coll.indexes.create_one(Hash[index_definition], name: 'fts_index') else coll.indexes.create(Hash[index_definition], name: 'fts_index') end Mongoid.logger.info "Ensuring document_id index on #{coll.name}" if Mongoid.logger - if Mongoid::Compatibility::Version.mongoid5? || Mongoid::Compatibility::Version.mongoid6? + if Mongoid::Compatibility::Version.mongoid5_or_newer? coll.indexes.create_one('document_id' => 1) # to make removes fast else coll.indexes.create('document_id' => 1) # to make removes fast @@ -282,7 +282,7 @@ def all_ngrams(str, config, bound_number_returned = true) def remove_from_ngram_index mongoid_fulltext_config.each_pair do |index_name, _fulltext_config| coll = collection.database[index_name] - if Mongoid::Compatibility::Version.mongoid5? || Mongoid::Compatibility::Version.mongoid6? + if Mongoid::Compatibility::Version.mongoid5_or_newer? coll.find('class' => name).delete_many else coll.find('class' => name).remove_all @@ -328,7 +328,7 @@ def update_ngram_index # remove existing ngrams from external index coll = collection.database[index_name.to_sym] - if Mongoid::Compatibility::Version.mongoid5? || Mongoid::Compatibility::Version.mongoid6? + if Mongoid::Compatibility::Version.mongoid5_or_newer? coll.find('document_id' => _id).delete_many else coll.find('document_id' => _id).remove_all @@ -352,7 +352,7 @@ def update_ngram_index ngrams.each_pair do |ngram, score| index_document = { 'ngram' => ngram, 'document_id' => _id, 'score' => score, 'class' => self.class.name } index_document['filter_values'] = filter_values if fulltext_config.key?(:filters) - if Mongoid::Compatibility::Version.mongoid5? || Mongoid::Compatibility::Version.mongoid6? + if Mongoid::Compatibility::Version.mongoid5_or_newer? coll.insert_one(index_document) else coll.insert(index_document) @@ -364,7 +364,7 @@ def update_ngram_index def remove_from_ngram_index mongoid_fulltext_config.each_pair do |index_name, _fulltext_config| coll = collection.database[index_name] - if Mongoid::Compatibility::Version.mongoid5? || Mongoid::Compatibility::Version.mongoid6? + if Mongoid::Compatibility::Version.mongoid5_or_newer? coll.find('document_id' => _id).delete_many else coll.find('document_id' => _id).remove_all diff --git a/spec/mongoid/full_text_search_spec.rb b/spec/mongoid/full_text_search_spec.rb index 3c024cc..1d81994 100644 --- a/spec/mongoid/full_text_search_spec.rb +++ b/spec/mongoid/full_text_search_spec.rb @@ -599,7 +599,7 @@ context 'incremental' do it 'removes an existing record' do coll = Mongoid.default_session['mongoid_fulltext.index_basicartwork_0'] - if Mongoid::Compatibility::Version.mongoid5? || Mongoid::Compatibility::Version.mongoid6? + if Mongoid::Compatibility::Version.mongoid5_or_newer? coll.find('document_id' => flowers1._id).delete_many else coll.find('document_id' => flowers1._id).remove_all diff --git a/spec/support/mongoid.rb b/spec/support/mongoid.rb index 788b045..d73d317 100644 --- a/spec/support/mongoid.rb +++ b/spec/support/mongoid.rb @@ -2,4 +2,4 @@ module Mongoid def self.default_session default_client end -end if Mongoid::Compatibility::Version.mongoid5? || Mongoid::Compatibility::Version.mongoid6? +end if Mongoid::Compatibility::Version.mongoid5_or_newer? From 20fbcdf813a274a485ffd67b3177b3035cd84d58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toma=CC=81s=CC=8C=20Celizna?= Date: Sat, 5 May 2018 11:18:45 +0200 Subject: [PATCH 21/24] readd danger --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index 37aa435..d207a6d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,6 +5,8 @@ matrix: - rvm: 2.5.0 env: - MONGOID_VERSION=7.0 + before_script: + - bundle exec danger - rvm: 2.3.1 env: - MONGOID_VERSION=6.0 From 1ff06cc8ee0fefe5ba99b60fcfb4d3b03e05121c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toma=CC=81s=CC=8C=20Celizna?= Date: Sat, 5 May 2018 11:25:29 +0200 Subject: [PATCH 22/24] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 68f51cf..6fd4122 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ ### 0.8.1 (Next) +* [#35](https://github.com/mongoid/mongoid_fulltext/pull/35): Mongoid 7 compatibility - [@tomasc](https://github.com/tomasc). * Your contribution here. ### 0.8.0 (1/19/2017) From 6c8ba8c58d747613397aefd07151ab5acea4bf39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toma=CC=81s=CC=8C=20Celizna?= Date: Sat, 5 May 2018 17:07:11 +0200 Subject: [PATCH 23/24] mongoid-compatibility added _or_newer? in 0.5.1 --- mongoid_fulltext.gemspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mongoid_fulltext.gemspec b/mongoid_fulltext.gemspec index 5b8b62a..681aa60 100644 --- a/mongoid_fulltext.gemspec +++ b/mongoid_fulltext.gemspec @@ -13,7 +13,7 @@ Gem::Specification.new do |s| s.homepage = 'https://github.com/mongoid/mongoid_fulltext' s.licenses = ['MIT'] s.summary = 'Full-text search for the Mongoid ORM, using n-grams extracted from text.' - s.add_dependency 'mongoid', '>= 3.0' + s.add_dependency 'mongoid', '>= 3.0', '< 8' s.add_dependency 'mongoid-compatibility', '>= 0.5.1' s.add_dependency 'unicode_utils' s.add_development_dependency 'database_cleaner' From 6ff4ce6ffbdeba86f1cf9dc7382825f7c3812e43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toma=CC=81s=CC=8C=20Celizna?= Date: Sat, 5 May 2018 17:11:35 +0200 Subject: [PATCH 24/24] add database_cleaner --- spec/spec_helper.rb | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 849707d..c06b4dd 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -22,6 +22,12 @@ config.connect_to('mongoid_fulltext_test') end +Mongoid.logger.level = Logger::INFO +Mongo::Logger.logger.level = Logger::INFO if Mongoid::Compatibility::Version.mongoid5_or_newer? + +DatabaseCleaner.orm = :mongoid +DatabaseCleaner.strategy = :truncation + RSpec.configure do |c| c.before :each do DatabaseCleaner.clean