From ebf537e202c96b88dfb9cd11cd8cb48abf75e5ca Mon Sep 17 00:00:00 2001 From: Luca Ongaro Date: Sun, 27 Nov 2022 22:57:04 +0100 Subject: [PATCH 1/4] Add addFields method This method adds fields to an existing document. The fields should not exist already on the document, or an error is thrown. This is useful to patch an existing document with some additional field, without having to replace it. --- src/MiniSearch.test.js | 26 ++++++++++++++++ src/MiniSearch.ts | 70 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 88 insertions(+), 8 deletions(-) diff --git a/src/MiniSearch.test.js b/src/MiniSearch.test.js index 5482104c..054802b5 100644 --- a/src/MiniSearch.test.js +++ b/src/MiniSearch.test.js @@ -759,6 +759,32 @@ describe('MiniSearch', () => { }) }) + describe('addFields', () => { + it('add fields to an existing document', () => { + const options = { fields: ['text', 'author'], storeFields: ['text', 'author', 'n'] } + const ms = new MiniSearch(options) + const other = new MiniSearch(options) + + ms.add({ id: 1, text: 'Some quite interesting stuff' }) + ms.addFields(1, { author: 'Al et. al.', n: 5 }) + + other.add({ id: 1, text: 'Some quite interesting stuff', author: 'Al et. al.', n: 5 }) + + expect(ms).toEqual(other) + }) + + it('throws an error if the document did not exist', () => { + const ms = new MiniSearch({ fields: ['text'] }) + expect(() => { ms.addFields(1, { text: 'hello' }) }).toThrow('MiniSearch: no document with ID 1') + }) + + it('throws an error if adding a field that already exists', () => { + const ms = new MiniSearch({ fields: ['text'] }) + ms.add({ id: 1, text: 'Some interesting stuff' }) + expect(() => { ms.addFields(1, { text: 'hello' }) }).toThrow('MiniSearch: field text already exists on document with ID 1') + }) + }) + describe('vacuum', () => { it('cleans up discarded documents from the index', async () => { const ms = new MiniSearch({ fields: ['text'], storeFields: ['text'] }) diff --git a/src/MiniSearch.ts b/src/MiniSearch.ts index 7350b9c4..4e0c7b05 100644 --- a/src/MiniSearch.ts +++ b/src/MiniSearch.ts @@ -588,7 +588,7 @@ export default class MiniSearch { this._enqueuedVacuum = null this._enqueuedVacuumConditions = defaultVacuumConditions - this.addFields(this._options.fields) + this.addFieldIds(this._options.fields) } /** @@ -597,8 +597,9 @@ export default class MiniSearch { * @param document The document to be indexed */ add (document: T): void { - const { extractField, tokenize, processTerm, fields, idField } = this._options + const { extractField, idField } = this._options const id = extractField(document, idField) + if (id == null) { throw new Error(`MiniSearch: document does not have ID field "${idField}"`) } @@ -610,6 +611,52 @@ export default class MiniSearch { const shortDocumentId = this.addDocumentId(id) this.saveStoredFields(shortDocumentId, document) + this.addToIndex(shortDocumentId, document, true) + } + + /** + * Adds some fields to an existing documeny + * + * The added fields should not be already present on the document, or an error + * will be thrown. + * + * ## Example: + * + * const miniSearch = new MiniSearch({ fields: ['title', 'text', 'author'] }) + * + * miniSearch.add({ id: 1, title: 'Neuromancer' }) + * + * miniSearch.addFields(1, { + * text: 'The sky above the port was the color of television, tuned to a dead channel.', + * author: 'William Gibson' + * }) + * + * // The above is equivalent to: + * miniSearch.add({ + * id: 1, + * title: 'Neuromancer', + * text: 'The sky above the port was the color of television, tuned to a dead channel.', + * author: 'William Gibson' + * }) + * + * @param id The document ID + * @param toAdd The fields to add + */ + addFields (id: any, toAdd: T): void { + const shortDocumentId = this._idToShortId.get(id) + + if (shortDocumentId == null) { + throw new Error(`MiniSearch: no document with ID ${id}`) + } + + this.saveStoredFields(shortDocumentId, toAdd) + + this.addToIndex(shortDocumentId, toAdd, false) + } + + private addToIndex (shortDocumentId: number, document: T, added: boolean) { + const { extractField, tokenize, processTerm, fields } = this._options + for (const field of fields) { const fieldValue = extractField(document, field) if (fieldValue == null) continue @@ -617,8 +664,13 @@ export default class MiniSearch { const tokens = tokenize(fieldValue.toString(), field) const fieldId = this._fieldIds[field] - const uniqueTerms = new Set(tokens).size - this.addFieldLength(shortDocumentId, fieldId, this._documentCount - 1, uniqueTerms) + const uniqueTerms = new Set(tokens) + uniqueTerms.delete('') + + if (this._fieldLength.get(shortDocumentId)?.[fieldId] != null) { + throw new Error(`MiniSearch: field ${field} already exists on document with ID ${this._documentIds.get(shortDocumentId)}`) + } + this.addFieldLength(shortDocumentId, fieldId, this._documentCount, uniqueTerms.size, added) for (const term of tokens) { const processedTerm = processTerm(term, field) @@ -1706,7 +1758,7 @@ export default class MiniSearch { /** * @ignore */ - private addFields (fields: string[]): void { + private addFieldIds (fields: string[]): void { for (let i = 0; i < fields.length; i++) { this._fieldIds[fields[i]] = i } @@ -1715,14 +1767,16 @@ export default class MiniSearch { /** * @ignore */ - private addFieldLength (documentId: number, fieldId: number, count: number, length: number): void { + private addFieldLength (documentId: number, fieldId: number, count: number, length: number, added: boolean): void { let fieldLengths = this._fieldLength.get(documentId) if (fieldLengths == null) this._fieldLength.set(documentId, fieldLengths = []) + const n = added ? 1 : 0 + fieldLengths[fieldId] = length const averageFieldLength = this._avgFieldLength[fieldId] || 0 - const totalFieldLength = (averageFieldLength * count) + length - this._avgFieldLength[fieldId] = totalFieldLength / (count + 1) + const totalFieldLength = (averageFieldLength * (count - n)) + length + this._avgFieldLength[fieldId] = totalFieldLength / count } /** From 910c04361d2e80cde3e25ae6965d4ca065a4e992 Mon Sep 17 00:00:00 2001 From: Luca Ongaro Date: Mon, 28 Nov 2022 11:32:05 +0100 Subject: [PATCH 2/4] Add removeFields method Symmetric to addFields --- src/MiniSearch.test.js | 40 +++++++++++++++++- src/MiniSearch.ts | 94 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 121 insertions(+), 13 deletions(-) diff --git a/src/MiniSearch.test.js b/src/MiniSearch.test.js index 054802b5..6ccc1a4b 100644 --- a/src/MiniSearch.test.js +++ b/src/MiniSearch.test.js @@ -775,13 +775,49 @@ describe('MiniSearch', () => { it('throws an error if the document did not exist', () => { const ms = new MiniSearch({ fields: ['text'] }) - expect(() => { ms.addFields(1, { text: 'hello' }) }).toThrow('MiniSearch: no document with ID 1') + expect(() => { + ms.addFields(1, { text: 'hello' }) + }).toThrow('MiniSearch: no document with ID 1') }) it('throws an error if adding a field that already exists', () => { const ms = new MiniSearch({ fields: ['text'] }) ms.add({ id: 1, text: 'Some interesting stuff' }) - expect(() => { ms.addFields(1, { text: 'hello' }) }).toThrow('MiniSearch: field text already exists on document with ID 1') + expect(() => { + ms.addFields(1, { text: 'hello' }) + }).toThrow('MiniSearch: field text already exists on document with ID 1') + }) + }) + + describe('removeFields', () => { + it('removes fields to an existing document', () => { + const options = { fields: ['text', 'author'], storeFields: ['text', 'author', 'n'] } + const ms = new MiniSearch(options) + const other = new MiniSearch(options) + + ms.add({ id: 1, text: 'Some quite interesting stuff', author: 'Al et. al.', n: 5 }) + ms.add({ id: 2, text: 'Lalala', author: 'Someone', n: 3 }) + ms.removeFields(1, { text: 'Some quite interesting stuff', n: 5 }) + + other.add({ id: 1, author: 'Al et. al.' }) + other.add({ id: 2, text: 'Lalala', author: 'Someone', n: 3 }) + + expect(ms).toEqual(other) + }) + + it('throws an error if the document did not exist', () => { + const ms = new MiniSearch({ fields: ['text'] }) + expect(() => { + ms.removeFields(1, { text: 'hello' }) + }).toThrow('MiniSearch: no document with ID 1') + }) + + it('throws an error if removing a field that did not exist', () => { + const ms = new MiniSearch({ fields: ['text', 'author'] }) + ms.add({ id: 1, author: 'Al et. al.' }) + expect(() => { + ms.removeFields(1, { text: 'Some interesting stuff' }) + }).toThrow('MiniSearch: field text does not exist on document with ID 1') }) }) diff --git a/src/MiniSearch.ts b/src/MiniSearch.ts index 4e0c7b05..9dc3f95a 100644 --- a/src/MiniSearch.ts +++ b/src/MiniSearch.ts @@ -741,7 +741,7 @@ export default class MiniSearch { * @param document The document to be removed */ remove (document: T): void { - const { tokenize, processTerm, extractField, fields, idField } = this._options + const { extractField, idField } = this._options const id = extractField(document, idField) if (id == null) { @@ -754,6 +754,75 @@ export default class MiniSearch { throw new Error(`MiniSearch: cannot remove document with ID ${id}: it is not in the index`) } + this.removeFromIndex(shortId, document, true) + + this._storedFields.delete(shortId) + this._documentIds.delete(shortId) + this._idToShortId.delete(id) + this._fieldLength.delete(shortId) + this._documentCount -= 1 + } + + /** + * Removes some fields from an existing documeny + * + * The removed fields should be present on the document, or an error will be + * thrown. + * + * Note: removing _all_ the fields in a document with `removeFields` is + * different from removing the whole document with [[MiniSearch.remove]] or + * [[MiniSearch.discard]]. The difference in the first case is that the + * document is still counted in [[MiniSearch.documentCount]], even if it is + * practically not searchable anymore. + * + * ## Example: + * + * const miniSearch = new MiniSearch({ fields: ['title', 'text', 'author'] }) + * + * miniSearch.add({ + * id: 1, + * title: 'Neuromancer', + * text: 'The sky above the port was the color of television, tuned to a dead channel.', + * author: 'William Gibson' + * }) + * + * miniSearch.removeFields(1, { + * text: 'The sky above the port was the color of television, tuned to a dead channel.', + * author: 'William Gibson' + * }) + * + * // The above is equivalent to: + * miniSearch.add({ + * id: 1, + * title: 'Neuromancer' + * }) + * + * @param id The document ID + * @param toRemove The fields to remove + */ + removeFields (id: any, toRemove: T) { + const { storeFields, extractField } = this._options + const shortDocumentId = this._idToShortId.get(id) + + if (shortDocumentId == null) { + throw new Error(`MiniSearch: no document with ID ${id}`) + } + + this.removeFromIndex(shortDocumentId, toRemove, false) + + const storedFields = this._storedFields.get(shortDocumentId) + + for (const fieldName of storeFields) { + const fieldValue = extractField(toRemove, fieldName) + if (storedFields != null && fieldValue !== undefined) { + delete storedFields[fieldName] + } + } + } + + private removeFromIndex (shortId: number, document: T, removed: boolean) { + const { tokenize, processTerm, extractField, fields } = this._options + for (const field of fields) { const fieldValue = extractField(document, field) if (fieldValue == null) continue @@ -761,8 +830,13 @@ export default class MiniSearch { const tokens = tokenize(fieldValue.toString(), field) const fieldId = this._fieldIds[field] - const uniqueTerms = new Set(tokens).size - this.removeFieldLength(shortId, fieldId, this._documentCount, uniqueTerms) + const uniqueTerms = new Set(tokens) + uniqueTerms.delete('') + + if (this._fieldLength.get(shortId)?.[fieldId] == null) { + throw new Error(`MiniSearch: field ${field} does not exist on document with ID ${this._documentIds.get(shortId)}`) + } + this.removeFieldLength(shortId, fieldId, this._documentCount, uniqueTerms.size, removed) for (const term of tokens) { const processedTerm = processTerm(term, field) @@ -775,12 +849,6 @@ export default class MiniSearch { } } } - - this._storedFields.delete(shortId) - this._documentIds.delete(shortId) - this._idToShortId.delete(id) - this._fieldLength.delete(shortId) - this._documentCount -= 1 } /** @@ -1782,13 +1850,17 @@ export default class MiniSearch { /** * @ignore */ - private removeFieldLength (documentId: number, fieldId: number, count: number, length: number): void { + private removeFieldLength (documentId: number, fieldId: number, count: number, length: number, removed: boolean = true): void { + const fieldLengths = this._fieldLength.get(documentId) + delete fieldLengths?.[fieldId] + if (count === 1) { this._avgFieldLength[fieldId] = 0 return } + const n = removed ? 1 : 0 const totalFieldLength = (this._avgFieldLength[fieldId] * count) - length - this._avgFieldLength[fieldId] = totalFieldLength / (count - 1) + this._avgFieldLength[fieldId] = totalFieldLength / (count - n) } /** From 6d3f9003862d08189cb45c235789b931bd8fea41 Mon Sep 17 00:00:00 2001 From: Luca Ongaro Date: Mon, 28 Nov 2022 11:36:51 +0100 Subject: [PATCH 3/4] Clarify docs --- src/MiniSearch.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/MiniSearch.ts b/src/MiniSearch.ts index 9dc3f95a..71141680 100644 --- a/src/MiniSearch.ts +++ b/src/MiniSearch.ts @@ -766,6 +766,7 @@ export default class MiniSearch { /** * Removes some fields from an existing documeny * + * The original fields to be removed must be provided as the second argument. * The removed fields should be present on the document, or an error will be * thrown. * From 52233a143f654e773b053f4c037b2ee45d38dcda Mon Sep 17 00:00:00 2001 From: Luca Ongaro Date: Mon, 28 Nov 2022 11:43:37 +0100 Subject: [PATCH 4/4] Improve test --- src/MiniSearch.test.js | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/MiniSearch.test.js b/src/MiniSearch.test.js index 6ccc1a4b..f914f5b9 100644 --- a/src/MiniSearch.test.js +++ b/src/MiniSearch.test.js @@ -814,10 +814,18 @@ describe('MiniSearch', () => { it('throws an error if removing a field that did not exist', () => { const ms = new MiniSearch({ fields: ['text', 'author'] }) - ms.add({ id: 1, author: 'Al et. al.' }) + ms.addAll([ + { id: 1, author: 'Al et. al.' }, + { id: 2 } + ]) + expect(() => { ms.removeFields(1, { text: 'Some interesting stuff' }) }).toThrow('MiniSearch: field text does not exist on document with ID 1') + + expect(() => { + ms.removeFields(2, { author: 'Someone' }) + }).toThrow('MiniSearch: field author does not exist on document with ID 2') }) })