Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions src/MiniSearch.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -759,6 +759,76 @@ describe('MiniSearch', () => {
})
})

describe('addFields', () => {
it('add fields to an existing document', () => {
const options = { fields: ['text', 'author'], storeFields: ['text', 'author', 'n'] }
const ms = new MiniSearch(options)
const other = new MiniSearch(options)

ms.add({ id: 1, text: 'Some quite interesting stuff' })
ms.addFields(1, { author: 'Al et. al.', n: 5 })

other.add({ id: 1, text: 'Some quite interesting stuff', author: 'Al et. al.', n: 5 })

expect(ms).toEqual(other)
})

it('throws an error if the document did not exist', () => {
const ms = new MiniSearch({ fields: ['text'] })
expect(() => {
ms.addFields(1, { text: 'hello' })
}).toThrow('MiniSearch: no document with ID 1')
})

it('throws an error if adding a field that already exists', () => {
const ms = new MiniSearch({ fields: ['text'] })
ms.add({ id: 1, text: 'Some interesting stuff' })
expect(() => {
ms.addFields(1, { text: 'hello' })
}).toThrow('MiniSearch: field text already exists on document with ID 1')
})
})

describe('removeFields', () => {
it('removes fields to an existing document', () => {
const options = { fields: ['text', 'author'], storeFields: ['text', 'author', 'n'] }
const ms = new MiniSearch(options)
const other = new MiniSearch(options)

ms.add({ id: 1, text: 'Some quite interesting stuff', author: 'Al et. al.', n: 5 })
ms.add({ id: 2, text: 'Lalala', author: 'Someone', n: 3 })
ms.removeFields(1, { text: 'Some quite interesting stuff', n: 5 })

other.add({ id: 1, author: 'Al et. al.' })
other.add({ id: 2, text: 'Lalala', author: 'Someone', n: 3 })

expect(ms).toEqual(other)
})

it('throws an error if the document did not exist', () => {
const ms = new MiniSearch({ fields: ['text'] })
expect(() => {
ms.removeFields(1, { text: 'hello' })
}).toThrow('MiniSearch: no document with ID 1')
})

it('throws an error if removing a field that did not exist', () => {
const ms = new MiniSearch({ fields: ['text', 'author'] })
ms.addAll([
{ id: 1, author: 'Al et. al.' },
{ id: 2 }
])

expect(() => {
ms.removeFields(1, { text: 'Some interesting stuff' })
}).toThrow('MiniSearch: field text does not exist on document with ID 1')

expect(() => {
ms.removeFields(2, { author: 'Someone' })
}).toThrow('MiniSearch: field author does not exist on document with ID 2')
})
})

describe('vacuum', () => {
it('cleans up discarded documents from the index', async () => {
const ms = new MiniSearch({ fields: ['text'], storeFields: ['text'] })
Expand Down
165 changes: 146 additions & 19 deletions src/MiniSearch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -588,7 +588,7 @@ export default class MiniSearch<T = any> {
this._enqueuedVacuum = null
this._enqueuedVacuumConditions = defaultVacuumConditions

this.addFields(this._options.fields)
this.addFieldIds(this._options.fields)
}

/**
Expand All @@ -597,8 +597,9 @@ export default class MiniSearch<T = any> {
* @param document The document to be indexed
*/
add (document: T): void {
const { extractField, tokenize, processTerm, fields, idField } = this._options
const { extractField, idField } = this._options
const id = extractField(document, idField)

if (id == null) {
throw new Error(`MiniSearch: document does not have ID field "${idField}"`)
}
Expand All @@ -610,15 +611,66 @@ export default class MiniSearch<T = any> {
const shortDocumentId = this.addDocumentId(id)
this.saveStoredFields(shortDocumentId, document)

this.addToIndex(shortDocumentId, document, true)
}

/**
* Adds some fields to an existing documeny
*
* The added fields should not be already present on the document, or an error
* will be thrown.
*
* ## Example:
*
* const miniSearch = new MiniSearch({ fields: ['title', 'text', 'author'] })
*
* miniSearch.add({ id: 1, title: 'Neuromancer' })
*
* miniSearch.addFields(1, {
* text: 'The sky above the port was the color of television, tuned to a dead channel.',
* author: 'William Gibson'
* })
*
* // The above is equivalent to:
* miniSearch.add({
* id: 1,
* title: 'Neuromancer',
* text: 'The sky above the port was the color of television, tuned to a dead channel.',
* author: 'William Gibson'
* })
*
* @param id The document ID
* @param toAdd The fields to add
*/
addFields (id: any, toAdd: T): void {
const shortDocumentId = this._idToShortId.get(id)

if (shortDocumentId == null) {
throw new Error(`MiniSearch: no document with ID ${id}`)
}

this.saveStoredFields(shortDocumentId, toAdd)

this.addToIndex(shortDocumentId, toAdd, false)
}

private addToIndex (shortDocumentId: number, document: T, added: boolean) {
const { extractField, tokenize, processTerm, fields } = this._options

for (const field of fields) {
const fieldValue = extractField(document, field)
if (fieldValue == null) continue

const tokens = tokenize(fieldValue.toString(), field)
const fieldId = this._fieldIds[field]

const uniqueTerms = new Set(tokens).size
this.addFieldLength(shortDocumentId, fieldId, this._documentCount - 1, uniqueTerms)
const uniqueTerms = new Set(tokens)
uniqueTerms.delete('')

if (this._fieldLength.get(shortDocumentId)?.[fieldId] != null) {
throw new Error(`MiniSearch: field ${field} already exists on document with ID ${this._documentIds.get(shortDocumentId)}`)
}
this.addFieldLength(shortDocumentId, fieldId, this._documentCount, uniqueTerms.size, added)

for (const term of tokens) {
const processedTerm = processTerm(term, field)
Expand Down Expand Up @@ -689,7 +741,7 @@ export default class MiniSearch<T = any> {
* @param document The document to be removed
*/
remove (document: T): void {
const { tokenize, processTerm, extractField, fields, idField } = this._options
const { extractField, idField } = this._options
const id = extractField(document, idField)

if (id == null) {
Expand All @@ -702,15 +754,90 @@ export default class MiniSearch<T = any> {
throw new Error(`MiniSearch: cannot remove document with ID ${id}: it is not in the index`)
}

this.removeFromIndex(shortId, document, true)

this._storedFields.delete(shortId)
this._documentIds.delete(shortId)
this._idToShortId.delete(id)
this._fieldLength.delete(shortId)
this._documentCount -= 1
}

/**
* Removes some fields from an existing documeny
*
* The original fields to be removed must be provided as the second argument.
* The removed fields should be present on the document, or an error will be
* thrown.
*
* Note: removing _all_ the fields in a document with `removeFields` is
* different from removing the whole document with [[MiniSearch.remove]] or
* [[MiniSearch.discard]]. The difference in the first case is that the
* document is still counted in [[MiniSearch.documentCount]], even if it is
* practically not searchable anymore.
*
* ## Example:
*
* const miniSearch = new MiniSearch({ fields: ['title', 'text', 'author'] })
*
* miniSearch.add({
* id: 1,
* title: 'Neuromancer',
* text: 'The sky above the port was the color of television, tuned to a dead channel.',
* author: 'William Gibson'
* })
*
* miniSearch.removeFields(1, {
* text: 'The sky above the port was the color of television, tuned to a dead channel.',
* author: 'William Gibson'
* })
*
* // The above is equivalent to:
* miniSearch.add({
* id: 1,
* title: 'Neuromancer'
* })
*
* @param id The document ID
* @param toRemove The fields to remove
*/
removeFields (id: any, toRemove: T) {
const { storeFields, extractField } = this._options
const shortDocumentId = this._idToShortId.get(id)

if (shortDocumentId == null) {
throw new Error(`MiniSearch: no document with ID ${id}`)
}

this.removeFromIndex(shortDocumentId, toRemove, false)

const storedFields = this._storedFields.get(shortDocumentId)

for (const fieldName of storeFields) {
const fieldValue = extractField(toRemove, fieldName)
if (storedFields != null && fieldValue !== undefined) {
delete storedFields[fieldName]
}
}
}

private removeFromIndex (shortId: number, document: T, removed: boolean) {
const { tokenize, processTerm, extractField, fields } = this._options

for (const field of fields) {
const fieldValue = extractField(document, field)
if (fieldValue == null) continue

const tokens = tokenize(fieldValue.toString(), field)
const fieldId = this._fieldIds[field]

const uniqueTerms = new Set(tokens).size
this.removeFieldLength(shortId, fieldId, this._documentCount, uniqueTerms)
const uniqueTerms = new Set(tokens)
uniqueTerms.delete('')

if (this._fieldLength.get(shortId)?.[fieldId] == null) {
throw new Error(`MiniSearch: field ${field} does not exist on document with ID ${this._documentIds.get(shortId)}`)
}
this.removeFieldLength(shortId, fieldId, this._documentCount, uniqueTerms.size, removed)

for (const term of tokens) {
const processedTerm = processTerm(term, field)
Expand All @@ -723,12 +850,6 @@ export default class MiniSearch<T = any> {
}
}
}

this._storedFields.delete(shortId)
this._documentIds.delete(shortId)
this._idToShortId.delete(id)
this._fieldLength.delete(shortId)
this._documentCount -= 1
}

/**
Expand Down Expand Up @@ -1706,7 +1827,7 @@ export default class MiniSearch<T = any> {
/**
* @ignore
*/
private addFields (fields: string[]): void {
private addFieldIds (fields: string[]): void {
for (let i = 0; i < fields.length; i++) {
this._fieldIds[fields[i]] = i
}
Expand All @@ -1715,26 +1836,32 @@ export default class MiniSearch<T = any> {
/**
* @ignore
*/
private addFieldLength (documentId: number, fieldId: number, count: number, length: number): void {
private addFieldLength (documentId: number, fieldId: number, count: number, length: number, added: boolean): void {
let fieldLengths = this._fieldLength.get(documentId)
if (fieldLengths == null) this._fieldLength.set(documentId, fieldLengths = [])
const n = added ? 1 : 0

fieldLengths[fieldId] = length

const averageFieldLength = this._avgFieldLength[fieldId] || 0
const totalFieldLength = (averageFieldLength * count) + length
this._avgFieldLength[fieldId] = totalFieldLength / (count + 1)
const totalFieldLength = (averageFieldLength * (count - n)) + length
this._avgFieldLength[fieldId] = totalFieldLength / count
}

/**
* @ignore
*/
private removeFieldLength (documentId: number, fieldId: number, count: number, length: number): void {
private removeFieldLength (documentId: number, fieldId: number, count: number, length: number, removed: boolean = true): void {
const fieldLengths = this._fieldLength.get(documentId)
delete fieldLengths?.[fieldId]

if (count === 1) {
this._avgFieldLength[fieldId] = 0
return
}
const n = removed ? 1 : 0
const totalFieldLength = (this._avgFieldLength[fieldId] * count) - length
this._avgFieldLength[fieldId] = totalFieldLength / (count - 1)
this._avgFieldLength[fieldId] = totalFieldLength / (count - n)
}

/**
Expand Down