mirror of
https://github.com/krisk/Fuse.git
synced 2026-01-25 16:43:11 +00:00
413 lines
12 KiB
JavaScript
413 lines
12 KiB
JavaScript
const Bitap = require('./bitap')
|
|
const deepValue = require('./helpers/deep_value')
|
|
const isArray = require('./helpers/is_array')
|
|
|
|
class Fuse {
|
|
constructor (list, {
|
|
// Approximately where in the text is the pattern expected to be found?
|
|
location = 0,
|
|
// Determines how close the match must be to the fuzzy location (specified above).
|
|
// An exact letter match which is 'distance' characters away from the fuzzy location
|
|
// would score as a complete mismatch. A distance of '0' requires the match be at
|
|
// the exact location specified, a threshold of '1000' would require a perfect match
|
|
// to be within 800 characters of the fuzzy location to be found using a 0.8 threshold.
|
|
distance = 100,
|
|
// At what point does the match algorithm give up. A threshold of '0.0' requires a perfect match
|
|
// (of both letters and location), a threshold of '1.0' would match anything.
|
|
threshold = 0.6,
|
|
// Machine word size
|
|
maxPatternLength = 32,
|
|
// Indicates whether comparisons should be case sensitive.
|
|
caseSensitive = false,
|
|
// Regex used to separate words when searching. Only applicable when `tokenize` is `true`.
|
|
tokenSeparator = / +/g,
|
|
// When true, the algorithm continues searching to the end of the input even if a perfect
|
|
// match is found before the end of the same input.
|
|
findAllMatches = false,
|
|
// Minimum number of characters that must be matched before a result is considered a match
|
|
minMatchCharLength = 1,
|
|
// The name of the identifier property. If specified, the returned result will be a list
|
|
// of the items' dentifiers, otherwise it will be a list of the items.
|
|
id = null,
|
|
// List of properties that will be searched. This also supports nested properties.
|
|
keys = [],
|
|
// Whether to sort the result list, by score
|
|
shouldSort = true,
|
|
// The get function to use when fetching an object's properties.
|
|
// The default will search nested paths *ie foo.bar.baz*
|
|
getFn = deepValue,
|
|
// Default sort function
|
|
sortFn = (a, b) => (a.score - b.score),
|
|
// When true, the search algorithm will search individual words **and** the full string,
|
|
// computing the final score as a function of both. Note that when `tokenize` is `true`,
|
|
// the `threshold`, `distance`, and `location` are inconsequential for individual tokens.
|
|
tokenize = false,
|
|
// When true, the result set will only include records that match all tokens. Will only work
|
|
// if `tokenize` is also true.
|
|
matchAllTokens = false,
|
|
|
|
includeMatches = false,
|
|
includeScore = false,
|
|
|
|
// Will print to the console. Useful for debugging.
|
|
verbose = false
|
|
}) {
|
|
this.options = {
|
|
location,
|
|
distance,
|
|
threshold,
|
|
maxPatternLength,
|
|
isCaseSensitive: caseSensitive,
|
|
tokenSeparator,
|
|
findAllMatches,
|
|
minMatchCharLength,
|
|
id,
|
|
keys,
|
|
includeMatches,
|
|
includeScore,
|
|
shouldSort,
|
|
getFn,
|
|
sortFn,
|
|
verbose,
|
|
tokenize,
|
|
matchAllTokens
|
|
}
|
|
|
|
this.setCollection(list)
|
|
}
|
|
|
|
setCollection (list) {
|
|
this.list = list
|
|
return list
|
|
}
|
|
|
|
search (pattern) {
|
|
this._log(`---------\nSearch pattern: "${pattern}"`)
|
|
|
|
const {
|
|
tokenSearchers,
|
|
fullSearcher
|
|
} = this._prepareSearchers(pattern)
|
|
|
|
let { weights, results } = this._search(tokenSearchers, fullSearcher)
|
|
|
|
this._computeScore(weights, results)
|
|
|
|
if (this.options.shouldSort) {
|
|
this._sort(results)
|
|
}
|
|
|
|
return this._format(results)
|
|
}
|
|
|
|
_prepareSearchers (pattern = '') {
|
|
const tokenSearchers = []
|
|
|
|
if (this.options.tokenize) {
|
|
// Tokenize on the separator
|
|
const tokens = pattern.split(this.options.tokenSeparator)
|
|
for (let i = 0, len = tokens.length; i < len; i += 1) {
|
|
tokenSearchers.push(new Bitap(tokens[i], this.options))
|
|
}
|
|
}
|
|
|
|
let fullSearcher = new Bitap(pattern, this.options)
|
|
|
|
return { tokenSearchers, fullSearcher }
|
|
}
|
|
|
|
_search (tokenSearchers = [], fullSearcher) {
|
|
const list = this.list
|
|
const resultMap = {}
|
|
const results = []
|
|
|
|
// Check the first item in the list, if it's a string, then we assume
|
|
// that every item in the list is also a string, and thus it's a flattened array.
|
|
if (typeof list[0] === 'string') {
|
|
// Iterate over every item
|
|
for (let i = 0, len = list.length; i < len; i += 1) {
|
|
this._analyze({
|
|
key: '',
|
|
value: list[i],
|
|
record: i,
|
|
index: i
|
|
}, {
|
|
resultMap,
|
|
results,
|
|
tokenSearchers,
|
|
fullSearcher
|
|
})
|
|
}
|
|
|
|
return { weights: null, results }
|
|
}
|
|
|
|
// Otherwise, the first item is an Object (hopefully), and thus the searching
|
|
// is done on the values of the keys of each item.
|
|
const weights = {}
|
|
for (let i = 0, len = list.length; i < len; i += 1) {
|
|
let item = list[i]
|
|
// Iterate over every key
|
|
for (let j = 0, keysLen = this.options.keys.length; j < keysLen; j += 1) {
|
|
let key = this.options.keys[j]
|
|
if (typeof key !== 'string') {
|
|
weights[key.name] = {
|
|
weight: (1 - key.weight) || 1
|
|
}
|
|
if (key.weight <= 0 || key.weight > 1) {
|
|
throw new Error('Key weight has to be > 0 and <= 1')
|
|
}
|
|
key = key.name
|
|
} else {
|
|
weights[key] = {
|
|
weight: 1
|
|
}
|
|
}
|
|
|
|
this._analyze({
|
|
key,
|
|
value: this.options.getFn(item, key),
|
|
record: item,
|
|
index: i
|
|
}, {
|
|
resultMap,
|
|
results,
|
|
tokenSearchers,
|
|
fullSearcher
|
|
})
|
|
}
|
|
}
|
|
|
|
return { weights, results }
|
|
}
|
|
|
|
_analyze ({ key, arrayIndex = -1, value, record, index }, { tokenSearchers = [], fullSearcher = [], resultMap = {}, results = [] }) {
|
|
// Check if the texvaluet can be searched
|
|
if (value === undefined || value === null) {
|
|
return
|
|
}
|
|
|
|
let exists = false
|
|
let averageScore = -1
|
|
let numTextMatches = 0
|
|
|
|
if (typeof value === 'string') {
|
|
this._log(`\nKey: ${key === '' ? '-' : key}`)
|
|
|
|
let mainSearchResult = fullSearcher.search(value)
|
|
this._log(`Full text: "${value}", score: ${mainSearchResult.score}`)
|
|
|
|
if (this.options.tokenize) {
|
|
let words = value.split(this.options.tokenSeparator)
|
|
let scores = []
|
|
|
|
for (let i = 0; i < tokenSearchers.length; i += 1) {
|
|
let tokenSearcher = tokenSearchers[i]
|
|
|
|
this._log(`\nPattern: "${tokenSearcher.pattern}"`)
|
|
|
|
// let tokenScores = []
|
|
let hasMatchInText = false
|
|
|
|
for (let j = 0; j < words.length; j += 1) {
|
|
let word = words[j]
|
|
let tokenSearchResult = tokenSearcher.search(word)
|
|
let obj = {}
|
|
if (tokenSearchResult.isMatch) {
|
|
obj[word] = tokenSearchResult.score
|
|
exists = true
|
|
hasMatchInText = true
|
|
scores.push(tokenSearchResult.score)
|
|
} else {
|
|
obj[word] = 1
|
|
if (!this.options.matchAllTokens) {
|
|
scores.push(1)
|
|
}
|
|
}
|
|
this._log(`Token: "${word}", score: ${obj[word]}`)
|
|
// tokenScores.push(obj)
|
|
}
|
|
|
|
if (hasMatchInText) {
|
|
numTextMatches += 1
|
|
}
|
|
}
|
|
|
|
averageScore = scores[0]
|
|
let scoresLen = scores.length
|
|
for (let i = 1; i < scoresLen; i += 1) {
|
|
averageScore += scores[i]
|
|
}
|
|
averageScore = averageScore / scoresLen
|
|
|
|
this._log('Token score average:', averageScore)
|
|
}
|
|
|
|
let finalScore = mainSearchResult.score
|
|
if (averageScore > -1) {
|
|
finalScore = (finalScore + averageScore) / 2
|
|
}
|
|
|
|
this._log('Score average:', finalScore)
|
|
|
|
let checkTextMatches = (this.options.tokenize && this.options.matchAllTokens) ? numTextMatches >= tokenSearchers.length : true
|
|
|
|
this._log(`\nCheck Matches: ${checkTextMatches}`)
|
|
|
|
// If a match is found, add the item to <rawResults>, including its score
|
|
if ((exists || mainSearchResult.isMatch) && checkTextMatches) {
|
|
// Check if the item already exists in our results
|
|
let existingResult = resultMap[index]
|
|
if (existingResult) {
|
|
// Use the lowest score
|
|
// existingResult.score, bitapResult.score
|
|
existingResult.output.push({
|
|
key,
|
|
arrayIndex,
|
|
value,
|
|
score: finalScore,
|
|
matchedIndices: mainSearchResult.matchedIndices
|
|
})
|
|
} else {
|
|
// Add it to the raw result list
|
|
resultMap[index] = {
|
|
item: record,
|
|
output: [{
|
|
key,
|
|
arrayIndex,
|
|
value,
|
|
score: finalScore,
|
|
matchedIndices: mainSearchResult.matchedIndices
|
|
}]
|
|
}
|
|
|
|
results.push(resultMap[index])
|
|
}
|
|
}
|
|
} else if (isArray(value)) {
|
|
for (let i = 0, len = value.length; i < len; i += 1) {
|
|
this._analyze({
|
|
key,
|
|
arrayIndex: i,
|
|
value: value[i],
|
|
record,
|
|
index
|
|
}, {
|
|
resultMap,
|
|
results,
|
|
tokenSearchers,
|
|
fullSearcher
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
_computeScore (weights, results) {
|
|
this._log('\n\nComputing score:\n')
|
|
|
|
for (let i = 0, len = results.length; i < len; i += 1) {
|
|
const output = results[i].output
|
|
const scoreLen = output.length
|
|
|
|
let totalScore = 0
|
|
let bestScore = 1
|
|
|
|
for (let j = 0; j < scoreLen; j += 1) {
|
|
let weight = weights ? weights[output[j].key].weight : 1
|
|
let score = weight === 1 ? output[j].score : (output[j].score || 0.001)
|
|
let nScore = score * weight
|
|
|
|
if (weight !== 1) {
|
|
bestScore = Math.min(bestScore, nScore)
|
|
} else {
|
|
output[j].nScore = nScore
|
|
totalScore += nScore
|
|
}
|
|
}
|
|
|
|
results[i].score = bestScore === 1 ? totalScore / scoreLen : bestScore
|
|
|
|
this._log(results[i])
|
|
}
|
|
}
|
|
|
|
_sort (results) {
|
|
this._log('\n\nSorting....')
|
|
results.sort(this.options.sortFn)
|
|
}
|
|
|
|
_format (results) {
|
|
const finalOutput = []
|
|
|
|
this._log('\n\nOutput:\n\n', JSON.stringify(results))
|
|
|
|
let transformers = []
|
|
|
|
if (this.options.includeMatches) {
|
|
transformers.push((result, data) => {
|
|
const output = result.output
|
|
data.matches = []
|
|
|
|
for (let i = 0, len = output.length; i < len; i += 1) {
|
|
let item = output[i]
|
|
|
|
if (item.matchedIndices.length === 0) {
|
|
continue
|
|
}
|
|
|
|
let obj = {
|
|
indices: item.matchedIndices,
|
|
value: item.value
|
|
}
|
|
if (item.key) {
|
|
obj.key = item.key
|
|
}
|
|
if (item.hasOwnProperty('arrayIndex') && item.arrayIndex > -1) {
|
|
obj.arrayIndex = item.arrayIndex
|
|
}
|
|
data.matches.push(obj)
|
|
}
|
|
})
|
|
}
|
|
|
|
if (this.options.includeScore) {
|
|
transformers.push((result, data) => {
|
|
data.score = result.score
|
|
})
|
|
}
|
|
|
|
for (let i = 0, len = results.length; i < len; i += 1) {
|
|
const result = results[i]
|
|
|
|
if (this.options.id) {
|
|
result.item = this.options.getFn(result.item, this.options.id)[0]
|
|
}
|
|
|
|
if (!transformers.length) {
|
|
finalOutput.push(result.item)
|
|
continue
|
|
}
|
|
|
|
const data = {
|
|
item: result.item
|
|
}
|
|
|
|
for (let j = 0, len = transformers.length; j < len; j += 1) {
|
|
transformers[j](result, data)
|
|
}
|
|
|
|
finalOutput.push(data)
|
|
}
|
|
|
|
return finalOutput
|
|
}
|
|
|
|
_log () {
|
|
if (this.options.verbose) {
|
|
console.log(...arguments)
|
|
}
|
|
}
|
|
}
|
|
|
|
module.exports = Fuse
|