diff --git a/bower.json b/bower.json index 1d11891..04e8d82 100644 --- a/bower.json +++ b/bower.json @@ -1,6 +1,6 @@ { "name": "fuse.js", - "version": "1.0.0", + "version": "1.0.1", "main": "./src/fuse.js", "ignore": [ "test/" diff --git a/package.json b/package.json index cca10e0..97fa3ad 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "fuse.js", "author": "Kirollos Risk", - "version": "1.0.0", + "version": "1.0.1", "description": "Lightweight fuzzy-search", "license": "Apache", "main": "./src/fuse.js", diff --git a/src/fuse.js b/src/fuse.js index 796c02b..bf12fc1 100644 --- a/src/fuse.js +++ b/src/fuse.js @@ -1,4 +1,5 @@ /** + * @license * Fuse - Lightweight fuzzy-search * * Copyright (c) 2012 Kirollos Risk . @@ -17,323 +18,328 @@ * limitations under the License. */ (function() { + /** + * Adapted from "Diff, Match and Patch", by Google + * + * http://code.google.com/p/google-diff-match-patch/ + * + * Modified by: Kirollos Risk + * ----------------------------------------------- + * Details: the algorithm and structure was modified to allow the creation of + * instances with a method inside which does the actual + * bitap search. The (the string that is searched for) is only defined + * once per instance and thus it eliminates redundant re-creation when searching + * over a list of strings. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + */ + + var defaultOptions = { + // Approximately where in the text is the pattern expected to be found? + location: 0, + + // Determines how close the match must be to the fuzzy location (specified above). + // An exact letter match which is 'distance' characters away from the fuzzy location + // would score as a complete mismatch. A distance of '0' requires the match be at + // the exact location specified, a threshold of '1000' would require a perfect match + // to be within 800 characters of the fuzzy location to be found using a 0.8 threshold. + distance: 100, + + // At what point does the match algorithm give up. A threshold of '0.0' requires a perfect match + // (of both letters and location), a threshold of '1.0' would match anything. + threshold: 0.6, + + // Machine word size + maxPatternLength: 32 + }; + + function Searcher(pattern, options) { + options = options || {}; + + var MATCH_LOCATION = options.location || defaultOptions.location, + MATCH_DISTANCE = options.distance || defaultOptions.distance, + MATCH_THRESHOLD = options.threshold || defaultOptions.threshold, + MAX_PATTERN_LEN = options.maxPatternLength || defaultOptions.maxPatternLength, + + pattern = options.caseSensitive ? pattern : pattern.toLowerCase(), + patternLen = pattern.length; + + if (patternLen > MAX_PATTERN_LEN) { + throw new Error('Pattern length is too long'); + } + + var matchmask = 1 << (patternLen - 1); + /** - * Adapted from "Diff, Match and Patch", by Google - * - * http://code.google.com/p/google-diff-match-patch/ - * - * Modified by: Kirollos Risk - * ----------------------------------------------- - * Details: the algorithm and structure was modified to allow the creation of - * instances with a method inside which does the actual - * bitap search. The (the string that is searched for) is only defined - * once per instance and thus it eliminates redundant re-creation when searching - * over a list of strings. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. + * Initialize the alphabet for the Bitap algorithm. + * @return {Object} Hash of character locations. + * @private */ + var pattern_alphabet = (function() { + var mask = {}, + i = 0; - var defaultOptions = { - // Approximately where in the text is the pattern expected to be found? - location: 0, + for (i = 0; i < patternLen; i++) { + mask[pattern.charAt(i)] = 0; + } - // Determines how close the match must be to the fuzzy location (specified above). - // An exact letter match which is 'distance' characters away from the fuzzy location - // would score as a complete mismatch. A distance of '0' requires the match be at - // the exact location specified, a threshold of '1000' would require a perfect match - // to be within 800 characters of the fuzzy location to be found using a 0.8 threshold. - distance: 100, + for (i = 0; i < patternLen; i++) { + mask[pattern.charAt(i)] |= 1 << (pattern.length - i - 1); + } - // At what point does the match algorithm give up. A threshold of '0.0' requires a perfect match - // (of both letters and location), a threshold of '1.0' would match anything. - threshold: 0.6, + return mask; + })(); - // Machine word size - maxPatternLength: 32 - }; + /** + * Compute and return the score for a match with `e` errors and `x` location. + * @param {number} e Number of errors in match. + * @param {number} x Location of match. + * @return {number} Overall score for match (0.0 = good, 1.0 = bad). + * @private + */ + function match_bitapScore(e, x) { + var accuracy = e / patternLen, + proximity = Math.abs(MATCH_LOCATION - x); - function Searcher(pattern, options) { - options = options || {}; - - var MATCH_LOCATION = options.location || defaultOptions.location, - MATCH_DISTANCE = options.distance || defaultOptions.distance, - MATCH_THRESHOLD = options.threshold || defaultOptions.threshold, - MAX_PATTERN_LEN = options.maxPatternLength || defaultOptions.maxPatternLength, - - pattern = options.caseSensitive ? pattern : pattern.toLowerCase(), - patternLen = pattern.length; - - if (patternLen > MAX_PATTERN_LEN) { - throw new Error('Pattern length is too long'); - } - - var matchmask = 1 << (patternLen - 1); - - /** - * Initialize the alphabet for the Bitap algorithm. - * @return {Object} Hash of character locations. - * @private - */ - var pattern_alphabet = (function() { - var mask = {}, - i = 0; - - for (i = 0; i < patternLen; i++) { - mask[pattern.charAt(i)] = 0; - } - - for (i = 0; i < patternLen; i++) { - mask[pattern.charAt(i)] |= 1 << (pattern.length - i - 1); - } - - return mask; - })(); - - /** - * Compute and return the score for a match with `e` errors and `x` location. - * @param {number} e Number of errors in match. - * @param {number} x Location of match. - * @return {number} Overall score for match (0.0 = good, 1.0 = bad). - * @private - */ - function match_bitapScore(e, x) { - var accuracy = e / patternLen, - proximity = Math.abs(MATCH_LOCATION - x); - - if (!MATCH_DISTANCE) { - // Dodge divide by zero error. - return proximity ? 1.0 : accuracy; - } - return accuracy + (proximity / MATCH_DISTANCE); - } - - /** - * Compute and return the result of the search - * @param {String} text The text to search in - * @return - * {Object} Literal containing: - * {Boolean} isMatch Whether the text is a match or not - * {Decimal} score Overall score for the match - * @public - */ - this.search = function(text) { - text = options.caseSensitive ? text : text.toLowerCase(); - - if (pattern === text) { - // Exact match - return { - isMatch: true, - score: 0 - }; - } - - var i, j, - // Set starting location at beginning text and initialize the alphabet. - textLen = text.length, - // Highest score beyond which we give up. - scoreThreshold = MATCH_THRESHOLD, - // Is there a nearby exact match? (speedup) - bestLoc = text.indexOf(pattern, MATCH_LOCATION), - - binMin, binMid, - binMax = patternLen + textLen, - - lastRd, start, finish, rd, charMatch, - - score = 1, - - locations = []; - - if (bestLoc != -1) { - scoreThreshold = Math.min(match_bitapScore(0, bestLoc), scoreThreshold); - // What about in the other direction? (speedup) - bestLoc = text.lastIndexOf(pattern, MATCH_LOCATION + patternLen); - - if (bestLoc != -1) { - scoreThreshold = Math.min(match_bitapScore(0, bestLoc), scoreThreshold); - } - } - - bestLoc = -1; - - for (i = 0; i < patternLen; i++) { - // Scan for the best match; each iteration allows for one more error. - // Run a binary search to determine how far from 'MATCH_LOCATION' we can stray at this - // error level. - binMin = 0; - binMid = binMax; - while (binMin < binMid) { - if (match_bitapScore(i, MATCH_LOCATION + binMid) <= scoreThreshold) { - binMin = binMid; - } else { - binMax = binMid; - } - binMid = Math.floor((binMax - binMin) / 2 + binMin); - } - - // Use the result from this iteration as the maximum for the next. - binMax = binMid; - start = Math.max(1, MATCH_LOCATION - binMid + 1); - finish = Math.min(MATCH_LOCATION + binMid, textLen) + patternLen; - - // Initialize the bit array - rd = Array(finish + 2); - - rd[finish + 1] = (1 << i) - 1; - - for (j = finish; j >= start; j--) { - // The alphabet is a sparse hash, so the following line generates warnings. - charMatch = pattern_alphabet[text.charAt(j - 1)]; - - if (i === 0) { - // First pass: exact match. - rd[j] = ((rd[j + 1] << 1) | 1) & charMatch; - } else { - // Subsequent passes: fuzzy match. - rd[j] = ((rd[j + 1] << 1) | 1) & charMatch | (((lastRd[j + 1] | lastRd[j]) << 1) | 1) | lastRd[j + 1]; - } - if (rd[j] & matchmask) { - score = match_bitapScore(i, j - 1); - // This match will almost certainly be better than any existing match. - // But check anyway. - if (score <= scoreThreshold) { - // Told you so. - scoreThreshold = score; - bestLoc = j - 1; - locations.push(bestLoc); - - if (bestLoc > MATCH_LOCATION) { - // When passing loc, don't exceed our current distance from loc. - start = Math.max(1, 2 * MATCH_LOCATION - bestLoc); - } else { - // Already passed loc, downhill from here on in. - break; - } - } - } - } - - // No hope for a (better) match at greater error levels. - if (match_bitapScore(i + 1, MATCH_LOCATION) > scoreThreshold) { - break; - } - lastRd = rd; - } - - return { - isMatch: bestLoc >= 0, - score: score - }; - } + if (!MATCH_DISTANCE) { + // Dodge divide by zero error. + return proximity ? 1.0 : accuracy; + } + return accuracy + (proximity / MATCH_DISTANCE); } /** - * @param {Array} list - * @param {Object} options + * Compute and return the result of the search + * @param {String} text The text to search in + * @return + * {Object} Literal containing: + * {Boolean} isMatch Whether the text is a match or not + * {Decimal} score Overall score for the match * @public */ - function Fuse(list, options) { - options = options || {}; - var keys = options.keys; + this.search = function(text) { + text = options.caseSensitive ? text : text.toLowerCase(); - /** - * Searches for all the items whose keys (fuzzy) match the pattern. - * @param {String} pattern The pattern string to fuzzy search on. - * @return {Array} A list of all serch matches. - * @public - */ - this.search = function(pattern) { - var searcher = new Searcher(pattern, options), - i, j, item, text, dataLen = list.length, - bitapResult, rawResults = [], - resultMap = {}, - rawResultsLen, existingResult, results = [], - compute = null; + if (pattern === text) { + // Exact match + return { + isMatch: true, + score: 0 + }; + } - /** - * Calls for bitap analysis. Builds the raw result list. - * @param {String} text The pattern string to fuzzy search on. - * @param {String|Int} entity If the is an Array, then entity will be an index, - * otherwise it's the item object. - * @param {Int} index - * @return {Object|Int} - * @private - */ - function analyzeText(text, entity, index) { - // Check if the text can be searched - if (text !== undefined && text !== null && typeof text === 'string') { + var i, j, + // Set starting location at beginning text and initialize the alphabet. + textLen = text.length, + // Highest score beyond which we give up. + scoreThreshold = MATCH_THRESHOLD, + // Is there a nearby exact match? (speedup) + bestLoc = text.indexOf(pattern, MATCH_LOCATION), - // Get the result - bitapResult = searcher.search(text); + binMin, binMid, + binMax = patternLen + textLen, - // If a match is found, add the item to , including its score - if (bitapResult.isMatch) { + lastRd, start, finish, rd, charMatch, - // Check if the item already exists in our results - existingResult = resultMap[index]; - if (existingResult) { - // Use the lowest score - existingResult.score = Math.min(existingResult.score, bitapResult.score); - } else { - // Add it to the raw result list - resultMap[index] = { - item: entity, - score: bitapResult.score - }; - rawResults.push(resultMap[index]); - } - } - } + score = 1, + + locations = []; + + if (bestLoc != -1) { + scoreThreshold = Math.min(match_bitapScore(0, bestLoc), scoreThreshold); + // What about in the other direction? (speedup) + bestLoc = text.lastIndexOf(pattern, MATCH_LOCATION + patternLen); + + if (bestLoc != -1) { + scoreThreshold = Math.min(match_bitapScore(0, bestLoc), scoreThreshold); + } + } + + bestLoc = -1; + + for (i = 0; i < patternLen; i++) { + // Scan for the best match; each iteration allows for one more error. + // Run a binary search to determine how far from 'MATCH_LOCATION' we can stray at this + // error level. + binMin = 0; + binMid = binMax; + while (binMin < binMid) { + if (match_bitapScore(i, MATCH_LOCATION + binMid) <= scoreThreshold) { + binMin = binMid; + } else { + binMax = binMid; + } + binMid = Math.floor((binMax - binMin) / 2 + binMin); + } + + // Use the result from this iteration as the maximum for the next. + binMax = binMid; + start = Math.max(1, MATCH_LOCATION - binMid + 1); + finish = Math.min(MATCH_LOCATION + binMid, textLen) + patternLen; + + // Initialize the bit array + rd = Array(finish + 2); + + rd[finish + 1] = (1 << i) - 1; + + for (j = finish; j >= start; j--) { + // The alphabet is a sparse hash, so the following line generates warnings. + charMatch = pattern_alphabet[text.charAt(j - 1)]; + + if (i === 0) { + // First pass: exact match. + rd[j] = ((rd[j + 1] << 1) | 1) & charMatch; + } else { + // Subsequent passes: fuzzy match. + rd[j] = ((rd[j + 1] << 1) | 1) & charMatch | (((lastRd[j + 1] | lastRd[j]) << 1) | 1) | lastRd[j + 1]; + } + if (rd[j] & matchmask) { + score = match_bitapScore(i, j - 1); + // This match will almost certainly be better than any existing match. + // But check anyway. + if (score <= scoreThreshold) { + // Told you so. + scoreThreshold = score; + bestLoc = j - 1; + locations.push(bestLoc); + + if (bestLoc > MATCH_LOCATION) { + // When passing loc, don't exceed our current distance from loc. + start = Math.max(1, 2 * MATCH_LOCATION - bestLoc); + } else { + // Already passed loc, downhill from here on in. + break; + } } + } + } - // Check the first item in the list, if it's a string, then we assume - // that every item in the list is also a string, and thus it's a flattened array. - if (typeof list[0] === 'string') { - // Iterate over every item - for (i = 0; i < dataLen; i++) { - analyzeText(list[i], i, i); - } + // No hope for a (better) match at greater error levels. + if (match_bitapScore(i + 1, MATCH_LOCATION) > scoreThreshold) { + break; + } + lastRd = rd; + } + + return { + isMatch: bestLoc >= 0, + score: score + }; + } + } + + /** + * @param {Array} list + * @param {Object} options + * @public + */ + function Fuse(list, options) { + options = options || {}; + var searchKeys = options.keys || []; + + /** + * Searches for all the items whose keys (fuzzy) match the pattern. + * @param {String} pattern The pattern string to fuzzy search on. + * @return {Array} A list of all serch matches. + * @public + */ + this.search = function(pattern) { + var searcher = new Searcher(pattern, options), + i, j, item, text, + dataLen = list.length, + searchKeysLen = searchKeys.length, + bitapResult, rawResults = [], + index = 0, + resultMap = {}, + rawResultsLen, existingResult, results = [], + compute = null; + + /** + * Calls for bitap analysis. Builds the raw result list. + * @param {String} text The pattern string to fuzzy search on. + * @param {String|Int} entity If the is an Array, then entity will be an index, + * otherwise it's the item object. + * @param {Int} index + * @return {Object|Int} + * @private + */ + function analyzeText(text, entity, index) { + // Check if the text can be searched + if (text !== undefined && text !== null && typeof text === 'string') { + + // Get the result + bitapResult = searcher.search(text); + + // If a match is found, add the item to , including its score + if (bitapResult.isMatch) { + + // Check if the item already exists in our results + existingResult = resultMap[index]; + if (existingResult) { + // Use the lowest score + existingResult.score = Math.min(existingResult.score, bitapResult.score); } else { - // Otherwise, the first item is an Object (hopefully), and thus the searching - // is done on the values of the keys of each item. - - // Iterate over every item - for (i = 0; i < dataLen; i++) { - item = list[i]; - // Iterate over every key - for (j = 0; j < keys.length; j++) { - analyzeText(item[keys[j]], item, i); - } - } + // Add it to the raw result list + resultMap[index] = { + item: entity, + score: bitapResult.score + }; + rawResults.push(resultMap[index]); } - - // Sort the results, form lowest to highest score - rawResults.sort(function(a, b) { - return a.score - b.score; - }); - - // From the results, push into a new array only the item identifier (if specified) - // of the entire item. This is because we don't want to return the , - // since it contains other metadata; - rawResultsLen = rawResults.length; - for (i = 0; i < rawResultsLen; i++) { - results.push(options.id ? rawResults[i].item[options.id] : rawResults[i].item); - } - - return results; + } } + } + + // Check the first item in the list, if it's a string, then we assume + // that every item in the list is also a string, and thus it's a flattened array. + if (typeof list[0] === 'string') { + // Iterate over every item + for (i = 0; i < dataLen; i++) { + analyzeText(list[i], i, i); + } + for (; index < dataLen; index++) { + analyzeText(list[i], index, index); + } + } else { + // Otherwise, the first item is an Object (hopefully), and thus the searching + // is done on the values of the keys of each item. + + // Iterate over every item + for (; index < dataLen; index++) { + item = list[index]; + // Iterate over every key + for (j = 0; j < searchKeysLen; j++) { + analyzeText(item[searchKeys[j]], item, index); + } + } + } + + // Sort the results, form lowest to highest score + rawResults.sort(function(a, b) { + return a.score - b.score; + }); + + // From the results, push into a new array only the item identifier (if specified) + // of the entire item. This is because we don't want to return the , + // since it contains other metadata; + rawResultsLen = rawResults.length; + for (i = 0; i < rawResultsLen; i++) { + results.push(options.id ? rawResults[i].item[options.id] : rawResults[i].item); + } + + return results; } + } - //Export to Common JS Loader - if (typeof module !== 'undefined' && typeof module.exports !== 'undefined') { - if (typeof module.setExports === 'function') { - module.setExports(Fuse); - } else { - module.exports = Fuse; - } + //Export to Common JS Loader + if (typeof module !== 'undefined' && typeof module.exports !== 'undefined') { + if (typeof module.setExports === 'function') { + module.setExports(Fuse); } else { - window.Fuse = Fuse; + module.exports = Fuse; } - + } else { + window.Fuse = Fuse; + } })(); \ No newline at end of file diff --git a/src/fuse.min.js b/src/fuse.min.js index 14af8bb..93e7974 100644 --- a/src/fuse.min.js +++ b/src/fuse.min.js @@ -1,9 +1,20 @@ /** + * @license * Fuse - Lightweight fuzzy-search * * Copyright (c) 2012 Kirollos Risk . * All Rights Reserved. Apache Software License 2.0 * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ -!function(){function Searcher(pattern,options){options=options||{};var MATCH_LOCATION=options.location||0,MATCH_DISTANCE=options.distance||100,MATCH_THRESHOLD=options.threshold||.6,pattern=options.caseSensitive?pattern:pattern.toLowerCase(),patternLen=pattern.length;if(patternLen>32){throw new Error("Pattern length is too long")}var matchmask=1<=start;j--){charMatch=pattern_alphabet[text.charAt(j-1)];if(i===0){rd[j]=(rd[j+1]<<1|1)&charMatch}else{rd[j]=(rd[j+1]<<1|1)&charMatch|((lastRd[j+1]|lastRd[j])<<1|1)|lastRd[j+1]}if(rd[j]&matchmask){score=match_bitapScore(i,j-1);if(score<=scoreThreshold){scoreThreshold=score;bestLoc=j-1;locations.push(bestLoc);if(bestLoc>MATCH_LOCATION){start=Math.max(1,2*MATCH_LOCATION-bestLoc)}else{break}}}}if(match_bitapScore(i+1,MATCH_LOCATION)>scoreThreshold){break}lastRd=rd}return{isMatch:bestLoc>=0,score:score}}}function Fuse(list,options){options=options||{};var keys=options.keys;this.search=function(pattern){var searcher=new Searcher(pattern,options),i,j,item,text,dataLen=list.length,bitapResult,rawResults=[],resultMap={},rawResultsLen,existingResult,results=[],compute=null;function analyzeText(text,entity,index){if(text!==undefined&&text!==null&&typeof text==="string"){bitapResult=searcher.search(text);if(bitapResult.isMatch){existingResult=resultMap[index];if(existingResult){existingResult.score=Math.min(existingResult.score,bitapResult.score)}else{resultMap[index]={item:entity,score:bitapResult.score};rawResults.push(resultMap[index])}}}}if(typeof list[0]==="string"){for(i=0;is)throw new Error("Pattern length is too long");var c=1<r;r++)e[t.charAt(r)]=0;for(r=0;h>r;r++)e[t.charAt(r)]|=1<a;a++){for(u=0,l=y;l>u;)o(a,n+l)<=M?u=l:y=l,l=Math.floor((y-u)/2+u);for(y=l,m=Math.max(1,n-l+1),g=Math.min(n+l,x)+h,p=Array(g+2),p[g+1]=(1<=m;s--)if(v=f[r.charAt(s-1)],p[s]=0===a?(p[s+1]<<1|1)&v:(p[s+1]<<1|1)&v|((d[s+1]|d[s])<<1|1)|d[s+1],p[s]&c&&(L=o(a,s-1),M>=L)){if(M=L,w=s-1,A.push(w),!(w>n))break;m=Math.max(1,2*n-w)}if(o(a+1,n)>M)break;d=p}return{isMatch:w>=0,score:L}}}function e(e,r){r=r||{};var o=r.keys||[];this.search=function(n){function a(t,e,r){void 0!==t&&null!==t&&"string"==typeof t&&(c=l.search(t),c.isMatch&&(u=v[r],u?u.score=Math.min(u.score,c.score):(v[r]={item:e,score:c.score},g.push(v[r]))))}var i,s,h,c,f,u,l=new t(n,r),d=e.length,m=o.length,g=[],p=0,v={},x=[];if("string"==typeof e[0]){for(i=0;d>i;i++)a(e[i],i,i);for(;d>p;p++)a(e[i],p,p)}else for(;d>p;p++)for(h=e[p],s=0;m>s;s++)a(h[o[s]],h,p);for(g.sort(function(t,e){return t.score-e.score}),f=g.length,i=0;f>i;i++)x.push(r.id?g[i].item[r.id]:g[i].item);return x}}var r={location:0,distance:100,threshold:.6,maxPatternLength:32};"undefined"!=typeof module&&"undefined"!=typeof module.exports?"function"==typeof module.setExports?module.setExports(e):module.exports=e:window.Fuse=e}(); \ No newline at end of file