diff options
| author | Phil Crosby | 2012-04-29 21:32:11 -0700 |
|---|---|---|
| committer | Phil Crosby | 2012-04-29 21:32:11 -0700 |
| commit | a1af6bedf1daa378970e62ebaf55301e7539eb2e (patch) | |
| tree | 338ffa673d29fc35299d6a26545ca192b4ee05a5 /lib | |
| parent | 629e74c5271d25ef56cc91005954297afa0d3000 (diff) | |
| download | vimium-a1af6bedf1daa378970e62ebaf55301e7539eb2e.tar.bz2 | |
reorder source
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/completion.js | 601 |
1 files changed, 299 insertions, 302 deletions
diff --git a/lib/completion.js b/lib/completion.js index f62d3a58..3209a2f4 100644 --- a/lib/completion.js +++ b/lib/completion.js @@ -1,307 +1,5 @@ var completion = (function() { - //============ Helper functions and objects ============// - - /** Singleton object that provides helpers and caching for fuzzy completion. */ - var fuzzyMatcher = (function() { - var self = {}; - - self.timeToClean = 0; - self.cacheSize = 1000; - self.regexNonWord = /[\W_]/ig; - - // cache generated regular expressions - self.matcherCache = {}; - // cache filtered results from recent queries - self.filterCache = {}; - self.normalizationCache = {}; - - /** Normalizes the string specified in :query. Strips any non-word characters and converts - * to lower case. */ - self.normalize = function(query) { - if (!(query in self.normalizationCache)) - self.normalizationCache[query] = query.replace(self.regexNonWord, '').toLowerCase(); - return self.normalizationCache[query]; - } - - /** Returns the non-matching and matching string parts, in alternating order (starting with a - * non-matching part) or null, if the string doesn't match the query. - * - * Sample: match("codecodec","code.google.com/codec") would yield ["", "code", ".google.com/", "codec"] - * - * Note that this function matches the longest possible parts of a string and is therefore not very - * efficient. There it falls back to a more performant, but less accurate regex matching if the - * normalized query is longer than 10 characters. - * - * _Don't use this to check if a string matches a query_. Use `getMatcher(query).test(str)` instead. - */ - self.getMatchGroups = function(query, str) { - query = self.normalize(query); - if (query.length == 0) - return str.length ? [str] : []; - if (query.length > 15) { - // for long query strings, the method is much too inefficient, so fall - // back to the less accurate regex matching - return self.getMatcher(query).exec(str).slice(1); - } - - for (var i = query.length; i >= 1; --i) { - var part = query.slice(0, i); - var partOffset = str.toLowerCase().indexOf(part); - if (partOffset < 0) - continue; - - // we use recursive backtracking here, this is why it's slow. - rest = self.getMatchGroups(query.slice(i), str.slice(partOffset + i)); - if (!rest) continue; - - return [ - str.slice(0, partOffset), - part, - ].concat(rest); - } - return null; - } - - /** Calculates a very simple similarity value between a :query and a :string */ - self.calculateRelevancy = function(query, str) { - query = self.normalize(query); - str = self.normalize(str); - var sum = 0; - - // only iterate over slices of the query starting at an offset up to 10 to save resources - for (var start = 0; start < 20 && start < query.length; ++start) { - for (var i = query.length; i >= start; --i) { - if (str.indexOf(query.slice(start, i)) >= 0) { - var length = i - start; - sum += length * length; - break; - } - } - } - return sum * sum * sum; - } - - /** Trims the size of the caches to the configured size using a FIFO algorithm. */ - self.cleanCache = function() { - // remove old cached regexes - Object.keys(self.matcherCache).slice(self.cacheSize).forEach(function(query) { - delete self.matcherCache[query]; - }); - // remove old cached normalization results - Object.keys(self.normalizationCache).slice(self.cacheSize).forEach(function(query) { - delete self.normalizationCache[query]; - }); - } - - /** Returns a regex that matches a string using a fuzzy :query. Example: The :query "abc" would result - * in a regex like /^([^a])*(a)([^b])*(b)([^c])*(c)(.*)$/ - */ - self.getMatcher = function(query) { - query = self.normalize(query); - if (!(query in self.matcherCache)) { - // build up a regex for fuzzy matching. This is the fastest method I checked (faster than: - // string building, splice, concat, multi-level join) - var regex = ['^']; - for (var i = 0; i < query.length; ++i) { - regex.push('([^'); - regex.push(query[i]); - regex.push(']*)('); - regex.push(query[i]); - regex.push(')'); - } - regex.push('(.*)$'); - self.matcherCache[query] = new RegExp(regex.join(''), 'i'); - } - return self.matcherCache[query]; - } - - /** Clear the cache for the given source, e.g. for refreshing */ - self.invalidateFilterCache = function(id) { - self.filterCache[id] = {}; - } - - /** Filters a collection :source using fuzzy matching against an input string :query. If a query with - * a less specific query was issued before (e.g. if the user added a letter to the query), the cached - * results of the last filtering are used as a starting point, instead of :source. - */ - self.filter = function(query, source, getValue, id) { - if (!(id in self.filterCache)) - self.filterCache[id] = {}; - - // find the most narrow list of results in the cache - var optSpecificity = source.length; - var specificity; - for (key in self.filterCache[id]) { - if (!self.filterCache[id].hasOwnProperty(key)) - continue; - - if ((query.indexOf(key) != 0 && key.indexOf(query) != 0) || key.length > query.length) { - // cache entry no longer needed - delete self.filterCache[id][key]; - continue; - } - - // is this a plausible result set to use as a source? - if (query.indexOf(key) < 0) - continue; - - // is this cache entry the most specific so far? - specificity = self.filterCache[id][key].length; - if (specificity < optSpecificity) { - source = self.filterCache[id][key]; - optSpecificity = specificity; - } - } - - // don't clean up the caches every iteration - if (++self.timeToClean > 100) { - self.timeToClean = 0; - self.cleanCache(); - } - - var matcher = self.getMatcher(query); - var filtered = source.filter(function(x) { return matcher.test(getValue(x)) }); - self.filterCache[id][query] = filtered; - return filtered; - } - - return self; - })(); - - var htmlRegex = /<[^>]*>|&[a-z]+;/gi; - - /** Strips HTML tags and entities using a naive regex replacement. Optionally, saves the stripped - * HTML tags in a dictionary indexed by the position where the tag should be reinserted. */ - function stripHtmlTags(str, positions) { - if (!positions) - return str.replace(htmlRegex, ''); - - var match = str.match(htmlRegex); - if (!match) return; - match.reverse(); - var split = str.split(htmlRegex); - var offset = 0; - var i = 0; - split.forEach(function(text) { - if (match.length > 0) - positions[offset += text.length] = match.pop(); - }); - - return split.join(''); - } - - /** Creates an action that opens :url in the current tab by default or in a new tab as an alternative. */ - function createActionOpenUrl(url) { - var open = function(newTab, selected) { - return function() { - chrome.extension.sendRequest({ - handler: newTab ? "openUrlInNewTab" : "openUrlInCurrentTab", - url: url, - selected: selected - }); - } - } - - if (url.indexOf("javascript:") == 0) - return [ open(false), open(false), open(false) ]; - else - return [ open(false), open(true, true), open(true, false) ]; - } - - /** Returns an action that switches to the tab with the given :id. */ - function createActionSwitchToTab(id) { - var open = function() { - chrome.extension.sendRequest({ handler: 'selectSpecificTab', id: id }); - } - return [open, open, open]; - } - - /** Creates an file-internal representation of a URL match with the given paramters */ - function createCompletionHtml(type, str, title) { - title = title || ''; - // sanitize input, it could come from a malicious web site - title = title.length > 0 ? ' <span class="title">' + utils.escapeHtml(title) + '</span>' : ''; - return '<em>' + type + '</em> ' + utils.escapeHtml(str) + title; - } - - /** Renders a completion by marking fuzzy-matched parts. */ - function renderFuzzy(query, html) { - // we want to match the content in HTML tags, but not the HTML tags themselves, so we remove the - // tags and reinsert them after the matching process - var htmlTags = {}; - var groups = fuzzyMatcher.getMatchGroups(query, stripHtmlTags(html, htmlTags)); - - html = []; - var htmlOffset = 0; - - // this helper function adds the HTML generated _for one single character_ to the HTML output - // and reinserts HTML tags stripped before, if they were at this position - function addToHtml(str) { - if (htmlOffset in htmlTags) - html.push(htmlTags[htmlOffset]); - html.push(str); - ++htmlOffset; - } - - function addCharsWithDecoration(str, before, after) { - before = before || ''; - after = after || ''; - for (var i = 0; i < str.length; ++i) - addToHtml(before + str[i] + after); - } - - // iterate over the match groups. They are non-matched and matched string parts, in alternating order - for (var i = 0; i < groups.length; ++i) { - if (i % 2 == 0) - // we have a non-matched part, it could have several characters. We need to insert them character - // by character, so that addToHtml can keep track of the position in the original string - addCharsWithDecoration(groups[i]); - else - // we have a matched part. In addition to the characters themselves, we add some decorating HTML. - addCharsWithDecoration(groups[i], '<span class="fuzzyMatch">', '</span>'); - }; - - // call it another time so that a tag at the very last position is reinserted - addToHtml(''); - return html.join(''); - } - - /** A completion class that only holds a relevancy value and a function to get HTML and action - * properties */ - var LazyCompletion = function(relevancy, builder) { - this.relevancy = relevancy; - this.build = builder; - } - - /** Singleton object that provides fast access to the Chrome history */ - var historyCache = (function() { - var size = 20000; - var cachedHistory = null; - - function use(callback) { - if (cachedHistory !== null) - return callback(cachedHistory); - - chrome.history.search({ text: '', maxResults: size, startTime: 0 }, function(history) { - // sorting in ascending order, so we can push new items to the end later - history.sort(function(a, b) { - return (a.lastVisitTime|| 0) - (b.lastVisitTime || 0); - }); - cachedHistory = history; - callback(history); - }); - - chrome.history.onVisited.addListener(function(item) { - // only cache newly visited sites - if (item.visitCount === 1) - cachedHistory.push(item); - }); - } - - return { use: use }; - })() - /** Helper class to construct fuzzy completers for asynchronous data sources like history or bookmark * matchers. */ var AsyncCompletionSource = function() { @@ -623,6 +321,305 @@ var completion = (function() { }); } } + /** Singleton object that provides helpers and caching for fuzzy completion. */ + var fuzzyMatcher = (function() { + var self = {}; + + self.timeToClean = 0; + self.cacheSize = 1000; + self.regexNonWord = /[\W_]/ig; + + // cache generated regular expressions + self.matcherCache = {}; + // cache filtered results from recent queries + self.filterCache = {}; + self.normalizationCache = {}; + + /** Normalizes the string specified in :query. Strips any non-word characters and converts + * to lower case. */ + self.normalize = function(query) { + if (!(query in self.normalizationCache)) + self.normalizationCache[query] = query.replace(self.regexNonWord, '').toLowerCase(); + return self.normalizationCache[query]; + } + + /** Returns the non-matching and matching string parts, in alternating order (starting with a + * non-matching part) or null, if the string doesn't match the query. + * + * Sample: match("codecodec","code.google.com/codec") would yield ["", "code", ".google.com/", "codec"] + * + * Note that this function matches the longest possible parts of a string and is therefore not very + * efficient. There it falls back to a more performant, but less accurate regex matching if the + * normalized query is longer than 10 characters. + * + * _Don't use this to check if a string matches a query_. Use `getMatcher(query).test(str)` instead. + */ + self.getMatchGroups = function(query, str) { + query = self.normalize(query); + if (query.length == 0) + return str.length ? [str] : []; + if (query.length > 15) { + // for long query strings, the method is much too inefficient, so fall + // back to the less accurate regex matching + return self.getMatcher(query).exec(str).slice(1); + } + + for (var i = query.length; i >= 1; --i) { + var part = query.slice(0, i); + var partOffset = str.toLowerCase().indexOf(part); + if (partOffset < 0) + continue; + + // we use recursive backtracking here, this is why it's slow. + rest = self.getMatchGroups(query.slice(i), str.slice(partOffset + i)); + if (!rest) continue; + + return [ + str.slice(0, partOffset), + part, + ].concat(rest); + } + return null; + } + + /** Calculates a very simple similarity value between a :query and a :string */ + self.calculateRelevancy = function(query, str) { + query = self.normalize(query); + str = self.normalize(str); + var sum = 0; + + // only iterate over slices of the query starting at an offset up to 10 to save resources + for (var start = 0; start < 20 && start < query.length; ++start) { + for (var i = query.length; i >= start; --i) { + if (str.indexOf(query.slice(start, i)) >= 0) { + var length = i - start; + sum += length * length; + break; + } + } + } + return sum * sum * sum; + } + + /** Trims the size of the caches to the configured size using a FIFO algorithm. */ + self.cleanCache = function() { + // remove old cached regexes + Object.keys(self.matcherCache).slice(self.cacheSize).forEach(function(query) { + delete self.matcherCache[query]; + }); + // remove old cached normalization results + Object.keys(self.normalizationCache).slice(self.cacheSize).forEach(function(query) { + delete self.normalizationCache[query]; + }); + } + + /** Returns a regex that matches a string using a fuzzy :query. Example: The :query "abc" would result + * in a regex like /^([^a])*(a)([^b])*(b)([^c])*(c)(.*)$/ + */ + self.getMatcher = function(query) { + query = self.normalize(query); + if (!(query in self.matcherCache)) { + // build up a regex for fuzzy matching. This is the fastest method I checked (faster than: + // string building, splice, concat, multi-level join) + var regex = ['^']; + for (var i = 0; i < query.length; ++i) { + regex.push('([^'); + regex.push(query[i]); + regex.push(']*)('); + regex.push(query[i]); + regex.push(')'); + } + regex.push('(.*)$'); + self.matcherCache[query] = new RegExp(regex.join(''), 'i'); + } + return self.matcherCache[query]; + } + + /** Clear the cache for the given source, e.g. for refreshing */ + self.invalidateFilterCache = function(id) { + self.filterCache[id] = {}; + } + + /** Filters a collection :source using fuzzy matching against an input string :query. If a query with + * a less specific query was issued before (e.g. if the user added a letter to the query), the cached + * results of the last filtering are used as a starting point, instead of :source. + */ + self.filter = function(query, source, getValue, id) { + if (!(id in self.filterCache)) + self.filterCache[id] = {}; + + // find the most narrow list of results in the cache + var optSpecificity = source.length; + var specificity; + for (key in self.filterCache[id]) { + if (!self.filterCache[id].hasOwnProperty(key)) + continue; + + if ((query.indexOf(key) != 0 && key.indexOf(query) != 0) || key.length > query.length) { + // cache entry no longer needed + delete self.filterCache[id][key]; + continue; + } + + // is this a plausible result set to use as a source? + if (query.indexOf(key) < 0) + continue; + + // is this cache entry the most specific so far? + specificity = self.filterCache[id][key].length; + if (specificity < optSpecificity) { + source = self.filterCache[id][key]; + optSpecificity = specificity; + } + } + + // don't clean up the caches every iteration + if (++self.timeToClean > 100) { + self.timeToClean = 0; + self.cleanCache(); + } + + var matcher = self.getMatcher(query); + var filtered = source.filter(function(x) { return matcher.test(getValue(x)) }); + self.filterCache[id][query] = filtered; + return filtered; + } + + return self; + })(); + + var htmlRegex = /<[^>]*>|&[a-z]+;/gi; + + /** Strips HTML tags and entities using a naive regex replacement. Optionally, saves the stripped + * HTML tags in a dictionary indexed by the position where the tag should be reinserted. */ + function stripHtmlTags(str, positions) { + if (!positions) + return str.replace(htmlRegex, ''); + + var match = str.match(htmlRegex); + if (!match) return; + match.reverse(); + var split = str.split(htmlRegex); + var offset = 0; + var i = 0; + split.forEach(function(text) { + if (match.length > 0) + positions[offset += text.length] = match.pop(); + }); + + return split.join(''); + } + + /** Creates an action that opens :url in the current tab by default or in a new tab as an alternative. */ + function createActionOpenUrl(url) { + var open = function(newTab, selected) { + return function() { + chrome.extension.sendRequest({ + handler: newTab ? "openUrlInNewTab" : "openUrlInCurrentTab", + url: url, + selected: selected + }); + } + } + + if (url.indexOf("javascript:") == 0) + return [ open(false), open(false), open(false) ]; + else + return [ open(false), open(true, true), open(true, false) ]; + } + + /** Returns an action that switches to the tab with the given :id. */ + function createActionSwitchToTab(id) { + var open = function() { + chrome.extension.sendRequest({ handler: 'selectSpecificTab', id: id }); + } + return [open, open, open]; + } + + /** Creates an file-internal representation of a URL match with the given paramters */ + function createCompletionHtml(type, str, title) { + title = title || ''; + // sanitize input, it could come from a malicious web site + title = title.length > 0 ? ' <span class="title">' + utils.escapeHtml(title) + '</span>' : ''; + return '<em>' + type + '</em> ' + utils.escapeHtml(str) + title; + } + + /** Renders a completion by marking fuzzy-matched parts. */ + function renderFuzzy(query, html) { + // we want to match the content in HTML tags, but not the HTML tags themselves, so we remove the + // tags and reinsert them after the matching process + var htmlTags = {}; + var groups = fuzzyMatcher.getMatchGroups(query, stripHtmlTags(html, htmlTags)); + + html = []; + var htmlOffset = 0; + + // this helper function adds the HTML generated _for one single character_ to the HTML output + // and reinserts HTML tags stripped before, if they were at this position + function addToHtml(str) { + if (htmlOffset in htmlTags) + html.push(htmlTags[htmlOffset]); + html.push(str); + ++htmlOffset; + } + + function addCharsWithDecoration(str, before, after) { + before = before || ''; + after = after || ''; + for (var i = 0; i < str.length; ++i) + addToHtml(before + str[i] + after); + } + + // iterate over the match groups. They are non-matched and matched string parts, in alternating order + for (var i = 0; i < groups.length; ++i) { + if (i % 2 == 0) + // we have a non-matched part, it could have several characters. We need to insert them character + // by character, so that addToHtml can keep track of the position in the original string + addCharsWithDecoration(groups[i]); + else + // we have a matched part. In addition to the characters themselves, we add some decorating HTML. + addCharsWithDecoration(groups[i], '<span class="fuzzyMatch">', '</span>'); + }; + + // call it another time so that a tag at the very last position is reinserted + addToHtml(''); + return html.join(''); + } + + /** A completion class that only holds a relevancy value and a function to get HTML and action + * properties */ + var LazyCompletion = function(relevancy, builder) { + this.relevancy = relevancy; + this.build = builder; + } + + /** Singleton object that provides fast access to the Chrome history */ + var historyCache = (function() { + var size = 20000; + var cachedHistory = null; + + function use(callback) { + if (cachedHistory !== null) + return callback(cachedHistory); + + chrome.history.search({ text: '', maxResults: size, startTime: 0 }, function(history) { + // sorting in ascending order, so we can push new items to the end later + history.sort(function(a, b) { + return (a.lastVisitTime|| 0) - (b.lastVisitTime || 0); + }); + cachedHistory = history; + callback(history); + }); + + chrome.history.onVisited.addListener(function(item) { + // only cache newly visited sites + if (item.visitCount === 1) + cachedHistory.push(item); + }); + } + + return { use: use }; + })(); // public interface return { |
