diff options
Diffstat (limited to 'background_scripts/completion.js')
| -rw-r--r-- | background_scripts/completion.js | 626 |
1 files changed, 626 insertions, 0 deletions
diff --git a/background_scripts/completion.js b/background_scripts/completion.js new file mode 100644 index 00000000..b31c4918 --- /dev/null +++ b/background_scripts/completion.js @@ -0,0 +1,626 @@ +/* + * This contains the definition of the completers used for the Vomnibox's suggestion UI. A complter will take + * a query (whatever the user typed into the Vomnibox) and return a list of matches, e.g. bookmarks, domains, + * URLs from history. + * + * The Vomnibox frontend script makes a "filterCompleter" request to the background page, which in turn calls + * filter() on each these completers. + * + * A completer is a class which has two functions: + * - refresh(): refreshes the completer's data source (e.g. refetches the list of bookmarks from Chrome). + * - filter(query, callback): "query" will be whatever the user typed into the Vomnibox. "callback" is a + * function which will be invoked with a list of LazyCompletionResults as its first argument. + * + * A completer's filter() function returns a list of LazyCompletionResults. This contains a relevancy score + * for the result, as well as a function to build the full result (e.g. the HTML representing this result). + * + * The MultiCompleter collects a big list of LazyCompletionResults from many completers by calling each of + * their filter functions in turn, sorts the results by relevancy, and then calls build() on the top N + * results. This allows us to avoid generating HTML for all of the results we're not going to use. + * The objects returned from build() are sent to the Vomnibox frontend script to be shown in the UI. + */ +var completion = (function() { + + /* + * An object which contains a relevancy score for the given completion, and a function which can be + * invoked to build its HTML. + * Calling build() should return an object of the form: + * { html: "", action: { functionName: "", args: [] } } + * This object is eventually sent back to the Vomnibox frontend script. "action" contains the action to + * be performed by the frontend script if this result is chosen (user selects it and hits enter). + * "action" includes the function the frontendScript should execute (e.g. "navigateToUrl") along with any + * arguments (like the URL). + * "html" is the HTML representation of this result, with some characters emphasized to higlight the query. + * + * This is called "lazy" because it takes in a function to lazily compute a result's html. That operation + * can be kind of expensive, so you only want to do it to the top completion results, after you've sorted + * them by relevancy. + */ + var LazyCompletionResult = function(relevancy, buildFunction) { + this.relevancy = relevancy; + this.build = buildFunction; + } + + /* + * A completer which takes in a list of keyword commands (like "wiki" for "search wikipedia") and will + * decide if your query is a command, a URL that you want to visit, or a search term. + */ + var SmartKeywordCompleter = Class.extend({ + /* + * - commands: a list of commands of the form: { keyword: [title, url] }, e.g.: + * { "wiki ": ["Wikipedia (en)", "http://en.wikipedia.org/wiki/%s" ] + */ + init: function(commands) { + this.commands = commands || {}; + this.commandKeys = Object.keys(commands); + }, + + refresh: function() { }, + + /** Returns the suggestions matching the user-defined commands */ + getCommandSuggestions: function(query, suggestions) { + return this.commandKeys.filter(function(cmd) { return query.indexOf(cmd) == 0 }).map(function(cmd) { + var term = query.slice(cmd.length); + var desc = this.commands[cmd][0]; + var pattern = this.commands[cmd][1]; + var url = (typeof pattern == "function") ? pattern(term) : pattern.replace(/%s/g, term); + + // this will appear even before the URL/search suggestion + return new LazyCompletionResult(-2, function() { + return { + html: createCompletionHtml(desc, term), + action: { functionName: "navigateToUrl", args: [utils.createFullUrl(url)] }, + }}) + }.proxy(this)); + }, + + /** Checks if the input is a URL. If yes, returns a suggestion to visit it. If no, returns a suggestion + * to start a web search. */ + getUrlOrSearchSuggestion: function(query, suggestions) { + // trim query + query = query.replace(/^\s+|\s+$/g, ''); + var isUrl = utils.isUrl(query); + return new LazyCompletionResult(-1, function() { + return { + html: createCompletionHtml(isUrl ? "goto" : "search", query), + action: { functionName: "navigateToUrl", + args: isUrl ? [utils.createFullUrl(query)] : [utils.createSearchUrl(query)] } + }}); + }, + + filter: function(query, callback) { + suggestions = this.getCommandSuggestions(query); + suggestions.push(this.getUrlOrSearchSuggestion(query)); + callback(suggestions); + } + }); + + /* + * A generic asynchronous completer which is used by completers which have asynchronous data sources, + * like history or bookmarks. + */ + var AsyncCompleter = Class.extend({ + init: function() { + this.id = utils.createUniqueId(); + this.reset(); + this.resultsReady = this.fallbackReadyCallback = function(results) { this.completions = results; } + }, + + reset: function() { + fuzzyMatcher.invalidateFilterCache(this.id); + this.completions = null; + }, + + /* + * This creates an intermediate representation of a completion which will later be called with a specific + * query. + * - type: the type of item we're completing against, e.g. "bookmark", "history", "tab". + * - item: the item itself. This should include a url and title property (bookmark, history and tab + * objects include both of these). + * - action: the action to take in the Vomnibox frontend + * + * It's used to save us work -- we call this on every bookmark in your bookmarks list when we first fetch + * them, for instance, and we don't want to do some the same work again every time a new query is + * processed. + * + * TODO(philc): It would be nice if this could be removed; it's confusing. + * */ + createUnrankedCompletion: function(type, item, action) { + var url = item.url; + var parts = [type, url, item.title]; + var str = parts.join(" "); + action = action || { functionName: "navigateToUrl", args: [url] }; + + function createLazyCompletion(query) { + var relevancy = url.length / fuzzyMatcher.calculateRelevancy(query, str) + return new LazyCompletionResult(relevancy, function() { + return { + html: renderFuzzy(query, createCompletionHtml.apply(null, parts)), + action: action, + }}); + } + + // add one more layer of indirection: For filtering, we only need the string to match. + // Only after we reduced the number of possible results, we call :bind on them to get + // an actual completion object + return { + completionString: parts.join(" "), + createLazyCompletion: createLazyCompletion, + } + }, + + processResults: function(query, results) { + results = fuzzyMatcher.filter(query, results, + function(match) { return match.completionString }, this.id); + return results.map(function(result) { return result.createLazyCompletion(query); }); + }, + + filter: function(query, callback) { + var handler = function(results) { callback(this.processResults(query, results)); }.proxy(this); + + // are the results ready? + if (this.completions !== null) { + // yes: call the callback synchronously + handler(this.completions); + } else { + // no: register the handler as a callback + this.resultsReady = function(results) { + handler(results); + this.resultsReady = this.fallbackReadyCallback; + this.resultsReady(results); + }.proxy(this); + } + } + }); + + var FuzzyBookmarkCompleter = Class.extend({ + init: function() { this.asyncCompleter = new AsyncCompleter(); }, + filter: function(query, callback) { return this.asyncCompleter.filter(query, callback); }, + + // Traverses the bookmark hierarhcy and retuns a list of all bookmarks in the tree. + traverseBookmarkTree: function(bookmarks) { + var results = []; + var toVisit = bookmarks; + while (toVisit.length > 0) { + var bookmark = toVisit.shift(); + results.push(bookmark); + if (bookmark.children) + toVisit.push.apply(toVisit, bookmark.children); + } + return results; + }, + + refresh: function() { + this.asyncCompleter.reset(); + chrome.bookmarks.getTree(function(bookmarks) { + var results = this.traverseBookmarkTree(bookmarks); + var validResults = results.filter(function(b) { return b.url !== undefined; }); + var matches = validResults.map(function(bookmark) { + return this.asyncCompleter.createUnrankedCompletion("bookmark", bookmark); + }.proxy(this)); + this.asyncCompleter.resultsReady(matches); + }.proxy(this)); + } + }); + + var FuzzyHistoryCompleter = Class.extend({ + init: function(maxResults) { + this.asyncCompleter = new AsyncCompleter(); + this.maxResults = maxResults; + }, + + filter: function(query, callback) { return this.asyncCompleter.filter(query, callback); }, + + refresh: function() { + this.asyncCompleter.reset(); + + historyCache.use(function(history) { + this.asyncCompleter.resultsReady(history.slice(-this.maxResults).map(function(item) { + return this.asyncCompleter.createUnrankedCompletion("history", item); + }.proxy(this))); + }.proxy(this)); + } + }); + + var FuzzyTabCompleter = Class.extend({ + init: function() { this.asyncCompleter = new AsyncCompleter(); }, + + filter: function(query, callback) { return this.asyncCompleter.filter(query, callback); }, + + refresh: function() { + this.asyncCompleter.reset(); + chrome.tabs.getAllInWindow(null, function(tabs) { + this.asyncCompleter.resultsReady(tabs.map(function(tab) { + return this.asyncCompleter.createUnrankedCompletion("tab", tab, + { functionName: "switchToTab", args: [tab.id] }); + }.proxy(this))); + }.proxy(this)); + } + }); + + /* + * A completer which matches only domains from sites in your history with the current query. + */ + var DomainCompleter = Class.extend({ + // A mapping of doamin => useHttps, where useHttps is a boolean. + domains: null, + + withDomains: function(callback) { + var self = this; + function buildResult() { + return Object.keys(self.domains).map(function(dom) { + return [dom, self.domains[dom]]; + }); + } + if (self.domains !== null) + return callback(buildResult()); + + self.domains = {}; + + function processDomain(domain, https) { + // non-www version is preferrable, so check if we have it already + if (domain.indexOf('www.') == 0 && self.domains.hasOwnProperty(domain.slice(4))) + domain = domain.slice(4); + + // HTTPS is preferrable + https = https || self.domains[domain] || self.domains['www.' + domain]; + + self.domains[domain] = !!https; + delete self.domains['www.' + domain]; + } + + function processUrl(url) { + parts = url.split('/'); + processDomain(parts[2], parts[0] == 'https:'); + } + + historyCache.use(function(history) { + history.forEach(function(item) { processUrl(item.url); }); + }); + + chrome.history.onVisited.addListener(function(item) { processUrl(item.url); }); + + callback(buildResult()); + }, + + refresh: function() { }, + + filter: function(query, callback) { + var best = null; + this.withDomains(function(domains) { + var bestOffset = 1000; + domains.forEach(function(result) { + var domain = result[0]; + var protocol = result[1] ? 'https' : 'http'; + + var offset = domain.indexOf(query); + if (offset < 0 || offset >= bestOffset) + return; + + // found a new optimum + bestOffset = offset; + best = new LazyCompletionResult(-1.5, function() { + return { + html: createCompletionHtml("site", domain), + action: { functionName: "navigateToUrl", args: [protocol + "://" + domain] }, + }}); + }); + }); + callback(best ? [best] : []); + } + }); + + /** A meta-completer that delegates queries and merges and sorts the results of a collection of other + * completer instances given in :sources. The optional argument :queryThreshold determines how long a + * query has to be to trigger a search. */ + var MultiCompleter = Class.extend({ + init: function(sources, queryThreshold) { + if (queryThreshold === undefined) + queryThreshold = 1; // default + this.sources = sources; + this.queryThreshold = queryThreshold; + }, + + refresh: function() { this.sources.forEach(function(source) { source.refresh(); }); }, + + filter: function(query, maxResults, callback) { + if (query.length < this.queryThreshold) { + callback([]); + return; + } + + var allResults = []; + var counter = this.sources.length; + + this.sources.forEach(function(source) { + source.filter(query, function(results) { + allResults = allResults.concat(results); + if (--counter > 0) + return; + + // all sources have provided results by now, so we can sort and return + allResults.sort(function(a,b) { return a.relevancy - b.relevancy; }); + // evalulate lazy completions for the top n results + callback(allResults.slice(0, maxResults).map(function(result) { return result.build(); })); + }); + }); + } + }); + + /** Singleton object that provides helpers and caching for fuzzy completion. */ + var fuzzyMatcher = (function() { + var self = {}; + + self.timeToClean = 0; + self.cacheSize = 1000; + self.regexNonWord = /[\W_]/ig; + + // cache generated regular expressions + self.regexpCache = {}; + // cache filtered results from recent queries + self.filterCache = {}; + self.normalizationCache = {}; + + /** Normalizes the string specified in :query. Strips any non-word characters and converts + * to lower case. */ + self.normalize = function(query) { + if (!(query in self.normalizationCache)) + self.normalizationCache[query] = query.replace(self.regexNonWord, '').toLowerCase(); + return self.normalizationCache[query]; + } + + /** Returns the non-matching and matching string parts, in alternating order (starting with a + * non-matching part) or null, if the string doesn't match the query. + * + * Sample: match("codecodec","code.google.com/codec") would yield ["", "code", ".google.com/", "codec"] + * + * Note that this function matches the longest possible parts of a string and is therefore not very + * efficient. There it falls back to a more performant, but less accurate regex matching if the + * normalized query is longer than 10 characters. + * + * _Don't use this to check if a string matches a query_. Use `getRegexp(query).test(str)` instead. + */ + self.getMatchGroups = function(query, str) { + query = self.normalize(query); + if (query.length == 0) + return str.length ? [str] : []; + if (query.length > 15) { + // for long query strings, the method is much too inefficient, so fall + // back to the less accurate regex matching + return self.getRegexp(query).exec(str).slice(1); + } + + for (var i = query.length; i >= 1; --i) { + var part = query.slice(0, i); + var partOffset = str.toLowerCase().indexOf(part); + if (partOffset < 0) + continue; + + // we use recursive backtracking here, this is why it's slow. + rest = self.getMatchGroups(query.slice(i), str.slice(partOffset + i)); + if (!rest) continue; + + return [ + str.slice(0, partOffset), + part, + ].concat(rest); + } + return null; + } + + /** Calculates a very simple similarity value between a :query and a :string */ + self.calculateRelevancy = function(query, str) { + query = self.normalize(query); + str = self.normalize(str); + var sum = 0; + + // only iterate over slices of the query starting at an offset up to 10 to save resources + for (var start = 0; start < 20 && start < query.length; ++start) { + for (var i = query.length; i >= start; --i) { + if (str.indexOf(query.slice(start, i)) >= 0) { + var length = i - start; + sum += length * length; + break; + } + } + } + return sum * sum * sum; + } + + /** Trims the size of the caches to the configured size using a FIFO algorithm. */ + self.cleanCache = function() { + // remove old cached regexes + Object.keys(self.regexpCache).slice(self.cacheSize).forEach(function(query) { + delete self.regexpCache[query]; + }); + // remove old cached normalization results + Object.keys(self.normalizationCache).slice(self.cacheSize).forEach(function(query) { + delete self.normalizationCache[query]; + }); + } + + /** Returns a regex that matches a string using a fuzzy :query. Example: The :query "abc" would result + * in a regex like /^([^a])*(a)([^b])*(b)([^c])*(c)(.*)$/ + */ + self.getRegexp = function(query) { + query = self.normalize(query); + if (!(query in self.regexpCache)) { + // build up a regex for fuzzy matching. This is the fastest method I checked (faster than: + // string building, splice, concat, multi-level join) + var regex = ['^']; + for (var i = 0; i < query.length; ++i) { + regex.push('([^'); + regex.push(query[i]); + regex.push(']*)('); + regex.push(query[i]); + regex.push(')'); + } + regex.push('(.*)$'); + self.regexpCache[query] = new RegExp(regex.join(''), 'i'); + } + return self.regexpCache[query]; + } + + /** Clear the cache for the given source, e.g. for refreshing */ + self.invalidateFilterCache = function(id) { + self.filterCache[id] = {}; + } + + /** Filters a collection :source using fuzzy matching against an input string :query. If a query with + * a less specific query was issued before (e.g. if the user added a letter to the query), the cached + * results of the last filtering are used as a starting point, instead of :source. + */ + self.filter = function(query, source, getValue, id) { + if (!(id in self.filterCache)) + self.filterCache[id] = {}; + + // find the most narrow list of results in the cache + var optSpecificity = source.length; + var specificity; + for (key in self.filterCache[id]) { + if (!self.filterCache[id].hasOwnProperty(key)) + continue; + + if ((query.indexOf(key) != 0 && key.indexOf(query) != 0) || key.length > query.length) { + // cache entry no longer needed + delete self.filterCache[id][key]; + continue; + } + + // is this a plausible result set to use as a source? + if (query.indexOf(key) < 0) + continue; + + // is this cache entry the most specific so far? + specificity = self.filterCache[id][key].length; + if (specificity < optSpecificity) { + source = self.filterCache[id][key]; + optSpecificity = specificity; + } + } + + // don't clean up the caches every iteration + if (++self.timeToClean > 100) { + self.timeToClean = 0; + self.cleanCache(); + } + + var regexp = self.getRegexp(query); + var filtered = source.filter(function(x) { return regexp.test(getValue(x)) }); + self.filterCache[id][query] = filtered; + return filtered; + } + + return self; + })(); + + var htmlRegex = /<[^>]*>|&[a-z]+;/gi; + + /** Strips HTML tags and entities using a naive regex replacement. Optionally, saves the stripped + * HTML tags in a dictionary indexed by the position where the tag should be reinserted. */ + function stripHtmlTags(str, positions) { + if (!positions) + return str.replace(htmlRegex, ''); + + var match = str.match(htmlRegex); + if (!match) return; + match.reverse(); + var split = str.split(htmlRegex); + var offset = 0; + var i = 0; + split.forEach(function(text) { + if (match.length > 0) + positions[offset += text.length] = match.pop(); + }); + + return split.join(''); + } + + /** Creates an file-internal representation of a URL match with the given paramters */ + function createCompletionHtml(type, str, title) { + title = title || ''; + // sanitize input, it could come from a malicious web site + title = title.length > 0 ? ' <span class="title">' + utils.escapeHtml(title) + '</span>' : ''; + return '<em>' + type + '</em> ' + utils.escapeHtml(str) + title; + } + + /** Renders a completion by marking fuzzy-matched parts. */ + function renderFuzzy(query, html) { + // we want to match the content in HTML tags, but not the HTML tags themselves, so we remove the + // tags and reinsert them after the matching process + var htmlTags = {}; + var groups = fuzzyMatcher.getMatchGroups(query, stripHtmlTags(html, htmlTags)); + + html = []; + var htmlOffset = 0; + + // this helper function adds the HTML generated _for one single character_ to the HTML output + // and reinserts HTML tags stripped before, if they were at this position + function addToHtml(str) { + if (htmlOffset in htmlTags) + html.push(htmlTags[htmlOffset]); + html.push(str); + ++htmlOffset; + } + + function addCharsWithDecoration(str, before, after) { + before = before || ''; + after = after || ''; + for (var i = 0; i < str.length; ++i) + addToHtml(before + str[i] + after); + } + + // iterate over the match groups. They are non-matched and matched string parts, in alternating order + for (var i = 0; i < groups.length; ++i) { + if (i % 2 == 0) + // we have a non-matched part, it could have several characters. We need to insert them character + // by character, so that addToHtml can keep track of the position in the original string + addCharsWithDecoration(groups[i]); + else + // we have a matched part. In addition to the characters themselves, we add some decorating HTML. + addCharsWithDecoration(groups[i], '<span class="fuzzyMatch">', '</span>'); + }; + + // call it another time so that a tag at the very last position is reinserted + addToHtml(''); + return html.join(''); + } + + /** Singleton object that provides fast access to the Chrome history */ + var historyCache = (function() { + var size = 20000; + var cachedHistory = null; + + function use(callback) { + if (cachedHistory !== null) + return callback(cachedHistory); + + chrome.history.search({ text: '', maxResults: size, startTime: 0 }, function(history) { + // sorting in ascending order, so we can push new items to the end later + history.sort(function(a, b) { + return (a.lastVisitTime|| 0) - (b.lastVisitTime || 0); + }); + cachedHistory = history; + callback(history); + }); + + chrome.history.onVisited.addListener(function(item) { + // only cache newly visited sites + if (item.visitCount === 1) + cachedHistory.push(item); + }); + } + + return { use: use }; + })(); + + // public interface + return { + FuzzyBookmarkCompleter: FuzzyBookmarkCompleter, + FuzzyHistoryCompleter: FuzzyHistoryCompleter, + FuzzyTabCompleter: FuzzyTabCompleter, + SmartKeywordCompleter: SmartKeywordCompleter, + DomainCompleter: DomainCompleter, + MultiCompleter: MultiCompleter + }; +})() |
