/* * This contains the definition of the completers used for the Vomnibox's suggestion UI. A complter will take * a query (whatever the user typed into the Vomnibox) and return a list of matches, e.g. bookmarks, domains, * URLs from history. * * The Vomnibox frontend script makes a "filterCompleter" request to the background page, which in turn calls * filter() on each these completers. * * A completer is a class which has two functions: * - refresh(): refreshes the completer's data source (e.g. refetches the list of bookmarks from Chrome). * - filter(query, callback): "query" will be whatever the user typed into the Vomnibox. "callback" is a * function which will be invoked with a list of LazyCompletionResults as its first argument. * * A completer's filter() function returns a list of LazyCompletionResults. This contains a relevancy score * for the result, as well as a function to build the full result (e.g. the HTML representing this result). * * The MultiCompleter collects a big list of LazyCompletionResults from many completers by calling each of * their filter functions in turn, sorts the results by relevancy, and then calls build() on the top N * results. This allows us to avoid generating HTML for all of the results we're not going to use. * The objects returned from build() are sent to the Vomnibox frontend script to be shown in the UI. */ var completion = (function() { /* * An object which contains a relevancy score for the given completion, and a function which can be * invoked to build its HTML. * Calling build() should return an object of the form: * { html: "", action: { functionName: "", args: [] } } * This object is eventually sent back to the Vomnibox frontend script. "action" contains the action to * be performed by the frontend script if this result is chosen (user selects it and hits enter). * "action" includes the function the frontendScript should execute (e.g. "navigateToUrl") along with any * arguments (like the URL). * "html" is the HTML representation of this result, with some characters emphasized to higlight the query. * * This is called "lazy" because it takes in a function to lazily compute a result's html. That operation * can be kind of expensive, so you only want to do it to the top completion results, after you've sorted * them by relevancy. */ var LazyCompletionResult = function(relevancy, buildFunction) { this.relevancy = relevancy; this.build = buildFunction; } /* * A completer which takes in a list of keyword commands (like "wiki" for "search wikipedia") and will * decide if your query is a command, a URL that you want to visit, or a search term. */ var SmartKeywordCompleter = Class.extend({ /* * - commands: a list of commands of the form: { keyword: [title, url] }, e.g.: * { "wiki ": ["Wikipedia (en)", "http://en.wikipedia.org/wiki/%s" ] */ init: function(commands) { this.commands = commands || {}; this.commandKeys = Object.keys(commands); }, refresh: function() { }, /** Returns the suggestions matching the user-defined commands */ getCommandSuggestions: function(query, suggestions) { return this.commandKeys.filter(function(cmd) { return query.indexOf(cmd) == 0 }).map(function(cmd) { var term = query.slice(cmd.length); var desc = this.commands[cmd][0]; var pattern = this.commands[cmd][1]; var url = (typeof pattern == "function") ? pattern(term) : pattern.replace(/%s/g, term); // this will appear even before the URL/search suggestion return new LazyCompletionResult(-2, function() { return { html: createCompletionHtml(desc, term), action: { functionName: "navigateToUrl", args: [utils.createFullUrl(url)] }, }}) }.proxy(this)); }, /** Checks if the input is a URL. If yes, returns a suggestion to visit it. If no, returns a suggestion * to start a web search. */ getUrlOrSearchSuggestion: function(query, suggestions) { // trim query query = query.replace(/^\s+|\s+$/g, ''); var isUrl = utils.isUrl(query); return new LazyCompletionResult(-1, function() { return { html: createCompletionHtml(isUrl ? "goto" : "search", query), action: { functionName: "navigateToUrl", args: isUrl ? [utils.createFullUrl(query)] : [utils.createSearchUrl(query)] } }}); }, filter: function(query, callback) { suggestions = this.getCommandSuggestions(query); suggestions.push(this.getUrlOrSearchSuggestion(query)); callback(suggestions); } }); /* * A generic asynchronous completer which is used by completers which have asynchronous data sources, * like history or bookmarks. */ var AsyncCompleter = Class.extend({ init: function() { this.id = utils.createUniqueId(); this.reset(); this.resultsReady = this.fallbackReadyCallback = function(results) { this.completions = results; } }, reset: function() { fuzzyMatcher.invalidateFilterCache(this.id); this.completions = null; }, /* * This creates an intermediate representation of a completion which will later be called with a specific * query. * - type: the type of item we're completing against, e.g. "bookmark", "history", "tab". * - item: the item itself. This should include a url and title property (bookmark, history and tab * objects include both of these). * - action: the action to take in the Vomnibox frontend * * It's used to save us work -- we call this on every bookmark in your bookmarks list when we first fetch * them, for instance, and we don't want to do some the same work again every time a new query is * processed. * * TODO(philc): It would be nice if this could be removed; it's confusing. * */ createUnrankedCompletion: function(type, item, action) { var url = item.url; var parts = [type, url, item.title]; var str = parts.join(" "); action = action || { functionName: "navigateToUrl", args: [url] }; function createLazyCompletion(query) { var relevancy = url.length / fuzzyMatcher.calculateRelevancy(query, str) return new LazyCompletionResult(relevancy, function() { return { html: renderFuzzy(query, createCompletionHtml.apply(null, parts)), action: action, }}); } // add one more layer of indirection: For filtering, we only need the string to match. // Only after we reduced the number of possible results, we call :bind on them to get // an actual completion object return { completionString: parts.join(" "), createLazyCompletion: createLazyCompletion, } }, processResults: function(query, results) { results = fuzzyMatcher.filter(query, results, function(match) { return match.completionString }, this.id); return results.map(function(result) { return result.createLazyCompletion(query); }); }, filter: function(query, callback) { var handler = function(results) { callback(this.processResults(query, results)); }.proxy(this); // are the results ready? if (this.completions !== null) { // yes: call the callback synchronously handler(this.completions); } else { // no: register the handler as a callback this.resultsReady = function(results) { handler(results); this.resultsReady = this.fallbackReadyCallback; this.resultsReady(results); }.proxy(this); } } }); /** A fuzzy bookmark completer */ var FuzzyBookmarkCompletionSource = Class.extend({ init: function() { this.asyncCompleter = new AsyncCompleter(); }, filter: function(query, callback) { return this.asyncCompleter.filter(query, callback); }, // Traverses the bookmark hierarhcy and retuns a list of all bookmarks in the tree. traverseBookmarkTree: function(bookmarks) { var results = []; var toVisit = bookmarks; while (toVisit.length > 0) { var bookmark = toVisit.shift(); results.push(bookmark); if (bookmark.children) toVisit.push.apply(toVisit, bookmark.children); } return results; }, refresh: function() { this.asyncCompleter.reset(); chrome.bookmarks.getTree(function(bookmarks) { var results = this.traverseBookmarkTree(bookmarks); var validResults = results.filter(function(b) { return b.url !== undefined; }); var matches = validResults.map(function(bookmark) { return this.asyncCompleter.createUnrankedCompletion("bookmark", bookmark); }.proxy(this)); this.asyncCompleter.resultsReady(matches); }.proxy(this)); } }); var FuzzyHistoryCompleter = Class.extend({ init: function(maxResults) { this.asyncCompleter = new AsyncCompleter(); this.maxResults = maxResults; }, filter: function(query, callback) { return this.asyncCompleter.filter(query, callback); }, refresh: function() { this.asyncCompleter.reset(); historyCache.use(function(history) { this.asyncCompleter.resultsReady(history.slice(-this.maxResults).map(function(item) { return this.asyncCompleter.createUnrankedCompletion("history", item); }.proxy(this))); }.proxy(this)); } }); var FuzzyTabCompleter = Class.extend({ init: function() { this.asyncCompleter = new AsyncCompleter(); }, filter: function(query, callback) { return this.asyncCompleter.filter(query, callback); }, refresh: function() { this.asyncCompleter.reset(); chrome.tabs.getAllInWindow(null, function(tabs) { this.asyncCompleter.resultsReady(tabs.map(function(tab) { return this.asyncCompleter.createUnrankedCompletion("tab", tab, { functionName: "switchToTab", args: [tab.id] }); }.proxy(this))); }.proxy(this)); } }); /* * A completer which matches only domains from sites in your history with the current query. */ var DomainCompletionSource = Class.extend({ // A mapping of doamin => useHttps, where useHttps is a boolean. domains: null, withDomains: function(callback) { var self = this; function buildResult() { return Object.keys(self.domains).map(function(dom) { return [dom, self.domains[dom]]; }); } if (self.domains !== null) return callback(buildResult()); self.domains = {}; function processDomain(domain, https) { // non-www version is preferrable, so check if we have it already if (domain.indexOf('www.') == 0 && self.domains.hasOwnProperty(domain.slice(4))) domain = domain.slice(4); // HTTPS is preferrable https = https || self.domains[domain] || self.domains['www.' + domain]; self.domains[domain] = !!https; delete self.domains['www.' + domain]; } function processUrl(url) { parts = url.split('/'); processDomain(parts[2], parts[0] == 'https:'); } historyCache.use(function(history) { history.forEach(function(item) { processUrl(item.url); }); }); chrome.history.onVisited.addListener(function(item) { processUrl(item.url); }); callback(buildResult()); }, refresh: function() { }, filter: function(query, callback) { var best = null; this.withDomains(function(domains) { var bestOffset = 1000; domains.forEach(function(result) { var domain = result[0]; var protocol = result[1] ? 'https' : 'http'; var offset = domain.indexOf(query); if (offset < 0 || offset >= bestOffset) return; // found a new optimum bestOffset = offset; best = new LazyCompletionResult(-1.5, function() { return { html: createCompletionHtml("site", domain), action: { functionName: "navigateToUrl", args: [protocol + "://" + domain] }, }}); }); }); callback(best ? [best] : []); } }); /** A meta-completer that delegates queries and merges and sorts the results of a collection of other * completer instances given in :sources. The optional argument :queryThreshold determines how long a * query has to be to trigger a search. */ var MultiCompleter = Class.extend({ init: function(sources, queryThreshold) { if (queryThreshold === undefined) queryThreshold = 1; // default this.sources = sources; this.queryThreshold = queryThreshold; }, refresh: function() { this.sources.forEach(function(source) { source.refresh(); }); }, filter: function(query, maxResults, callback) { if (query.length < this.queryThreshold) { callback([]); return; } var allResults = []; var counter = this.sources.length; this.sources.forEach(function(source) { source.filter(query, function(results) { allResults = allResults.concat(results); if (--counter > 0) return; // all sources have provided results by now, so we can sort and return allResults.sort(function(a,b) { return a.relevancy - b.relevancy; }); // evalulate lazy completions for the top n results callback(allResults.slice(0, maxResults).map(function(result) { return result.build(); })); }); }); } }); /** Singleton object that provides helpers and caching for fuzzy completion. */ var fuzzyMatcher = (function() { var self = {}; self.timeToClean = 0; self.cacheSize = 1000; self.regexNonWord = /[\W_]/ig; // cache generated regular expressions self.regexpCache = {}; // cache filtered results from recent queries self.filterCache = {}; self.normalizationCache = {}; /** Normalizes the string specified in :query. Strips any non-word characters and converts * to lower case. */ self.normalize = function(query) { if (!(query in self.normalizationCache)) self.normalizationCache[query] = query.replace(self.regexNonWord, '').toLowerCase(); return self.normalizationCache[query]; } /** Returns the non-matching and matching string parts, in alternating order (starting with a * non-matching part) or null, if the string doesn't match the query. * * Sample: match("codecodec","code.google.com/codec") would yield ["", "code", ".google.com/", "codec"] * * Note that this function matches the longest possible parts of a string and is therefore not very * efficient. There it falls back to a more performant, but less accurate regex matching if the * normalized query is longer than 10 characters. * * _Don't use this to check if a string matches a query_. Use `getRegexp(query).test(str)` instead. */ self.getMatchGroups = function(query, str) { query = self.normalize(query); if (query.length == 0) return str.length ? [str] : []; if (query.length > 15) { // for long query strings, the method is much too inefficient, so fall // back to the less accurate regex matching return self.getRegexp(query).exec(str).slice(1); } for (var i = query.length; i >= 1; --i) { var part = query.slice(0, i); var partOffset = str.toLowerCase().indexOf(part); if (partOffset < 0) continue; // we use recursive backtracking here, this is why it's slow. rest = self.getMatchGroups(query.slice(i), str.slice(partOffset + i)); if (!rest) continue; return [ str.slice(0, partOffset), part, ].concat(rest); } return null; } /** Calculates a very simple similarity value between a :query and a :string */ self.calculateRelevancy = function(query, str) { query = self.normalize(query); str = self.normalize(str); var sum = 0; // only iterate over slices of the query starting at an offset up to 10 to save resources for (var start = 0; start < 20 && start < query.length; ++start) { for (var i = query.length; i >= start; --i) { if (str.indexOf(query.slice(start, i)) >= 0) { var length = i - start; sum += length * length; break; } } } return sum * sum * sum; } /** Trims the size of the caches to the configured size using a FIFO algorithm. */ self.cleanCache = function() { // remove old cached regexes Object.keys(self.regexpCache).slice(self.cacheSize).forEach(function(query) { delete self.regexpCache[query]; }); // remove old cached normalization results Object.keys(self.normalizationCache).slice(self.cacheSize).forEach(function(query) { delete self.normalizationCache[query]; }); } /** Returns a regex that matches a string using a fuzzy :query. Example: The :query "abc" would result * in a regex like /^([^a])*(a)([^b])*(b)([^c])*(c)(.*)$/ */ self.getRegexp = function(query) { query = self.normalize(query); if (!(query in self.regexpCache)) { // build up a regex for fuzzy matching. This is the fastest method I checked (faster than: // string building, splice, concat, multi-level join) var regex = ['^']; for (var i = 0; i < query.length; ++i) { regex.push('([^'); regex.push(query[i]); regex.push(']*)('); regex.push(query[i]); regex.push(')'); } regex.push('(.*)$'); self.regexpCache[query] = new RegExp(regex.join(''), 'i'); } return self.regexpCache[query]; } /** Clear the cache for the given source, e.g. for refreshing */ self.invalidateFilterCache = function(id) { self.filterCache[id] = {}; } /** Filters a collection :source using fuzzy matching against an input string :query. If a query with * a less specific query was issued before (e.g. if the user added a letter to the query), the cached * results of the last filtering are used as a starting point, instead of :source. */ self.filter = function(query, source, getValue, id) { if (!(id in self.filterCache)) self.filterCache[id] = {}; // find the most narrow list of results in the cache var optSpecificity = source.length; var specificity; for (key in self.filterCache[id]) { if (!self.filterCache[id].hasOwnProperty(key)) continue; if ((query.indexOf(key) != 0 && key.indexOf(query) != 0) || key.length > query.length) { // cache entry no longer needed delete self.filterCache[id][key]; continue; } // is this a plausible result set to use as a source? if (query.indexOf(key) < 0) continue; // is this cache entry the most specific so far? specificity = self.filterCache[id][key].length; if (specificity < optSpecificity) { source = self.filterCache[id][key]; optSpecificity = specificity; } } // don't clean up the caches every iteration if (++self.timeToClean > 100) { self.timeToClean = 0; self.cleanCache(); } var regexp = self.getRegexp(query); var filtered = source.filter(function(x) { return regexp.test(getValue(x)) }); self.filterCache[id][query] = filtered; return filtered; } return self; })(); var htmlRegex = /<[^>]*>|&[a-z]+;/gi; /** Strips HTML tags and entities using a naive regex replacement. Optionally, saves the stripped * HTML tags in a dictionary indexed by the position where the tag should be reinserted. */ function stripHtmlTags(str, positions) { if (!positions) return str.replace(htmlRegex, ''); var match = str.match(htmlRegex); if (!match) return; match.reverse(); var split = str.split(htmlRegex); var offset = 0; var i = 0; split.forEach(function(text) { if (match.length > 0) positions[offset += text.length] = match.pop(); }); return split.join(''); } /** Creates an file-internal representation of a URL match with the given paramters */ function createCompletionHtml(type, str, title) { title = title || ''; // sanitize input, it could come from a malicious web site title = title.length > 0 ? ' ' + utils.escapeHtml(title) + '' : ''; return '' + type + ' ' + utils.escapeHtml(str) + title; } /** Renders a completion by marking fuzzy-matched parts. */ function renderFuzzy(query, html) { // we want to match the content in HTML tags, but not the HTML tags themselves, so we remove the // tags and reinsert them after the matching process var htmlTags = {}; var groups = fuzzyMatcher.getMatchGroups(query, stripHtmlTags(html, htmlTags)); html = []; var htmlOffset = 0; // this helper function adds the HTML generated _for one single character_ to the HTML output // and reinserts HTML tags stripped before, if they were at this position function addToHtml(str) { if (htmlOffset in htmlTags) html.push(htmlTags[htmlOffset]); html.push(str); ++htmlOffset; } function addCharsWithDecoration(str, before, after) { before = before || ''; after = after || ''; for (var i = 0; i < str.length; ++i) addToHtml(before + str[i] + after); } // iterate over the match groups. They are non-matched and matched string parts, in alternating order for (var i = 0; i < groups.length; ++i) { if (i % 2 == 0) // we have a non-matched part, it could have several characters. We need to insert them character // by character, so that addToHtml can keep track of the position in the original string addCharsWithDecoration(groups[i]); else // we have a matched part. In addition to the characters themselves, we add some decorating HTML. addCharsWithDecoration(groups[i], '', ''); }; // call it another time so that a tag at the very last position is reinserted addToHtml(''); return html.join(''); } /** Singleton object that provides fast access to the Chrome history */ var historyCache = (function() { var size = 20000; var cachedHistory = null; function use(callback) { if (cachedHistory !== null) return callback(cachedHistory); chrome.history.search({ text: '', maxResults: size, startTime: 0 }, function(history) { // sorting in ascending order, so we can push new items to the end later history.sort(function(a, b) { return (a.lastVisitTime|| 0) - (b.lastVisitTime || 0); }); cachedHistory = history; callback(history); }); chrome.history.onVisited.addListener(function(item) { // only cache newly visited sites if (item.visitCount === 1) cachedHistory.push(item); }); } return { use: use }; })(); // public interface return { FuzzyBookmarkCompletionSource: FuzzyBookmarkCompletionSource, FuzzyHistoryCompletionSource: FuzzyHistoryCompletionSource, FuzzyTabCompletionSource: FuzzyTabCompletionSource, SmartCompletionSource: SmartCompletionSource, DomainCompletionSource: DomainCompletionSource, MultiCompleter: MultiCompleter }; })()