aboutsummaryrefslogtreecommitdiffstats
path: root/background_scripts/completion.js
diff options
context:
space:
mode:
Diffstat (limited to 'background_scripts/completion.js')
-rw-r--r--background_scripts/completion.js626
1 files changed, 626 insertions, 0 deletions
diff --git a/background_scripts/completion.js b/background_scripts/completion.js
new file mode 100644
index 00000000..b31c4918
--- /dev/null
+++ b/background_scripts/completion.js
@@ -0,0 +1,626 @@
+/*
+ * This contains the definition of the completers used for the Vomnibox's suggestion UI. A complter will take
+ * a query (whatever the user typed into the Vomnibox) and return a list of matches, e.g. bookmarks, domains,
+ * URLs from history.
+ *
+ * The Vomnibox frontend script makes a "filterCompleter" request to the background page, which in turn calls
+ * filter() on each these completers.
+ *
+ * A completer is a class which has two functions:
+ * - refresh(): refreshes the completer's data source (e.g. refetches the list of bookmarks from Chrome).
+ * - filter(query, callback): "query" will be whatever the user typed into the Vomnibox. "callback" is a
+ * function which will be invoked with a list of LazyCompletionResults as its first argument.
+ *
+ * A completer's filter() function returns a list of LazyCompletionResults. This contains a relevancy score
+ * for the result, as well as a function to build the full result (e.g. the HTML representing this result).
+ *
+ * The MultiCompleter collects a big list of LazyCompletionResults from many completers by calling each of
+ * their filter functions in turn, sorts the results by relevancy, and then calls build() on the top N
+ * results. This allows us to avoid generating HTML for all of the results we're not going to use.
+ * The objects returned from build() are sent to the Vomnibox frontend script to be shown in the UI.
+ */
+var completion = (function() {
+
+ /*
+ * An object which contains a relevancy score for the given completion, and a function which can be
+ * invoked to build its HTML.
+ * Calling build() should return an object of the form:
+ * { html: "", action: { functionName: "", args: [] } }
+ * This object is eventually sent back to the Vomnibox frontend script. "action" contains the action to
+ * be performed by the frontend script if this result is chosen (user selects it and hits enter).
+ * "action" includes the function the frontendScript should execute (e.g. "navigateToUrl") along with any
+ * arguments (like the URL).
+ * "html" is the HTML representation of this result, with some characters emphasized to higlight the query.
+ *
+ * This is called "lazy" because it takes in a function to lazily compute a result's html. That operation
+ * can be kind of expensive, so you only want to do it to the top completion results, after you've sorted
+ * them by relevancy.
+ */
+ var LazyCompletionResult = function(relevancy, buildFunction) {
+ this.relevancy = relevancy;
+ this.build = buildFunction;
+ }
+
+ /*
+ * A completer which takes in a list of keyword commands (like "wiki" for "search wikipedia") and will
+ * decide if your query is a command, a URL that you want to visit, or a search term.
+ */
+ var SmartKeywordCompleter = Class.extend({
+ /*
+ * - commands: a list of commands of the form: { keyword: [title, url] }, e.g.:
+ * { "wiki ": ["Wikipedia (en)", "http://en.wikipedia.org/wiki/%s" ]
+ */
+ init: function(commands) {
+ this.commands = commands || {};
+ this.commandKeys = Object.keys(commands);
+ },
+
+ refresh: function() { },
+
+ /** Returns the suggestions matching the user-defined commands */
+ getCommandSuggestions: function(query, suggestions) {
+ return this.commandKeys.filter(function(cmd) { return query.indexOf(cmd) == 0 }).map(function(cmd) {
+ var term = query.slice(cmd.length);
+ var desc = this.commands[cmd][0];
+ var pattern = this.commands[cmd][1];
+ var url = (typeof pattern == "function") ? pattern(term) : pattern.replace(/%s/g, term);
+
+ // this will appear even before the URL/search suggestion
+ return new LazyCompletionResult(-2, function() {
+ return {
+ html: createCompletionHtml(desc, term),
+ action: { functionName: "navigateToUrl", args: [utils.createFullUrl(url)] },
+ }})
+ }.proxy(this));
+ },
+
+ /** Checks if the input is a URL. If yes, returns a suggestion to visit it. If no, returns a suggestion
+ * to start a web search. */
+ getUrlOrSearchSuggestion: function(query, suggestions) {
+ // trim query
+ query = query.replace(/^\s+|\s+$/g, '');
+ var isUrl = utils.isUrl(query);
+ return new LazyCompletionResult(-1, function() {
+ return {
+ html: createCompletionHtml(isUrl ? "goto" : "search", query),
+ action: { functionName: "navigateToUrl",
+ args: isUrl ? [utils.createFullUrl(query)] : [utils.createSearchUrl(query)] }
+ }});
+ },
+
+ filter: function(query, callback) {
+ suggestions = this.getCommandSuggestions(query);
+ suggestions.push(this.getUrlOrSearchSuggestion(query));
+ callback(suggestions);
+ }
+ });
+
+ /*
+ * A generic asynchronous completer which is used by completers which have asynchronous data sources,
+ * like history or bookmarks.
+ */
+ var AsyncCompleter = Class.extend({
+ init: function() {
+ this.id = utils.createUniqueId();
+ this.reset();
+ this.resultsReady = this.fallbackReadyCallback = function(results) { this.completions = results; }
+ },
+
+ reset: function() {
+ fuzzyMatcher.invalidateFilterCache(this.id);
+ this.completions = null;
+ },
+
+ /*
+ * This creates an intermediate representation of a completion which will later be called with a specific
+ * query.
+ * - type: the type of item we're completing against, e.g. "bookmark", "history", "tab".
+ * - item: the item itself. This should include a url and title property (bookmark, history and tab
+ * objects include both of these).
+ * - action: the action to take in the Vomnibox frontend
+ *
+ * It's used to save us work -- we call this on every bookmark in your bookmarks list when we first fetch
+ * them, for instance, and we don't want to do some the same work again every time a new query is
+ * processed.
+ *
+ * TODO(philc): It would be nice if this could be removed; it's confusing.
+ * */
+ createUnrankedCompletion: function(type, item, action) {
+ var url = item.url;
+ var parts = [type, url, item.title];
+ var str = parts.join(" ");
+ action = action || { functionName: "navigateToUrl", args: [url] };
+
+ function createLazyCompletion(query) {
+ var relevancy = url.length / fuzzyMatcher.calculateRelevancy(query, str)
+ return new LazyCompletionResult(relevancy, function() {
+ return {
+ html: renderFuzzy(query, createCompletionHtml.apply(null, parts)),
+ action: action,
+ }});
+ }
+
+ // add one more layer of indirection: For filtering, we only need the string to match.
+ // Only after we reduced the number of possible results, we call :bind on them to get
+ // an actual completion object
+ return {
+ completionString: parts.join(" "),
+ createLazyCompletion: createLazyCompletion,
+ }
+ },
+
+ processResults: function(query, results) {
+ results = fuzzyMatcher.filter(query, results,
+ function(match) { return match.completionString }, this.id);
+ return results.map(function(result) { return result.createLazyCompletion(query); });
+ },
+
+ filter: function(query, callback) {
+ var handler = function(results) { callback(this.processResults(query, results)); }.proxy(this);
+
+ // are the results ready?
+ if (this.completions !== null) {
+ // yes: call the callback synchronously
+ handler(this.completions);
+ } else {
+ // no: register the handler as a callback
+ this.resultsReady = function(results) {
+ handler(results);
+ this.resultsReady = this.fallbackReadyCallback;
+ this.resultsReady(results);
+ }.proxy(this);
+ }
+ }
+ });
+
+ var FuzzyBookmarkCompleter = Class.extend({
+ init: function() { this.asyncCompleter = new AsyncCompleter(); },
+ filter: function(query, callback) { return this.asyncCompleter.filter(query, callback); },
+
+ // Traverses the bookmark hierarhcy and retuns a list of all bookmarks in the tree.
+ traverseBookmarkTree: function(bookmarks) {
+ var results = [];
+ var toVisit = bookmarks;
+ while (toVisit.length > 0) {
+ var bookmark = toVisit.shift();
+ results.push(bookmark);
+ if (bookmark.children)
+ toVisit.push.apply(toVisit, bookmark.children);
+ }
+ return results;
+ },
+
+ refresh: function() {
+ this.asyncCompleter.reset();
+ chrome.bookmarks.getTree(function(bookmarks) {
+ var results = this.traverseBookmarkTree(bookmarks);
+ var validResults = results.filter(function(b) { return b.url !== undefined; });
+ var matches = validResults.map(function(bookmark) {
+ return this.asyncCompleter.createUnrankedCompletion("bookmark", bookmark);
+ }.proxy(this));
+ this.asyncCompleter.resultsReady(matches);
+ }.proxy(this));
+ }
+ });
+
+ var FuzzyHistoryCompleter = Class.extend({
+ init: function(maxResults) {
+ this.asyncCompleter = new AsyncCompleter();
+ this.maxResults = maxResults;
+ },
+
+ filter: function(query, callback) { return this.asyncCompleter.filter(query, callback); },
+
+ refresh: function() {
+ this.asyncCompleter.reset();
+
+ historyCache.use(function(history) {
+ this.asyncCompleter.resultsReady(history.slice(-this.maxResults).map(function(item) {
+ return this.asyncCompleter.createUnrankedCompletion("history", item);
+ }.proxy(this)));
+ }.proxy(this));
+ }
+ });
+
+ var FuzzyTabCompleter = Class.extend({
+ init: function() { this.asyncCompleter = new AsyncCompleter(); },
+
+ filter: function(query, callback) { return this.asyncCompleter.filter(query, callback); },
+
+ refresh: function() {
+ this.asyncCompleter.reset();
+ chrome.tabs.getAllInWindow(null, function(tabs) {
+ this.asyncCompleter.resultsReady(tabs.map(function(tab) {
+ return this.asyncCompleter.createUnrankedCompletion("tab", tab,
+ { functionName: "switchToTab", args: [tab.id] });
+ }.proxy(this)));
+ }.proxy(this));
+ }
+ });
+
+ /*
+ * A completer which matches only domains from sites in your history with the current query.
+ */
+ var DomainCompleter = Class.extend({
+ // A mapping of doamin => useHttps, where useHttps is a boolean.
+ domains: null,
+
+ withDomains: function(callback) {
+ var self = this;
+ function buildResult() {
+ return Object.keys(self.domains).map(function(dom) {
+ return [dom, self.domains[dom]];
+ });
+ }
+ if (self.domains !== null)
+ return callback(buildResult());
+
+ self.domains = {};
+
+ function processDomain(domain, https) {
+ // non-www version is preferrable, so check if we have it already
+ if (domain.indexOf('www.') == 0 && self.domains.hasOwnProperty(domain.slice(4)))
+ domain = domain.slice(4);
+
+ // HTTPS is preferrable
+ https = https || self.domains[domain] || self.domains['www.' + domain];
+
+ self.domains[domain] = !!https;
+ delete self.domains['www.' + domain];
+ }
+
+ function processUrl(url) {
+ parts = url.split('/');
+ processDomain(parts[2], parts[0] == 'https:');
+ }
+
+ historyCache.use(function(history) {
+ history.forEach(function(item) { processUrl(item.url); });
+ });
+
+ chrome.history.onVisited.addListener(function(item) { processUrl(item.url); });
+
+ callback(buildResult());
+ },
+
+ refresh: function() { },
+
+ filter: function(query, callback) {
+ var best = null;
+ this.withDomains(function(domains) {
+ var bestOffset = 1000;
+ domains.forEach(function(result) {
+ var domain = result[0];
+ var protocol = result[1] ? 'https' : 'http';
+
+ var offset = domain.indexOf(query);
+ if (offset < 0 || offset >= bestOffset)
+ return;
+
+ // found a new optimum
+ bestOffset = offset;
+ best = new LazyCompletionResult(-1.5, function() {
+ return {
+ html: createCompletionHtml("site", domain),
+ action: { functionName: "navigateToUrl", args: [protocol + "://" + domain] },
+ }});
+ });
+ });
+ callback(best ? [best] : []);
+ }
+ });
+
+ /** A meta-completer that delegates queries and merges and sorts the results of a collection of other
+ * completer instances given in :sources. The optional argument :queryThreshold determines how long a
+ * query has to be to trigger a search. */
+ var MultiCompleter = Class.extend({
+ init: function(sources, queryThreshold) {
+ if (queryThreshold === undefined)
+ queryThreshold = 1; // default
+ this.sources = sources;
+ this.queryThreshold = queryThreshold;
+ },
+
+ refresh: function() { this.sources.forEach(function(source) { source.refresh(); }); },
+
+ filter: function(query, maxResults, callback) {
+ if (query.length < this.queryThreshold) {
+ callback([]);
+ return;
+ }
+
+ var allResults = [];
+ var counter = this.sources.length;
+
+ this.sources.forEach(function(source) {
+ source.filter(query, function(results) {
+ allResults = allResults.concat(results);
+ if (--counter > 0)
+ return;
+
+ // all sources have provided results by now, so we can sort and return
+ allResults.sort(function(a,b) { return a.relevancy - b.relevancy; });
+ // evalulate lazy completions for the top n results
+ callback(allResults.slice(0, maxResults).map(function(result) { return result.build(); }));
+ });
+ });
+ }
+ });
+
+ /** Singleton object that provides helpers and caching for fuzzy completion. */
+ var fuzzyMatcher = (function() {
+ var self = {};
+
+ self.timeToClean = 0;
+ self.cacheSize = 1000;
+ self.regexNonWord = /[\W_]/ig;
+
+ // cache generated regular expressions
+ self.regexpCache = {};
+ // cache filtered results from recent queries
+ self.filterCache = {};
+ self.normalizationCache = {};
+
+ /** Normalizes the string specified in :query. Strips any non-word characters and converts
+ * to lower case. */
+ self.normalize = function(query) {
+ if (!(query in self.normalizationCache))
+ self.normalizationCache[query] = query.replace(self.regexNonWord, '').toLowerCase();
+ return self.normalizationCache[query];
+ }
+
+ /** Returns the non-matching and matching string parts, in alternating order (starting with a
+ * non-matching part) or null, if the string doesn't match the query.
+ *
+ * Sample: match("codecodec","code.google.com/codec") would yield ["", "code", ".google.com/", "codec"]
+ *
+ * Note that this function matches the longest possible parts of a string and is therefore not very
+ * efficient. There it falls back to a more performant, but less accurate regex matching if the
+ * normalized query is longer than 10 characters.
+ *
+ * _Don't use this to check if a string matches a query_. Use `getRegexp(query).test(str)` instead.
+ */
+ self.getMatchGroups = function(query, str) {
+ query = self.normalize(query);
+ if (query.length == 0)
+ return str.length ? [str] : [];
+ if (query.length > 15) {
+ // for long query strings, the method is much too inefficient, so fall
+ // back to the less accurate regex matching
+ return self.getRegexp(query).exec(str).slice(1);
+ }
+
+ for (var i = query.length; i >= 1; --i) {
+ var part = query.slice(0, i);
+ var partOffset = str.toLowerCase().indexOf(part);
+ if (partOffset < 0)
+ continue;
+
+ // we use recursive backtracking here, this is why it's slow.
+ rest = self.getMatchGroups(query.slice(i), str.slice(partOffset + i));
+ if (!rest) continue;
+
+ return [
+ str.slice(0, partOffset),
+ part,
+ ].concat(rest);
+ }
+ return null;
+ }
+
+ /** Calculates a very simple similarity value between a :query and a :string */
+ self.calculateRelevancy = function(query, str) {
+ query = self.normalize(query);
+ str = self.normalize(str);
+ var sum = 0;
+
+ // only iterate over slices of the query starting at an offset up to 10 to save resources
+ for (var start = 0; start < 20 && start < query.length; ++start) {
+ for (var i = query.length; i >= start; --i) {
+ if (str.indexOf(query.slice(start, i)) >= 0) {
+ var length = i - start;
+ sum += length * length;
+ break;
+ }
+ }
+ }
+ return sum * sum * sum;
+ }
+
+ /** Trims the size of the caches to the configured size using a FIFO algorithm. */
+ self.cleanCache = function() {
+ // remove old cached regexes
+ Object.keys(self.regexpCache).slice(self.cacheSize).forEach(function(query) {
+ delete self.regexpCache[query];
+ });
+ // remove old cached normalization results
+ Object.keys(self.normalizationCache).slice(self.cacheSize).forEach(function(query) {
+ delete self.normalizationCache[query];
+ });
+ }
+
+ /** Returns a regex that matches a string using a fuzzy :query. Example: The :query "abc" would result
+ * in a regex like /^([^a])*(a)([^b])*(b)([^c])*(c)(.*)$/
+ */
+ self.getRegexp = function(query) {
+ query = self.normalize(query);
+ if (!(query in self.regexpCache)) {
+ // build up a regex for fuzzy matching. This is the fastest method I checked (faster than:
+ // string building, splice, concat, multi-level join)
+ var regex = ['^'];
+ for (var i = 0; i < query.length; ++i) {
+ regex.push('([^');
+ regex.push(query[i]);
+ regex.push(']*)(');
+ regex.push(query[i]);
+ regex.push(')');
+ }
+ regex.push('(.*)$');
+ self.regexpCache[query] = new RegExp(regex.join(''), 'i');
+ }
+ return self.regexpCache[query];
+ }
+
+ /** Clear the cache for the given source, e.g. for refreshing */
+ self.invalidateFilterCache = function(id) {
+ self.filterCache[id] = {};
+ }
+
+ /** Filters a collection :source using fuzzy matching against an input string :query. If a query with
+ * a less specific query was issued before (e.g. if the user added a letter to the query), the cached
+ * results of the last filtering are used as a starting point, instead of :source.
+ */
+ self.filter = function(query, source, getValue, id) {
+ if (!(id in self.filterCache))
+ self.filterCache[id] = {};
+
+ // find the most narrow list of results in the cache
+ var optSpecificity = source.length;
+ var specificity;
+ for (key in self.filterCache[id]) {
+ if (!self.filterCache[id].hasOwnProperty(key))
+ continue;
+
+ if ((query.indexOf(key) != 0 && key.indexOf(query) != 0) || key.length > query.length) {
+ // cache entry no longer needed
+ delete self.filterCache[id][key];
+ continue;
+ }
+
+ // is this a plausible result set to use as a source?
+ if (query.indexOf(key) < 0)
+ continue;
+
+ // is this cache entry the most specific so far?
+ specificity = self.filterCache[id][key].length;
+ if (specificity < optSpecificity) {
+ source = self.filterCache[id][key];
+ optSpecificity = specificity;
+ }
+ }
+
+ // don't clean up the caches every iteration
+ if (++self.timeToClean > 100) {
+ self.timeToClean = 0;
+ self.cleanCache();
+ }
+
+ var regexp = self.getRegexp(query);
+ var filtered = source.filter(function(x) { return regexp.test(getValue(x)) });
+ self.filterCache[id][query] = filtered;
+ return filtered;
+ }
+
+ return self;
+ })();
+
+ var htmlRegex = /<[^>]*>|&[a-z]+;/gi;
+
+ /** Strips HTML tags and entities using a naive regex replacement. Optionally, saves the stripped
+ * HTML tags in a dictionary indexed by the position where the tag should be reinserted. */
+ function stripHtmlTags(str, positions) {
+ if (!positions)
+ return str.replace(htmlRegex, '');
+
+ var match = str.match(htmlRegex);
+ if (!match) return;
+ match.reverse();
+ var split = str.split(htmlRegex);
+ var offset = 0;
+ var i = 0;
+ split.forEach(function(text) {
+ if (match.length > 0)
+ positions[offset += text.length] = match.pop();
+ });
+
+ return split.join('');
+ }
+
+ /** Creates an file-internal representation of a URL match with the given paramters */
+ function createCompletionHtml(type, str, title) {
+ title = title || '';
+ // sanitize input, it could come from a malicious web site
+ title = title.length > 0 ? ' <span class="title">' + utils.escapeHtml(title) + '</span>' : '';
+ return '<em>' + type + '</em> ' + utils.escapeHtml(str) + title;
+ }
+
+ /** Renders a completion by marking fuzzy-matched parts. */
+ function renderFuzzy(query, html) {
+ // we want to match the content in HTML tags, but not the HTML tags themselves, so we remove the
+ // tags and reinsert them after the matching process
+ var htmlTags = {};
+ var groups = fuzzyMatcher.getMatchGroups(query, stripHtmlTags(html, htmlTags));
+
+ html = [];
+ var htmlOffset = 0;
+
+ // this helper function adds the HTML generated _for one single character_ to the HTML output
+ // and reinserts HTML tags stripped before, if they were at this position
+ function addToHtml(str) {
+ if (htmlOffset in htmlTags)
+ html.push(htmlTags[htmlOffset]);
+ html.push(str);
+ ++htmlOffset;
+ }
+
+ function addCharsWithDecoration(str, before, after) {
+ before = before || '';
+ after = after || '';
+ for (var i = 0; i < str.length; ++i)
+ addToHtml(before + str[i] + after);
+ }
+
+ // iterate over the match groups. They are non-matched and matched string parts, in alternating order
+ for (var i = 0; i < groups.length; ++i) {
+ if (i % 2 == 0)
+ // we have a non-matched part, it could have several characters. We need to insert them character
+ // by character, so that addToHtml can keep track of the position in the original string
+ addCharsWithDecoration(groups[i]);
+ else
+ // we have a matched part. In addition to the characters themselves, we add some decorating HTML.
+ addCharsWithDecoration(groups[i], '<span class="fuzzyMatch">', '</span>');
+ };
+
+ // call it another time so that a tag at the very last position is reinserted
+ addToHtml('');
+ return html.join('');
+ }
+
+ /** Singleton object that provides fast access to the Chrome history */
+ var historyCache = (function() {
+ var size = 20000;
+ var cachedHistory = null;
+
+ function use(callback) {
+ if (cachedHistory !== null)
+ return callback(cachedHistory);
+
+ chrome.history.search({ text: '', maxResults: size, startTime: 0 }, function(history) {
+ // sorting in ascending order, so we can push new items to the end later
+ history.sort(function(a, b) {
+ return (a.lastVisitTime|| 0) - (b.lastVisitTime || 0);
+ });
+ cachedHistory = history;
+ callback(history);
+ });
+
+ chrome.history.onVisited.addListener(function(item) {
+ // only cache newly visited sites
+ if (item.visitCount === 1)
+ cachedHistory.push(item);
+ });
+ }
+
+ return { use: use };
+ })();
+
+ // public interface
+ return {
+ FuzzyBookmarkCompleter: FuzzyBookmarkCompleter,
+ FuzzyHistoryCompleter: FuzzyHistoryCompleter,
+ FuzzyTabCompleter: FuzzyTabCompleter,
+ SmartKeywordCompleter: SmartKeywordCompleter,
+ DomainCompleter: DomainCompleter,
+ MultiCompleter: MultiCompleter
+ };
+})()