/*
* This contains the definition of the completers used for the Vomnibox's suggestion UI. A complter will take
* a query (whatever the user typed into the Vomnibox) and return a list of matches, e.g. bookmarks, domains,
* URLs from history.
*
* The Vomnibox frontend script makes a "filterCompleter" request to the background page, which in turn calls
* filter() on each these completers.
*
* A completer is a class which has two functions:
* - refresh(): refreshes the completer's data source (e.g. refetches the list of bookmarks from Chrome).
* - filter(query, callback): "query" will be whatever the user typed into the Vomnibox. "callback" is a
* function which will be invoked with a list of LazyCompletionResults as its first argument.
*
* A completer's filter() function returns a list of LazyCompletionResults. This contains a relevancy score
* for the result, as well as a function to build the full result (e.g. the HTML representing this result).
*
* The MultiCompleter collects a big list of LazyCompletionResults from many completers by calling each of
* their filter functions in turn, sorts the results by relevancy, and then calls build() on the top N
* results. This allows us to avoid generating HTML for all of the results we're not going to use.
* The objects returned from build() are sent to the Vomnibox frontend script to be shown in the UI.
*/
var completion = (function() {
/*
* An object which contains a relevancy score for the given completion, and a function which can be
* invoked to build its HTML.
* Calling build() should return an object of the form:
* { html: "", action: { functionName: "", args: [] } }
* This object is eventually sent back to the Vomnibox frontend script. "action" contains the action to
* be performed by the frontend script if this result is chosen (user selects it and hits enter).
* "action" includes the function the frontendScript should execute (e.g. "navigateToUrl") along with any
* arguments (like the URL).
* "html" is the HTML representation of this result, with some characters emphasized to higlight the query.
*
* This is called "lazy" because it takes in a function to lazily compute a result's html. That operation
* can be kind of expensive, so you only want to do it to the top completion results, after you've sorted
* them by relevancy.
*/
var LazyCompletionResult = function(relevancy, buildFunction) {
this.relevancy = relevancy;
this.build = buildFunction;
}
/*
* A completer which takes in a list of keyword commands (like "wiki" for "search wikipedia") and will
* decide if your query is a command, a URL that you want to visit, or a search term.
*/
var SmartKeywordCompleter = Class.extend({
/*
* - commands: a list of commands of the form: { keyword: [title, url] }, e.g.:
* { "wiki ": ["Wikipedia (en)", "http://en.wikipedia.org/wiki/%s" ]
*/
init: function(commands) {
this.commands = commands || {};
this.commandKeys = Object.keys(commands);
},
refresh: function() { },
/** Returns the suggestions matching the user-defined commands */
getCommandSuggestions: function(query, suggestions) {
return this.commandKeys.filter(function(cmd) { return query.indexOf(cmd) == 0 }).map(function(cmd) {
var term = query.slice(cmd.length);
var desc = this.commands[cmd][0];
var pattern = this.commands[cmd][1];
var url = (typeof pattern == "function") ? pattern(term) : pattern.replace(/%s/g, term);
// this will appear even before the URL/search suggestion
return new LazyCompletionResult(-2, function() {
return {
html: createCompletionHtml(desc, term),
action: { functionName: "navigateToUrl", args: [utils.createFullUrl(url)] },
}})
}.proxy(this));
},
/** Checks if the input is a URL. If yes, returns a suggestion to visit it. If no, returns a suggestion
* to start a web search. */
getUrlOrSearchSuggestion: function(query, suggestions) {
// trim query
query = query.replace(/^\s+|\s+$/g, '');
var isUrl = utils.isUrl(query);
return new LazyCompletionResult(-1, function() {
return {
html: createCompletionHtml(isUrl ? "goto" : "search", query),
action: { functionName: "navigateToUrl",
args: isUrl ? [utils.createFullUrl(query)] : [utils.createSearchUrl(query)] }
}});
},
filter: function(query, callback) {
suggestions = this.getCommandSuggestions(query);
suggestions.push(this.getUrlOrSearchSuggestion(query));
callback(suggestions);
}
});
/*
* A generic asynchronous completer which is used by completers which have asynchronous data sources,
* like history or bookmarks.
*/
var AsyncCompleter = Class.extend({
init: function() {
this.id = utils.createUniqueId();
this.reset();
this.resultsReady = this.fallbackReadyCallback = function(results) { this.completions = results; }
},
reset: function() {
fuzzyMatcher.invalidateFilterCache(this.id);
this.completions = null;
},
/*
* This creates an intermediate representation of a completion which will later be called with a specific
* query.
* - type: the type of item we're completing against, e.g. "bookmark", "history", "tab".
* - item: the item itself. This should include a url and title property (bookmark, history and tab
* objects include both of these).
* - action: the action to take in the Vomnibox frontend
*
* It's used to save us work -- we call this on every bookmark in your bookmarks list when we first fetch
* them, for instance, and we don't want to do some the same work again every time a new query is
* processed.
*
* TODO(philc): It would be nice if this could be removed; it's confusing.
* */
createUnrankedCompletion: function(type, item, action) {
var url = item.url;
var parts = [type, url, item.title];
var str = parts.join(" ");
action = action || { functionName: "navigateToUrl", args: [url] };
function createLazyCompletion(query) {
var relevancy = url.length / fuzzyMatcher.calculateRelevancy(query, str)
return new LazyCompletionResult(relevancy, function() {
return {
html: renderFuzzy(query, createCompletionHtml.apply(null, parts)),
action: action,
}});
}
// add one more layer of indirection: For filtering, we only need the string to match.
// Only after we reduced the number of possible results, we call :bind on them to get
// an actual completion object
return {
completionString: parts.join(" "),
createLazyCompletion: createLazyCompletion,
}
},
processResults: function(query, results) {
results = fuzzyMatcher.filter(query, results,
function(match) { return match.completionString }, this.id);
return results.map(function(result) { return result.createLazyCompletion(query); });
},
filter: function(query, callback) {
var handler = function(results) { callback(this.processResults(query, results)); }.proxy(this);
// are the results ready?
if (this.completions !== null) {
// yes: call the callback synchronously
handler(this.completions);
} else {
// no: register the handler as a callback
this.resultsReady = function(results) {
handler(results);
this.resultsReady = this.fallbackReadyCallback;
this.resultsReady(results);
}.proxy(this);
}
}
});
var FuzzyBookmarkCompleter = Class.extend({
init: function() { this.asyncCompleter = new AsyncCompleter(); },
filter: function(query, callback) { return this.asyncCompleter.filter(query, callback); },
// Traverses the bookmark hierarhcy and retuns a list of all bookmarks in the tree.
traverseBookmarkTree: function(bookmarks) {
var results = [];
var toVisit = bookmarks;
while (toVisit.length > 0) {
var bookmark = toVisit.shift();
results.push(bookmark);
if (bookmark.children)
toVisit.push.apply(toVisit, bookmark.children);
}
return results;
},
refresh: function() {
this.asyncCompleter.reset();
chrome.bookmarks.getTree(function(bookmarks) {
var results = this.traverseBookmarkTree(bookmarks);
var validResults = results.filter(function(b) { return b.url !== undefined; });
var matches = validResults.map(function(bookmark) {
return this.asyncCompleter.createUnrankedCompletion("bookmark", bookmark);
}.proxy(this));
this.asyncCompleter.resultsReady(matches);
}.proxy(this));
}
});
var FuzzyHistoryCompleter = Class.extend({
init: function(maxResults) {
this.asyncCompleter = new AsyncCompleter();
this.maxResults = maxResults;
},
filter: function(query, callback) { return this.asyncCompleter.filter(query, callback); },
refresh: function() {
this.asyncCompleter.reset();
historyCache.use(function(history) {
this.asyncCompleter.resultsReady(history.slice(-this.maxResults).map(function(item) {
return this.asyncCompleter.createUnrankedCompletion("history", item);
}.proxy(this)));
}.proxy(this));
}
});
var FuzzyTabCompleter = Class.extend({
init: function() { this.asyncCompleter = new AsyncCompleter(); },
filter: function(query, callback) { return this.asyncCompleter.filter(query, callback); },
refresh: function() {
this.asyncCompleter.reset();
chrome.tabs.getAllInWindow(null, function(tabs) {
this.asyncCompleter.resultsReady(tabs.map(function(tab) {
return this.asyncCompleter.createUnrankedCompletion("tab", tab,
{ functionName: "switchToTab", args: [tab.id] });
}.proxy(this)));
}.proxy(this));
}
});
/*
* A completer which matches only domains from sites in your history with the current query.
*/
var DomainCompleter = Class.extend({
// A mapping of doamin => useHttps, where useHttps is a boolean.
domains: null,
withDomains: function(callback) {
var self = this;
function buildResult() {
return Object.keys(self.domains).map(function(dom) {
return [dom, self.domains[dom]];
});
}
if (self.domains !== null)
return callback(buildResult());
self.domains = {};
function processDomain(domain, https) {
// non-www version is preferrable, so check if we have it already
if (domain.indexOf('www.') == 0 && self.domains.hasOwnProperty(domain.slice(4)))
domain = domain.slice(4);
// HTTPS is preferrable
https = https || self.domains[domain] || self.domains['www.' + domain];
self.domains[domain] = !!https;
delete self.domains['www.' + domain];
}
function processUrl(url) {
parts = url.split('/');
processDomain(parts[2], parts[0] == 'https:');
}
historyCache.use(function(history) {
history.forEach(function(item) { processUrl(item.url); });
});
chrome.history.onVisited.addListener(function(item) { processUrl(item.url); });
callback(buildResult());
},
refresh: function() { },
filter: function(query, callback) {
var best = null;
this.withDomains(function(domains) {
var bestOffset = 1000;
domains.forEach(function(result) {
var domain = result[0];
var protocol = result[1] ? 'https' : 'http';
var offset = domain.indexOf(query);
if (offset < 0 || offset >= bestOffset)
return;
// found a new optimum
bestOffset = offset;
best = new LazyCompletionResult(-1.5, function() {
return {
html: createCompletionHtml("site", domain),
action: { functionName: "navigateToUrl", args: [protocol + "://" + domain] },
}});
});
});
callback(best ? [best] : []);
}
});
/** A meta-completer that delegates queries and merges and sorts the results of a collection of other
* completer instances given in :sources. The optional argument :queryThreshold determines how long a
* query has to be to trigger a search. */
var MultiCompleter = Class.extend({
init: function(sources, queryThreshold) {
if (queryThreshold === undefined)
queryThreshold = 1; // default
this.sources = sources;
this.queryThreshold = queryThreshold;
},
refresh: function() { this.sources.forEach(function(source) { source.refresh(); }); },
filter: function(query, maxResults, callback) {
if (query.length < this.queryThreshold) {
callback([]);
return;
}
var allResults = [];
var counter = this.sources.length;
this.sources.forEach(function(source) {
source.filter(query, function(results) {
allResults = allResults.concat(results);
if (--counter > 0)
return;
// all sources have provided results by now, so we can sort and return
allResults.sort(function(a,b) { return a.relevancy - b.relevancy; });
// evalulate lazy completions for the top n results
callback(allResults.slice(0, maxResults).map(function(result) { return result.build(); }));
});
});
}
});
/** Singleton object that provides helpers and caching for fuzzy completion. */
var fuzzyMatcher = (function() {
var self = {};
self.timeToClean = 0;
self.cacheSize = 1000;
self.regexNonWord = /[\W_]/ig;
// cache generated regular expressions
self.regexpCache = {};
// cache filtered results from recent queries
self.filterCache = {};
self.normalizationCache = {};
/** Normalizes the string specified in :query. Strips any non-word characters and converts
* to lower case. */
self.normalize = function(query) {
if (!(query in self.normalizationCache))
self.normalizationCache[query] = query.replace(self.regexNonWord, '').toLowerCase();
return self.normalizationCache[query];
}
/** Returns the non-matching and matching string parts, in alternating order (starting with a
* non-matching part) or null, if the string doesn't match the query.
*
* Sample: match("codecodec","code.google.com/codec") would yield ["", "code", ".google.com/", "codec"]
*
* Note that this function matches the longest possible parts of a string and is therefore not very
* efficient. There it falls back to a more performant, but less accurate regex matching if the
* normalized query is longer than 10 characters.
*
* _Don't use this to check if a string matches a query_. Use `getRegexp(query).test(str)` instead.
*/
self.getMatchGroups = function(query, str) {
query = self.normalize(query);
if (query.length == 0)
return str.length ? [str] : [];
if (query.length > 15) {
// for long query strings, the method is much too inefficient, so fall
// back to the less accurate regex matching
return self.getRegexp(query).exec(str).slice(1);
}
for (var i = query.length; i >= 1; --i) {
var part = query.slice(0, i);
var partOffset = str.toLowerCase().indexOf(part);
if (partOffset < 0)
continue;
// we use recursive backtracking here, this is why it's slow.
rest = self.getMatchGroups(query.slice(i), str.slice(partOffset + i));
if (!rest) continue;
return [
str.slice(0, partOffset),
part,
].concat(rest);
}
return null;
}
/** Calculates a very simple similarity value between a :query and a :string */
self.calculateRelevancy = function(query, str) {
query = self.normalize(query);
str = self.normalize(str);
var sum = 0;
// only iterate over slices of the query starting at an offset up to 10 to save resources
for (var start = 0; start < 20 && start < query.length; ++start) {
for (var i = query.length; i >= start; --i) {
if (str.indexOf(query.slice(start, i)) >= 0) {
var length = i - start;
sum += length * length;
break;
}
}
}
return sum * sum * sum;
}
/** Trims the size of the caches to the configured size using a FIFO algorithm. */
self.cleanCache = function() {
// remove old cached regexes
Object.keys(self.regexpCache).slice(self.cacheSize).forEach(function(query) {
delete self.regexpCache[query];
});
// remove old cached normalization results
Object.keys(self.normalizationCache).slice(self.cacheSize).forEach(function(query) {
delete self.normalizationCache[query];
});
}
/** Returns a regex that matches a string using a fuzzy :query. Example: The :query "abc" would result
* in a regex like /^([^a])*(a)([^b])*(b)([^c])*(c)(.*)$/
*/
self.getRegexp = function(query) {
query = self.normalize(query);
if (!(query in self.regexpCache)) {
// build up a regex for fuzzy matching. This is the fastest method I checked (faster than:
// string building, splice, concat, multi-level join)
var regex = ['^'];
for (var i = 0; i < query.length; ++i) {
regex.push('([^');
regex.push(query[i]);
regex.push(']*)(');
regex.push(query[i]);
regex.push(')');
}
regex.push('(.*)$');
self.regexpCache[query] = new RegExp(regex.join(''), 'i');
}
return self.regexpCache[query];
}
/** Clear the cache for the given source, e.g. for refreshing */
self.invalidateFilterCache = function(id) {
self.filterCache[id] = {};
}
/** Filters a collection :source using fuzzy matching against an input string :query. If a query with
* a less specific query was issued before (e.g. if the user added a letter to the query), the cached
* results of the last filtering are used as a starting point, instead of :source.
*/
self.filter = function(query, source, getValue, id) {
if (!(id in self.filterCache))
self.filterCache[id] = {};
// find the most narrow list of results in the cache
var optSpecificity = source.length;
var specificity;
for (key in self.filterCache[id]) {
if (!self.filterCache[id].hasOwnProperty(key))
continue;
if ((query.indexOf(key) != 0 && key.indexOf(query) != 0) || key.length > query.length) {
// cache entry no longer needed
delete self.filterCache[id][key];
continue;
}
// is this a plausible result set to use as a source?
if (query.indexOf(key) < 0)
continue;
// is this cache entry the most specific so far?
specificity = self.filterCache[id][key].length;
if (specificity < optSpecificity) {
source = self.filterCache[id][key];
optSpecificity = specificity;
}
}
// don't clean up the caches every iteration
if (++self.timeToClean > 100) {
self.timeToClean = 0;
self.cleanCache();
}
var regexp = self.getRegexp(query);
var filtered = source.filter(function(x) { return regexp.test(getValue(x)) });
self.filterCache[id][query] = filtered;
return filtered;
}
return self;
})();
var htmlRegex = /<[^>]*>|&[a-z]+;/gi;
/** Strips HTML tags and entities using a naive regex replacement. Optionally, saves the stripped
* HTML tags in a dictionary indexed by the position where the tag should be reinserted. */
function stripHtmlTags(str, positions) {
if (!positions)
return str.replace(htmlRegex, '');
var match = str.match(htmlRegex);
if (!match) return;
match.reverse();
var split = str.split(htmlRegex);
var offset = 0;
var i = 0;
split.forEach(function(text) {
if (match.length > 0)
positions[offset += text.length] = match.pop();
});
return split.join('');
}
/** Creates an file-internal representation of a URL match with the given paramters */
function createCompletionHtml(type, str, title) {
title = title || '';
// sanitize input, it could come from a malicious web site
title = title.length > 0 ? ' ' + utils.escapeHtml(title) + '' : '';
return '' + type + ' ' + utils.escapeHtml(str) + title;
}
/** Renders a completion by marking fuzzy-matched parts. */
function renderFuzzy(query, html) {
// we want to match the content in HTML tags, but not the HTML tags themselves, so we remove the
// tags and reinsert them after the matching process
var htmlTags = {};
var groups = fuzzyMatcher.getMatchGroups(query, stripHtmlTags(html, htmlTags));
html = [];
var htmlOffset = 0;
// this helper function adds the HTML generated _for one single character_ to the HTML output
// and reinserts HTML tags stripped before, if they were at this position
function addToHtml(str) {
if (htmlOffset in htmlTags)
html.push(htmlTags[htmlOffset]);
html.push(str);
++htmlOffset;
}
function addCharsWithDecoration(str, before, after) {
before = before || '';
after = after || '';
for (var i = 0; i < str.length; ++i)
addToHtml(before + str[i] + after);
}
// iterate over the match groups. They are non-matched and matched string parts, in alternating order
for (var i = 0; i < groups.length; ++i) {
if (i % 2 == 0)
// we have a non-matched part, it could have several characters. We need to insert them character
// by character, so that addToHtml can keep track of the position in the original string
addCharsWithDecoration(groups[i]);
else
// we have a matched part. In addition to the characters themselves, we add some decorating HTML.
addCharsWithDecoration(groups[i], '', '');
};
// call it another time so that a tag at the very last position is reinserted
addToHtml('');
return html.join('');
}
/** Singleton object that provides fast access to the Chrome history */
var historyCache = (function() {
var size = 20000;
var cachedHistory = null;
function use(callback) {
if (cachedHistory !== null)
return callback(cachedHistory);
chrome.history.search({ text: '', maxResults: size, startTime: 0 }, function(history) {
// sorting in ascending order, so we can push new items to the end later
history.sort(function(a, b) {
return (a.lastVisitTime|| 0) - (b.lastVisitTime || 0);
});
cachedHistory = history;
callback(history);
});
chrome.history.onVisited.addListener(function(item) {
// only cache newly visited sites
if (item.visitCount === 1)
cachedHistory.push(item);
});
}
return { use: use };
})();
// public interface
return {
FuzzyBookmarkCompleter: FuzzyBookmarkCompleter,
FuzzyHistoryCompleter: FuzzyHistoryCompleter,
FuzzyTabCompleter: FuzzyTabCompleter,
SmartKeywordCompleter: SmartKeywordCompleter,
DomainCompleter: DomainCompleter,
MultiCompleter: MultiCompleter
};
})()