diff options
Diffstat (limited to 'background_scripts/completion.js')
| -rw-r--r-- | background_scripts/completion.js | 626 | 
1 files changed, 626 insertions, 0 deletions
| diff --git a/background_scripts/completion.js b/background_scripts/completion.js new file mode 100644 index 00000000..b31c4918 --- /dev/null +++ b/background_scripts/completion.js @@ -0,0 +1,626 @@ +/* + * This contains the definition of the completers used for the Vomnibox's suggestion UI. A complter will take + * a query (whatever the user typed into the Vomnibox) and return a list of matches, e.g. bookmarks, domains, + * URLs from history. + * + * The Vomnibox frontend script makes a "filterCompleter" request to the background page, which in turn calls + * filter() on each these completers. + * + * A completer is a class which has two functions: + * - refresh(): refreshes the completer's data source (e.g. refetches the list of bookmarks from Chrome). + * - filter(query, callback): "query" will be whatever the user typed into the Vomnibox. "callback" is a + *   function which will be invoked with a list of LazyCompletionResults as its first argument. + * + * A completer's filter() function returns a list of LazyCompletionResults. This contains a relevancy score + * for the result, as well as a function to build the full result (e.g. the HTML representing this result). + * + * The MultiCompleter collects a big list of LazyCompletionResults from many completers by calling each of + * their filter functions in turn, sorts the results by relevancy, and then calls build() on the top N + * results. This allows us to avoid generating HTML for all of the results we're not going to use. + * The objects returned from build() are sent to the Vomnibox frontend script to be shown in the UI. + */ +var completion = (function() { + +  /* +   * An object which contains a relevancy score for the given completion, and a function which can be +   * invoked to build its HTML. +   * Calling build() should return an object of the form: +   *   { html: "", action: { functionName: "", args: [] } } +   * This object is eventually sent back to the Vomnibox frontend script. "action" contains the action to +   * be performed by the frontend script if this result is chosen (user selects it and hits enter). +   * "action" includes the function the frontendScript should execute (e.g. "navigateToUrl") along with any +   * arguments (like the URL). +   * "html" is the HTML representation of this result, with some characters emphasized to higlight the query. +   * +   * This is called "lazy" because it takes in a function to lazily compute a result's html. That operation +   * can be kind of expensive, so you only want to do it to the top completion results, after you've sorted +   * them by relevancy. +   */ +  var LazyCompletionResult = function(relevancy, buildFunction) { +    this.relevancy = relevancy; +    this.build = buildFunction; +  } + +  /* +   * A completer which takes in a list of keyword commands (like "wiki" for "search wikipedia") and will +   * decide if your query is a command, a URL that you want to visit, or a search term. +   */ +  var SmartKeywordCompleter = Class.extend({ +    /* +     * - commands: a list of commands of the form: { keyword: [title, url] }, e.g.: +     *   { "wiki ": ["Wikipedia (en)", "http://en.wikipedia.org/wiki/%s" ] +     */ +    init: function(commands) { +      this.commands = commands || {}; +      this.commandKeys = Object.keys(commands); +    }, + +    refresh: function() { }, + +      /** Returns the suggestions matching the user-defined commands */ +    getCommandSuggestions: function(query, suggestions) { +      return this.commandKeys.filter(function(cmd) { return query.indexOf(cmd) == 0 }).map(function(cmd) { +        var term = query.slice(cmd.length); +        var desc = this.commands[cmd][0]; +        var pattern = this.commands[cmd][1]; +        var url = (typeof pattern == "function") ? pattern(term) : pattern.replace(/%s/g, term); + +        // this will appear even before the URL/search suggestion +        return new LazyCompletionResult(-2, function() { +          return { +            html:   createCompletionHtml(desc, term), +            action: { functionName: "navigateToUrl", args: [utils.createFullUrl(url)] }, +          }}) +      }.proxy(this)); +    }, + +    /** Checks if the input is a URL. If yes, returns a suggestion to visit it. If no, returns a suggestion +     * to start a web search. */ +    getUrlOrSearchSuggestion: function(query, suggestions) { +      // trim query +      query = query.replace(/^\s+|\s+$/g, ''); +      var isUrl = utils.isUrl(query); +      return new LazyCompletionResult(-1, function() { +        return { +            html: createCompletionHtml(isUrl ? "goto" : "search", query), +            action: { functionName: "navigateToUrl", +                args: isUrl ? [utils.createFullUrl(query)] : [utils.createSearchUrl(query)] } +        }}); +    }, + +    filter: function(query, callback) { +      suggestions = this.getCommandSuggestions(query); +      suggestions.push(this.getUrlOrSearchSuggestion(query)); +      callback(suggestions); +    } +  }); + +  /* +   * A generic asynchronous completer which is used by completers which have asynchronous data sources, +   * like history or bookmarks. +   */ +  var AsyncCompleter = Class.extend({ +    init: function() { +      this.id = utils.createUniqueId(); +      this.reset(); +      this.resultsReady = this.fallbackReadyCallback = function(results) { this.completions = results; } +    }, + +    reset: function() { +      fuzzyMatcher.invalidateFilterCache(this.id); +      this.completions = null; +    }, + +    /* +     * This creates an intermediate representation of a completion which will later be called with a specific +     * query. +     * - type: the type of item we're completing against, e.g. "bookmark", "history", "tab". +     * - item: the item itself. This should include a url and title property (bookmark, history and tab +     *   objects include both of these). +     * - action: the action to take in the Vomnibox frontend  +     * +     * It's used to save us work -- we call this on every bookmark in your bookmarks list when we first fetch +     * them, for instance, and we don't want to do some the same work again every time a new query is +     * processed. +     * +     * TODO(philc): It would be nice if this could be removed; it's confusing. +     * */ +    createUnrankedCompletion: function(type, item, action) { +      var url = item.url; +      var parts = [type, url, item.title]; +      var str = parts.join(" "); +      action = action || { functionName: "navigateToUrl", args: [url] }; + +      function createLazyCompletion(query) { +        var relevancy = url.length / fuzzyMatcher.calculateRelevancy(query, str) +        return new LazyCompletionResult(relevancy, function() { +          return { +            html:   renderFuzzy(query, createCompletionHtml.apply(null, parts)), +            action: action, +          }}); +      } + +      // add one more layer of indirection: For filtering, we only need the string to match. +      // Only after we reduced the number of possible results, we call :bind on them to get +      // an actual completion object +      return { +        completionString: parts.join(" "), +        createLazyCompletion: createLazyCompletion, +      } +    }, + +    processResults: function(query, results) { +      results = fuzzyMatcher.filter(query, results, +          function(match) { return match.completionString }, this.id); +      return results.map(function(result) { return result.createLazyCompletion(query); }); +    }, + +    filter: function(query, callback) { +      var handler = function(results) { callback(this.processResults(query, results)); }.proxy(this); + +      // are the results ready? +      if (this.completions !== null) { +        // yes: call the callback synchronously +        handler(this.completions); +      } else { +        // no: register the handler as a callback +        this.resultsReady = function(results) { +          handler(results); +          this.resultsReady = this.fallbackReadyCallback; +          this.resultsReady(results); +        }.proxy(this); +      } +    } +  }); + +  var FuzzyBookmarkCompleter = Class.extend({ +    init: function() { this.asyncCompleter = new AsyncCompleter(); }, +    filter: function(query, callback) { return this.asyncCompleter.filter(query, callback); }, + +    // Traverses the bookmark hierarhcy and retuns a list of all bookmarks in the tree. +    traverseBookmarkTree: function(bookmarks) { +      var results = []; +      var toVisit = bookmarks; +      while (toVisit.length > 0) { +        var bookmark = toVisit.shift(); +        results.push(bookmark); +        if (bookmark.children) +          toVisit.push.apply(toVisit, bookmark.children); +      } +      return results; +    }, + +    refresh: function() { +      this.asyncCompleter.reset(); +      chrome.bookmarks.getTree(function(bookmarks) { +        var results = this.traverseBookmarkTree(bookmarks); +        var validResults = results.filter(function(b) { return b.url !== undefined; }); +        var matches = validResults.map(function(bookmark) { +          return this.asyncCompleter.createUnrankedCompletion("bookmark", bookmark); +        }.proxy(this)); +        this.asyncCompleter.resultsReady(matches); +      }.proxy(this)); +    } +  }); + +  var FuzzyHistoryCompleter = Class.extend({ +    init: function(maxResults) { +      this.asyncCompleter = new AsyncCompleter(); +      this.maxResults = maxResults; +    }, + +    filter: function(query, callback) { return this.asyncCompleter.filter(query, callback); }, + +    refresh: function() { +      this.asyncCompleter.reset(); + +      historyCache.use(function(history) { +        this.asyncCompleter.resultsReady(history.slice(-this.maxResults).map(function(item) { +          return this.asyncCompleter.createUnrankedCompletion("history", item); +        }.proxy(this))); +      }.proxy(this)); +    } +  }); + +  var FuzzyTabCompleter = Class.extend({ +    init: function() { this.asyncCompleter = new AsyncCompleter(); }, + +    filter: function(query, callback) { return this.asyncCompleter.filter(query, callback); }, + +    refresh: function() { +      this.asyncCompleter.reset(); +      chrome.tabs.getAllInWindow(null, function(tabs) { +        this.asyncCompleter.resultsReady(tabs.map(function(tab) { +          return this.asyncCompleter.createUnrankedCompletion("tab", tab, +              { functionName: "switchToTab", args: [tab.id] }); +        }.proxy(this))); +      }.proxy(this)); +    } +  }); + +  /* +   * A completer which matches only domains from sites in your history with the current query. +   */ +  var DomainCompleter = Class.extend({ +    // A mapping of doamin => useHttps, where useHttps is a boolean. +    domains: null, + +    withDomains: function(callback) { +      var self = this; +      function buildResult() { +        return Object.keys(self.domains).map(function(dom) { +          return [dom, self.domains[dom]]; +        }); +      } +      if (self.domains !== null) +        return callback(buildResult()); + +      self.domains = {}; + +      function processDomain(domain, https) { +        // non-www version is preferrable, so check if we have it already +        if (domain.indexOf('www.') == 0 && self.domains.hasOwnProperty(domain.slice(4))) +          domain = domain.slice(4); + +        // HTTPS is preferrable +        https = https || self.domains[domain] || self.domains['www.' + domain]; + +        self.domains[domain] = !!https; +        delete self.domains['www.' + domain]; +      } + +      function processUrl(url) { +        parts = url.split('/'); +        processDomain(parts[2], parts[0] == 'https:'); +      } + +      historyCache.use(function(history) { +        history.forEach(function(item) { processUrl(item.url); }); +      }); + +      chrome.history.onVisited.addListener(function(item) { processUrl(item.url); }); + +      callback(buildResult()); +    }, + +    refresh: function() { }, + +    filter: function(query, callback) { +      var best = null; +      this.withDomains(function(domains) { +        var bestOffset = 1000; +        domains.forEach(function(result) { +          var domain = result[0]; +          var protocol = result[1] ? 'https' : 'http'; + +          var offset = domain.indexOf(query); +          if (offset < 0 || offset >= bestOffset) +            return; + +          // found a new optimum +          bestOffset = offset; +          best = new LazyCompletionResult(-1.5, function() { +            return { +              html:   createCompletionHtml("site", domain), +              action: { functionName: "navigateToUrl", args: [protocol + "://" + domain] }, +            }}); +        }); +      }); +      callback(best ? [best] : []); +    } +  }); + +  /** A meta-completer that delegates queries and merges and sorts the results of a collection of other +   * completer instances given in :sources. The optional argument :queryThreshold determines how long a +   * query has to be to trigger a search. */ +  var MultiCompleter = Class.extend({ +    init: function(sources, queryThreshold) { +      if (queryThreshold === undefined) +        queryThreshold = 1; // default +      this.sources = sources; +      this.queryThreshold = queryThreshold; +    }, + +    refresh: function() { this.sources.forEach(function(source) { source.refresh(); }); }, + +    filter: function(query, maxResults, callback) { +      if (query.length < this.queryThreshold) { +        callback([]); +        return; +      } + +      var allResults = []; +      var counter = this.sources.length; + +      this.sources.forEach(function(source) { +        source.filter(query, function(results) { +          allResults = allResults.concat(results); +          if (--counter > 0) +            return; + +          // all sources have provided results by now, so we can sort and return +          allResults.sort(function(a,b) { return a.relevancy - b.relevancy; }); +          // evalulate lazy completions for the top n results +          callback(allResults.slice(0, maxResults).map(function(result) { return result.build(); })); +        }); +      }); +    } +  }); + +  /** Singleton object that provides helpers and caching for fuzzy completion. */ +  var fuzzyMatcher = (function() { +    var self = {}; + +    self.timeToClean = 0; +    self.cacheSize = 1000; +    self.regexNonWord = /[\W_]/ig; + +    // cache generated regular expressions +    self.regexpCache = {}; +    // cache filtered results from recent queries +    self.filterCache = {}; +    self.normalizationCache = {}; + +    /** Normalizes the string specified in :query. Strips any non-word characters and converts +     * to lower case. */ +    self.normalize = function(query) { +      if (!(query in self.normalizationCache)) +        self.normalizationCache[query] = query.replace(self.regexNonWord, '').toLowerCase(); +      return self.normalizationCache[query]; +    } + +    /** Returns the non-matching and matching string parts, in alternating order (starting with a +     * non-matching part) or null, if the string doesn't match the query. +     * +     * Sample: match("codecodec","code.google.com/codec") would yield ["", "code", ".google.com/", "codec"] +     * +     * Note that this function matches the longest possible parts of a string and is therefore not very +     * efficient. There it falls back to a more performant, but less accurate regex matching if the +     * normalized query is longer than 10 characters. +     * +     * _Don't use this to check if a string matches a query_. Use `getRegexp(query).test(str)` instead. +     */ +    self.getMatchGroups = function(query, str) { +      query = self.normalize(query); +      if (query.length == 0) +        return str.length ? [str] : []; +      if (query.length > 15) { +        // for long query strings, the method is much too inefficient, so fall +        // back to the less accurate regex matching +        return self.getRegexp(query).exec(str).slice(1); +      } + +      for (var i = query.length; i >= 1; --i) { +        var part = query.slice(0, i); +        var partOffset = str.toLowerCase().indexOf(part); +        if (partOffset < 0) +          continue; + +        // we use recursive backtracking here, this is why it's slow. +        rest = self.getMatchGroups(query.slice(i), str.slice(partOffset + i)); +        if (!rest) continue; + +        return [ +          str.slice(0, partOffset), +          part, +        ].concat(rest); +      } +      return null; +    } + +    /** Calculates a very simple similarity value between a :query and a :string */ +    self.calculateRelevancy = function(query, str) { +      query = self.normalize(query); +      str   = self.normalize(str); +      var sum = 0; + +      // only iterate over slices of the query starting at an offset up to 10 to save resources +      for (var start = 0; start < 20 && start < query.length; ++start) { +        for (var i = query.length; i >= start; --i) { +          if (str.indexOf(query.slice(start, i)) >= 0) { +            var length = i - start; +            sum += length * length; +            break; +          } +        } +      } +      return sum * sum * sum; +    } + +    /** Trims the size of the caches to the configured size using a FIFO algorithm. */ +    self.cleanCache = function() { +      // remove old cached regexes +      Object.keys(self.regexpCache).slice(self.cacheSize).forEach(function(query) { +        delete self.regexpCache[query]; +      }); +      // remove old cached normalization results +      Object.keys(self.normalizationCache).slice(self.cacheSize).forEach(function(query) { +        delete self.normalizationCache[query]; +      }); +    } + +    /** Returns a regex that matches a string using a fuzzy :query. Example: The :query "abc" would result +     * in a regex like /^([^a])*(a)([^b])*(b)([^c])*(c)(.*)$/ +     */ +    self.getRegexp = function(query) { +      query = self.normalize(query); +      if (!(query in self.regexpCache)) { +        // build up a regex for fuzzy matching. This is the fastest method I checked (faster than: +        // string building, splice, concat, multi-level join) +        var regex = ['^']; +        for (var i = 0; i < query.length; ++i) { +          regex.push('([^'); +          regex.push(query[i]); +          regex.push(']*)('); +          regex.push(query[i]); +          regex.push(')'); +        } +        regex.push('(.*)$'); +        self.regexpCache[query] = new RegExp(regex.join(''), 'i'); +      } +      return self.regexpCache[query]; +    } + +    /** Clear the cache for the given source, e.g. for refreshing */ +    self.invalidateFilterCache = function(id) { +      self.filterCache[id] = {}; +    } + +    /** Filters a collection :source using fuzzy matching against an input string :query. If a query with +     * a less specific query was issued before (e.g. if the user added a letter to the query), the cached +     * results of the last filtering are used as a starting point, instead of :source. +     */ +    self.filter = function(query, source, getValue, id) { +      if (!(id in self.filterCache)) +        self.filterCache[id] = {}; + +      // find the most narrow list of results in the cache +      var optSpecificity = source.length; +      var specificity; +      for (key in self.filterCache[id]) { +        if (!self.filterCache[id].hasOwnProperty(key)) +          continue; + +        if ((query.indexOf(key) != 0 && key.indexOf(query) != 0) || key.length > query.length) { +          // cache entry no longer needed +          delete self.filterCache[id][key]; +          continue; +        } + +        // is this a plausible result set to use as a source? +        if (query.indexOf(key) < 0) +          continue; + +        // is this cache entry the most specific so far? +        specificity = self.filterCache[id][key].length; +        if (specificity < optSpecificity) { +          source = self.filterCache[id][key]; +          optSpecificity = specificity; +        } +      } + +      // don't clean up the caches every iteration +      if (++self.timeToClean > 100) { +        self.timeToClean = 0; +        self.cleanCache(); +      } + +      var regexp = self.getRegexp(query); +      var filtered = source.filter(function(x) { return regexp.test(getValue(x)) }); +      self.filterCache[id][query] = filtered; +      return filtered; +    } + +    return self; +  })(); + +  var htmlRegex = /<[^>]*>|&[a-z]+;/gi; + +  /** Strips HTML tags and entities using a naive regex replacement. Optionally, saves the stripped +   * HTML tags in a dictionary indexed by the position where the tag should be reinserted. */ +  function stripHtmlTags(str, positions) { +    if (!positions) +      return str.replace(htmlRegex, ''); + +    var match = str.match(htmlRegex); +    if (!match) return; +    match.reverse(); +    var split = str.split(htmlRegex); +    var offset = 0; +    var i = 0; +    split.forEach(function(text) { +      if (match.length > 0) +        positions[offset += text.length] = match.pop(); +    }); + +    return split.join(''); +  } + +  /** Creates an file-internal representation of a URL match with the given paramters */ +  function createCompletionHtml(type, str, title) { +    title = title || ''; +    // sanitize input, it could come from a malicious web site +    title = title.length > 0 ? ' <span class="title">' + utils.escapeHtml(title) + '</span>' : ''; +    return '<em>' + type + '</em> ' + utils.escapeHtml(str) + title; +  } + +  /** Renders a completion by marking fuzzy-matched parts. */ +  function renderFuzzy(query, html) { +    // we want to match the content in HTML tags, but not the HTML tags themselves, so we remove the +    // tags and reinsert them after the matching process +    var htmlTags = {}; +    var groups = fuzzyMatcher.getMatchGroups(query, stripHtmlTags(html, htmlTags)); + +    html = []; +    var htmlOffset = 0; + +    // this helper function adds the HTML generated _for one single character_ to the HTML output +    // and reinserts HTML tags stripped before, if they were at this position +    function addToHtml(str) { +      if (htmlOffset in htmlTags) +        html.push(htmlTags[htmlOffset]); +      html.push(str); +      ++htmlOffset; +    } + +    function addCharsWithDecoration(str, before, after) { +      before = before || ''; +      after = after || ''; +      for (var i = 0; i < str.length; ++i) +        addToHtml(before + str[i] + after); +    } + +    // iterate over the match groups. They are non-matched and matched string parts, in alternating order +    for (var i = 0; i < groups.length; ++i) { +      if (i % 2 == 0) +        // we have a non-matched part, it could have several characters. We need to insert them character +        // by character, so that addToHtml can keep track of the position in the original string +        addCharsWithDecoration(groups[i]); +      else +        // we have a matched part. In addition to the characters themselves, we add some decorating HTML. +        addCharsWithDecoration(groups[i], '<span class="fuzzyMatch">', '</span>'); +    }; + +    // call it another time so that a tag at the very last position is reinserted +    addToHtml(''); +    return html.join(''); +  } + +  /** Singleton object that provides fast access to the Chrome history */ +  var historyCache = (function() { +    var size = 20000; +    var cachedHistory = null; + +    function use(callback) { +      if (cachedHistory !== null) +        return callback(cachedHistory); + +      chrome.history.search({ text: '', maxResults: size, startTime: 0 }, function(history) { +        // sorting in ascending order, so we can push new items to the end later +        history.sort(function(a, b) { +          return (a.lastVisitTime|| 0) - (b.lastVisitTime || 0); +        }); +        cachedHistory = history; +        callback(history); +      }); + +      chrome.history.onVisited.addListener(function(item) { +        // only cache newly visited sites +        if (item.visitCount === 1) +          cachedHistory.push(item); +      }); +    } + +    return { use: use }; +  })(); + +  // public interface +  return { +    FuzzyBookmarkCompleter: FuzzyBookmarkCompleter, +    FuzzyHistoryCompleter: FuzzyHistoryCompleter, +    FuzzyTabCompleter: FuzzyTabCompleter, +    SmartKeywordCompleter: SmartKeywordCompleter, +    DomainCompleter: DomainCompleter, +    MultiCompleter: MultiCompleter +  }; +})() | 
