diff options
| author | Phil Crosby | 2012-04-29 21:32:11 -0700 | 
|---|---|---|
| committer | Phil Crosby | 2012-04-29 21:32:11 -0700 | 
| commit | a1af6bedf1daa378970e62ebaf55301e7539eb2e (patch) | |
| tree | 338ffa673d29fc35299d6a26545ca192b4ee05a5 /lib/completion.js | |
| parent | 629e74c5271d25ef56cc91005954297afa0d3000 (diff) | |
| download | vimium-a1af6bedf1daa378970e62ebaf55301e7539eb2e.tar.bz2 | |
reorder source
Diffstat (limited to 'lib/completion.js')
| -rw-r--r-- | lib/completion.js | 601 | 
1 files changed, 299 insertions, 302 deletions
| diff --git a/lib/completion.js b/lib/completion.js index f62d3a58..3209a2f4 100644 --- a/lib/completion.js +++ b/lib/completion.js @@ -1,307 +1,5 @@  var completion = (function() { -  //============ Helper functions and objects ============// - -  /** Singleton object that provides helpers and caching for fuzzy completion. */ -  var fuzzyMatcher = (function() { -    var self = {}; - -    self.timeToClean = 0; -    self.cacheSize = 1000; -    self.regexNonWord = /[\W_]/ig; - -    // cache generated regular expressions -    self.matcherCache = {}; -    // cache filtered results from recent queries -    self.filterCache = {}; -    self.normalizationCache = {}; - -    /** Normalizes the string specified in :query. Strips any non-word characters and converts -     * to lower case. */ -    self.normalize = function(query) { -      if (!(query in self.normalizationCache)) -        self.normalizationCache[query] = query.replace(self.regexNonWord, '').toLowerCase(); -      return self.normalizationCache[query]; -    } - -    /** Returns the non-matching and matching string parts, in alternating order (starting with a -     * non-matching part) or null, if the string doesn't match the query. -     * -     * Sample: match("codecodec","code.google.com/codec") would yield ["", "code", ".google.com/", "codec"] -     * -     * Note that this function matches the longest possible parts of a string and is therefore not very -     * efficient. There it falls back to a more performant, but less accurate regex matching if the -     * normalized query is longer than 10 characters. -     * -     * _Don't use this to check if a string matches a query_. Use `getMatcher(query).test(str)` instead. -     */ -    self.getMatchGroups = function(query, str) { -      query = self.normalize(query); -      if (query.length == 0) -        return str.length ? [str] : []; -      if (query.length > 15) { -        // for long query strings, the method is much too inefficient, so fall -        // back to the less accurate regex matching -        return self.getMatcher(query).exec(str).slice(1); -      } - -      for (var i = query.length; i >= 1; --i) { -        var part = query.slice(0, i); -        var partOffset = str.toLowerCase().indexOf(part); -        if (partOffset < 0) -          continue; - -        // we use recursive backtracking here, this is why it's slow. -        rest = self.getMatchGroups(query.slice(i), str.slice(partOffset + i)); -        if (!rest) continue; - -        return [ -          str.slice(0, partOffset), -          part, -        ].concat(rest); -      } -      return null; -    } - -    /** Calculates a very simple similarity value between a :query and a :string */ -    self.calculateRelevancy = function(query, str) { -      query = self.normalize(query); -      str   = self.normalize(str); -      var sum = 0; - -      // only iterate over slices of the query starting at an offset up to 10 to save resources -      for (var start = 0; start < 20 && start < query.length; ++start) { -        for (var i = query.length; i >= start; --i) { -          if (str.indexOf(query.slice(start, i)) >= 0) { -            var length = i - start; -            sum += length * length; -            break; -          } -        } -      } -      return sum * sum * sum; -    } - -    /** Trims the size of the caches to the configured size using a FIFO algorithm. */ -    self.cleanCache = function() { -      // remove old cached regexes -      Object.keys(self.matcherCache).slice(self.cacheSize).forEach(function(query) { -        delete self.matcherCache[query]; -      }); -      // remove old cached normalization results -      Object.keys(self.normalizationCache).slice(self.cacheSize).forEach(function(query) { -        delete self.normalizationCache[query]; -      }); -    } - -    /** Returns a regex that matches a string using a fuzzy :query. Example: The :query "abc" would result -     * in a regex like /^([^a])*(a)([^b])*(b)([^c])*(c)(.*)$/ -     */ -    self.getMatcher = function(query) { -      query = self.normalize(query); -      if (!(query in self.matcherCache)) { -        // build up a regex for fuzzy matching. This is the fastest method I checked (faster than: -        // string building, splice, concat, multi-level join) -        var regex = ['^']; -        for (var i = 0; i < query.length; ++i) { -          regex.push('([^'); -          regex.push(query[i]); -          regex.push(']*)('); -          regex.push(query[i]); -          regex.push(')'); -        } -        regex.push('(.*)$'); -        self.matcherCache[query] = new RegExp(regex.join(''), 'i'); -      } -      return self.matcherCache[query]; -    } - -    /** Clear the cache for the given source, e.g. for refreshing */ -    self.invalidateFilterCache = function(id) { -      self.filterCache[id] = {}; -    } - -    /** Filters a collection :source using fuzzy matching against an input string :query. If a query with -     * a less specific query was issued before (e.g. if the user added a letter to the query), the cached -     * results of the last filtering are used as a starting point, instead of :source. -     */ -    self.filter = function(query, source, getValue, id) { -      if (!(id in self.filterCache)) -        self.filterCache[id] = {}; - -      // find the most narrow list of results in the cache -      var optSpecificity = source.length; -      var specificity; -      for (key in self.filterCache[id]) { -        if (!self.filterCache[id].hasOwnProperty(key)) -          continue; - -        if ((query.indexOf(key) != 0 && key.indexOf(query) != 0) || key.length > query.length) { -          // cache entry no longer needed -          delete self.filterCache[id][key]; -          continue; -        } - -        // is this a plausible result set to use as a source? -        if (query.indexOf(key) < 0) -          continue; - -        // is this cache entry the most specific so far? -        specificity = self.filterCache[id][key].length; -        if (specificity < optSpecificity) { -          source = self.filterCache[id][key]; -          optSpecificity = specificity; -        } -      } - -      // don't clean up the caches every iteration -      if (++self.timeToClean > 100) { -        self.timeToClean = 0; -        self.cleanCache(); -      } - -      var matcher = self.getMatcher(query); -      var filtered = source.filter(function(x) { return matcher.test(getValue(x)) }); -      self.filterCache[id][query] = filtered; -      return filtered; -    } - -    return self; -  })(); - -  var htmlRegex = /<[^>]*>|&[a-z]+;/gi; - -  /** Strips HTML tags and entities using a naive regex replacement. Optionally, saves the stripped -   * HTML tags in a dictionary indexed by the position where the tag should be reinserted. */ -  function stripHtmlTags(str, positions) { -    if (!positions) -      return str.replace(htmlRegex, ''); - -    var match = str.match(htmlRegex); -    if (!match) return; -    match.reverse(); -    var split = str.split(htmlRegex); -    var offset = 0; -    var i = 0; -    split.forEach(function(text) { -      if (match.length > 0) -        positions[offset += text.length] = match.pop(); -    }); - -    return split.join(''); -  } - -  /** Creates an action that opens :url in the current tab by default or in a new tab as an alternative. */ -  function createActionOpenUrl(url) { -    var open = function(newTab, selected) { -      return function() { -        chrome.extension.sendRequest({ -          handler:  newTab ? "openUrlInNewTab" : "openUrlInCurrentTab", -          url:      url, -          selected: selected -        }); -      } -    } - -    if (url.indexOf("javascript:") == 0) -      return [ open(false), open(false), open(false) ]; -    else -      return [ open(false), open(true, true), open(true, false) ]; -  } - -  /** Returns an action that switches to the tab with the given :id. */ -  function createActionSwitchToTab(id) { -    var open = function() { -      chrome.extension.sendRequest({ handler: 'selectSpecificTab', id: id }); -    } -    return [open, open, open]; -  } - -  /** Creates an file-internal representation of a URL match with the given paramters */ -  function createCompletionHtml(type, str, title) { -    title = title || ''; -    // sanitize input, it could come from a malicious web site -    title = title.length > 0 ? ' <span class="title">' + utils.escapeHtml(title) + '</span>' : ''; -    return '<em>' + type + '</em> ' + utils.escapeHtml(str) + title; -  } - -  /** Renders a completion by marking fuzzy-matched parts. */ -  function renderFuzzy(query, html) { -    // we want to match the content in HTML tags, but not the HTML tags themselves, so we remove the -    // tags and reinsert them after the matching process -    var htmlTags = {}; -    var groups = fuzzyMatcher.getMatchGroups(query, stripHtmlTags(html, htmlTags)); - -    html = []; -    var htmlOffset = 0; - -    // this helper function adds the HTML generated _for one single character_ to the HTML output -    // and reinserts HTML tags stripped before, if they were at this position -    function addToHtml(str) { -      if (htmlOffset in htmlTags) -        html.push(htmlTags[htmlOffset]); -      html.push(str); -      ++htmlOffset; -    } - -    function addCharsWithDecoration(str, before, after) { -      before = before || ''; -      after = after || ''; -      for (var i = 0; i < str.length; ++i) -        addToHtml(before + str[i] + after); -    } - -    // iterate over the match groups. They are non-matched and matched string parts, in alternating order -    for (var i = 0; i < groups.length; ++i) { -      if (i % 2 == 0) -        // we have a non-matched part, it could have several characters. We need to insert them character -        // by character, so that addToHtml can keep track of the position in the original string -        addCharsWithDecoration(groups[i]); -      else -        // we have a matched part. In addition to the characters themselves, we add some decorating HTML. -        addCharsWithDecoration(groups[i], '<span class="fuzzyMatch">', '</span>'); -    }; - -    // call it another time so that a tag at the very last position is reinserted -    addToHtml(''); -    return html.join(''); -  } - -  /** A completion class that only holds a relevancy value and a function to get HTML and action -   * properties */ -  var LazyCompletion = function(relevancy, builder) { -    this.relevancy = relevancy; -    this.build = builder; -  } - -  /** Singleton object that provides fast access to the Chrome history */ -  var historyCache = (function() { -    var size = 20000; -    var cachedHistory = null; - -    function use(callback) { -      if (cachedHistory !== null) -        return callback(cachedHistory); - -      chrome.history.search({ text: '', maxResults: size, startTime: 0 }, function(history) { -        // sorting in ascending order, so we can push new items to the end later -        history.sort(function(a, b) { -          return (a.lastVisitTime|| 0) - (b.lastVisitTime || 0); -        }); -        cachedHistory = history; -        callback(history); -      }); - -      chrome.history.onVisited.addListener(function(item) { -        // only cache newly visited sites -        if (item.visitCount === 1) -          cachedHistory.push(item); -      }); -    } - -    return { use: use }; -  })() -    /** Helper class to construct fuzzy completers for asynchronous data sources like history or bookmark     * matchers. */    var AsyncCompletionSource = function() { @@ -623,6 +321,305 @@ var completion = (function() {        });      }    } +  /** Singleton object that provides helpers and caching for fuzzy completion. */ +  var fuzzyMatcher = (function() { +    var self = {}; + +    self.timeToClean = 0; +    self.cacheSize = 1000; +    self.regexNonWord = /[\W_]/ig; + +    // cache generated regular expressions +    self.matcherCache = {}; +    // cache filtered results from recent queries +    self.filterCache = {}; +    self.normalizationCache = {}; + +    /** Normalizes the string specified in :query. Strips any non-word characters and converts +     * to lower case. */ +    self.normalize = function(query) { +      if (!(query in self.normalizationCache)) +        self.normalizationCache[query] = query.replace(self.regexNonWord, '').toLowerCase(); +      return self.normalizationCache[query]; +    } + +    /** Returns the non-matching and matching string parts, in alternating order (starting with a +     * non-matching part) or null, if the string doesn't match the query. +     * +     * Sample: match("codecodec","code.google.com/codec") would yield ["", "code", ".google.com/", "codec"] +     * +     * Note that this function matches the longest possible parts of a string and is therefore not very +     * efficient. There it falls back to a more performant, but less accurate regex matching if the +     * normalized query is longer than 10 characters. +     * +     * _Don't use this to check if a string matches a query_. Use `getMatcher(query).test(str)` instead. +     */ +    self.getMatchGroups = function(query, str) { +      query = self.normalize(query); +      if (query.length == 0) +        return str.length ? [str] : []; +      if (query.length > 15) { +        // for long query strings, the method is much too inefficient, so fall +        // back to the less accurate regex matching +        return self.getMatcher(query).exec(str).slice(1); +      } + +      for (var i = query.length; i >= 1; --i) { +        var part = query.slice(0, i); +        var partOffset = str.toLowerCase().indexOf(part); +        if (partOffset < 0) +          continue; + +        // we use recursive backtracking here, this is why it's slow. +        rest = self.getMatchGroups(query.slice(i), str.slice(partOffset + i)); +        if (!rest) continue; + +        return [ +          str.slice(0, partOffset), +          part, +        ].concat(rest); +      } +      return null; +    } + +    /** Calculates a very simple similarity value between a :query and a :string */ +    self.calculateRelevancy = function(query, str) { +      query = self.normalize(query); +      str   = self.normalize(str); +      var sum = 0; + +      // only iterate over slices of the query starting at an offset up to 10 to save resources +      for (var start = 0; start < 20 && start < query.length; ++start) { +        for (var i = query.length; i >= start; --i) { +          if (str.indexOf(query.slice(start, i)) >= 0) { +            var length = i - start; +            sum += length * length; +            break; +          } +        } +      } +      return sum * sum * sum; +    } + +    /** Trims the size of the caches to the configured size using a FIFO algorithm. */ +    self.cleanCache = function() { +      // remove old cached regexes +      Object.keys(self.matcherCache).slice(self.cacheSize).forEach(function(query) { +        delete self.matcherCache[query]; +      }); +      // remove old cached normalization results +      Object.keys(self.normalizationCache).slice(self.cacheSize).forEach(function(query) { +        delete self.normalizationCache[query]; +      }); +    } + +    /** Returns a regex that matches a string using a fuzzy :query. Example: The :query "abc" would result +     * in a regex like /^([^a])*(a)([^b])*(b)([^c])*(c)(.*)$/ +     */ +    self.getMatcher = function(query) { +      query = self.normalize(query); +      if (!(query in self.matcherCache)) { +        // build up a regex for fuzzy matching. This is the fastest method I checked (faster than: +        // string building, splice, concat, multi-level join) +        var regex = ['^']; +        for (var i = 0; i < query.length; ++i) { +          regex.push('([^'); +          regex.push(query[i]); +          regex.push(']*)('); +          regex.push(query[i]); +          regex.push(')'); +        } +        regex.push('(.*)$'); +        self.matcherCache[query] = new RegExp(regex.join(''), 'i'); +      } +      return self.matcherCache[query]; +    } + +    /** Clear the cache for the given source, e.g. for refreshing */ +    self.invalidateFilterCache = function(id) { +      self.filterCache[id] = {}; +    } + +    /** Filters a collection :source using fuzzy matching against an input string :query. If a query with +     * a less specific query was issued before (e.g. if the user added a letter to the query), the cached +     * results of the last filtering are used as a starting point, instead of :source. +     */ +    self.filter = function(query, source, getValue, id) { +      if (!(id in self.filterCache)) +        self.filterCache[id] = {}; + +      // find the most narrow list of results in the cache +      var optSpecificity = source.length; +      var specificity; +      for (key in self.filterCache[id]) { +        if (!self.filterCache[id].hasOwnProperty(key)) +          continue; + +        if ((query.indexOf(key) != 0 && key.indexOf(query) != 0) || key.length > query.length) { +          // cache entry no longer needed +          delete self.filterCache[id][key]; +          continue; +        } + +        // is this a plausible result set to use as a source? +        if (query.indexOf(key) < 0) +          continue; + +        // is this cache entry the most specific so far? +        specificity = self.filterCache[id][key].length; +        if (specificity < optSpecificity) { +          source = self.filterCache[id][key]; +          optSpecificity = specificity; +        } +      } + +      // don't clean up the caches every iteration +      if (++self.timeToClean > 100) { +        self.timeToClean = 0; +        self.cleanCache(); +      } + +      var matcher = self.getMatcher(query); +      var filtered = source.filter(function(x) { return matcher.test(getValue(x)) }); +      self.filterCache[id][query] = filtered; +      return filtered; +    } + +    return self; +  })(); + +  var htmlRegex = /<[^>]*>|&[a-z]+;/gi; + +  /** Strips HTML tags and entities using a naive regex replacement. Optionally, saves the stripped +   * HTML tags in a dictionary indexed by the position where the tag should be reinserted. */ +  function stripHtmlTags(str, positions) { +    if (!positions) +      return str.replace(htmlRegex, ''); + +    var match = str.match(htmlRegex); +    if (!match) return; +    match.reverse(); +    var split = str.split(htmlRegex); +    var offset = 0; +    var i = 0; +    split.forEach(function(text) { +      if (match.length > 0) +        positions[offset += text.length] = match.pop(); +    }); + +    return split.join(''); +  } + +  /** Creates an action that opens :url in the current tab by default or in a new tab as an alternative. */ +  function createActionOpenUrl(url) { +    var open = function(newTab, selected) { +      return function() { +        chrome.extension.sendRequest({ +          handler:  newTab ? "openUrlInNewTab" : "openUrlInCurrentTab", +          url:      url, +          selected: selected +        }); +      } +    } + +    if (url.indexOf("javascript:") == 0) +      return [ open(false), open(false), open(false) ]; +    else +      return [ open(false), open(true, true), open(true, false) ]; +  } + +  /** Returns an action that switches to the tab with the given :id. */ +  function createActionSwitchToTab(id) { +    var open = function() { +      chrome.extension.sendRequest({ handler: 'selectSpecificTab', id: id }); +    } +    return [open, open, open]; +  } + +  /** Creates an file-internal representation of a URL match with the given paramters */ +  function createCompletionHtml(type, str, title) { +    title = title || ''; +    // sanitize input, it could come from a malicious web site +    title = title.length > 0 ? ' <span class="title">' + utils.escapeHtml(title) + '</span>' : ''; +    return '<em>' + type + '</em> ' + utils.escapeHtml(str) + title; +  } + +  /** Renders a completion by marking fuzzy-matched parts. */ +  function renderFuzzy(query, html) { +    // we want to match the content in HTML tags, but not the HTML tags themselves, so we remove the +    // tags and reinsert them after the matching process +    var htmlTags = {}; +    var groups = fuzzyMatcher.getMatchGroups(query, stripHtmlTags(html, htmlTags)); + +    html = []; +    var htmlOffset = 0; + +    // this helper function adds the HTML generated _for one single character_ to the HTML output +    // and reinserts HTML tags stripped before, if they were at this position +    function addToHtml(str) { +      if (htmlOffset in htmlTags) +        html.push(htmlTags[htmlOffset]); +      html.push(str); +      ++htmlOffset; +    } + +    function addCharsWithDecoration(str, before, after) { +      before = before || ''; +      after = after || ''; +      for (var i = 0; i < str.length; ++i) +        addToHtml(before + str[i] + after); +    } + +    // iterate over the match groups. They are non-matched and matched string parts, in alternating order +    for (var i = 0; i < groups.length; ++i) { +      if (i % 2 == 0) +        // we have a non-matched part, it could have several characters. We need to insert them character +        // by character, so that addToHtml can keep track of the position in the original string +        addCharsWithDecoration(groups[i]); +      else +        // we have a matched part. In addition to the characters themselves, we add some decorating HTML. +        addCharsWithDecoration(groups[i], '<span class="fuzzyMatch">', '</span>'); +    }; + +    // call it another time so that a tag at the very last position is reinserted +    addToHtml(''); +    return html.join(''); +  } + +  /** A completion class that only holds a relevancy value and a function to get HTML and action +   * properties */ +  var LazyCompletion = function(relevancy, builder) { +    this.relevancy = relevancy; +    this.build = builder; +  } + +  /** Singleton object that provides fast access to the Chrome history */ +  var historyCache = (function() { +    var size = 20000; +    var cachedHistory = null; + +    function use(callback) { +      if (cachedHistory !== null) +        return callback(cachedHistory); + +      chrome.history.search({ text: '', maxResults: size, startTime: 0 }, function(history) { +        // sorting in ascending order, so we can push new items to the end later +        history.sort(function(a, b) { +          return (a.lastVisitTime|| 0) - (b.lastVisitTime || 0); +        }); +        cachedHistory = history; +        callback(history); +      }); + +      chrome.history.onVisited.addListener(function(item) { +        // only cache newly visited sites +        if (item.visitCount === 1) +          cachedHistory.push(item); +      }); +    } + +    return { use: use }; +  })();    // public interface    return { | 
