reorder source

author: Phil Crosby 2012-04-29 21:32:11 -0700
committer: Phil Crosby 2012-04-29 21:32:11 -0700
commit: a1af6bedf1daa378970e62ebaf55301e7539eb2e (patch)
tree: 338ffa673d29fc35299d6a26545ca192b4ee05a5 /lib
parent: 629e74c5271d25ef56cc91005954297afa0d3000 (diff)
download: vimium-a1af6bedf1daa378970e62ebaf55301e7539eb2e.tar.bz2
1 files changed, 299 insertions, 302 deletions
diff --git a/lib/completion.js b/lib/completion.js
index f62d3a58..3209a2f4 100644
--- a/lib/completion.js
+++ b/lib/completion.js
@@ -1,307 +1,5 @@
 var completion = (function() {
 
-  //============ Helper functions and objects ============//
-
-  /** Singleton object that provides helpers and caching for fuzzy completion. */
-  var fuzzyMatcher = (function() {
-    var self = {};
-
-    self.timeToClean = 0;
-    self.cacheSize = 1000;
-    self.regexNonWord = /[\W_]/ig;
-
-    // cache generated regular expressions
-    self.matcherCache = {};
-    // cache filtered results from recent queries
-    self.filterCache = {};
-    self.normalizationCache = {};
-
-    /** Normalizes the string specified in :query. Strips any non-word characters and converts
-     * to lower case. */
-    self.normalize = function(query) {
-      if (!(query in self.normalizationCache))
-        self.normalizationCache[query] = query.replace(self.regexNonWord, '').toLowerCase();
-      return self.normalizationCache[query];
-    }
-
-    /** Returns the non-matching and matching string parts, in alternating order (starting with a
-     * non-matching part) or null, if the string doesn't match the query.
-     *
-     * Sample: match("codecodec","code.google.com/codec") would yield ["", "code", ".google.com/", "codec"]
-     *
-     * Note that this function matches the longest possible parts of a string and is therefore not very
-     * efficient. There it falls back to a more performant, but less accurate regex matching if the
-     * normalized query is longer than 10 characters.
-     *
-     * _Don't use this to check if a string matches a query_. Use `getMatcher(query).test(str)` instead.
-     */
-    self.getMatchGroups = function(query, str) {
-      query = self.normalize(query);
-      if (query.length == 0)
-        return str.length ? [str] : [];
-      if (query.length > 15) {
-        // for long query strings, the method is much too inefficient, so fall
-        // back to the less accurate regex matching
-        return self.getMatcher(query).exec(str).slice(1);
-      }
-
-      for (var i = query.length; i >= 1; --i) {
-        var part = query.slice(0, i);
-        var partOffset = str.toLowerCase().indexOf(part);
-        if (partOffset < 0)
-          continue;
-
-        // we use recursive backtracking here, this is why it's slow.
-        rest = self.getMatchGroups(query.slice(i), str.slice(partOffset + i));
-        if (!rest) continue;
-
-        return [
-          str.slice(0, partOffset),
-          part,
-        ].concat(rest);
-      }
-      return null;
-    }
-
-    /** Calculates a very simple similarity value between a :query and a :string */
-    self.calculateRelevancy = function(query, str) {
-      query = self.normalize(query);
-      str   = self.normalize(str);
-      var sum = 0;
-
-      // only iterate over slices of the query starting at an offset up to 10 to save resources
-      for (var start = 0; start < 20 && start < query.length; ++start) {
-        for (var i = query.length; i >= start; --i) {
-          if (str.indexOf(query.slice(start, i)) >= 0) {
-            var length = i - start;
-            sum += length * length;
-            break;
-          }
-        }
-      }
-      return sum * sum * sum;
-    }
-
-    /** Trims the size of the caches to the configured size using a FIFO algorithm. */
-    self.cleanCache = function() {
-      // remove old cached regexes
-      Object.keys(self.matcherCache).slice(self.cacheSize).forEach(function(query) {
-        delete self.matcherCache[query];
-      });
-      // remove old cached normalization results
-      Object.keys(self.normalizationCache).slice(self.cacheSize).forEach(function(query) {
-        delete self.normalizationCache[query];
-      });
-    }
-
-    /** Returns a regex that matches a string using a fuzzy :query. Example: The :query "abc" would result
-     * in a regex like /^([^a])*(a)([^b])*(b)([^c])*(c)(.*)$/
-     */
-    self.getMatcher = function(query) {
-      query = self.normalize(query);
-      if (!(query in self.matcherCache)) {
-        // build up a regex for fuzzy matching. This is the fastest method I checked (faster than:
-        // string building, splice, concat, multi-level join)
-        var regex = ['^'];
-        for (var i = 0; i < query.length; ++i) {
-          regex.push('([^');
-          regex.push(query[i]);
-          regex.push(']*)(');
-          regex.push(query[i]);
-          regex.push(')');
-        }
-        regex.push('(.*)$');
-        self.matcherCache[query] = new RegExp(regex.join(''), 'i');
-      }
-      return self.matcherCache[query];
-    }
-
-    /** Clear the cache for the given source, e.g. for refreshing */
-    self.invalidateFilterCache = function(id) {
-      self.filterCache[id] = {};
-    }
-
-    /** Filters a collection :source using fuzzy matching against an input string :query. If a query with
-     * a less specific query was issued before (e.g. if the user added a letter to the query), the cached
-     * results of the last filtering are used as a starting point, instead of :source.
-     */
-    self.filter = function(query, source, getValue, id) {
-      if (!(id in self.filterCache))
-        self.filterCache[id] = {};
-
-      // find the most narrow list of results in the cache
-      var optSpecificity = source.length;
-      var specificity;
-      for (key in self.filterCache[id]) {
-        if (!self.filterCache[id].hasOwnProperty(key))
-          continue;
-
-        if ((query.indexOf(key) != 0 && key.indexOf(query) != 0) || key.length > query.length) {
-          // cache entry no longer needed
-          delete self.filterCache[id][key];
-          continue;
-        }
-
-        // is this a plausible result set to use as a source?
-        if (query.indexOf(key) < 0)
-          continue;
-
-        // is this cache entry the most specific so far?
-        specificity = self.filterCache[id][key].length;
-        if (specificity < optSpecificity) {
-          source = self.filterCache[id][key];
-          optSpecificity = specificity;
-        }
-      }
-
-      // don't clean up the caches every iteration
-      if (++self.timeToClean > 100) {
-        self.timeToClean = 0;
-        self.cleanCache();
-      }
-
-      var matcher = self.getMatcher(query);
-      var filtered = source.filter(function(x) { return matcher.test(getValue(x)) });
-      self.filterCache[id][query] = filtered;
-      return filtered;
-    }
-
-    return self;
-  })();
-
-  var htmlRegex = /<[^>]*>|&[a-z]+;/gi;
-
-  /** Strips HTML tags and entities using a naive regex replacement. Optionally, saves the stripped
-   * HTML tags in a dictionary indexed by the position where the tag should be reinserted. */
-  function stripHtmlTags(str, positions) {
-    if (!positions)
-      return str.replace(htmlRegex, '');
-
-    var match = str.match(htmlRegex);
-    if (!match) return;
-    match.reverse();
-    var split = str.split(htmlRegex);
-    var offset = 0;
-    var i = 0;
-    split.forEach(function(text) {
-      if (match.length > 0)
-        positions[offset += text.length] = match.pop();
-    });
-
-    return split.join('');
-  }
-
-  /** Creates an action that opens :url in the current tab by default or in a new tab as an alternative. */
-  function createActionOpenUrl(url) {
-    var open = function(newTab, selected) {
-      return function() {
-        chrome.extension.sendRequest({
-          handler:  newTab ? "openUrlInNewTab" : "openUrlInCurrentTab",
-          url:      url,
-          selected: selected
-        });
-      }
-    }
-
-    if (url.indexOf("javascript:") == 0)
-      return [ open(false), open(false), open(false) ];
-    else
-      return [ open(false), open(true, true), open(true, false) ];
-  }
-
-  /** Returns an action that switches to the tab with the given :id. */
-  function createActionSwitchToTab(id) {
-    var open = function() {
-      chrome.extension.sendRequest({ handler: 'selectSpecificTab', id: id });
-    }
-    return [open, open, open];
-  }
-
-  /** Creates an file-internal representation of a URL match with the given paramters */
-  function createCompletionHtml(type, str, title) {
-    title = title || '';
-    // sanitize input, it could come from a malicious web site
-    title = title.length > 0 ? ' <span class="title">' + utils.escapeHtml(title) + '</span>' : '';
-    return '<em>' + type + '</em> ' + utils.escapeHtml(str) + title;
-  }
-
-  /** Renders a completion by marking fuzzy-matched parts. */
-  function renderFuzzy(query, html) {
-    // we want to match the content in HTML tags, but not the HTML tags themselves, so we remove the
-    // tags and reinsert them after the matching process
-    var htmlTags = {};
-    var groups = fuzzyMatcher.getMatchGroups(query, stripHtmlTags(html, htmlTags));
-
-    html = [];
-    var htmlOffset = 0;
-
-    // this helper function adds the HTML generated _for one single character_ to the HTML output
-    // and reinserts HTML tags stripped before, if they were at this position
-    function addToHtml(str) {
-      if (htmlOffset in htmlTags)
-        html.push(htmlTags[htmlOffset]);
-      html.push(str);
-      ++htmlOffset;
-    }
-
-    function addCharsWithDecoration(str, before, after) {
-      before = before || '';
-      after = after || '';
-      for (var i = 0; i < str.length; ++i)
-        addToHtml(before + str[i] + after);
-    }
-
-    // iterate over the match groups. They are non-matched and matched string parts, in alternating order
-    for (var i = 0; i < groups.length; ++i) {
-      if (i % 2 == 0)
-        // we have a non-matched part, it could have several characters. We need to insert them character
-        // by character, so that addToHtml can keep track of the position in the original string
-        addCharsWithDecoration(groups[i]);
-      else
-        // we have a matched part. In addition to the characters themselves, we add some decorating HTML.
-        addCharsWithDecoration(groups[i], '<span class="fuzzyMatch">', '</span>');
-    };
-
-    // call it another time so that a tag at the very last position is reinserted
-    addToHtml('');
-    return html.join('');
-  }
-
-  /** A completion class that only holds a relevancy value and a function to get HTML and action
-   * properties */
-  var LazyCompletion = function(relevancy, builder) {
-    this.relevancy = relevancy;
-    this.build = builder;
-  }
-
-  /** Singleton object that provides fast access to the Chrome history */
-  var historyCache = (function() {
-    var size = 20000;
-    var cachedHistory = null;
-
-    function use(callback) {
-      if (cachedHistory !== null)
-        return callback(cachedHistory);
-
-      chrome.history.search({ text: '', maxResults: size, startTime: 0 }, function(history) {
-        // sorting in ascending order, so we can push new items to the end later
-        history.sort(function(a, b) {
-          return (a.lastVisitTime|| 0) - (b.lastVisitTime || 0);
-        });
-        cachedHistory = history;
-        callback(history);
-      });
-
-      chrome.history.onVisited.addListener(function(item) {
-        // only cache newly visited sites
-        if (item.visitCount === 1)
-          cachedHistory.push(item);
-      });
-    }
-
-    return { use: use };
-  })()
-
   /** Helper class to construct fuzzy completers for asynchronous data sources like history or bookmark
    * matchers. */
   var AsyncCompletionSource = function() {
@@ -623,6 +321,305 @@ var completion = (function() {
       });
     }
   }
+  /** Singleton object that provides helpers and caching for fuzzy completion. */
+  var fuzzyMatcher = (function() {
+    var self = {};
+
+    self.timeToClean = 0;
+    self.cacheSize = 1000;
+    self.regexNonWord = /[\W_]/ig;
+
+    // cache generated regular expressions
+    self.matcherCache = {};
+    // cache filtered results from recent queries
+    self.filterCache = {};
+    self.normalizationCache = {};
+
+    /** Normalizes the string specified in :query. Strips any non-word characters and converts
+     * to lower case. */
+    self.normalize = function(query) {
+      if (!(query in self.normalizationCache))
+        self.normalizationCache[query] = query.replace(self.regexNonWord, '').toLowerCase();
+      return self.normalizationCache[query];
+    }
+
+    /** Returns the non-matching and matching string parts, in alternating order (starting with a
+     * non-matching part) or null, if the string doesn't match the query.
+     *
+     * Sample: match("codecodec","code.google.com/codec") would yield ["", "code", ".google.com/", "codec"]
+     *
+     * Note that this function matches the longest possible parts of a string and is therefore not very
+     * efficient. There it falls back to a more performant, but less accurate regex matching if the
+     * normalized query is longer than 10 characters.
+     *
+     * _Don't use this to check if a string matches a query_. Use `getMatcher(query).test(str)` instead.
+     */
+    self.getMatchGroups = function(query, str) {
+      query = self.normalize(query);
+      if (query.length == 0)
+        return str.length ? [str] : [];
+      if (query.length > 15) {
+        // for long query strings, the method is much too inefficient, so fall
+        // back to the less accurate regex matching
+        return self.getMatcher(query).exec(str).slice(1);
+      }
+
+      for (var i = query.length; i >= 1; --i) {
+        var part = query.slice(0, i);
+        var partOffset = str.toLowerCase().indexOf(part);
+        if (partOffset < 0)
+          continue;
+
+        // we use recursive backtracking here, this is why it's slow.
+        rest = self.getMatchGroups(query.slice(i), str.slice(partOffset + i));
+        if (!rest) continue;
+
+        return [
+          str.slice(0, partOffset),
+          part,
+        ].concat(rest);
+      }
+      return null;
+    }
+
+    /** Calculates a very simple similarity value between a :query and a :string */
+    self.calculateRelevancy = function(query, str) {
+      query = self.normalize(query);
+      str   = self.normalize(str);
+      var sum = 0;
+
+      // only iterate over slices of the query starting at an offset up to 10 to save resources
+      for (var start = 0; start < 20 && start < query.length; ++start) {
+        for (var i = query.length; i >= start; --i) {
+          if (str.indexOf(query.slice(start, i)) >= 0) {
+            var length = i - start;
+            sum += length * length;
+            break;
+          }
+        }
+      }
+      return sum * sum * sum;
+    }
+
+    /** Trims the size of the caches to the configured size using a FIFO algorithm. */
+    self.cleanCache = function() {
+      // remove old cached regexes
+      Object.keys(self.matcherCache).slice(self.cacheSize).forEach(function(query) {
+        delete self.matcherCache[query];
+      });
+      // remove old cached normalization results
+      Object.keys(self.normalizationCache).slice(self.cacheSize).forEach(function(query) {
+        delete self.normalizationCache[query];
+      });
+    }
+
+    /** Returns a regex that matches a string using a fuzzy :query. Example: The :query "abc" would result
+     * in a regex like /^([^a])*(a)([^b])*(b)([^c])*(c)(.*)$/
+     */
+    self.getMatcher = function(query) {
+      query = self.normalize(query);
+      if (!(query in self.matcherCache)) {
+        // build up a regex for fuzzy matching. This is the fastest method I checked (faster than:
+        // string building, splice, concat, multi-level join)
+        var regex = ['^'];
+        for (var i = 0; i < query.length; ++i) {
+          regex.push('([^');
+          regex.push(query[i]);
+          regex.push(']*)(');
+          regex.push(query[i]);
+          regex.push(')');
+        }
+        regex.push('(.*)$');
+        self.matcherCache[query] = new RegExp(regex.join(''), 'i');
+      }
+      return self.matcherCache[query];
+    }
+
+    /** Clear the cache for the given source, e.g. for refreshing */
+    self.invalidateFilterCache = function(id) {
+      self.filterCache[id] = {};
+    }
+
+    /** Filters a collection :source using fuzzy matching against an input string :query. If a query with
+     * a less specific query was issued before (e.g. if the user added a letter to the query), the cached
+     * results of the last filtering are used as a starting point, instead of :source.
+     */
+    self.filter = function(query, source, getValue, id) {
+      if (!(id in self.filterCache))
+        self.filterCache[id] = {};
+
+      // find the most narrow list of results in the cache
+      var optSpecificity = source.length;
+      var specificity;
+      for (key in self.filterCache[id]) {
+        if (!self.filterCache[id].hasOwnProperty(key))
+          continue;
+
+        if ((query.indexOf(key) != 0 && key.indexOf(query) != 0) || key.length > query.length) {
+          // cache entry no longer needed
+          delete self.filterCache[id][key];
+          continue;
+        }
+
+        // is this a plausible result set to use as a source?
+        if (query.indexOf(key) < 0)
+          continue;
+
+        // is this cache entry the most specific so far?
+        specificity = self.filterCache[id][key].length;
+        if (specificity < optSpecificity) {
+          source = self.filterCache[id][key];
+          optSpecificity = specificity;
+        }
+      }
+
+      // don't clean up the caches every iteration
+      if (++self.timeToClean > 100) {
+        self.timeToClean = 0;
+        self.cleanCache();
+      }
+
+      var matcher = self.getMatcher(query);
+      var filtered = source.filter(function(x) { return matcher.test(getValue(x)) });
+      self.filterCache[id][query] = filtered;
+      return filtered;
+    }
+
+    return self;
+  })();
+
+  var htmlRegex = /<[^>]*>|&[a-z]+;/gi;
+
+  /** Strips HTML tags and entities using a naive regex replacement. Optionally, saves the stripped
+   * HTML tags in a dictionary indexed by the position where the tag should be reinserted. */
+  function stripHtmlTags(str, positions) {
+    if (!positions)
+      return str.replace(htmlRegex, '');
+
+    var match = str.match(htmlRegex);
+    if (!match) return;
+    match.reverse();
+    var split = str.split(htmlRegex);
+    var offset = 0;
+    var i = 0;
+    split.forEach(function(text) {
+      if (match.length > 0)
+        positions[offset += text.length] = match.pop();
+    });
+
+    return split.join('');
+  }
+
+  /** Creates an action that opens :url in the current tab by default or in a new tab as an alternative. */
+  function createActionOpenUrl(url) {
+    var open = function(newTab, selected) {
+      return function() {
+        chrome.extension.sendRequest({
+          handler:  newTab ? "openUrlInNewTab" : "openUrlInCurrentTab",
+          url:      url,
+          selected: selected
+        });
+      }
+    }
+
+    if (url.indexOf("javascript:") == 0)
+      return [ open(false), open(false), open(false) ];
+    else
+      return [ open(false), open(true, true), open(true, false) ];
+  }
+
+  /** Returns an action that switches to the tab with the given :id. */
+  function createActionSwitchToTab(id) {
+    var open = function() {
+      chrome.extension.sendRequest({ handler: 'selectSpecificTab', id: id });
+    }
+    return [open, open, open];
+  }
+
+  /** Creates an file-internal representation of a URL match with the given paramters */
+  function createCompletionHtml(type, str, title) {
+    title = title || '';
+    // sanitize input, it could come from a malicious web site
+    title = title.length > 0 ? ' <span class="title">' + utils.escapeHtml(title) + '</span>' : '';
+    return '<em>' + type + '</em> ' + utils.escapeHtml(str) + title;
+  }
+
+  /** Renders a completion by marking fuzzy-matched parts. */
+  function renderFuzzy(query, html) {
+    // we want to match the content in HTML tags, but not the HTML tags themselves, so we remove the
+    // tags and reinsert them after the matching process
+    var htmlTags = {};
+    var groups = fuzzyMatcher.getMatchGroups(query, stripHtmlTags(html, htmlTags));
+
+    html = [];
+    var htmlOffset = 0;
+
+    // this helper function adds the HTML generated _for one single character_ to the HTML output
+    // and reinserts HTML tags stripped before, if they were at this position
+    function addToHtml(str) {
+      if (htmlOffset in htmlTags)
+        html.push(htmlTags[htmlOffset]);
+      html.push(str);
+      ++htmlOffset;
+    }
+
+    function addCharsWithDecoration(str, before, after) {
+      before = before || '';
+      after = after || '';
+      for (var i = 0; i < str.length; ++i)
+        addToHtml(before + str[i] + after);
+    }
+
+    // iterate over the match groups. They are non-matched and matched string parts, in alternating order
+    for (var i = 0; i < groups.length; ++i) {
+      if (i % 2 == 0)
+        // we have a non-matched part, it could have several characters. We need to insert them character
+        // by character, so that addToHtml can keep track of the position in the original string
+        addCharsWithDecoration(groups[i]);
+      else
+        // we have a matched part. In addition to the characters themselves, we add some decorating HTML.
+        addCharsWithDecoration(groups[i], '<span class="fuzzyMatch">', '</span>');
+    };
+
+    // call it another time so that a tag at the very last position is reinserted
+    addToHtml('');
+    return html.join('');
+  }
+
+  /** A completion class that only holds a relevancy value and a function to get HTML and action
+   * properties */
+  var LazyCompletion = function(relevancy, builder) {
+    this.relevancy = relevancy;
+    this.build = builder;
+  }
+
+  /** Singleton object that provides fast access to the Chrome history */
+  var historyCache = (function() {
+    var size = 20000;
+    var cachedHistory = null;
+
+    function use(callback) {
+      if (cachedHistory !== null)
+        return callback(cachedHistory);
+
+      chrome.history.search({ text: '', maxResults: size, startTime: 0 }, function(history) {
+        // sorting in ascending order, so we can push new items to the end later
+        history.sort(function(a, b) {
+          return (a.lastVisitTime|| 0) - (b.lastVisitTime || 0);
+        });
+        cachedHistory = history;
+        callback(history);
+      });
+
+      chrome.history.onVisited.addListener(function(item) {
+        // only cache newly visited sites
+        if (item.visitCount === 1)
+          cachedHistory.push(item);
+      });
+    }
+
+    return { use: use };
+  })();
 
   // public interface
   return {
author	Phil Crosby	2012-04-29 21:32:11 -0700
committer	Phil Crosby	2012-04-29 21:32:11 -0700
commit	a1af6bedf1daa378970e62ebaf55301e7539eb2e (patch)
tree	338ffa673d29fc35299d6a26545ca192b4ee05a5 /lib
parent	629e74c5271d25ef56cc91005954297afa0d3000 (diff)
download	vimium-a1af6bedf1daa378970e62ebaf55301e7539eb2e.tar.bz2