add fuzzy mode

author: Niklas Baumstark 2012-01-21 00:23:59 +0100
committer: Niklas Baumstark 2012-04-10 23:54:35 +0200
commit: 6f589fedcc826b125884e3a5884c9791802afb7f (patch)
tree: eb117a99558b5456b0367157a09a12f887db8630 /lib
parent: 269042a28230bb35406d1447fac8955ca1a5c0b3 (diff)
download: vimium-6f589fedcc826b125884e3a5884c9791802afb7f.tar.bz2
3 files changed, 375 insertions, 19 deletions
diff --git a/lib/completion.js b/lib/completion.js
new file mode 100644
index 00000000..cad7d455
--- /dev/null
+++ b/lib/completion.js
@@ -0,0 +1,324 @@
+var completion = (function() {
+
+  //============ Helper functions and objects ============//
+
+  /** Singleton object that provides helpers and caching for fuzzy completion. */
+  var fuzzyMatcher = (function() {
+    var self = {};
+
+    self.matcherCacheSize = 300;
+    self.regexNonWord = /\W*/g;
+
+    // cache generated regular expressions
+    self.matcherCache = {};
+    // cache filtered results from recent queries
+    self.filterCache = {};
+
+    /** Normalizes the string specified in :query. Strips any non-word characters and converts
+      * to lower case. */
+    self.normalize = function(query) {
+      return query.replace(self.regexNonWord, '').toLowerCase();
+    }
+
+    /** Calculates a very simple similarity value between a :query and a :string. The current
+      * implementation simply returns the length of the longest prefix of :query that is found within :str.
+      */
+    self.calculateRelevancy = function(query, str) {
+      query = self.normalize(query);
+      str   = self.normalize(str);
+      for (var i = query.length; i >= 0; --i) {
+        if (str.indexOf(query.slice(0, i)) >= 0)
+          return i;
+      }
+      return 0;
+    }
+
+    /** Trims the size of the regex cache to the configured size using a FIFO algorithm. */
+    self.cleanMatcherCache = function() {
+      // remove old matchers
+      queries = Object.keys(self.matcherCache);
+      for (var i = 0; i < queries.length - self.matcherCacheSize; ++i)
+        delete self.matcherCache(queries[i]);
+    }
+
+    /** Returns a regex that matches a string using a fuzzy :query. Example: The :query "abc" would result
+      * in a regex like /^([^a])*(a)([^b])*(b)([^c])*(c)(.*)$/
+      */
+    self.getMatcher = function(query) {
+      query = self.normalize(query);
+      if (!(query in self.matcherCache)) {
+        // build up a regex for fuzzy matching
+        var regex = '^';
+        for (var i = 0; i < query.length; ++i)
+          regex += '([^' + query[i] + ']*)(' + query[i] + ')';
+        self.matcherCache[query] = new RegExp(regex + '(.*)$', 'i');
+      }
+      return self.matcherCache[query];
+    }
+
+    /** Clears the filter cache with the given ID. */
+    self.clearFilterCache = function(id) {
+      if (id in self.filterCache)
+        delete self.filterCache[id];
+    }
+
+    /** Filters a list :ary using fuzzy matching against an input string :query. If a query with a less
+      * specific query was issued before (e.g. if the user added a letter to the query), the cached results
+      * of the last filtering are used as a starting point, instead of :ary.
+      */
+    self.filter = function(query, ary, getValue, id, callback) {
+      var filtered = [];
+      var source = ary;
+
+      if (!(id in self.filterCache))
+        self.filterCache[id] = {};
+
+      // find the most specific list of sources in the cache
+      var maxSpecificity = 0;
+      for (key in self.filterCache[id]) {
+        if (!self.filterCache[id].hasOwnProperty(key))
+          continue;
+
+        if ((query.indexOf(key) != 0 && key.indexOf(query) != 0) || key.length > query.length) {
+          // cache entry no longer needed
+          delete self.filterCache[id][key];
+          continue;
+        }
+
+        // is this cache entry the most specific so far?
+        var specificity = self.filterCache[id][key].length;
+        if (query.indexOf(key) == 0 && specificity > maxSpecificity) {
+          source = self.filterCache[id][key];
+          maxSpecificity = specificity;
+        }
+      }
+
+      // clean up
+      self.cleanMatcherCache();
+
+      var matcher = self.getMatcher(query);
+      for (var i = 0; i < source.length; ++i) {
+        if (!matcher.test(getValue(source[i])))
+          continue;
+        filtered.push(source[i]);
+        callback(source[i]);
+      }
+      self.filterCache[id][query] = filtered;
+    }
+
+    return self;
+  })();
+
+  /** Creates an action that opens :url in the current tab by default or in a new tab as an alternative. */
+  function createActionOpenUrl(url) {
+    return [
+      function() { window.location = url; },
+      function() { window.open(url); },
+    ]
+  }
+
+  /** Creates a completion that renders by marking fuzzy-matched parts. */
+  function createHighlightingCompletion(query, str, action, relevancy) {
+    return {
+      render: function() {
+        var match = fuzzyMatcher.getMatcher(query).exec(str);
+        var html = '';
+        for (var i = 1; i < match.length; ++i) {
+          if (i % 2 == 1)
+            html += match[i];
+          else
+            html += '<strong>' + match[i] + '</strong>';
+        };
+        return html;
+      },
+      action: action,
+      relevancy: relevancy,
+    }
+  }
+
+  /** Helper class to construct fuzzy completers for asynchronous data sources like history or bookmark
+   * matchers. */
+  var AsyncFuzzyUrlCompleter = function() {
+    this.completions = null;
+    this.id = utils.createUniqueId();
+    this.readyCallback = function(results) {
+      this.completions = results;
+    }
+  }
+  AsyncFuzzyUrlCompleter.prototype = {
+    // to be implemented by subclasses
+    refresh: function() { },
+
+    calculateRelevancy: function(query, match) {
+      return match.url.length * 10 / (fuzzyMatcher.calculateRelevancy(query, match.str)+1);
+    },
+
+    filter: function(query, callback) {
+      var self = this;
+
+      var handler = function(results) {
+        var filtered = [];
+        fuzzyMatcher.filter(query,
+                            self.completions, function(comp) { return comp.str },
+                            self.id,
+                            function(match) {
+          filtered.push(createHighlightingCompletion(
+                query, match.str,
+                createActionOpenUrl(match.url),
+                self.calculateRelevancy(query, match)));
+        });
+        callback(filtered);
+      }
+
+      // are the results ready?
+      if (this.completions !== null) {
+        // yes: call the callback synchronously
+        handler(this.completions);
+      } else {
+        // no: push our handler to the handling chain
+        var oldReadyCallback = this.readyCallback;
+        this.readyCallback = function(results) {
+          handler(results);
+          this.readyCallback = oldReadyCallback;
+          oldReadyCallback(results);
+        }
+      }
+    },
+  }
+
+  //========== Completer implementations ===========//
+
+  /** A simple completer that suggests to open the input string as an URL or to trigger a web search for the
+   * given term, depending on whether it thinks the input is an URL or not. */
+  var SmartCompleter = function() {
+    this.refresh = function() { };
+
+    this.filter = function(query, callback) {
+      var url;
+      var str;
+
+      if (utils.isUrl(query)) {
+        url = utils.createFullUrl(query);
+        str = '<em>goto</em> ' + url;
+      } else {
+        url = utils.createSearchUrl(query);
+        str = '<em>search</em> ' + query;
+      }
+
+      // call back with exactly one suggestion
+      callback([{
+        render: function() { return str; },
+        action: createActionOpenUrl(url),
+        // relevancy will always be the lowest one, so the suggestion is at the top
+        relevancy: -1,
+      }]);
+    };
+  }
+
+  /** A fuzzy history completer */
+  var FuzzyHistoryCompleter = function(maxEntries) {
+    AsyncFuzzyUrlCompleter.call(this);
+    this.maxEntries = maxEntries || 1000;
+  }
+  FuzzyHistoryCompleter.prototype = new AsyncFuzzyUrlCompleter;
+  FuzzyHistoryCompleter.prototype.refresh = function() {
+    this.completions = null; // reset completions
+
+    // asynchronously fetch history items
+    var port = chrome.extension.connect({ name: "getHistory" }) ;
+    var self = this;
+    port.onMessage.addListener(function(msg) {
+      var results = [];
+
+      for (var i = 0; i < msg.history.length; ++i) {
+        var historyItem = msg.history[i];
+        var title = '';
+
+        if (historyItem.title.length > 0)
+          title = ' (' + historyItem.title + ')';
+
+        results.push({
+          str: 'history: ' + historyItem.url + title,
+          url: historyItem.url,
+        });
+      }
+      port = null;
+      self.readyCallback(results);
+    });
+    port.postMessage({ maxResults: this.maxEntries });
+  }
+
+  /** A fuzzy bookmark completer */
+  var FuzzyBookmarkCompleter = function() {
+    AsyncFuzzyUrlCompleter.call(this);
+  }
+  FuzzyBookmarkCompleter.prototype = new AsyncFuzzyUrlCompleter;
+  FuzzyBookmarkCompleter.prototype.refresh = function() {
+    this.completions = null; // reset completions
+
+    var port = chrome.extension.connect({ name: "getAllBookmarks" }) ;
+    var self = this;
+    port.onMessage.addListener(function(msg) {
+      var results = [];
+
+      for (var i = 0; i < msg.bookmarks.length; ++i) {
+        var bookmark = msg.bookmarks[i];
+
+        var title = '';
+        if (bookmark.title.length > 0)
+          title = ' (' + bookmark.title + ')';
+
+        results.push({
+          str: 'bookmark: ' + bookmark.url + title,
+          url: bookmark.url,
+        });
+      }
+      port = null;
+      self.readyCallback(results);
+    });
+    port.postMessage();
+  }
+
+  /** A meta-completer that delegates queries and merges and sorts the results of a collection of other
+   * completer instances. */
+  var MergingCompleter = function(sources) {
+    this.sources = sources;
+  }
+  MergingCompleter.prototype = {
+    refresh: function() {
+      for (var i = 0; i < this.sources.length; ++i)
+        this.sources[i].refresh();
+    },
+
+    filter: function(query, callback) {
+      var all = [];
+      var counter = this.sources.length;
+
+      for (var i = 0; i < this.sources.length; ++i) {
+        this.sources[i].filter(query, function(results) {
+          all = all.concat(results);
+          if (--counter > 0)
+            return;
+
+          // all sources have provided results by now, so we can sort and return
+          all.sort(function(a,b) {
+            return a.relevancy - b.relevancy;
+          });
+          for (var i = 0; i < all.length; ++i) {
+            if (!callback(all[i]))
+              // the caller doesn't want any more results
+              return;
+          }
+        });
+      }
+    }
+  }
+
+  // public interface
+  return {
+    FuzzyHistoryCompleter: FuzzyHistoryCompleter,
+    FuzzyBookmarkCompleter: FuzzyBookmarkCompleter,
+    SmartCompleter: SmartCompleter,
+    MergingCompleter: MergingCompleter,
+  };
+})();
diff --git a/lib/keyboardUtils.js b/lib/keyboardUtils.js
index 98725d95..84e860ba 100644
--- a/lib/keyboardUtils.js
+++ b/lib/keyboardUtils.js
@@ -47,7 +47,8 @@ function getKeyChar(event) {
       keyIdentifier = event.shiftKey ? correctedIdentifiers[1] : correctedIdentifiers[0];
     }
     var unicodeKeyInHex = "0x" + keyIdentifier.substring(2);
-    return String.fromCharCode(parseInt(unicodeKeyInHex)).toLowerCase();
+    var character = String.fromCharCode(parseInt(unicodeKeyInHex)).toLowerCase();
+    return event.shiftKey ? character.toUpperCase() : character;
 }
 
 function isPrimaryModifierKey(event) {
diff --git a/lib/utils.js b/lib/utils.js
index 8aada3a1..922e7db3 100644
--- a/lib/utils.js
+++ b/lib/utils.js
@@ -21,17 +21,26 @@ var utils = {
   },
 
   /**
-   * Creates a search URL from the given :query.
+   * Generates a unique ID
    */
-  createSearchUrl: function(query) {
-    return "http://www.google.com/search?q=" + query;
+  createUniqueId: (function() {
+    id = 0;
+    return function() { return ++id; };
+  })(),
+
+  /**
+   * Completes a partial URL (without scheme)
+   */
+  createFullUrl: function(partialUrl) {
+    if (!/^[a-z]{3,}:\/\//.test(partialUrl))
+      partialUrl = 'http://' + partialUrl;
+    return partialUrl
   },
 
   /**
-   * Tries to convert :str into a valid URL.
-   * We don't bother with escaping characters, however, as Chrome will do that for us.
+   * Tries to detect, whether :str is a valid URL.
    */
-  ensureUrl: function(str) {
+  isUrl: function(str) {
     // more or less RFC compliant URL host part parsing. This should be sufficient
     // for our needs
     var urlRegex = new RegExp(
@@ -55,38 +64,60 @@ var utils = {
 
     // it starts with a scheme, so it's definitely an URL
     if (/^[a-z]{3,}:\/\//.test(str))
-      return str;
-    var strWithScheme = 'http://' + str;
+      return true;
 
-    // definitely not a valid URL; treat as search query
+    // spaces => definitely not a valid URL
     if (str.indexOf(' ') >= 0)
-      return utils.createSearchUrl(str);
+      return false;
 
     // assuming that this is an URL, try to parse it into its meaningful parts. If matching fails, we're
     // pretty sure that we don't have some kind of URL here.
     var match = urlRegex.exec(str.split('/')[0]);
     if (!match)
-      return utils.createSearchUrl(str);
+      return false;
     var hostname = match[3];
 
     // allow known special host names
     if (specialHostNames.indexOf(hostname) >= 0)
-      return strWithScheme;
+      return true;
 
     // allow IPv6 addresses (need to be wrapped in brackets, as required by RFC).  It is sufficient to check
     // for a colon here, as the regex wouldn't match colons in the host name unless it's an v6 address
     if (hostname.indexOf(':') >= 0)
-      return strWithScheme;
+      return true;
 
     // at this point we have to make a decision. As a heuristic, we check if the input has dots in it. If
-    // yes, and if the last part could be a TLD, treat it as an URL
+    // yes, and if the last part could be a TLD, treat it as an URL.
     var dottedParts = hostname.split('.');
     var lastPart = dottedParts[dottedParts.length-1];
-    if (dottedParts.length > 1 && (lastPart.length <= 3 || longTlds.indexOf(lastPart) >= 0))
-      return strWithScheme;
+    if (dottedParts.length > 1 && ((lastPart.length >= 2 && lastPart.length <= 3)
+                                   || longTlds.indexOf(lastPart) >= 0))
+      return true;
+
+    // also allow IPv4 addresses
+    if (/^(\d{1,3}\.){3}\d{1,3}$/.test(hostname))
+      return true;
+
+    // fallback: no URL
+    return false
+  },
 
-    // fallback: use as search query
-    return utils.createSearchUrl(str);
+  /**
+   * Creates a search URL from the given :query.
+   */
+  createSearchUrl: function(query) {
+    return "http://www.google.com/search?q=" + query;
+  },
+
+  /**
+   * Tries to convert :str into a valid URL.
+   * We don't bother with escaping characters, however, as Chrome will do that for us.
+   */
+  ensureUrl: function(str) {
+    if (utils.isUrl(str))
+      return utils.createFullUrl(str);
+    else
+      return utils.createSearchUrl(str);
   },
 
 };
author	Niklas Baumstark	2012-01-21 00:23:59 +0100
committer	Niklas Baumstark	2012-04-10 23:54:35 +0200
commit	6f589fedcc826b125884e3a5884c9791802afb7f (patch)
tree	eb117a99558b5456b0367157a09a12f887db8630 /lib
parent	269042a28230bb35406d1447fac8955ca1a5c0b3 (diff)
download	vimium-6f589fedcc826b125884e3a5884c9791802afb7f.tar.bz2