Search completion; tweaks and refactoring.

author: Stephen Blott 2015-05-02 17:32:28 +0100
committer: Stephen Blott 2015-05-03 08:41:55 +0100
commit: ba4e8018e3d8cd80e0fa9ac541e37e7eee37028f (patch)
tree: dda565cc9f2d55009329d81405490d467764680b
parent: 8329f3cbe95e6a39e500aa15e54c6c44fad9cb7e (diff)
download: vimium-ba4e8018e3d8cd80e0fa9ac541e37e7eee37028f.tar.bz2
4 files changed, 93 insertions, 79 deletions
diff --git a/background_scripts/completion.coffee b/background_scripts/completion.coffee
index cb5f64b0..5b5dc191 100644
--- a/background_scripts/completion.coffee
+++ b/background_scripts/completion.coffee
@@ -337,22 +337,32 @@ class SearchEngineCompleter
 
     type = if description? then description else "search"
     searchUrl = if custom then url else Settings.get "searchUrl"
-    query = queryTerms[1..].join " "
+    query = queryTerms[(if custom then 1 else 0)..].join " "
 
     # For custom search engines, we add an auto-selected suggestion.
     if custom
       title = if description? then query else queryTerms[0] + ": " + query
-      suggestions.push @mkSuggestion false, queryTerms, type, mkUrl(query), description, @computeRelevancy
+      suggestions.push @mkSuggestion false, queryTerms, type, mkUrl(query), description, @computeRelevancy, 1
       suggestions[0].autoSelect = true
-      suggestions[0].relevancyScore = 1
       queryTerms = queryTerms[1..]
 
     # For custom search-engine queries, this adds suggestions only if we have a completer.  For other queries,
     # this adds suggestions for the default search engine (if we have a completer for that).
-    SearchEngines.complete searchUrl, queryTerms, (newSuggestions = []) =>
+    SearchEngines.complete searchUrl, queryTerms, (searchSuggestions = []) =>
+
+      # Scoring:
+      #   - The score does not depend upon the actual suggestion (so, it does not depend upon word relevancy).
+      #     We assume that the completion engine has already factored that in.
+      #   - The score is higher if the query is longer.  The idea is that search suggestions are more likely
+      #     to be relevant if, after typing quite some number of characters, the user hasn't yet found a
+      #     useful suggestion from another completer.
+      #   - Scores are weighted such that they retain the ordering provided by the completion engine.
       characterCount = query.length - queryTerms.length + 1
-      for suggestion in newSuggestions
-        suggestions.push @mkSuggestion true, queryTerms, type, mkUrl(suggestion), suggestion, @computeRelevancy, characterCount
+      score = 0.8 * (Math.min(characterCount, 12.0)/12.0)
+
+      for suggestion in searchSuggestions
+        suggestions.push @mkSuggestion true, queryTerms, type, mkUrl(suggestion), suggestion, @computeRelevancy, score
+        score *= 0.9
 
       if custom
         for suggestion in suggestions
@@ -365,15 +375,8 @@ class SearchEngineCompleter
     suggestion.insertText = insertText
     suggestion
 
-  computeRelevancy: (suggestion) ->
-    suggestion.relevancyScore ?
-      # We score search-engine completions by word relevancy, but weight the score increasingly as the number of
-      # characters in the query terms increases.  The idea is that, the more the user has had to type, the less
-      # likely it is that one of the other suggestion types has proven useful, so the more likely it is that
-      # this suggestion will be useful.
-      # NOTE(smblott) This will require tweaking.
-      (Math.min(suggestion.extraRelevancyData, 12)/12) *
-        RankingUtils.wordRelevancy suggestion.queryTerms, suggestion.title, suggestion.title
+  # The score is computed in filter() and provided here via suggestion.extraRelevancyData.
+  computeRelevancy: (suggestion) -> suggestion.extraRelevancyData
 
   refresh: ->
     @searchEngines = SearchEngineCompleter.getSearchEngines()
diff --git a/background_scripts/search_engines.coffee b/background_scripts/search_engines.coffee
index e68cf85d..3c6654e3 100644
--- a/background_scripts/search_engines.coffee
+++ b/background_scripts/search_engines.coffee
@@ -1,49 +1,54 @@
 
-matchesAnyRegexp = (regexps, string) ->
-  for re in regexps
-    return true if re.test string
-  false
-
-# Each completer implements three functions:
+# A completion engine provides search suggestions for a search engine.  A search engine is identified by a
+# "searchUrl", e.g. Settings.get("searchUrl"), or a custom search engine.
+#
+# Each completion engine defines three functions:
+#
+#   1. "match" - This takes a searchUrl, and returns a boolean indicating whether this completion engine can
+#      perform completion for the given search engine.
 #
-#   match:  can this completer be used for this search URL?
-#   getUrl: map these query terms to a completion URL.
-#   parse:  extract suggestions from the resulting (successful) XMLHttpRequest.
+#   2. "getUrl" - This takes a list of query terms (queryTerms) and generates a completion URL, that is, a URL
+#      which will provide completions for this completion engine.
 #
-class Google
+#   3. "parse" - This takes a successful XMLHttpRequest object (the request has completed successfully), and
+#      returns a list of suggestions (a list of strings).
+#
+# The main (only) completion entry point is SearchEngines.complete().  This implements all lookup and caching
+# logic.  It is possible to add new completion engines without changing the SearchEngines infrastructure
+# itself.
+
+# A base class for common regexp-based matching engines.
+class RegexpEngine
+  constructor: (@regexps) ->
+  match: (searchUrl) -> Utils.matchesAnyRegexp @regexps, searchUrl
+
+# Completion engine for English-language Google search.
+class Google extends RegexpEngine
   constructor: ->
-    @regexps = [
-      # We include matches for the major English-speaking TLDs.
+    super [
+      # We match the major English-speaking TLDs.
       new RegExp "^https?://[a-z]+\.google\.(com|ie|co.uk|ca|com.au)/"
-      # NOTE(smblott). A  temporary hack, just for me, and just for development. Will be removed.
-      new RegExp "localhost/.*/booky"
+      new RegExp "localhost/cgi-bin/booky" # Only for smblott.
       ]
 
-  name: "Google"
-  match: (searchUrl) -> matchesAnyRegexp @regexps, searchUrl
-
   getUrl: (queryTerms) ->
     "http://suggestqueries.google.com/complete/search?ss_protocol=legace&client=toolbar&q=#{Utils.createSearchQuery queryTerms}"
 
-  # Returns a list of suggestions (strings).
   parse: (xhr) ->
-    suggestions = xhr?.responseXML?.getElementsByTagName "suggestion"
-    return [] unless suggestions
-    for suggestion in suggestions
-      continue unless suggestion = suggestion.getAttribute "data"
-      suggestion
+    try
+      for suggestion in xhr.responseXML.getElementsByTagName "suggestion"
+        continue unless suggestion = suggestion.getAttribute "data"
+        suggestion
+    catch
+      []
 
-class Youtube
+class Youtube extends RegexpEngine
   constructor: ->
-    @regexps = [ new RegExp "https?://[a-z]+\.youtube\.com/results" ]
-
-  name: "YouTube"
-  match: (searchUrl) -> matchesAnyRegexp @regexps, searchUrl
+    super [ new RegExp "https?://[a-z]+\.youtube\.com/results" ]
 
   getUrl: (queryTerms) ->
     "http://suggestqueries.google.com/complete/search?client=youtube&ds=yt&q=#{Utils.createSearchQuery queryTerms}"
 
-  # Returns a list of suggestions (strings).
   parse: (xhr) ->
     try
       text = xhr.responseText
@@ -56,8 +61,6 @@ class Youtube
 # A dummy search engine which is guaranteed to match any search URL, but never produces completions.  This
 # allows the rest of the logic to be written knowing that there will be a search engine match.
 class DummySearchEngine
-  constructor: ->
-  name: "Dummy"
   match: -> true
   # We return a useless URL which we know will succeed, but which won't generate any network traffic.
   getUrl: -> chrome.runtime.getURL "content_scripts/vimium.css"
@@ -72,15 +75,13 @@ SearchEngines =
     callback? null
 
   # Perform an HTTP GET.
-  #   searchUrl is the search engine's URL, e.g. Settings.get("searchUrl").
-  #   url is the URL to fetch.
-  #   callback will be called with a successful XMLHttpRequest object, or null.
   get: (searchUrl, url, callback) ->
-    @requests ?= {} # Maps searchUrls to any outstanding HTTP request for that search engine.
+    @requests ?= {} # Maps a searchUrl to any outstanding HTTP request for that search engine.
     @cancel searchUrl
 
-    # We cache the results of the most-recent 1000 requests with a one-minute expiry.
-    @requestCache ?= new SimpleCache 1 * 60 * 1000, 1000
+    # We cache the results of the most-recent 100 successfully XMLHttpRequests with a ten-second (ie. very
+    # short) expiry.
+    @requestCache ?= new SimpleCache 10 * 1000, 100
 
     if @requestCache.has url
       callback @requestCache.get url
@@ -88,22 +89,23 @@ SearchEngines =
 
     @requests[searchUrl] = xhr = new XMLHttpRequest()
     xhr.open "GET", url, true
-    xhr.timeout = 500
+    # We set a fairly short timeout.  If we block for too long, then we block *all* completers.
+    xhr.timeout = 300
     xhr.ontimeout = => @cancel searchUrl, callback
     xhr.onerror = => @cancel searchUrl, callback
     xhr.send()
 
     xhr.onreadystatechange = =>
       if xhr.readyState == 4
+        @requests[searchUrl] = null
         if xhr.status == 200
-          @requests[searchUrl] = null
           callback @requestCache.set url, xhr
         else
           callback null
 
-  # Look up the search-completion engine for this search URL.  Because of DummySearchEngine, above, we know
-  # there will always be a match.  Imagining that there may be many search engines, and knowing that this is
-  # called for every character entered, we cache the result.
+  # Look up the search-completion engine for this searchUrl.  Because of DummySearchEngine, above, we know
+  # there will always be a match.  Imagining that there may be many completion engines, and knowing that this
+  # is called for every input event in the vomnibar, we cache the result.
   lookupEngine: (searchUrl) ->
     @engineCache ?= new SimpleCache 30 * 60 * 60 * 1000 # 30 hours (these are small, we can keep them longer).
     if @engineCache.has searchUrl
@@ -114,22 +116,24 @@ SearchEngines =
         return @engineCache.set searchUrl, engine if engine.match searchUrl
 
   # This is the main (actually, the only) entry point.
-  #   searchUrl is the search engine's URL, e.g. Settings.get("searchUrl").
-  #   queryTerms are the queryTerms.
-  #   callback will be applied to a list of suggestion strings (which may be an empty list, if anything goes
-  #   wrong).
+  #  - searchUrl is the search engine's URL, e.g. Settings.get("searchUrl"), or a custome search engine's URL.
+  #    This is only used as a key for determining the relevant completion engine.
+  #  - queryTerms are the queryTerms.
+  #  - callback will be applied to a list of suggestion strings (which may be an empty list, if anything goes
+  #    wrong).
   complete: (searchUrl, queryTerms, callback) ->
+    # We can't complete empty queries.
     return callback [] unless 0 < queryTerms.length
 
-    # Don't try to complete general URLs.
+    # We don't complete URLs.
     return callback [] if 1 == queryTerms.length and Utils.isUrl queryTerms[0]
 
-    # Don't try to complete Javascrip URLs.
-    return callback [] if 0 < queryTerms.length and Utils.hasJavascriptPrefix queryTerms[0]
+    # We don't complete Javascript URLs.
+    return callback [] if Utils.hasJavascriptPrefix queryTerms[0]
 
     # Cache completions.  However, completions depend upon both the searchUrl and the query terms.  So we need
-    # to generate a key.  We mix in some nonsense generated by pwgen. There is the possibility of a key clash,
-    # but it's vanishingly small.
+    # to generate a key.  We mix in some nonsense generated by pwgen. A key clash is possible, but vanishingly
+    # unlikely.
     junk = "//Zi?ei5;o//"
     completionCacheKey = searchUrl + junk + queryTerms.join junk
     @completionCache ?= new SimpleCache 6 * 60 * 60 * 1000, 2000 # Six hours, 2000 entries.
@@ -140,9 +144,14 @@ SearchEngines =
     url = engine.getUrl queryTerms
     @get searchUrl, url, (xhr = null) =>
       if xhr?
-        callback @completionCache.set completionCacheKey, engine.parse xhr
+        # We keep at most three suggestions, the top three.  These are most likely to be useful.
+        callback @completionCache.set completionCacheKey, engine.parse(xhr)[...3]
       else
-        callback []
+        callback @completionCache.set completionCacheKey, callback []
+        # We cache failures, but remove them after just ten minutes.  This (it is hoped) avoids repeated
+        # XMLHttpRequest failures over a short period of time.
+        removeCompletionCacheKey = => @completionCache.set completionCacheKey, null
+        setTimeout removeCompletionCacheKey, 10 * 60 * 1000 # Ten minutes.
 
 root = exports ? window
 root.SearchEngines = SearchEngines
diff --git a/lib/utils.coffee b/lib/utils.coffee
index 88fe9e2c..5d9696e1 100644
--- a/lib/utils.coffee
+++ b/lib/utils.coffee
@@ -177,6 +177,13 @@ Utils =
     delete obj[property] for property in properties
     obj
 
+  # Does string match any of these regexps?
+  matchesAnyRegexp: (regexps, string) ->
+    for re in regexps
+      return true if re.test string
+    false
+
+
 # This creates a new function out of an existing function, where the new function takes fewer arguments. This
 # allows us to pass around functions instead of functions + a partial list of arguments.
 Function::curry = ->
diff --git a/pages/vomnibar.coffee b/pages/vomnibar.coffee
index c519ce3a..3520537e 100644
--- a/pages/vomnibar.coffee
+++ b/pages/vomnibar.coffee
@@ -81,6 +81,7 @@ class VomnibarUI
     @input.value = ""
     @updateTimer = null
     @completions = []
+    @previousText = null
     @selection = @initialSelectionValue
 
   updateSelection: ->
@@ -94,18 +95,12 @@ class VomnibarUI
       @completionList.children[i].className = (if i == @selection then "vomnibarSelected" else "")
 
     # For suggestions from search-engine completion, we copy the suggested text into the input when selected,
-    # and revert when not.
-    if 0 <= @selection
+    # and revert when not.  This allows the user to select a suggestion and then continue typing.
+    if 0 <= @selection and @completions[@selection].insertText
+      @previousText ?= @input.value
       suggestion = @completions[@selection]
-      if suggestion.insertText
-        @previousText ?= @input.value
-        @input.value = (suggestion.reinsertPrefix ? "") + suggestion.title
-      else
-        if @previousText?
-          @input.value = @previousText
-          @previousText = null
-    else
-      if @previousText?
+      @input.value = (suggestion.reinsertPrefix ? "") + suggestion.title
+    else if @previousText?
         @input.value = @previousText
         @previousText = null
author	Stephen Blott	2015-05-02 17:32:28 +0100
committer	Stephen Blott	2015-05-03 08:41:55 +0100
commit	ba4e8018e3d8cd80e0fa9ac541e37e7eee37028f (patch)
tree	dda565cc9f2d55009329d81405490d467764680b
parent	8329f3cbe95e6a39e500aa15e54c6c44fad9cb7e (diff)
download	vimium-ba4e8018e3d8cd80e0fa9ac541e37e7eee37028f.tar.bz2