From eb0e2964fca5ef2eccc06607944df6b208b2b99f Mon Sep 17 00:00:00 2001
From: Stephen Blott
Date: Sat, 16 May 2015 07:26:44 +0100
Subject: Search completion; alternative relevancy scoring.

This is an alternative relevancy-scoring scheme for search completion.
It attempts to address the "clumping" effect described in #1651 by:

  - Using the regular relevancy scoring scheme (but based only on the
    title (so, not the URL).

  - Weighting relevancy scores (exponentially) by the length the query
    (so, search suggestions score higher as the length of the query gets
    longer).

  - Weighting suggestions (fairly moderately) by their position in the
    suggestion list as returned by the completion engine.  We generally
    want to retain this ordering.

  - Applying a calibration fudge factor to roughly calibrate (boost)
    search-completion suggestions with those from other search engines.
---
 background_scripts/completion.coffee | 45 ++++++++++++++++++------------------
 1 file changed, 23 insertions(+), 22 deletions(-)

(limited to 'background_scripts')

diff --git a/background_scripts/completion.coffee b/background_scripts/completion.coffee
index 5755bfaf..c210878b 100644
--- a/background_scripts/completion.coffee
+++ b/background_scripts/completion.coffee
@@ -155,6 +155,11 @@ class Suggestion
     [ '.', [ "^https?://", "\\W+$" ].map (re) -> new RegExp re ]
   ]
 
+  # Boost a score by a factor (in the range (0,1.0)), while keeping the score in the range [0,1].  This makes
+  # greater adjustments to scores near the middle of the range (so, very poor relevancy scores remain poor).
+  @boostRelevancyScore: (factor, score) ->
+    score + if score < 0.5 then score * factor else (1.0 - score) * factor
+
 class BookmarkCompleter
   folderSeparator: "/"
   currentSearch: null
@@ -448,18 +453,10 @@ class SearchEngineCompleter
     factor = Math.max 0.0, Math.min 1.0, Settings.get "omniSearchWeight"
     haveCompletionEngine = (0.0 < factor or custom) and CompletionSearch.haveCompletionEngine searchUrl
 
-    # Relevancy:
-    #   - Relevancy does not depend upon the actual suggestion (so, it does not depend upon word
-    #     relevancy, say).  We assume that the completion engine has already factored that in.  Also,
-    #     completion engines sometimes handle spelling mistakes, in which case we wouldn't find the query
-    #     terms in the suggestion anyway.
-    #   - Scores are weighted such that they retain the order provided by the completion engine.
-    #   - The relavancy is higher if the query term is longer.  The idea is that search suggestions are more
-    #     likely to be relevant if, after typing some number of characters, the user hasn't yet found
-    #     a useful suggestion from another completer.
-    #
-    characterCount = query.length - queryTerms.length + 1
-    relevancy = (if custom then 0.5 else factor) * 12.0 / Math.max 12.0, characterCount
+    # We weight the relevancy-score factor by the length of the query (exponentially).  The idea is that, the
+    # more the user has typed, the less likely it is that what the user is searching for is amonst the
+    # suggestions from other completers.
+    factor *= 1 - Math.pow 0.8, query.length
 
     # This filter is applied to all of the suggestions from all of the completers, after they have been
     # aggregated by the MultiCompleter.
@@ -487,15 +484,19 @@ class SearchEngineCompleter
       forceAutoSelect: custom
       highlightTerms: not haveCompletionEngine
 
-    mkSuggestion = (suggestion) ->
+    mkSuggestion = (suggestion) =>
       new Suggestion
         queryTerms: queryTerms
         type: description
         url: Utils.createSearchUrl suggestion, searchUrl
         title: suggestion
-        relevancy: relevancy *= 0.9
         insertText: suggestion
         highlightTerms: false
+        isCustomSearch: custom
+        relevancyFunction: @computeRelevancy
+        # We reduce the relevancy factor as suggestions are added. This respects, to some extent, the
+        # suggestion order provided by the completion engine.
+        relevancyData: factor *= 0.95
 
     cachedSuggestions =
       if haveCompletionEngine then CompletionSearch.complete searchUrl, queryTerms else null
@@ -514,18 +515,18 @@ class SearchEngineCompleter
       onComplete suggestions,
         filter: filter
         continuation: (suggestions, onComplete) =>
-          # Fetch completion suggestions from suggestion engines.
-
-          # We can skip this if any new suggestions we propose cannot score highly enough to make the list
-          # anyway.
-          if 10 <= suggestions.length and relevancy < suggestions[suggestions.length-1].relevancy
-            console.log "skip (cannot make the grade):", suggestions.length, query if SearchEngineCompleter.debug
-            return onComplete []
-
           CompletionSearch.complete searchUrl, queryTerms, (suggestions = []) =>
             console.log "fetched suggestions:", suggestions.length, query if SearchEngineCompleter.debug
             onComplete suggestions.map mkSuggestion
 
+  computeRelevancy: ({ relevancyData, queryTerms, title }) ->
+    # Tweaks:
+    # - Calibration: we boost relevancy scores to try to achieve an appropriate balance between relevancy
+    #   scores here, and those provided by other completers.
+    # - Relevancy depends only on the title (which is the search terms), and not on the URL.
+    Suggestion.boostRelevancyScore 0.5,
+      relevancyData * RankingUtils.wordRelevancy queryTerms, title, title
+
 # A completer which calls filter() on many completers, aggregates the results, ranks them, and returns the top
 # 10. All queries from the vomnibar come through a multi completer.
 class MultiCompleter
-- 
cgit v1.2.3