From eb0e2964fca5ef2eccc06607944df6b208b2b99f Mon Sep 17 00:00:00 2001 From: Stephen Blott Date: Sat, 16 May 2015 07:26:44 +0100 Subject: Search completion; alternative relevancy scoring. This is an alternative relevancy-scoring scheme for search completion. It attempts to address the "clumping" effect described in #1651 by: - Using the regular relevancy scoring scheme (but based only on the title (so, not the URL). - Weighting relevancy scores (exponentially) by the length the query (so, search suggestions score higher as the length of the query gets longer). - Weighting suggestions (fairly moderately) by their position in the suggestion list as returned by the completion engine. We generally want to retain this ordering. - Applying a calibration fudge factor to roughly calibrate (boost) search-completion suggestions with those from other search engines. --- background_scripts/completion.coffee | 45 ++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 22 deletions(-) (limited to 'background_scripts') diff --git a/background_scripts/completion.coffee b/background_scripts/completion.coffee index 5755bfaf..c210878b 100644 --- a/background_scripts/completion.coffee +++ b/background_scripts/completion.coffee @@ -155,6 +155,11 @@ class Suggestion [ '.', [ "^https?://", "\\W+$" ].map (re) -> new RegExp re ] ] + # Boost a score by a factor (in the range (0,1.0)), while keeping the score in the range [0,1]. This makes + # greater adjustments to scores near the middle of the range (so, very poor relevancy scores remain poor). + @boostRelevancyScore: (factor, score) -> + score + if score < 0.5 then score * factor else (1.0 - score) * factor + class BookmarkCompleter folderSeparator: "/" currentSearch: null @@ -448,18 +453,10 @@ class SearchEngineCompleter factor = Math.max 0.0, Math.min 1.0, Settings.get "omniSearchWeight" haveCompletionEngine = (0.0 < factor or custom) and CompletionSearch.haveCompletionEngine searchUrl - # Relevancy: - # - Relevancy does not depend upon the actual suggestion (so, it does not depend upon word - # relevancy, say). We assume that the completion engine has already factored that in. Also, - # completion engines sometimes handle spelling mistakes, in which case we wouldn't find the query - # terms in the suggestion anyway. - # - Scores are weighted such that they retain the order provided by the completion engine. - # - The relavancy is higher if the query term is longer. The idea is that search suggestions are more - # likely to be relevant if, after typing some number of characters, the user hasn't yet found - # a useful suggestion from another completer. - # - characterCount = query.length - queryTerms.length + 1 - relevancy = (if custom then 0.5 else factor) * 12.0 / Math.max 12.0, characterCount + # We weight the relevancy-score factor by the length of the query (exponentially). The idea is that, the + # more the user has typed, the less likely it is that what the user is searching for is amonst the + # suggestions from other completers. + factor *= 1 - Math.pow 0.8, query.length # This filter is applied to all of the suggestions from all of the completers, after they have been # aggregated by the MultiCompleter. @@ -487,15 +484,19 @@ class SearchEngineCompleter forceAutoSelect: custom highlightTerms: not haveCompletionEngine - mkSuggestion = (suggestion) -> + mkSuggestion = (suggestion) => new Suggestion queryTerms: queryTerms type: description url: Utils.createSearchUrl suggestion, searchUrl title: suggestion - relevancy: relevancy *= 0.9 insertText: suggestion highlightTerms: false + isCustomSearch: custom + relevancyFunction: @computeRelevancy + # We reduce the relevancy factor as suggestions are added. This respects, to some extent, the + # suggestion order provided by the completion engine. + relevancyData: factor *= 0.95 cachedSuggestions = if haveCompletionEngine then CompletionSearch.complete searchUrl, queryTerms else null @@ -514,18 +515,18 @@ class SearchEngineCompleter onComplete suggestions, filter: filter continuation: (suggestions, onComplete) => - # Fetch completion suggestions from suggestion engines. - - # We can skip this if any new suggestions we propose cannot score highly enough to make the list - # anyway. - if 10 <= suggestions.length and relevancy < suggestions[suggestions.length-1].relevancy - console.log "skip (cannot make the grade):", suggestions.length, query if SearchEngineCompleter.debug - return onComplete [] - CompletionSearch.complete searchUrl, queryTerms, (suggestions = []) => console.log "fetched suggestions:", suggestions.length, query if SearchEngineCompleter.debug onComplete suggestions.map mkSuggestion + computeRelevancy: ({ relevancyData, queryTerms, title }) -> + # Tweaks: + # - Calibration: we boost relevancy scores to try to achieve an appropriate balance between relevancy + # scores here, and those provided by other completers. + # - Relevancy depends only on the title (which is the search terms), and not on the URL. + Suggestion.boostRelevancyScore 0.5, + relevancyData * RankingUtils.wordRelevancy queryTerms, title, title + # A completer which calls filter() on many completers, aggregates the results, ranks them, and returns the top # 10. All queries from the vomnibar come through a multi completer. class MultiCompleter -- cgit v1.2.3 From 148f1b44bcbe7df030ac9f95498826ee66f0a9cd Mon Sep 17 00:00:00 2001 From: Stephen Blott Date: Sat, 16 May 2015 10:17:37 +0100 Subject: Search completion; reinstate relevancy-score filter. See the comment in the diff. This test was in a previous version, but was dropped because with eb0e2964fca5ef2eccc06607944df6b208b2b99f, it was (thought to be) no longer possible to filter by relevancy score since we don't know what the relevancy score will be. The filter is reinstated here because we use the simple idea that, whatever relevancy scores are assigned to completion suggestions, they will be less than the score for a perfect suggestion. --- background_scripts/completion.coffee | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'background_scripts') diff --git a/background_scripts/completion.coffee b/background_scripts/completion.coffee index c210878b..0675d13f 100644 --- a/background_scripts/completion.coffee +++ b/background_scripts/completion.coffee @@ -515,6 +515,19 @@ class SearchEngineCompleter onComplete suggestions, filter: filter continuation: (suggestions, onComplete) => + + # We can skip querying the completion engine if any new suggestions we propose will not score highly + # enough to make the list anyway. We construct a suggestion which perfectly matches the query, and + # ask the relevancy function what score it would get. If that score is less than the score of the + # lowest-ranked suggestion from another completer (and there are already 10 suggestions), then + # there's no need to query the completion engine. + perfectRelevancyScore = @computeRelevancy new Suggestion + queryTerms: queryTerms, title: queryTerms.join(" "), relevancyData: factor + + if 10 <= suggestions.length and perfectRelevancyScore < suggestions[suggestions.length-1].relevancy + console.log "skip (cannot make the grade):", suggestions.length, query if SearchEngineCompleter.debug + return onComplete [] + CompletionSearch.complete searchUrl, queryTerms, (suggestions = []) => console.log "fetched suggestions:", suggestions.length, query if SearchEngineCompleter.debug onComplete suggestions.map mkSuggestion -- cgit v1.2.3 From 87c214ffd80ac436273f35fd95c800589e3f7d4a Mon Sep 17 00:00:00 2001 From: Stephen Blott Date: Sat, 16 May 2015 13:12:25 +0100 Subject: Search completion; tweak comments. --- background_scripts/completion.coffee | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'background_scripts') diff --git a/background_scripts/completion.coffee b/background_scripts/completion.coffee index 0675d13f..a8f160d0 100644 --- a/background_scripts/completion.coffee +++ b/background_scripts/completion.coffee @@ -155,8 +155,9 @@ class Suggestion [ '.', [ "^https?://", "\\W+$" ].map (re) -> new RegExp re ] ] - # Boost a score by a factor (in the range (0,1.0)), while keeping the score in the range [0,1]. This makes - # greater adjustments to scores near the middle of the range (so, very poor relevancy scores remain poor). + # Boost a relevancy score by a factor (in the range (0,1.0)), while keeping the score in the range [0,1]. + # This makes greater adjustments to scores near the middle of the range (so, very poor relevancy scores + # remain very poor). @boostRelevancyScore: (factor, score) -> score + if score < 0.5 then score * factor else (1.0 - score) * factor @@ -453,9 +454,8 @@ class SearchEngineCompleter factor = Math.max 0.0, Math.min 1.0, Settings.get "omniSearchWeight" haveCompletionEngine = (0.0 < factor or custom) and CompletionSearch.haveCompletionEngine searchUrl - # We weight the relevancy-score factor by the length of the query (exponentially). The idea is that, the - # more the user has typed, the less likely it is that what the user is searching for is amonst the - # suggestions from other completers. + # We weight the relevancy factor by the length of the query (exponentially). The idea is that, the + # more the user has typed, the less likely it is that another completer has proven fruitful. factor *= 1 - Math.pow 0.8, query.length # This filter is applied to all of the suggestions from all of the completers, after they have been @@ -495,7 +495,7 @@ class SearchEngineCompleter isCustomSearch: custom relevancyFunction: @computeRelevancy # We reduce the relevancy factor as suggestions are added. This respects, to some extent, the - # suggestion order provided by the completion engine. + # order provided by the completion engine. relevancyData: factor *= 0.95 cachedSuggestions = -- cgit v1.2.3