From 56325ab3858839a9eea61752665192c624c6e852 Mon Sep 17 00:00:00 2001 From: Stephen Blott Date: Fri, 16 May 2014 13:24:38 +0100 Subject: Import relevancy improvement code. --- background_scripts/completion.coffee | 127 ++++++++++++++++++++++++++++++++ tests/unit_tests/completion_test.coffee | 86 +++++++++++++++++++++ 2 files changed, 213 insertions(+) diff --git a/background_scripts/completion.coffee b/background_scripts/completion.coffee index beb7003a..2805dda9 100644 --- a/background_scripts/completion.coffee +++ b/background_scripts/completion.coffee @@ -296,8 +296,130 @@ RankingUtils = return false unless matchedTerm true + # TODO: Remove this long explanatory comment. + # + # The differences between the following version of `wordRelevancy` and the old one are: + # + # 1. Word relevancy scores: + # - Reduced by a factor of 3/1 for matches which are not at the start of a word. + # - Reduced by a factor of 3/2 for matches which are at the start of a word but not a whole word. + # - Unchangd for matches which are whole words. + # (These values come from the fudge factors in `matchWeights`, below) + # + # 2. Recency scores: + # - Reduced by a factor of 3/2. + # - This retains the existing balance between relevancy and recency for partial matches at the start of a word. + # - In the other two cases: + # The relative contribution of relevancy is reduced for partial matches. + # The relative contribution of relevancy is increased for whole-word matches. + # + # 3. Normalisation takes account of the number of times each term matched (previously, normalisation + # accounted only for the total length of the query terms). + # + # 4. `wordRelevancy()` now does not allow a poor `urlScore` to pull down a good `titleScore` + # (But see, also, the comments regarding three possible endings, below.) + + # Weights used for scoring matches. + matchWeights: + { + matchAnywhere: 1 + matchStartOfWord: 1 + matchWholeWord: 1 + # The following must be the sum of the weights above; it is used for normalization. + maximumScore: 3 + # + # Calibration factor for balancing word relevancy and recency. + recencyCalibrator: 2.0/3.0 + # The current value of 2.0/3.0 has the effect of: + # - favoring the contribution of recency when matches are not on word boundaries ( because 2.0/3.0 > (1)/3 ) + # - retaining the existing balance when matches are at the starts of words ( because 2.0/3.0 = (1+1)/3 ) + # - favoring the contribution of word relevance when matches are on whole words ( because 2.0/3.0 < (1+1+1)/3 ) + } + + # Calculate a score for matching `term` against `string`. + # The score is in the range [0, `matchWeights.maximumScore`], see above. + # Returns: [ score, count ], where count is the number of matched characters in `string` + scoreTerm: (term, string) -> + score = 0 + count = 0 + nonMatching = string.split(RegexpCache.get term) + if nonMatching.length > 1 + # Have match. + score = RankingUtils.matchWeights.matchAnywhere + count = nonMatching.reduce(((p,c) -> p - c.length), string.length) + if RegexpCache.get(term, "\\b").test string + # Have match at start of word. + score += RankingUtils.matchWeights.matchStartOfWord + if RegexpCache.get(term, "\\b", "\\b").test string + # Have match of whole word. + score += RankingUtils.matchWeights.matchWholeWord + [ score, if count < string.length then count else string.length ] + # Returns a number between [0, 1] indicating how often the query terms appear in the url and title. wordRelevancy: (queryTerms, url, title) -> + urlScore = titleScore = 0.0 + urlCount = titleCount = 0 + # Calculate initial scores. + for term in queryTerms + [ s, c ] = RankingUtils.scoreTerm term, url + urlScore += s + urlCount += c + if title + [ s, c ] = RankingUtils.scoreTerm term, title + titleScore += s + titleCount += c + + maximumPossibleScore = RankingUtils.matchWeights.maximumScore * queryTerms.length + + # Normalize scores. + urlScore /= maximumPossibleScore + urlScore *= RankingUtils.normalizeDifference urlCount, url.length + + if title + titleScore /= maximumPossibleScore + titleScore *= RankingUtils.normalizeDifference titleCount, title.length + else + titleScore = urlScore + + # ###################################################### + # Up to this point, some weightings have changed, but the structure of + # calculation is pretty much as it was before. + # + # However, there are three possible endings to this story ... + + # ###################################################### + # Ending #1: The old ending ... + # + # return (urlScore + titleScore) / 2 + # + # It's difficult to argue with that. + + # ###################################################### + # Ending #2: An ending favoring `titleScore` ... + # + # Prefer matches in the title over matches in the URL. + # Here, this means "don't let a poor urlScore pull down the titleScore". + # For example, urlScore can be unreasonably poor if the URL is very long. + urlScore = titleScore if urlScore < titleScore + + # Return the average. + (urlScore + titleScore) / 2 + + # ###################################################### + # Ending #3: An alternative (better?) ending ... + # + # return Math.max(urlScore, titleScore) + # + # Why? + # - Don't let a poor urlScore pull down a good titleScore, as in Ending #2. + # - But also don't let a poor titleScore pull down a good urlScore. + # - The query may be targeting one or the other, so let the best one win. + + # Pick one of these three endings. + # ###################################################### + + # Returns a number between [0, 1] indicating how often the query terms appear in the url and title. + oldWordRelevancy: (queryTerms, url, title) -> queryLength = 0 urlScore = 0.0 titleScore = 0.0 @@ -326,6 +448,11 @@ RankingUtils = # incresingly discount older history entries. recencyScore = recencyDifference * recencyDifference * recencyDifference + # Calibrate recencyScore vis-a-vis word-relevancy scores. + # This does not change the relative order of recency scores. + # See also comment in the definition of `matchWeights`, above. + recencyScore *= RankingUtils.matchWeights.recencyCalibrator + # Takes the difference of two numbers and returns a number between [0, 1] (the percentage difference). normalizeDifference: (a, b) -> max = Math.max(a, b) diff --git a/tests/unit_tests/completion_test.coffee b/tests/unit_tests/completion_test.coffee index fb267f63..e52dbbc1 100644 --- a/tests/unit_tests/completion_test.coffee +++ b/tests/unit_tests/completion_test.coffee @@ -228,6 +228,92 @@ context "suggestions", suggestion = new Suggestion(["queryterm"], "tab", "http://ninjawords.com", "ninjawords", returns(1)) assert.equal -1, suggestion.generateHtml().indexOf("http://ninjawords.com") +context "RankingUtils.wordRelevancy", + should "get a higher relevancy score in shorter URLs", -> + highScore = RankingUtils.wordRelevancy(["stack"], "http://stackoverflow.com/short", "nothing") + lowScore = RankingUtils.wordRelevancy(["stack"], "http://stackoverflow.com/longer", "nothing") + assert.isTrue highScore > lowScore + + should "get a higher relevancy score in shorter titles", -> + highScore = RankingUtils.wordRelevancy(["ffee"], "http://stackoverflow.com/same", "Coffeescript") + lowScore = RankingUtils.wordRelevancy(["ffee"], "http://stackoverflow.com/same", "Coffeescript rocks") + assert.isTrue highScore > lowScore + + should "get a higher relevancy score for matching the start of a word (in a URL)", -> + lowScore = RankingUtils.wordRelevancy(["stack"], "http://Xstackoverflow.com/same", "nothing") + highScore = RankingUtils.wordRelevancy(["stack"], "http://stackoverflowX.com/same", "nothing") + assert.isTrue highScore > lowScore + + should "get a higher relevancy score for matching the start of a word (in a title)", -> + lowScore = RankingUtils.wordRelevancy(["ted"], "http://stackoverflow.com/same", "Dist racted") + highScore = RankingUtils.wordRelevancy(["ted"], "http://stackoverflow.com/same", "Distrac ted") + assert.isTrue highScore > lowScore + + should "get a higher relevancy score for matching a whole word (in a URL)", -> + lowScore = RankingUtils.wordRelevancy(["com"], "http://stackoverflow.comX/same", "nothing") + highScore = RankingUtils.wordRelevancy(["com"], "http://stackoverflowX.com/same", "nothing") + assert.isTrue highScore > lowScore + + should "get a higher relevancy score for matching a whole word (in a title)", -> + lowScore = RankingUtils.wordRelevancy(["com"], "http://stackoverflow.com/same", "abc comX") + highScore = RankingUtils.wordRelevancy(["com"], "http://stackoverflow.com/same", "abcX com") + assert.isTrue highScore > lowScore + + # # TODO: (smblott) + # # Word relevancy should take into account the number of matches (it doesn't currently). + # should "get a higher relevancy score for multiple matches (in a URL)", -> + # lowScore = RankingUtils.wordRelevancy(["stack"], "http://stackoverflow.com/Xxxxxx", "nothing") + # highScore = RankingUtils.wordRelevancy(["stack"], "http://stackoverflow.com/Xstack", "nothing") + # assert.isTrue highScore > lowScore + + # should "get a higher relevancy score for multiple matches (in a title)", -> + # lowScore = RankingUtils.wordRelevancy(["bbc"], "http://stackoverflow.com/same", "BBC Radio 4 (XBCr4)") + # highScore = RankingUtils.wordRelevancy(["bbc"], "http://stackoverflow.com/same", "BBC Radio 4 (BBCr4)") + # assert.isTrue highScore > lowScore + +# WARNING: The following tests are hardware dependent. They depend upon +# different but algebraically-equivalent sequences of floating point +# operations yielding the same results. That they ever work at all is quite +# remarkable. +# TODO: (smblott) +# Remove these tests when `oldWordRelevancy` is removed. +context "RankingUtils.wordRelevancy (temporary hardware-dependent tests)", + should "exactly equal oldWordRelevancy for whole word matches (in a URL)", -> + newScore = RankingUtils.wordRelevancy(["com"], "http://stackoverflow.com/same", "irrelevant") + oldScore = RankingUtils.oldWordRelevancy(["com"], "http://stackoverflow.com/same", "irrelevant") + assert.equal newScore, oldScore # remarkable! Exactly equal floats. + + should "yield 2/3 * oldWordRelevancy for matches at the start of a word (in a URL)", -> + newScore = RankingUtils.wordRelevancy(["sta"], "http://stackoverflow.com/same", "irrelevant") + oldScore = (2.0/3.0) * RankingUtils.oldWordRelevancy(["sta"], "http://stackoverflow.com/same", "irrelevant") + assert.equal newScore, oldScore # remarkable! Exactly equal floats. + + should "yield 1/3 * oldWordRelevancy for matches within a word (in a URL)", -> + newScore = RankingUtils.wordRelevancy(["over"], "http://stackoverflow.com/same", "irrelevant") + oldScore = (1.0/3.0) * RankingUtils.oldWordRelevancy(["over"], "http://stackoverflow.com/same", "irrelevant") + assert.equal newScore, oldScore # remarkable! Exactly equal floats. + + should "exactly equal oldWordRelevancy for whole word matches (in a title)", -> + newScore = RankingUtils.wordRelevancy(["relevant"], "http://stackoverflow.com/same", "XX relevant YY") + # Multiply by 2 to account for new wordRelevancy favoring title. + oldScore = 2 * RankingUtils.oldWordRelevancy(["relevant"], "http://stackoverflow.com/same", "XX relevant YY") + assert.equal newScore, oldScore # remarkable! Exactly equal floats. + + should "2/3 * oldWordRelevancy for matches at the start of a word (in a title)", -> + newScore = RankingUtils.wordRelevancy(["relev"], "http://stackoverflow.com/same", "XX relevant YY") + # Multiply by 2 to account for new wordRelevancy favoring title. + oldScore = (2.0/3.0) * 2 * RankingUtils.oldWordRelevancy(["relev"], "http://stackoverflow.com/same", "XX relevant YY") + assert.equal newScore, oldScore # remarkable! Exactly equal floats. + + should "1/3 * oldWordRelevancy for matches within a word (in a title)", -> + newScore = RankingUtils.wordRelevancy(["elev"], "http://stackoverflow.com/same", "XX relevant YY") + # Multiply by 2 to account for new wordRelevancy favoring title. + oldScore = (1.0/3.0) * 2 * RankingUtils.oldWordRelevancy(["elev"], "http://stackoverflow.com/same", "XX relevant YY") + assert.equal newScore, oldScore # remarkable! Exactly equal floats. +# +# End of hardware-dependent tests. + +context "Suggestion.pushMatchingRanges", should "extract ranges matching term (simple case, two matches)", -> ranges = [] [ one, two, three ] = [ "one", "two", "three" ] -- cgit v1.2.3 From 60b2baf8adfca1286b0303d8d136b92c2b15af22 Mon Sep 17 00:00:00 2001 From: Stephen Blott Date: Fri, 16 May 2014 13:33:01 +0100 Subject: Address philc's comments. --- tests/unit_tests/completion_test.coffee | 86 +++++++++------------------------ 1 file changed, 22 insertions(+), 64 deletions(-) diff --git a/tests/unit_tests/completion_test.coffee b/tests/unit_tests/completion_test.coffee index e52dbbc1..bba0a0f8 100644 --- a/tests/unit_tests/completion_test.coffee +++ b/tests/unit_tests/completion_test.coffee @@ -229,90 +229,48 @@ context "suggestions", assert.equal -1, suggestion.generateHtml().indexOf("http://ninjawords.com") context "RankingUtils.wordRelevancy", - should "get a higher relevancy score in shorter URLs", -> - highScore = RankingUtils.wordRelevancy(["stack"], "http://stackoverflow.com/short", "nothing") - lowScore = RankingUtils.wordRelevancy(["stack"], "http://stackoverflow.com/longer", "nothing") + should "score higher in shorter URLs", -> + highScore = RankingUtils.wordRelevancy(["stack"], "http://stackoverflow.com/short", "a-title") + lowScore = RankingUtils.wordRelevancy(["stack"], "http://stackoverflow.com/longer", "a-title") assert.isTrue highScore > lowScore - should "get a higher relevancy score in shorter titles", -> - highScore = RankingUtils.wordRelevancy(["ffee"], "http://stackoverflow.com/same", "Coffeescript") - lowScore = RankingUtils.wordRelevancy(["ffee"], "http://stackoverflow.com/same", "Coffeescript rocks") + should "score higher in shorter titles", -> + highScore = RankingUtils.wordRelevancy(["coffee"], "a-url", "Coffeescript") + lowScore = RankingUtils.wordRelevancy(["coffee"], "a-url", "Coffeescript rocks") assert.isTrue highScore > lowScore - should "get a higher relevancy score for matching the start of a word (in a URL)", -> - lowScore = RankingUtils.wordRelevancy(["stack"], "http://Xstackoverflow.com/same", "nothing") - highScore = RankingUtils.wordRelevancy(["stack"], "http://stackoverflowX.com/same", "nothing") + should "score higher for matching the start of a word (in a URL)", -> + lowScore = RankingUtils.wordRelevancy(["stack"], "http://Xstackoverflow.com/same", "a-title") + highScore = RankingUtils.wordRelevancy(["stack"], "http://stackoverflowX.com/same", "a-title") assert.isTrue highScore > lowScore - should "get a higher relevancy score for matching the start of a word (in a title)", -> - lowScore = RankingUtils.wordRelevancy(["ted"], "http://stackoverflow.com/same", "Dist racted") - highScore = RankingUtils.wordRelevancy(["ted"], "http://stackoverflow.com/same", "Distrac ted") + should "score higher for matching the start of a word (in a title)", -> + lowScore = RankingUtils.wordRelevancy(["te"], "a-url", "Dist racted") + highScore = RankingUtils.wordRelevancy(["te"], "a-url", "Distrac ted") assert.isTrue highScore > lowScore - should "get a higher relevancy score for matching a whole word (in a URL)", -> - lowScore = RankingUtils.wordRelevancy(["com"], "http://stackoverflow.comX/same", "nothing") - highScore = RankingUtils.wordRelevancy(["com"], "http://stackoverflowX.com/same", "nothing") + should "score higher for matching a whole word (in a URL)", -> + lowScore = RankingUtils.wordRelevancy(["com"], "http://stackoverflow.comX/same", "a-title") + highScore = RankingUtils.wordRelevancy(["com"], "http://stackoverflowX.com/same", "a-title") assert.isTrue highScore > lowScore - should "get a higher relevancy score for matching a whole word (in a title)", -> - lowScore = RankingUtils.wordRelevancy(["com"], "http://stackoverflow.com/same", "abc comX") - highScore = RankingUtils.wordRelevancy(["com"], "http://stackoverflow.com/same", "abcX com") + should "score higher for matching a whole word (in a title)", -> + lowScore = RankingUtils.wordRelevancy(["com"], "a-url", "abc comX") + highScore = RankingUtils.wordRelevancy(["com"], "a-url", "abcX com") assert.isTrue highScore > lowScore # # TODO: (smblott) # # Word relevancy should take into account the number of matches (it doesn't currently). - # should "get a higher relevancy score for multiple matches (in a URL)", -> - # lowScore = RankingUtils.wordRelevancy(["stack"], "http://stackoverflow.com/Xxxxxx", "nothing") - # highScore = RankingUtils.wordRelevancy(["stack"], "http://stackoverflow.com/Xstack", "nothing") + # should "score higher for multiple matches (in a URL)", -> + # lowScore = RankingUtils.wordRelevancy(["stack"], "http://stackoverflow.com/Xxxxxx", "a-title") + # highScore = RankingUtils.wordRelevancy(["stack"], "http://stackoverflow.com/Xstack", "a-title") # assert.isTrue highScore > lowScore - # should "get a higher relevancy score for multiple matches (in a title)", -> + # should "score higher for multiple matches (in a title)", -> # lowScore = RankingUtils.wordRelevancy(["bbc"], "http://stackoverflow.com/same", "BBC Radio 4 (XBCr4)") # highScore = RankingUtils.wordRelevancy(["bbc"], "http://stackoverflow.com/same", "BBC Radio 4 (BBCr4)") # assert.isTrue highScore > lowScore -# WARNING: The following tests are hardware dependent. They depend upon -# different but algebraically-equivalent sequences of floating point -# operations yielding the same results. That they ever work at all is quite -# remarkable. -# TODO: (smblott) -# Remove these tests when `oldWordRelevancy` is removed. -context "RankingUtils.wordRelevancy (temporary hardware-dependent tests)", - should "exactly equal oldWordRelevancy for whole word matches (in a URL)", -> - newScore = RankingUtils.wordRelevancy(["com"], "http://stackoverflow.com/same", "irrelevant") - oldScore = RankingUtils.oldWordRelevancy(["com"], "http://stackoverflow.com/same", "irrelevant") - assert.equal newScore, oldScore # remarkable! Exactly equal floats. - - should "yield 2/3 * oldWordRelevancy for matches at the start of a word (in a URL)", -> - newScore = RankingUtils.wordRelevancy(["sta"], "http://stackoverflow.com/same", "irrelevant") - oldScore = (2.0/3.0) * RankingUtils.oldWordRelevancy(["sta"], "http://stackoverflow.com/same", "irrelevant") - assert.equal newScore, oldScore # remarkable! Exactly equal floats. - - should "yield 1/3 * oldWordRelevancy for matches within a word (in a URL)", -> - newScore = RankingUtils.wordRelevancy(["over"], "http://stackoverflow.com/same", "irrelevant") - oldScore = (1.0/3.0) * RankingUtils.oldWordRelevancy(["over"], "http://stackoverflow.com/same", "irrelevant") - assert.equal newScore, oldScore # remarkable! Exactly equal floats. - - should "exactly equal oldWordRelevancy for whole word matches (in a title)", -> - newScore = RankingUtils.wordRelevancy(["relevant"], "http://stackoverflow.com/same", "XX relevant YY") - # Multiply by 2 to account for new wordRelevancy favoring title. - oldScore = 2 * RankingUtils.oldWordRelevancy(["relevant"], "http://stackoverflow.com/same", "XX relevant YY") - assert.equal newScore, oldScore # remarkable! Exactly equal floats. - - should "2/3 * oldWordRelevancy for matches at the start of a word (in a title)", -> - newScore = RankingUtils.wordRelevancy(["relev"], "http://stackoverflow.com/same", "XX relevant YY") - # Multiply by 2 to account for new wordRelevancy favoring title. - oldScore = (2.0/3.0) * 2 * RankingUtils.oldWordRelevancy(["relev"], "http://stackoverflow.com/same", "XX relevant YY") - assert.equal newScore, oldScore # remarkable! Exactly equal floats. - - should "1/3 * oldWordRelevancy for matches within a word (in a title)", -> - newScore = RankingUtils.wordRelevancy(["elev"], "http://stackoverflow.com/same", "XX relevant YY") - # Multiply by 2 to account for new wordRelevancy favoring title. - oldScore = (1.0/3.0) * 2 * RankingUtils.oldWordRelevancy(["elev"], "http://stackoverflow.com/same", "XX relevant YY") - assert.equal newScore, oldScore # remarkable! Exactly equal floats. -# -# End of hardware-dependent tests. - context "Suggestion.pushMatchingRanges", should "extract ranges matching term (simple case, two matches)", -> ranges = [] -- cgit v1.2.3 From 37bdd510bd2d3fbe2674174d245f94664498f88b Mon Sep 17 00:00:00 2001 From: Stephen Blott Date: Fri, 16 May 2014 14:14:59 +0100 Subject: Remove verbose commentary. --- background_scripts/completion.coffee | 107 +++++++---------------------------- 1 file changed, 19 insertions(+), 88 deletions(-) diff --git a/background_scripts/completion.coffee b/background_scripts/completion.coffee index 2805dda9..0b2e8b0b 100644 --- a/background_scripts/completion.coffee +++ b/background_scripts/completion.coffee @@ -296,49 +296,23 @@ RankingUtils = return false unless matchedTerm true - # TODO: Remove this long explanatory comment. - # - # The differences between the following version of `wordRelevancy` and the old one are: - # - # 1. Word relevancy scores: - # - Reduced by a factor of 3/1 for matches which are not at the start of a word. - # - Reduced by a factor of 3/2 for matches which are at the start of a word but not a whole word. - # - Unchangd for matches which are whole words. - # (These values come from the fudge factors in `matchWeights`, below) - # - # 2. Recency scores: - # - Reduced by a factor of 3/2. - # - This retains the existing balance between relevancy and recency for partial matches at the start of a word. - # - In the other two cases: - # The relative contribution of relevancy is reduced for partial matches. - # The relative contribution of relevancy is increased for whole-word matches. - # - # 3. Normalisation takes account of the number of times each term matched (previously, normalisation - # accounted only for the total length of the query terms). - # - # 4. `wordRelevancy()` now does not allow a poor `urlScore` to pull down a good `titleScore` - # (But see, also, the comments regarding three possible endings, below.) - # Weights used for scoring matches. matchWeights: - { - matchAnywhere: 1 - matchStartOfWord: 1 - matchWholeWord: 1 - # The following must be the sum of the weights above; it is used for normalization. - maximumScore: 3 - # - # Calibration factor for balancing word relevancy and recency. - recencyCalibrator: 2.0/3.0 - # The current value of 2.0/3.0 has the effect of: - # - favoring the contribution of recency when matches are not on word boundaries ( because 2.0/3.0 > (1)/3 ) - # - retaining the existing balance when matches are at the starts of words ( because 2.0/3.0 = (1+1)/3 ) - # - favoring the contribution of word relevance when matches are on whole words ( because 2.0/3.0 < (1+1+1)/3 ) - } - - # Calculate a score for matching `term` against `string`. - # The score is in the range [0, `matchWeights.maximumScore`], see above. - # Returns: [ score, count ], where count is the number of matched characters in `string` + matchAnywhere: 1 + matchStartOfWord: 1 + matchWholeWord: 1 + # The following must be the sum of the three weights above; it is used for normalization. + maximumScore: 3 + # + # Calibration factor for balancing word relevancy and recency. + recencyCalibrator: 2.0/3.0 + # The current value of 2.0/3.0 has the effect of: + # - favoring the contribution of recency when matches are not on word boundaries ( because 2.0/3.0 > (1)/3 ) + # - favoring the contribution of word relevance when matches are on whole words ( because 2.0/3.0 < (1+1+1)/3 ) + + # Calculate a score for matching term against string. + # The score is in the range [0, matchWeights.maximumScore], see above. + # Returns: [ score, count ], where count is the number of matched characters in string. scoreTerm: (term, string) -> score = 0 count = 0 @@ -381,60 +355,19 @@ RankingUtils = else titleScore = urlScore - # ###################################################### - # Up to this point, some weightings have changed, but the structure of - # calculation is pretty much as it was before. - # - # However, there are three possible endings to this story ... - - # ###################################################### - # Ending #1: The old ending ... - # - # return (urlScore + titleScore) / 2 - # - # It's difficult to argue with that. - - # ###################################################### - # Ending #2: An ending favoring `titleScore` ... - # # Prefer matches in the title over matches in the URL. - # Here, this means "don't let a poor urlScore pull down the titleScore". + # In other words, don't let a poor urlScore pull down the titleScore. # For example, urlScore can be unreasonably poor if the URL is very long. urlScore = titleScore if urlScore < titleScore # Return the average. (urlScore + titleScore) / 2 - # ###################################################### - # Ending #3: An alternative (better?) ending ... + # Untested alternative to the above: + # - Don't let a poor urlScore pull down a good titleScore, and don't let a poor titleScore pull down a + # good urlScore. # # return Math.max(urlScore, titleScore) - # - # Why? - # - Don't let a poor urlScore pull down a good titleScore, as in Ending #2. - # - But also don't let a poor titleScore pull down a good urlScore. - # - The query may be targeting one or the other, so let the best one win. - - # Pick one of these three endings. - # ###################################################### - - # Returns a number between [0, 1] indicating how often the query terms appear in the url and title. - oldWordRelevancy: (queryTerms, url, title) -> - queryLength = 0 - urlScore = 0.0 - titleScore = 0.0 - for term in queryTerms - queryLength += term.length - urlScore += 1 if url && RankingUtils.matches [term], url - titleScore += 1 if title && RankingUtils.matches [term], title - urlScore = urlScore / queryTerms.length - urlScore = urlScore * RankingUtils.normalizeDifference(queryLength, url.length) - if title - titleScore = titleScore / queryTerms.length - titleScore = titleScore * RankingUtils.normalizeDifference(queryLength, title.length) - else - titleScore = urlScore - (urlScore + titleScore) / 2 # Returns a score between [0, 1] which indicates how recent the given timestamp is. Items which are over # a month old are counted as 0. This range is quadratic, so an item from one day ago has a much stronger @@ -449,8 +382,6 @@ RankingUtils = recencyScore = recencyDifference * recencyDifference * recencyDifference # Calibrate recencyScore vis-a-vis word-relevancy scores. - # This does not change the relative order of recency scores. - # See also comment in the definition of `matchWeights`, above. recencyScore *= RankingUtils.matchWeights.recencyCalibrator # Takes the difference of two numbers and returns a number between [0, 1] (the percentage difference). -- cgit v1.2.3