From 28807bd25b27e5404228a638f2ab5e6c00f606cc Mon Sep 17 00:00:00 2001 From: Stephen Blott Date: Tue, 5 May 2015 16:43:48 +0100 Subject: Search completion; misc. --- background_scripts/completion.coffee | 2 +- background_scripts/completion_engines.coffee | 219 +++++++++++++++++++++++++++ background_scripts/search_engines.coffee | 219 --------------------------- lib/utils.coffee | 6 +- manifest.json | 2 +- 5 files changed, 224 insertions(+), 224 deletions(-) create mode 100644 background_scripts/completion_engines.coffee delete mode 100644 background_scripts/search_engines.coffee diff --git a/background_scripts/completion.coffee b/background_scripts/completion.coffee index 39f8a140..729e86ab 100644 --- a/background_scripts/completion.coffee +++ b/background_scripts/completion.coffee @@ -385,7 +385,7 @@ class SearchEngineCompleter # immediately. return onComplete [] - SearchEngines.complete searchUrl, queryTerms, (searchSuggestions = []) => + CompletionEngines.complete searchUrl, queryTerms, (searchSuggestions = []) => for suggestion in searchSuggestions insertText = if custom then "#{keyword} #{suggestion}" else suggestion suggestions.push @mkSuggestion insertText, queryTerms, type, mkUrl(suggestion), suggestion, @computeRelevancy, score diff --git a/background_scripts/completion_engines.coffee b/background_scripts/completion_engines.coffee new file mode 100644 index 00000000..0177806a --- /dev/null +++ b/background_scripts/completion_engines.coffee @@ -0,0 +1,219 @@ + +# A completion engine provides search suggestions for a search engine. A search engine is identified by a +# "searchUrl", e.g. Settings.get("searchUrl"), or a custom search engine. +# +# Each completion engine defines three functions: +# +# 1. "match" - This takes a searchUrl, and returns a boolean indicating whether this completion engine can +# perform completion for the given search engine. +# +# 2. "getUrl" - This takes a list of query terms (queryTerms) and generates a completion URL, that is, a URL +# which will provide completions for this completion engine. +# +# 3. "parse" - This takes a successful XMLHttpRequest object (the request has completed successfully), and +# returns a list of suggestions (a list of strings). +# +# The main completion entry point is CompletionEngines.complete(). This implements all lookup and caching +# logic. It is possible to add new completion engines without changing the CompletionEngines infrastructure +# itself. + +# A base class for common regexp-based matching engines. +class RegexpEngine + constructor: (@regexps) -> + match: (searchUrl) -> Utils.matchesAnyRegexp @regexps, searchUrl + +# Several Google completion engines package responses in this way. +class GoogleXMLRegexpEngine extends RegexpEngine + parse: (xhr) -> + for suggestion in xhr.responseXML.getElementsByTagName "suggestion" + continue unless suggestion = suggestion.getAttribute "data" + suggestion + +class Google extends GoogleXMLRegexpEngine + # Example search URL: http://www.google.com/search?q=%s + constructor: -> + super [ + # We match the major English-speaking TLDs. + new RegExp "^https?://[a-z]+\.google\.(com|ie|co\.uk|ca|com\.au)/" + new RegExp "localhost/cgi-bin/booky" # Only for smblott. + ] + + getUrl: (queryTerms) -> + "http://suggestqueries.google.com/complete/search?ss_protocol=legace&client=toolbar&q=#{Utils.createSearchQuery queryTerms}" + +class Youtube extends GoogleXMLRegexpEngine + # Example search URL: http://www.youtube.com/results?search_query=%s + constructor: -> + super [ new RegExp "^https?://[a-z]+\.youtube\.com/results" ] + + getUrl: (queryTerms) -> + "http://suggestqueries.google.com/complete/search?client=youtube&ds=yt&xml=t&q=#{Utils.createSearchQuery queryTerms}" + +class Wikipedia extends RegexpEngine + # Example search URL: http://www.wikipedia.org/w/index.php?title=Special:Search&search=%s + constructor: -> + super [ new RegExp "^https?://[a-z]+\.wikipedia\.org/" ] + + getUrl: (queryTerms) -> + "https://en.wikipedia.org/w/api.php?action=opensearch&format=json&search=#{Utils.createSearchQuery queryTerms}" + + parse: (xhr) -> + JSON.parse(xhr.responseText)[1] + +## class GoogleMaps extends RegexpEngine +## # Example search URL: https://www.google.com/maps/search/%s +## constructor: -> +## super [ new RegExp "^https?://www\.google\.com/maps/search/" ] +## +## getUrl: (queryTerms) -> +## console.log "xxxxxxxxxxxxxxxxxxxxx" +## "https://www.google.com/s?tbm=map&fp=1&gs_ri=maps&source=hp&suggest=p&authuser=0&hl=en&pf=p&tch=1&ech=2&q=#{Utils.createSearchQuery queryTerms}" +## +## parse: (xhr) -> +## console.log "yyy", xhr.responseText +## data = JSON.parse xhr.responseText +## console.log "zzz" +## console.log data +## [] + +class Bing extends RegexpEngine + # Example search URL: https://www.bing.com/search?q=%s + constructor: -> super [ new RegExp "^https?://www\.bing\.com/search" ] + getUrl: (queryTerms) -> "http://api.bing.com/osjson.aspx?query=#{Utils.createSearchQuery queryTerms}" + parse: (xhr) -> JSON.parse(xhr.responseText)[1] + +class Amazon extends RegexpEngine + # Example search URL: http://www.amazon.com/s/?field-keywords=%s + constructor: -> super [ new RegExp "^https?://www\.amazon\.(com|co.uk|ca|com.au)/s/" ] + getUrl: (queryTerms) -> "https://completion.amazon.com/search/complete?method=completion&search-alias=aps&client=amazon-search-ui&mkt=1&q=#{Utils.createSearchQuery queryTerms}" + parse: (xhr) -> JSON.parse(xhr.responseText)[1] + +class DuckDuckGo extends RegexpEngine + # Example search URL: https://duckduckgo.com/?q=%s + constructor: -> super [ new RegExp "^https?://([a-z]+\.)?duckduckgo\.com/" ] + getUrl: (queryTerms) -> "https://duckduckgo.com/ac/?q=#{Utils.createSearchQuery queryTerms}" + parse: (xhr) -> + suggestion.phrase for suggestion in JSON.parse xhr.responseText + +# A dummy search engine which is guaranteed to match any search URL, but never produces completions. This +# allows the rest of the logic to be written knowing that there will be a search engine match. +class DummySearchEngine + match: -> true + # We return a useless URL which we know will succeed, but which won't generate any network traffic. + getUrl: -> chrome.runtime.getURL "content_scripts/vimium.css" + parse: -> [] + +completionEngines = [ + Youtube + Google + DuckDuckGo + Wikipedia + Bing + Amazon + DummySearchEngine +] + +CompletionEngines = + debug: true + + get: (searchUrl, url, callback) -> + xhr = new XMLHttpRequest() + xhr.open "GET", url, true + xhr.timeout = 1000 + xhr.ontimeout = xhr.onerror = -> callback null + xhr.send() + + xhr.onreadystatechange = -> + if xhr.readyState == 4 + callback(if xhr.status == 200 then xhr else null) + + # Look up the search-completion engine for this searchUrl. Because of DummySearchEngine, above, we know + # there will always be a match. Imagining that there may be many completion engines, and knowing that this + # is called for every input event in the vomnibar, we cache the result. + lookupEngine: (searchUrl) -> + @engineCache ?= new SimpleCache 30 * 60 * 60 * 1000 # 30 hours (these are small, we can keep them longer). + if @engineCache.has searchUrl + @engineCache.get searchUrl + else + for engine in completionEngines + engine = new engine() + return @engineCache.set searchUrl, engine if engine.match searchUrl + + # This is the main (actually, the only) entry point. + # - searchUrl is the search engine's URL, e.g. Settings.get("searchUrl"), or a custome search engine's URL. + # This is only used as a key for determining the relevant completion engine. + # - queryTerms are the queryTerms. + # - callback will be applied to a list of suggestion strings (which may be an empty list, if anything goes + # wrong). + complete: (searchUrl, queryTerms, callback) -> + @mostRecentHandler = null + + # We can't complete empty queries. + return callback [] unless 0 < queryTerms.length + + if 1 == queryTerms.length + # We don't complete URLs. + return callback [] if Utils.isUrl queryTerms[0] + # We don't complete less then three characters: the results are usually useless. This also prevents + # one- and two-character custom search engine keywords from being sent to the default completer (e.g. + # the initial "w" before typing "w something"). + return callback [] unless 2 < queryTerms[0].length + + # We don't complete Javascript URLs. + return callback [] if Utils.hasJavascriptPrefix queryTerms[0] + + # Cache completions. However, completions depend upon both the searchUrl and the query terms. So we need + # to generate a key. We mix in some junk generated by pwgen. A key clash is possible, but vanishingly + # unlikely. + junk = "//Zi?ei5;o//" + completionCacheKey = searchUrl + junk + queryTerms.join junk + @completionCache ?= new SimpleCache 60 * 60 * 1000, 2000 # One hour, 2000 entries. + if @completionCache.has completionCacheKey + console.log "hit", completionCacheKey if @debug + return callback @completionCache.get completionCacheKey + + fetchSuggestions = (callback) => + engine = @lookupEngine searchUrl + url = engine.getUrl queryTerms + console.log "get", url if @debug + query = queryTerms.join(" ").toLowerCase() + @get searchUrl, url, (xhr = null) => + # Parsing the response may fail if we receive an unexpected or an unexpectedly-formatted response. In + # all cases, we fall back to the catch clause, below. + try + suggestions = engine.parse xhr + # Make sure we really do have an iterable of strings. + suggestions = (suggestion for suggestion in suggestions when "string" == typeof suggestion) + # Filter out the query itself. It's not adding anything. + suggestions = (suggestion for suggestion in suggestions when suggestion.toLowerCase() != query) + catch + suggestions = [] + # We cache failures, but remove them after just ten minutes. This (it is hoped) avoids repeated + # XMLHttpRequest failures over a short period of time. + removeCompletionCacheKey = => @completionCache.set completionCacheKey, null + setTimeout removeCompletionCacheKey, 10 * 60 * 1000 # Ten minutes. + + callback suggestions + + # We pause in case the user is still typing. + Utils.setTimeout 200, handler = @mostRecentHandler = => + if handler != @mostRecentHandler # Bail if another completion has begun, or the user is typing. + console.log "bail", completionCacheKey if @debug + return callback [] + # Don't allow duplicate identical active requests. This can happen, for example, when the user enters or + # removes a space, or when they enter a character and immediately delete it. + @inTransit ?= {} + unless @inTransit[completionCacheKey]?.push callback + queue = @inTransit[completionCacheKey] = [] + fetchSuggestions (suggestions) => + callback @completionCache.set completionCacheKey, suggestions + delete @inTransit[completionCacheKey] + console.log "callbacks", queue.length, completionCacheKey if @debug and 0 < queue.length + callback suggestions for callback in queue + + userIsTyping: -> + console.log "reset (typing)" if @debug and @mostRecentHandler? + @mostRecentHandler = null + +root = exports ? window +root.CompletionEngines = CompletionEngines diff --git a/background_scripts/search_engines.coffee b/background_scripts/search_engines.coffee deleted file mode 100644 index 63c61a47..00000000 --- a/background_scripts/search_engines.coffee +++ /dev/null @@ -1,219 +0,0 @@ - -# A completion engine provides search suggestions for a search engine. A search engine is identified by a -# "searchUrl", e.g. Settings.get("searchUrl"), or a custom search engine. -# -# Each completion engine defines three functions: -# -# 1. "match" - This takes a searchUrl, and returns a boolean indicating whether this completion engine can -# perform completion for the given search engine. -# -# 2. "getUrl" - This takes a list of query terms (queryTerms) and generates a completion URL, that is, a URL -# which will provide completions for this completion engine. -# -# 3. "parse" - This takes a successful XMLHttpRequest object (the request has completed successfully), and -# returns a list of suggestions (a list of strings). -# -# The main completion entry point is SearchEngines.complete(). This implements all lookup and caching -# logic. It is possible to add new completion engines without changing the SearchEngines infrastructure -# itself. - -# A base class for common regexp-based matching engines. -class RegexpEngine - constructor: (@regexps) -> - match: (searchUrl) -> Utils.matchesAnyRegexp @regexps, searchUrl - -# Several Google completion engines package responses in this way. -class GoogleXMLRegexpEngine extends RegexpEngine - parse: (xhr) -> - for suggestion in xhr.responseXML.getElementsByTagName "suggestion" - continue unless suggestion = suggestion.getAttribute "data" - suggestion - -class Google extends GoogleXMLRegexpEngine - # Example search URL: http://www.google.com/search?q=%s - constructor: -> - super [ - # We match the major English-speaking TLDs. - new RegExp "^https?://[a-z]+\.google\.(com|ie|co\.uk|ca|com\.au)/" - new RegExp "localhost/cgi-bin/booky" # Only for smblott. - ] - - getUrl: (queryTerms) -> - "http://suggestqueries.google.com/complete/search?ss_protocol=legace&client=toolbar&q=#{Utils.createSearchQuery queryTerms}" - -class Youtube extends GoogleXMLRegexpEngine - # Example search URL: http://www.youtube.com/results?search_query=%s - constructor: -> - super [ new RegExp "^https?://[a-z]+\.youtube\.com/results" ] - - getUrl: (queryTerms) -> - "http://suggestqueries.google.com/complete/search?client=youtube&ds=yt&xml=t&q=#{Utils.createSearchQuery queryTerms}" - -class Wikipedia extends RegexpEngine - # Example search URL: http://www.wikipedia.org/w/index.php?title=Special:Search&search=%s - constructor: -> - super [ new RegExp "^https?://[a-z]+\.wikipedia\.org/" ] - - getUrl: (queryTerms) -> - "https://en.wikipedia.org/w/api.php?action=opensearch&format=json&search=#{Utils.createSearchQuery queryTerms}" - - parse: (xhr) -> - JSON.parse(xhr.responseText)[1] - -## class GoogleMaps extends RegexpEngine -## # Example search URL: https://www.google.com/maps/search/%s -## constructor: -> -## super [ new RegExp "^https?://www\.google\.com/maps/search/" ] -## -## getUrl: (queryTerms) -> -## console.log "xxxxxxxxxxxxxxxxxxxxx" -## "https://www.google.com/s?tbm=map&fp=1&gs_ri=maps&source=hp&suggest=p&authuser=0&hl=en&pf=p&tch=1&ech=2&q=#{Utils.createSearchQuery queryTerms}" -## -## parse: (xhr) -> -## console.log "yyy", xhr.responseText -## data = JSON.parse xhr.responseText -## console.log "zzz" -## console.log data -## [] - -class Bing extends RegexpEngine - # Example search URL: https://www.bing.com/search?q=%s - constructor: -> super [ new RegExp "^https?://www\.bing\.com/search" ] - getUrl: (queryTerms) -> "http://api.bing.com/osjson.aspx?query=#{Utils.createSearchQuery queryTerms}" - parse: (xhr) -> JSON.parse(xhr.responseText)[1] - -class Amazon extends RegexpEngine - # Example search URL: http://www.amazon.com/s/?field-keywords=%s - constructor: -> super [ new RegExp "^https?://www\.amazon\.(com|co.uk|ca|com.au)/s/" ] - getUrl: (queryTerms) -> "https://completion.amazon.com/search/complete?method=completion&search-alias=aps&client=amazon-search-ui&mkt=1&q=#{Utils.createSearchQuery queryTerms}" - parse: (xhr) -> JSON.parse(xhr.responseText)[1] - -class DuckDuckGo extends RegexpEngine - # Example search URL: https://duckduckgo.com/?q=%s - constructor: -> super [ new RegExp "^https?://([a-z]+\.)?duckduckgo\.com/" ] - getUrl: (queryTerms) -> "https://duckduckgo.com/ac/?q=#{Utils.createSearchQuery queryTerms}" - parse: (xhr) -> - suggestion.phrase for suggestion in JSON.parse xhr.responseText - -# A dummy search engine which is guaranteed to match any search URL, but never produces completions. This -# allows the rest of the logic to be written knowing that there will be a search engine match. -class DummySearchEngine - match: -> true - # We return a useless URL which we know will succeed, but which won't generate any network traffic. - getUrl: -> chrome.runtime.getURL "content_scripts/vimium.css" - parse: -> [] - -completionEngines = [ - Youtube - Google - DuckDuckGo - Wikipedia - Bing - Amazon - DummySearchEngine -] - -SearchEngines = - debug: true - - get: (searchUrl, url, callback) -> - xhr = new XMLHttpRequest() - xhr.open "GET", url, true - xhr.timeout = 1000 - xhr.ontimeout = xhr.onerror = -> callback null - xhr.send() - - xhr.onreadystatechange = -> - if xhr.readyState == 4 - callback(if xhr.status == 200 then xhr else null) - - # Look up the search-completion engine for this searchUrl. Because of DummySearchEngine, above, we know - # there will always be a match. Imagining that there may be many completion engines, and knowing that this - # is called for every input event in the vomnibar, we cache the result. - lookupEngine: (searchUrl) -> - @engineCache ?= new SimpleCache 30 * 60 * 60 * 1000 # 30 hours (these are small, we can keep them longer). - if @engineCache.has searchUrl - @engineCache.get searchUrl - else - for engine in completionEngines - engine = new engine() - return @engineCache.set searchUrl, engine if engine.match searchUrl - - # This is the main (actually, the only) entry point. - # - searchUrl is the search engine's URL, e.g. Settings.get("searchUrl"), or a custome search engine's URL. - # This is only used as a key for determining the relevant completion engine. - # - queryTerms are the queryTerms. - # - callback will be applied to a list of suggestion strings (which may be an empty list, if anything goes - # wrong). - complete: (searchUrl, queryTerms, callback) -> - @mostRecentHandler = null - - # We can't complete empty queries. - return callback [] unless 0 < queryTerms.length - - if 1 == queryTerms.length - # We don't complete URLs. - return callback [] if Utils.isUrl queryTerms[0] - # We don't complete less then three characters: the results are usually useless. This also prevents - # one- and two-character custom search engine keywords from being sent to the default completer (e.g. - # the initial "w" before typing "w something"). - return callback [] unless 2 < queryTerms[0].length - - # We don't complete Javascript URLs. - return callback [] if Utils.hasJavascriptPrefix queryTerms[0] - - # Cache completions. However, completions depend upon both the searchUrl and the query terms. So we need - # to generate a key. We mix in some junk generated by pwgen. A key clash is possible, but vanishingly - # unlikely. - junk = "//Zi?ei5;o//" - completionCacheKey = searchUrl + junk + queryTerms.join junk - @completionCache ?= new SimpleCache 60 * 60 * 1000, 2000 # One hour, 2000 entries. - if @completionCache.has completionCacheKey - console.log "hit", completionCacheKey if @debug - return callback @completionCache.get completionCacheKey - - fetchSuggestions = (callback) => - engine = @lookupEngine searchUrl - url = engine.getUrl queryTerms - console.log "get", url if @debug - query = queryTerms.join(" ").toLowerCase() - @get searchUrl, url, (xhr = null) => - # Parsing the response may fail if we receive an unexpected or an unexpectedly-formatted response. In - # all cases, we fall back to the catch clause, below. - try - suggestions = engine.parse xhr - # Make sure we really do have an iterable of strings. - suggestions = (suggestion for suggestion in suggestions when "string" == typeof suggestion) - # Filter out the query itself. It's not adding anything. - suggestions = (suggestion for suggestion in suggestions when suggestion.toLowerCase() != query) - catch - suggestions = [] - # We cache failures, but remove them after just ten minutes. This (it is hoped) avoids repeated - # XMLHttpRequest failures over a short period of time. - removeCompletionCacheKey = => @completionCache.set completionCacheKey, null - setTimeout removeCompletionCacheKey, 10 * 60 * 1000 # Ten minutes. - - callback suggestions - - # We pause in case the user is still typing. - Utils.setTimeout 200, handler = @mostRecentHandler = => - if handler != @mostRecentHandler # Bail if another completion has begun, or the user is typing. - console.log "bail", completionCacheKey if @debug - return callback [] - # Don't allow duplicate identical active requests. This can happen, for example, when the user enters or - # removes a space, or when they enter a character and immediately delete it. - @inTransit ?= {} - unless @inTransit[completionCacheKey]?.push callback - queue = @inTransit[completionCacheKey] = [] - fetchSuggestions (suggestions) => - callback @completionCache.set completionCacheKey, suggestions - delete @inTransit[completionCacheKey] - console.log "callbacks", queue.length, completionCacheKey if @debug and 0 < queue.length - callback suggestions for callback in queue - - userIsTyping: -> - console.log "reset (typing)" if @debug and @mostRecentHandler? - @mostRecentHandler = null - -root = exports ? window -root.SearchEngines = SearchEngines diff --git a/lib/utils.coffee b/lib/utils.coffee index 1b2a7a3f..e97872f0 100644 --- a/lib/utils.coffee +++ b/lib/utils.coffee @@ -207,14 +207,14 @@ globalRoot.extend = (hash1, hash2) -> hash1[key] = hash2[key] hash1 -# A simple cache. Entries used within an expiry period are retained (for one more expiry period), otherwise -# they are discarded. +# A simple cache. Entries used within two expiry periods are retained, otherwise they are discarded. +# At most 2 * @entries entries are retained. class SimpleCache # expiry: expiry time in milliseconds (default, one hour) # entries: maximum number of entries in @cache (there may be this many entries in @previous, too) constructor: (@expiry = 60 * 60 * 1000, @entries = 1000) -> @cache = {} - @rotate() # Force starts the rotation timer. + @rotate() # Force start the rotation timer. rotate: -> @previous = @cache diff --git a/manifest.json b/manifest.json index d3f6249f..195d16c8 100644 --- a/manifest.json +++ b/manifest.json @@ -14,7 +14,7 @@ "background_scripts/sync.js", "background_scripts/settings.js", "background_scripts/exclusions.js", - "background_scripts/search_engines.js", + "background_scripts/completion_engines.js", "background_scripts/completion.js", "background_scripts/marks.js", "background_scripts/main.js" -- cgit v1.2.3