From e363889d27bb7fddaea513fb733c3c050d3af144 Mon Sep 17 00:00:00 2001 From: Martin Schimandl Date: Sat, 14 Oct 2017 11:06:55 +0200 Subject: Add audit check for URL schema --- Library/Homebrew/cask/lib/hbc/audit.rb | 73 ++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/Library/Homebrew/cask/lib/hbc/audit.rb b/Library/Homebrew/cask/lib/hbc/audit.rb index d757b0623..8520f651f 100644 --- a/Library/Homebrew/cask/lib/hbc/audit.rb +++ b/Library/Homebrew/cask/lib/hbc/audit.rb @@ -30,6 +30,7 @@ module Hbc check_url check_generic_artifacts check_token_conflicts + check_https_availability check_download check_single_pre_postflight check_single_uninstall_zap @@ -275,6 +276,78 @@ module Hbc "#{core_tap.default_remote}/blob/master/Formula/#{cask.token}.rb" end + def check_https_availability + check_url_for_https_availability(cask.homepage) unless cask.url.to_s.empty? + check_url_for_https_availability(cask.appcast) unless cask.appcast.to_s.empty? + check_url_for_https_availability(cask.homepage) unless cask.homepage.to_s.empty? + end + + def check_url_for_https_availability(url_to_check) + if schema_http?(url_to_check) + result, effective_url = access_url(url_to_check.sub(/^http:/, 'https:')) + if schema_https?(effective_url) && result == 1 + add_error "Change #{url_to_check} to #{url_to_check.sub(/^http:/, 'https:')}" + else + result, effective_url = access_url(url_to_check) + + if result == 0 + add_error "URL is not reachable #{url_to_check}" + end + end + else + result, effective_url = access_url(url_to_check) + if result == 1 && schema_https?(effective_url) + return + else + result, effective_url = access_url(url_to_check.sub(/^https:/, 'http:')) + if result == 1 && schema_http?(effective_url) + add_error "Change #{url_to_check} to #{url_to_check.sub(/^https:/, 'http:')}" + else + add_error "URL is not reachable #{url_to_check}" + end + end + end + end + + def access_url(url_to_access) + # return values: + # 1, effective URL : URL reachable, no schema change + # 0, nil : URL unreachable + # -1, effective URL : URL reachable, but schema changed + + curl_executable, *args = curl_args( + "--compressed", "--location", "--fail", + "--write-out", "%{http_code} %{url_effective}", + "--output", "/dev/null", + url_to_access, + user_agent: :fake + ) + result = @command.run(curl_executable, args: args, print_stderr: false) + if result.success? + http_code, url_effective = result.stdout.chomp.split(' ') + odebug "input: #{url_to_access} effective: #{url_effective} code: #{http_code}" + + # Fail if return code not 2XX or 3XX + return 0, nil if http_code.to_i < 200 && http_code.to_i > 300 + + # Fail if URL schema changed + # ([4] is either http[s]:// or http[:]// ) + return -1, url_effective if url_to_access[4] != url_effective[4] + + return 1, url_effective + else + return 0, nil + end + end + + def schema_http?(url) + url[/^http:/] ? 1 : nil + end + + def schema_https?(url) + url[/^https:/] ? 1 : nil + end + def check_download return unless download && cask.url odebug "Auditing download" -- cgit v1.2.3 From b2abc0b3ab918f9631a4bfaefba3a5091d5b0717 Mon Sep 17 00:00:00 2001 From: Martin Schimandl Date: Sat, 14 Oct 2017 17:47:08 +0200 Subject: Add --head to curl to speedup audit --- Library/Homebrew/cask/lib/hbc/audit.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Library/Homebrew/cask/lib/hbc/audit.rb b/Library/Homebrew/cask/lib/hbc/audit.rb index 8520f651f..8785fb01b 100644 --- a/Library/Homebrew/cask/lib/hbc/audit.rb +++ b/Library/Homebrew/cask/lib/hbc/audit.rb @@ -318,7 +318,7 @@ module Hbc curl_executable, *args = curl_args( "--compressed", "--location", "--fail", "--write-out", "%{http_code} %{url_effective}", - "--output", "/dev/null", + "--output", "/dev/null", "--head", url_to_access, user_agent: :fake ) -- cgit v1.2.3 From 5ed5e500e5680bb2276320070ce685f04df33c55 Mon Sep 17 00:00:00 2001 From: Martin Schimandl Date: Sun, 22 Oct 2017 11:12:08 +0200 Subject: Reuse FormulaAuditor to check Cask's URLs --- Library/Homebrew/cask/lib/hbc/audit.rb | 68 ++-------------------------------- 1 file changed, 4 insertions(+), 64 deletions(-) diff --git a/Library/Homebrew/cask/lib/hbc/audit.rb b/Library/Homebrew/cask/lib/hbc/audit.rb index 8785fb01b..6c0dbab3c 100644 --- a/Library/Homebrew/cask/lib/hbc/audit.rb +++ b/Library/Homebrew/cask/lib/hbc/audit.rb @@ -2,6 +2,7 @@ require "hbc/checkable" require "hbc/download" require "digest" require "utils/git" +require "dev-cmd/audit" module Hbc class Audit @@ -277,75 +278,14 @@ module Hbc end def check_https_availability - check_url_for_https_availability(cask.homepage) unless cask.url.to_s.empty? + check_url_for_https_availability(cask.url) unless cask.url.to_s.empty? check_url_for_https_availability(cask.appcast) unless cask.appcast.to_s.empty? check_url_for_https_availability(cask.homepage) unless cask.homepage.to_s.empty? end def check_url_for_https_availability(url_to_check) - if schema_http?(url_to_check) - result, effective_url = access_url(url_to_check.sub(/^http:/, 'https:')) - if schema_https?(effective_url) && result == 1 - add_error "Change #{url_to_check} to #{url_to_check.sub(/^http:/, 'https:')}" - else - result, effective_url = access_url(url_to_check) - - if result == 0 - add_error "URL is not reachable #{url_to_check}" - end - end - else - result, effective_url = access_url(url_to_check) - if result == 1 && schema_https?(effective_url) - return - else - result, effective_url = access_url(url_to_check.sub(/^https:/, 'http:')) - if result == 1 && schema_http?(effective_url) - add_error "Change #{url_to_check} to #{url_to_check.sub(/^https:/, 'http:')}" - else - add_error "URL is not reachable #{url_to_check}" - end - end - end - end - - def access_url(url_to_access) - # return values: - # 1, effective URL : URL reachable, no schema change - # 0, nil : URL unreachable - # -1, effective URL : URL reachable, but schema changed - - curl_executable, *args = curl_args( - "--compressed", "--location", "--fail", - "--write-out", "%{http_code} %{url_effective}", - "--output", "/dev/null", "--head", - url_to_access, - user_agent: :fake - ) - result = @command.run(curl_executable, args: args, print_stderr: false) - if result.success? - http_code, url_effective = result.stdout.chomp.split(' ') - odebug "input: #{url_to_access} effective: #{url_effective} code: #{http_code}" - - # Fail if return code not 2XX or 3XX - return 0, nil if http_code.to_i < 200 && http_code.to_i > 300 - - # Fail if URL schema changed - # ([4] is either http[s]:// or http[:]// ) - return -1, url_effective if url_to_access[4] != url_effective[4] - - return 1, url_effective - else - return 0, nil - end - end - - def schema_http?(url) - url[/^http:/] ? 1 : nil - end - - def schema_https?(url) - url[/^https:/] ? 1 : nil + problem = FormulaAuditor.check_http_content(url_to_check.to_s) + add_error problem unless problem.nil? end def check_download -- cgit v1.2.3 From 0db069602ec160736e150064894b5e33fefa2373 Mon Sep 17 00:00:00 2001 From: Martin Schimandl Date: Sun, 3 Dec 2017 14:02:55 +0100 Subject: Refactor: Move FormulaAudit.check_http_content to utils/curl --- Library/Homebrew/cask/lib/hbc/audit.rb | 4 +- Library/Homebrew/dev-cmd/audit.rb | 103 ++------------------------------- Library/Homebrew/utils/curl.rb | 92 +++++++++++++++++++++++++++++ 3 files changed, 100 insertions(+), 99 deletions(-) diff --git a/Library/Homebrew/cask/lib/hbc/audit.rb b/Library/Homebrew/cask/lib/hbc/audit.rb index 6c0dbab3c..274a01ae2 100644 --- a/Library/Homebrew/cask/lib/hbc/audit.rb +++ b/Library/Homebrew/cask/lib/hbc/audit.rb @@ -2,7 +2,7 @@ require "hbc/checkable" require "hbc/download" require "digest" require "utils/git" -require "dev-cmd/audit" +require "utils/curl" module Hbc class Audit @@ -284,7 +284,7 @@ module Hbc end def check_url_for_https_availability(url_to_check) - problem = FormulaAuditor.check_http_content(url_to_check.to_s) + problem = curl_check_http_content(url_to_check.to_s) add_error problem unless problem.nil? end diff --git a/Library/Homebrew/dev-cmd/audit.rb b/Library/Homebrew/dev-cmd/audit.rb index bfe4dbc00..89ab79996 100644 --- a/Library/Homebrew/dev-cmd/audit.rb +++ b/Library/Homebrew/dev-cmd/audit.rb @@ -40,6 +40,7 @@ require "formula" require "formula_versions" require "utils" +require "utils/curl" require "extend/ENV" require "formula_cellar_checks" require "official_taps" @@ -202,98 +203,6 @@ class FormulaAuditor @specs = %w[stable devel head].map { |s| formula.send(s) }.compact end - def self.check_http_content(url, user_agents: [:default], check_content: false, strict: false, require_http: false) - return unless url.start_with? "http" - - details = nil - user_agent = nil - hash_needed = url.start_with?("http:") && !require_http - user_agents.each do |ua| - details = http_content_headers_and_checksum(url, hash_needed: hash_needed, user_agent: ua) - user_agent = ua - break if details[:status].to_s.start_with?("2") - end - - unless details[:status] - # Hack around https://github.com/Homebrew/brew/issues/3199 - return if MacOS.version == :el_capitan - return "The URL #{url} is not reachable" - end - - unless details[:status].start_with? "2" - return "The URL #{url} is not reachable (HTTP status code #{details[:status]})" - end - - return unless hash_needed - - secure_url = url.sub "http", "https" - secure_details = - http_content_headers_and_checksum(secure_url, hash_needed: true, user_agent: user_agent) - - if !details[:status].to_s.start_with?("2") || - !secure_details[:status].to_s.start_with?("2") - return - end - - etag_match = details[:etag] && - details[:etag] == secure_details[:etag] - content_length_match = - details[:content_length] && - details[:content_length] == secure_details[:content_length] - file_match = details[:file_hash] == secure_details[:file_hash] - - if etag_match || content_length_match || file_match - return "The URL #{url} should use HTTPS rather than HTTP" - end - - return unless check_content - - no_protocol_file_contents = %r{https?:\\?/\\?/} - details[:file] = details[:file].gsub(no_protocol_file_contents, "/") - secure_details[:file] = secure_details[:file].gsub(no_protocol_file_contents, "/") - - # Check for the same content after removing all protocols - if details[:file] == secure_details[:file] - return "The URL #{url} should use HTTPS rather than HTTP" - end - - return unless strict - - # Same size, different content after normalization - # (typical causes: Generated ID, Timestamp, Unix time) - if details[:file].length == secure_details[:file].length - return "The URL #{url} may be able to use HTTPS rather than HTTP. Please verify it in a browser." - end - - lenratio = (100 * secure_details[:file].length / details[:file].length).to_i - return unless (90..110).cover?(lenratio) - "The URL #{url} may be able to use HTTPS rather than HTTP. Please verify it in a browser." - end - - def self.http_content_headers_and_checksum(url, hash_needed: false, user_agent: :default) - max_time = hash_needed ? "600" : "25" - output, = curl_output( - "--connect-timeout", "15", "--include", "--max-time", max_time, "--location", url, - user_agent: user_agent - ) - - status_code = :unknown - while status_code == :unknown || status_code.to_s.start_with?("3") - headers, _, output = output.partition("\r\n\r\n") - status_code = headers[%r{HTTP\/.* (\d+)}, 1] - end - - output_hash = Digest::SHA256.digest(output) if hash_needed - - { - status: status_code, - etag: headers[%r{ETag: ([wW]\/)?"(([^"]|\\")*)"}, 2], - content_length: headers[/Content-Length: (\d+)/, 1], - file_hash: output_hash, - file: output, - } - end - def audit_style return unless @style_offenses display_cop_names = ARGV.include?("--display-cop-names") @@ -558,10 +467,10 @@ class FormulaAuditor return unless @online return unless DevelopmentTools.curl_handles_most_https_certificates? - if http_content_problem = FormulaAuditor.check_http_content(homepage, - user_agents: [:browser, :default], - check_content: true, - strict: @strict) + if http_content_problem = curl_check_http_content(homepage, + user_agents: [:browser, :default], + check_content: true, + strict: @strict) problem http_content_problem end end @@ -1037,7 +946,7 @@ class ResourceAuditor # A `brew mirror`'ed URL is usually not yet reachable at the time of # pull request. next if url =~ %r{^https://dl.bintray.com/homebrew/mirror/} - if http_content_problem = FormulaAuditor.check_http_content(url, require_http: curl_openssl_or_deps) + if http_content_problem = curl_check_http_content(url, require_http: curl_openssl_or_deps) problem http_content_problem end elsif strategy <= GitDownloadStrategy diff --git a/Library/Homebrew/utils/curl.rb b/Library/Homebrew/utils/curl.rb index eaa81352c..84853047c 100644 --- a/Library/Homebrew/utils/curl.rb +++ b/Library/Homebrew/utils/curl.rb @@ -59,3 +59,95 @@ end def curl_output(*args, **options) Open3.capture3(*curl_args(*args, show_output: true, **options)) end + +def curl_check_http_content(url, user_agents: [:default], check_content: false, strict: false, require_http: false) + return unless url.start_with? "http" + + details = nil + user_agent = nil + hash_needed = url.start_with?("http:") && !require_http + user_agents.each do |ua| + details = curl_http_content_headers_and_checksum(url, hash_needed: hash_needed, user_agent: ua) + user_agent = ua + break if details[:status].to_s.start_with?("2") + end + + unless details[:status] + # Hack around https://github.com/Homebrew/brew/issues/3199 + return if MacOS.version == :el_capitan + return "The URL #{url} is not reachable" + end + + unless details[:status].start_with? "2" + return "The URL #{url} is not reachable (HTTP status code #{details[:status]})" + end + + return unless hash_needed + + secure_url = url.sub "http", "https" + secure_details = + curl_http_content_headers_and_checksum(secure_url, hash_needed: true, user_agent: user_agent) + + if !details[:status].to_s.start_with?("2") || + !secure_details[:status].to_s.start_with?("2") + return + end + + etag_match = details[:etag] && + details[:etag] == secure_details[:etag] + content_length_match = + details[:content_length] && + details[:content_length] == secure_details[:content_length] + file_match = details[:file_hash] == secure_details[:file_hash] + + if etag_match || content_length_match || file_match + return "The URL #{url} should use HTTPS rather than HTTP" + end + + return unless check_content + + no_protocol_file_contents = %r{https?:\\?/\\?/} + details[:file] = details[:file].gsub(no_protocol_file_contents, "/") + secure_details[:file] = secure_details[:file].gsub(no_protocol_file_contents, "/") + + # Check for the same content after removing all protocols + if details[:file] == secure_details[:file] + return "The URL #{url} should use HTTPS rather than HTTP" + end + + return unless strict + + # Same size, different content after normalization + # (typical causes: Generated ID, Timestamp, Unix time) + if details[:file].length == secure_details[:file].length + return "The URL #{url} may be able to use HTTPS rather than HTTP. Please verify it in a browser." + end + + lenratio = (100 * secure_details[:file].length / details[:file].length).to_i + return unless (90..110).cover?(lenratio) + "The URL #{url} may be able to use HTTPS rather than HTTP. Please verify it in a browser." +end + +def curl_http_content_headers_and_checksum(url, hash_needed: false, user_agent: :default) + max_time = hash_needed ? "600" : "25" + output, = curl_output( + "--connect-timeout", "15", "--include", "--max-time", max_time, "--location", url, + user_agent: user_agent + ) + + status_code = :unknown + while status_code == :unknown || status_code.to_s.start_with?("3") + headers, _, output = output.partition("\r\n\r\n") + status_code = headers[%r{HTTP\/.* (\d+)}, 1] + end + + output_hash = Digest::SHA256.digest(output) if hash_needed + + { + status: status_code, + etag: headers[%r{ETag: ([wW]\/)?"(([^"]|\\")*)"}, 2], + content_length: headers[/Content-Length: (\d+)/, 1], + file_hash: output_hash, + file: output, + } +end -- cgit v1.2.3 From b1328adaa9030a3a7ed6127fe0662b0ae51f270b Mon Sep 17 00:00:00 2001 From: Martin Schimandl Date: Mon, 4 Dec 2017 19:32:22 +0100 Subject: Use cask.url.user_agent when available --- Library/Homebrew/cask/lib/hbc/audit.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Library/Homebrew/cask/lib/hbc/audit.rb b/Library/Homebrew/cask/lib/hbc/audit.rb index 274a01ae2..d53d26ffc 100644 --- a/Library/Homebrew/cask/lib/hbc/audit.rb +++ b/Library/Homebrew/cask/lib/hbc/audit.rb @@ -278,13 +278,13 @@ module Hbc end def check_https_availability - check_url_for_https_availability(cask.url) unless cask.url.to_s.empty? + check_url_for_https_availability(cask.url, user_agents: [cask.url.user_agent]) unless cask.url.to_s.empty? check_url_for_https_availability(cask.appcast) unless cask.appcast.to_s.empty? check_url_for_https_availability(cask.homepage) unless cask.homepage.to_s.empty? end - def check_url_for_https_availability(url_to_check) - problem = curl_check_http_content(url_to_check.to_s) + def check_url_for_https_availability(url_to_check, user_agents: [:default]) + problem = curl_check_http_content(url_to_check.to_s, user_agents: user_agents) add_error problem unless problem.nil? end -- cgit v1.2.3