aboutsummaryrefslogtreecommitdiffstats
path: root/Library/Homebrew/cask/developer/bin/the_long_tail
diff options
context:
space:
mode:
Diffstat (limited to 'Library/Homebrew/cask/developer/bin/the_long_tail')
-rwxr-xr-xLibrary/Homebrew/cask/developer/bin/the_long_tail252
1 files changed, 252 insertions, 0 deletions
diff --git a/Library/Homebrew/cask/developer/bin/the_long_tail b/Library/Homebrew/cask/developer/bin/the_long_tail
new file mode 100755
index 000000000..2a4cb8e7c
--- /dev/null
+++ b/Library/Homebrew/cask/developer/bin/the_long_tail
@@ -0,0 +1,252 @@
+#!/usr/bin/env ruby
+#
+# the_long_tail
+#
+# A histogram view on contributor stats
+#
+# notes
+#
+# Since this script does not track file-renames in the git history, the
+# dependence of Casks upon occasional contributors/non-maintainers can
+# only be expressed as a range or lower bound.
+#
+
+###
+### dependencies
+###
+
+require "open3"
+require "set"
+
+###
+### configurable constants
+###
+
+BINS = [
+ (1..10).to_a,
+ 100,
+ 1000,
+ ].flatten
+
+OCCASIONAL_CUTOFF = 5
+
+CASK_PATH = "Casks".freeze
+
+# all maintainers, past and present
+MAINTAINERS = %w[
+ paul.t.hinze@gmail.com
+ fanquake@users.noreply.github.com
+ fanquake@gmail.com
+ kevin@suttle.io
+ leoj3n@gmail.com
+ nano@fdp.io
+ nanoid.xd@gmail.com
+ me@passcod.name
+ walker@pobox.com
+ info@vitorgalvao.com
+ calebcenter@live.com
+ ndr@qef.io
+ josh@joshbutts.com
+ goxberry@gmail.com
+ radek.simko@gmail.com
+ federicobond@gmail.com
+ claui@users.noreply.github.com
+ amorymeltzer@gmail.com
+ hagins.josh@gmail.com
+ dragon.vctr@gmail.com
+ mail@sebastianroeder.de
+ github@adityadalal.com
+ adityadalal924@users.noreply.github.com
+ ].freeze
+
+###
+### git methods
+###
+
+def cd_to_project_root
+ Dir.chdir File.dirname(File.expand_path(__FILE__))
+ @git_root ||= Open3.popen3(*%w[
+ git rev-parse --show-toplevel
+ ]) do |_stdin, stdout, _stderr|
+ begin
+ stdout.gets.chomp
+ rescue
+ end
+ end
+ Dir.chdir @git_root
+ @git_root
+end
+
+def authors
+ @authors ||= Open3.popen3(*%w[
+ git log --no-merges --format=%ae --
+ ]) do |_stdin, stdout, _stderr|
+ h = {}
+ stdout.each_line do |line|
+ line.chomp!
+ h[line] ||= 0
+ h[line] += 1
+ end
+ h
+ end
+end
+
+def casks_by_author
+ @casks_by_author ||= Open3.popen3(*%w[
+ git log --no-merges --name-only --format=%ae --
+ ],
+ CASK_PATH) do |_stdin, stdout, _stderr|
+ email = nil
+ h = {}
+ stdout.each_line.to_a.join("").split("\n\n").each do |paragraph|
+ if paragraph.include?("Casks/")
+ lines = paragraph.split("\n")
+ email = lines.pop
+ h[email] ||= Set.new
+ h[email].merge(lines.compact)
+ else
+ email = paragraph.chomp
+ end
+ end
+ h
+ end
+end
+
+###
+### filesystem methods
+###
+
+def all_casks
+ @all_casks ||= Open3.popen2("/usr/bin/find",
+ CASK_PATH,
+ *%w[-type f -name *.rb]) do |_stdin, stdout|
+ stdout.each_line.map(&:chomp)
+ end
+end
+
+###
+### analysis and report methods
+###
+
+def histogram
+ if @histogram.nil?
+ @histogram = Hash[*BINS.map { |elt| [elt, 0] }.flatten]
+ authors.each do |_name, num_commits|
+ bottom = 0
+ BINS.each do |top|
+ @histogram[bottom] += 1 if num_commits >= bottom && num_commits < top
+ bottom = top
+ end
+ end
+ end
+ @histogram
+end
+
+def historic_occasional_cask_set
+ @historic_occasional_cask_set = authors.each.collect do |name, num_commits|
+ if num_commits > OCCASIONAL_CUTOFF
+ nil
+ elsif !casks_by_author.key?(name)
+ nil
+ else
+ casks_by_author[name].to_a
+ end
+ end.flatten.compact.to_set
+end
+
+def extant_occasional_cask_count
+ # avoid double-counting renames by intersecting with extant Casks
+ historic_occasional_cask_set.intersection(all_casks).count
+end
+
+def historic_nonmaintainer_cask_set
+ @historic_nonmaintainer_cask_set = authors.each.collect do |name, _num_commits|
+ if MAINTAINERS.include?(name)
+ nil
+ else
+ casks_by_author[name].to_a
+ end
+ end.flatten.compact.to_set
+end
+
+def extant_nonmaintainer_cask_count
+ # avoid double-counting renames by intersecting with extant Casks
+ historic_nonmaintainer_cask_set.intersection(all_casks).count
+end
+
+def extant_occasional_cask_percentage
+ @extant_occasional_cask_percentage ||= (100 * extant_occasional_cask_count / all_casks.count).to_i
+end
+
+def historic_occasional_cask_percentage
+ @historic_occasional_cask_percentage ||= (100 * historic_occasional_cask_set.count / all_casks.count).to_i
+end
+
+def extant_nonmaintainer_cask_percentage
+ @extant_nonmaintainer_cask_percentage ||= (100 * extant_nonmaintainer_cask_count / all_casks.count).to_i
+end
+
+def historic_nonmaintainer_cask_percentage
+ # this is so large, it might cross 100%
+ @historic_nonmaintainer_cask_percentage ||= [100, (100 * historic_nonmaintainer_cask_set.count / all_casks.count).to_i].min
+end
+
+def onetime_author_percentage
+ @onetime_author_percentage ||= (100 *
+ histogram[1] /
+ authors.length).to_i
+end
+
+def occasional_author_percentage
+ # why is it so hard to slice a hash?
+ @occasional_author_percentage ||= (100 *
+ (1..OCCASIONAL_CUTOFF).to_a.collect { |bin| histogram[bin] }.reduce(:+) /
+ authors.length).to_i
+end
+
+def graph_width
+ if @graph_width.nil?
+ @graph_width = `/bin/stty size 2>/dev/null`.chomp.split(" ").last.to_i
+ @graph_width = 80 if @graph_width <= 0
+ @graph_width -= 20 if @graph_width > 20
+ end
+ @graph_width
+end
+
+def graph_normalization
+ @graph_normalization ||= histogram.values.max.to_f
+end
+
+def print_header
+ puts "Commits\tContributors"
+ puts "---------------------"
+end
+
+def print_table
+ BINS.each do |bin|
+ plural = (bin % 10) == 0 ? "'s" : ""
+ graph = "." * ((histogram[bin] / graph_normalization) * graph_width)
+ puts "#{bin}#{plural}\t#{histogram[bin]}\t#{graph}"
+ end
+end
+
+def print_footer
+ puts %Q{\n#{occasional_author_percentage}% of contributors are "occasional" (with <= #{OCCASIONAL_CUTOFF} commits)}
+ puts "\n#{onetime_author_percentage}% of contributors commit only once"
+ puts "\n#{extant_occasional_cask_percentage}% - #{historic_occasional_cask_percentage}% of Casks depend on an occasional contributor"
+ puts "\n#{extant_nonmaintainer_cask_percentage}% - #{historic_nonmaintainer_cask_percentage}% of Casks depend on a contributor who is not a maintainer"
+ puts "\n"
+end
+
+def generate_report
+ print_header
+ print_table
+ print_footer
+end
+
+###
+### main
+###
+
+cd_to_project_root
+generate_report