diff options
Diffstat (limited to 'Library/Homebrew/cask/developer/bin/the_long_tail')
| -rwxr-xr-x | Library/Homebrew/cask/developer/bin/the_long_tail | 252 |
1 files changed, 252 insertions, 0 deletions
diff --git a/Library/Homebrew/cask/developer/bin/the_long_tail b/Library/Homebrew/cask/developer/bin/the_long_tail new file mode 100755 index 000000000..2a4cb8e7c --- /dev/null +++ b/Library/Homebrew/cask/developer/bin/the_long_tail @@ -0,0 +1,252 @@ +#!/usr/bin/env ruby +# +# the_long_tail +# +# A histogram view on contributor stats +# +# notes +# +# Since this script does not track file-renames in the git history, the +# dependence of Casks upon occasional contributors/non-maintainers can +# only be expressed as a range or lower bound. +# + +### +### dependencies +### + +require "open3" +require "set" + +### +### configurable constants +### + +BINS = [ + (1..10).to_a, + 100, + 1000, + ].flatten + +OCCASIONAL_CUTOFF = 5 + +CASK_PATH = "Casks".freeze + +# all maintainers, past and present +MAINTAINERS = %w[ + paul.t.hinze@gmail.com + fanquake@users.noreply.github.com + fanquake@gmail.com + kevin@suttle.io + leoj3n@gmail.com + nano@fdp.io + nanoid.xd@gmail.com + me@passcod.name + walker@pobox.com + info@vitorgalvao.com + calebcenter@live.com + ndr@qef.io + josh@joshbutts.com + goxberry@gmail.com + radek.simko@gmail.com + federicobond@gmail.com + claui@users.noreply.github.com + amorymeltzer@gmail.com + hagins.josh@gmail.com + dragon.vctr@gmail.com + mail@sebastianroeder.de + github@adityadalal.com + adityadalal924@users.noreply.github.com + ].freeze + +### +### git methods +### + +def cd_to_project_root + Dir.chdir File.dirname(File.expand_path(__FILE__)) + @git_root ||= Open3.popen3(*%w[ + git rev-parse --show-toplevel + ]) do |_stdin, stdout, _stderr| + begin + stdout.gets.chomp + rescue + end + end + Dir.chdir @git_root + @git_root +end + +def authors + @authors ||= Open3.popen3(*%w[ + git log --no-merges --format=%ae -- + ]) do |_stdin, stdout, _stderr| + h = {} + stdout.each_line do |line| + line.chomp! + h[line] ||= 0 + h[line] += 1 + end + h + end +end + +def casks_by_author + @casks_by_author ||= Open3.popen3(*%w[ + git log --no-merges --name-only --format=%ae -- + ], + CASK_PATH) do |_stdin, stdout, _stderr| + email = nil + h = {} + stdout.each_line.to_a.join("").split("\n\n").each do |paragraph| + if paragraph.include?("Casks/") + lines = paragraph.split("\n") + email = lines.pop + h[email] ||= Set.new + h[email].merge(lines.compact) + else + email = paragraph.chomp + end + end + h + end +end + +### +### filesystem methods +### + +def all_casks + @all_casks ||= Open3.popen2("/usr/bin/find", + CASK_PATH, + *%w[-type f -name *.rb]) do |_stdin, stdout| + stdout.each_line.map(&:chomp) + end +end + +### +### analysis and report methods +### + +def histogram + if @histogram.nil? + @histogram = Hash[*BINS.map { |elt| [elt, 0] }.flatten] + authors.each do |_name, num_commits| + bottom = 0 + BINS.each do |top| + @histogram[bottom] += 1 if num_commits >= bottom && num_commits < top + bottom = top + end + end + end + @histogram +end + +def historic_occasional_cask_set + @historic_occasional_cask_set = authors.each.collect do |name, num_commits| + if num_commits > OCCASIONAL_CUTOFF + nil + elsif !casks_by_author.key?(name) + nil + else + casks_by_author[name].to_a + end + end.flatten.compact.to_set +end + +def extant_occasional_cask_count + # avoid double-counting renames by intersecting with extant Casks + historic_occasional_cask_set.intersection(all_casks).count +end + +def historic_nonmaintainer_cask_set + @historic_nonmaintainer_cask_set = authors.each.collect do |name, _num_commits| + if MAINTAINERS.include?(name) + nil + else + casks_by_author[name].to_a + end + end.flatten.compact.to_set +end + +def extant_nonmaintainer_cask_count + # avoid double-counting renames by intersecting with extant Casks + historic_nonmaintainer_cask_set.intersection(all_casks).count +end + +def extant_occasional_cask_percentage + @extant_occasional_cask_percentage ||= (100 * extant_occasional_cask_count / all_casks.count).to_i +end + +def historic_occasional_cask_percentage + @historic_occasional_cask_percentage ||= (100 * historic_occasional_cask_set.count / all_casks.count).to_i +end + +def extant_nonmaintainer_cask_percentage + @extant_nonmaintainer_cask_percentage ||= (100 * extant_nonmaintainer_cask_count / all_casks.count).to_i +end + +def historic_nonmaintainer_cask_percentage + # this is so large, it might cross 100% + @historic_nonmaintainer_cask_percentage ||= [100, (100 * historic_nonmaintainer_cask_set.count / all_casks.count).to_i].min +end + +def onetime_author_percentage + @onetime_author_percentage ||= (100 * + histogram[1] / + authors.length).to_i +end + +def occasional_author_percentage + # why is it so hard to slice a hash? + @occasional_author_percentage ||= (100 * + (1..OCCASIONAL_CUTOFF).to_a.collect { |bin| histogram[bin] }.reduce(:+) / + authors.length).to_i +end + +def graph_width + if @graph_width.nil? + @graph_width = `/bin/stty size 2>/dev/null`.chomp.split(" ").last.to_i + @graph_width = 80 if @graph_width <= 0 + @graph_width -= 20 if @graph_width > 20 + end + @graph_width +end + +def graph_normalization + @graph_normalization ||= histogram.values.max.to_f +end + +def print_header + puts "Commits\tContributors" + puts "---------------------" +end + +def print_table + BINS.each do |bin| + plural = (bin % 10) == 0 ? "'s" : "" + graph = "." * ((histogram[bin] / graph_normalization) * graph_width) + puts "#{bin}#{plural}\t#{histogram[bin]}\t#{graph}" + end +end + +def print_footer + puts %Q{\n#{occasional_author_percentage}% of contributors are "occasional" (with <= #{OCCASIONAL_CUTOFF} commits)} + puts "\n#{onetime_author_percentage}% of contributors commit only once" + puts "\n#{extant_occasional_cask_percentage}% - #{historic_occasional_cask_percentage}% of Casks depend on an occasional contributor" + puts "\n#{extant_nonmaintainer_cask_percentage}% - #{historic_nonmaintainer_cask_percentage}% of Casks depend on a contributor who is not a maintainer" + puts "\n" +end + +def generate_report + print_header + print_table + print_footer +end + +### +### main +### + +cd_to_project_root +generate_report |
