Created
January 27, 2026 13:26
-
-
Save andrew/386bccb606387dcd62bfe416959963ac to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| require "net/http" | |
| require "json" | |
| require "uri" | |
| require "digest" | |
| require "fileutils" | |
| INSTANCE = "https://mastodon.social" | |
| USERNAME = "andrewnez" | |
| CACHE_DIR = File.join(__dir__, ".cache") | |
| FileUtils.mkdir_p(CACHE_DIR) | |
| def cache_path(key) | |
| File.join(CACHE_DIR, Digest::SHA256.hexdigest(key)) | |
| end | |
| def get(path) | |
| key = "#{INSTANCE}#{path}" | |
| cp = cache_path(key) | |
| if File.exist?(cp) | |
| cached = JSON.parse(File.read(cp)) | |
| return [cached["body"], cached["link"]] | |
| end | |
| uri = URI(key) | |
| response = Net::HTTP.get_response(uri) | |
| raise "HTTP #{response.code}: #{response.body}" unless response.is_a?(Net::HTTPSuccess) | |
| body = JSON.parse(response.body) | |
| File.write(cp, JSON.generate({ body: body, link: response["Link"] })) | |
| [body, response["Link"]] | |
| end | |
| def next_page_url(link_header) | |
| return nil unless link_header | |
| match = link_header.match(/<([^>]+)>;\s*rel="next"/) | |
| match && match[1] | |
| end | |
| # Step 1: Look up account ID | |
| account, _ = get("/api/v1/accounts/lookup?acct=#{USERNAME}") | |
| account_id = account["id"] | |
| display_name = account["display_name"] | |
| followers_count = account["followers_count"] | |
| puts "#{display_name} (@#{USERNAME}) — #{followers_count} followers" | |
| puts | |
| # Step 2: Paginate through followers | |
| followers = [] | |
| path = "/api/v1/accounts/#{account_id}/followers?limit=80" | |
| loop do | |
| puts "Fetching followers... (#{followers.size} so far)" | |
| batch, link_header = get(path) | |
| break if batch.empty? | |
| batch.each do |f| | |
| bio_text = f["note"]&.gsub(/<[^>]+>/, "")&.strip | |
| bio_domains = (f["note"] || "").scan(/href="https?:\/\/([^"\/]+)/).flatten.uniq | |
| bio_domains.reject! { |d| d.include?("mastodon") || d.include?("fediverse") } | |
| followers << { | |
| username: f["acct"], | |
| display_name: f["display_name"], | |
| bio: bio_text, | |
| bio_domains: bio_domains, | |
| followers_count: f["followers_count"], | |
| following_count: f["following_count"], | |
| instance: f["url"]&.then { URI(_1).host }, | |
| created_at: f["created_at"], | |
| bot: f["bot"] | |
| } | |
| end | |
| next_url = next_page_url(link_header) | |
| break unless next_url | |
| # If next URL is absolute, extract path+query | |
| parsed = URI(next_url) | |
| path = "#{parsed.path}?#{parsed.query}" | |
| sleep 0.1 | |
| end | |
| puts "Fetched #{followers.size} followers total" | |
| puts | |
| # Step 3: Summary stats | |
| instances = followers.map { _1[:instance] }.compact.tally.sort_by { -_1[1] } | |
| bots = followers.count { _1[:bot] } | |
| with_bio = followers.count { _1[:bio] && !_1[:bio].empty? } | |
| puts "--- Instance distribution (top 20) ---" | |
| instances.first(20).each do |instance, count| | |
| puts " #{instance}: #{count}" | |
| end | |
| puts | |
| puts "--- Quick stats ---" | |
| puts " Bots: #{bots}" | |
| puts " With bio: #{with_bio}/#{followers.size}" | |
| puts " Median follower count: #{followers.map { _1[:followers_count] }.compact.sort[followers.size / 2]}" | |
| puts " Median following count: #{followers.map { _1[:following_count] }.compact.sort[followers.size / 2]}" | |
| puts | |
| # Following count distribution | |
| following_buckets = { "0-50" => 0, "51-200" => 0, "201-500" => 0, "501-1000" => 0, "1000+" => 0 } | |
| followers.each do |f| | |
| c = f[:following_count] || 0 | |
| case c | |
| when 0..50 then following_buckets["0-50"] += 1 | |
| when 51..200 then following_buckets["51-200"] += 1 | |
| when 201..500 then following_buckets["201-500"] += 1 | |
| when 501..1000 then following_buckets["501-1000"] += 1 | |
| else following_buckets["1000+"] += 1 | |
| end | |
| end | |
| puts "--- Following count distribution ---" | |
| following_buckets.each { |range, count| puts " #{range}: #{count}" } | |
| puts | |
| # Follower count distribution | |
| follower_buckets = { "0-50" => 0, "51-200" => 0, "201-500" => 0, "501-1000" => 0, "1000+" => 0 } | |
| followers.each do |f| | |
| c = f[:followers_count] || 0 | |
| case c | |
| when 0..50 then follower_buckets["0-50"] += 1 | |
| when 51..200 then follower_buckets["51-200"] += 1 | |
| when 201..500 then follower_buckets["201-500"] += 1 | |
| when 501..1000 then follower_buckets["501-1000"] += 1 | |
| else follower_buckets["1000+"] += 1 | |
| end | |
| end | |
| puts "--- Follower count distribution ---" | |
| follower_buckets.each { |range, count| puts " #{range}: #{count}" } | |
| puts | |
| # Account creation date cohorts | |
| require "date" | |
| cohorts = Hash.new(0) | |
| followers.each do |f| | |
| next unless f[:created_at] | |
| date = Date.parse(f[:created_at]) | |
| # Group by quarter | |
| q = ((date.month - 1) / 3) + 1 | |
| cohorts["#{date.year} Q#{q}"] += 1 | |
| end | |
| puts "--- Account creation cohorts ---" | |
| cohorts.sort.each { |period, count| puts " #{period}: #{count}" } | |
| puts | |
| # Domains from bios | |
| all_domains = followers.flat_map { _1[:bio_domains] }.compact.tally.sort_by { -_1[1] } | |
| if all_domains.any? | |
| puts "--- Top domains in bios (top 30) ---" | |
| all_domains.first(30).each { |domain, count| puts " #{domain}: #{count}" } | |
| puts | |
| end | |
| # Step 4: Categorize bios using Claude CLI | |
| bios = followers | |
| .select { _1[:bio] && !_1[:bio].empty? } | |
| .map { "#{_1[:username]}: #{_1[:bio]}" } | |
| if bios.any? | |
| puts "--- Categorizing #{bios.size} bios with Claude CLI ---" | |
| prompt = <<~PROMPT | |
| Here are bios of Mastodon followers for @#{USERNAME}. Categorize them into groups (e.g. "Software developers", "Designers", "Journalists", "Academics", etc.) and count how many fall into each category. A person can be in multiple categories. Also note any interesting patterns. | |
| #{bios.join("\n\n")} | |
| PROMPT | |
| result = IO.popen(["claude", "-p", prompt, "--output-format", "text"], &:read) | |
| puts result | |
| end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment