Created
April 27, 2026 17:39
-
-
Save jamiew/a265347b400276b9393f9b76d6a17d7c to your computer and use it in GitHub Desktop.
NYC abandoned bike 311 complaint analysis (Socrata API)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env -S uv run --script | |
| # /// script | |
| # requires-python = ">=3.11" | |
| # dependencies = [ | |
| # "requests", | |
| # "pandas", | |
| # "matplotlib", | |
| # ] | |
| # /// | |
| """ | |
| NYC abandoned/derelict bike 311 complaints — pull, dedupe, chart. | |
| Generates 3 charts used in the blog post: | |
| - abandoned-bikes-volume-by-year.png | |
| - abandoned-bikes-removal-rate.png | |
| - abandoned-bikes-outcomes.png | |
| Data via NYC Open Data Socrata API. No API key required. | |
| """ | |
| from __future__ import annotations | |
| import sys | |
| from collections import Counter, defaultdict | |
| from pathlib import Path | |
| import matplotlib.pyplot as plt | |
| import pandas as pd | |
| import requests | |
| ENDPOINTS = { | |
| "archive": "https://data.cityofnewyork.us/resource/76ig-c548.json", # 2010-2019 | |
| "live": "https://data.cityofnewyork.us/resource/erm2-nwe9.json", # 2020-present | |
| } | |
| TYPES = ("Derelict Bicycle", "Abandoned Bike") | |
| REMOVED_PHRASES = ( | |
| "The Department of Sanitation removed the items.", | |
| "The Department of Sanitation collected the requested items.", | |
| ) | |
| DID_NOT_MEET = "did not meet" | |
| NO_CONDITION = "no condition" | |
| PAGE_SIZE = 50000 | |
| OUT_DIR = Path(__file__).resolve().parent.parent / "images" / "blog-posts" | |
| def fetch_all(endpoint: str) -> list[dict]: | |
| types_clause = " OR ".join(f"complaint_type='{t}'" for t in TYPES) | |
| where = f"({types_clause})" | |
| rows: list[dict] = [] | |
| offset = 0 | |
| while True: | |
| params = {"$where": where, "$limit": PAGE_SIZE, "$offset": offset} | |
| r = requests.get(endpoint, params=params, timeout=60) | |
| r.raise_for_status() | |
| batch = r.json() | |
| if not batch: | |
| break | |
| rows.extend(batch) | |
| offset += len(batch) | |
| print(f" {endpoint.split('/')[-1]}: pulled {len(rows)} rows", file=sys.stderr) | |
| if len(batch) < PAGE_SIZE: | |
| break | |
| return rows | |
| def to_frame() -> pd.DataFrame: | |
| rows: list[dict] = [] | |
| for name, url in ENDPOINTS.items(): | |
| print(f"fetching {name}...", file=sys.stderr) | |
| rows.extend(fetch_all(url)) | |
| df = pd.DataFrame(rows) | |
| df["created_date"] = pd.to_datetime(df["created_date"], errors="coerce") | |
| df["year"] = df["created_date"].dt.year | |
| df["resolution_description"] = df.get("resolution_description", "").fillna("") | |
| return df | |
| def classify(desc: str) -> str: | |
| if any(p in desc for p in REMOVED_PHRASES): | |
| return "removed" | |
| low = desc.lower() | |
| if DID_NOT_MEET in low: | |
| return "did_not_meet" | |
| if NO_CONDITION in low: | |
| return "no_condition" | |
| if not desc: | |
| return "empty" | |
| return "other" | |
| def chart_volume_by_year(df: pd.DataFrame, out: Path) -> None: | |
| counts = df.groupby("year").size().sort_index() | |
| counts = counts[counts.index >= 2010] | |
| years = counts.index.astype(int) | |
| fig, ax = plt.subplots(figsize=(10, 5)) | |
| ax.bar(years, counts.values, color="#3b82f6") | |
| ax.set_title("NYC abandoned bike complaints by year, 2010-2026") | |
| ax.set_xlabel("Year") | |
| ax.set_ylabel("Complaints") | |
| ax.set_xticks(list(years)) | |
| ax.tick_params(axis="x", rotation=45) | |
| ax.grid(axis="y", alpha=0.3) | |
| fig.tight_layout() | |
| fig.savefig(out, dpi=144) | |
| print(f"wrote {out}", file=sys.stderr) | |
| def chart_removal_rate(df: pd.DataFrame, out: Path) -> None: | |
| df = df.copy() | |
| df["bucket"] = df["resolution_description"].map(classify) | |
| by_year = df.groupby("year")["bucket"].value_counts().unstack(fill_value=0) | |
| by_year = by_year[by_year.index >= 2010] | |
| naive = (by_year.get("removed", 0) / by_year.sum(axis=1)).fillna(0) * 100 | |
| df["addr"] = df.get("incident_address", "").fillna("") + "|" + df["year"].astype(str) | |
| deduped = df.drop_duplicates(subset=["addr"]) | |
| dby = deduped.groupby("year")["bucket"].value_counts().unstack(fill_value=0) | |
| dby = dby[dby.index >= 2010] | |
| dedup_rate = (dby.get("removed", 0) / dby.sum(axis=1)).fillna(0) * 100 | |
| fig, ax = plt.subplots(figsize=(10, 5)) | |
| years = naive.index.astype(int) | |
| ax.plot(years, naive.values, marker="o", label="Naive removal rate", color="#ef4444") | |
| ax.plot(dedup_rate.index.astype(int), dedup_rate.values, marker="s", label="Deduplicated removal rate", color="#10b981") | |
| ax.set_title("NYC derelict bike complaints: naive vs deduplicated removal rate") | |
| ax.set_xlabel("Year") | |
| ax.set_ylabel("Removal rate (%)") | |
| ax.set_xticks(list(years)) | |
| ax.tick_params(axis="x", rotation=45) | |
| ax.legend() | |
| ax.grid(alpha=0.3) | |
| fig.tight_layout() | |
| fig.savefig(out, dpi=144) | |
| print(f"wrote {out}", file=sys.stderr) | |
| def chart_outcomes(df: pd.DataFrame, out: Path) -> None: | |
| df = df.copy() | |
| df["bucket"] = df["resolution_description"].map(classify) | |
| by_year = df.groupby("year")["bucket"].value_counts().unstack(fill_value=0) | |
| by_year = by_year[by_year.index >= 2010] | |
| pct = by_year.div(by_year.sum(axis=1), axis=0) * 100 | |
| order = ["did_not_meet", "no_condition", "other", "empty", "removed"] | |
| colors = { | |
| "did_not_meet": "#f97316", | |
| "no_condition": "#3b82f6", | |
| "other": "#a3a3a3", | |
| "empty": "#e5e7eb", | |
| "removed": "#10b981", | |
| } | |
| labels = { | |
| "did_not_meet": "Did not meet criteria", | |
| "no_condition": "No condition found", | |
| "other": "Other", | |
| "empty": "No resolution logged", | |
| "removed": "Removed", | |
| } | |
| fig, ax = plt.subplots(figsize=(10, 5)) | |
| years = pct.index.astype(int) | |
| bottom = pd.Series(0.0, index=pct.index) | |
| for key in order: | |
| if key not in pct: | |
| continue | |
| ax.bar(years, pct[key], bottom=bottom, label=labels[key], color=colors[key]) | |
| bottom = bottom + pct[key] | |
| ax.set_title("What actually happens to NYC derelict bike complaints") | |
| ax.set_xlabel("Year") | |
| ax.set_ylabel("% of complaints") | |
| ax.set_xticks(list(years)) | |
| ax.tick_params(axis="x", rotation=45) | |
| ax.legend(loc="lower left", fontsize=8) | |
| ax.set_ylim(0, 100) | |
| fig.tight_layout() | |
| fig.savefig(out, dpi=144) | |
| print(f"wrote {out}", file=sys.stderr) | |
| def main() -> None: | |
| OUT_DIR.mkdir(parents=True, exist_ok=True) | |
| df = to_frame() | |
| print(f"total rows: {len(df)}", file=sys.stderr) | |
| print(f"resolution buckets: {Counter(df['resolution_description'].map(classify))}", file=sys.stderr) | |
| chart_volume_by_year(df, OUT_DIR / "abandoned-bikes-volume-by-year.png") | |
| chart_removal_rate(df, OUT_DIR / "abandoned-bikes-removal-rate.png") | |
| chart_outcomes(df, OUT_DIR / "abandoned-bikes-outcomes.png") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment