Skip to content

Instantly share code, notes, and snippets.

@jamiew
Created April 27, 2026 17:39
Show Gist options
  • Select an option

  • Save jamiew/a265347b400276b9393f9b76d6a17d7c to your computer and use it in GitHub Desktop.

Select an option

Save jamiew/a265347b400276b9393f9b76d6a17d7c to your computer and use it in GitHub Desktop.
NYC abandoned bike 311 complaint analysis (Socrata API)
#!/usr/bin/env -S uv run --script
# /// script
# requires-python = ">=3.11"
# dependencies = [
# "requests",
# "pandas",
# "matplotlib",
# ]
# ///
"""
NYC abandoned/derelict bike 311 complaints — pull, dedupe, chart.
Generates 3 charts used in the blog post:
- abandoned-bikes-volume-by-year.png
- abandoned-bikes-removal-rate.png
- abandoned-bikes-outcomes.png
Data via NYC Open Data Socrata API. No API key required.
"""
from __future__ import annotations
import sys
from collections import Counter, defaultdict
from pathlib import Path
import matplotlib.pyplot as plt
import pandas as pd
import requests
ENDPOINTS = {
"archive": "https://data.cityofnewyork.us/resource/76ig-c548.json", # 2010-2019
"live": "https://data.cityofnewyork.us/resource/erm2-nwe9.json", # 2020-present
}
TYPES = ("Derelict Bicycle", "Abandoned Bike")
REMOVED_PHRASES = (
"The Department of Sanitation removed the items.",
"The Department of Sanitation collected the requested items.",
)
DID_NOT_MEET = "did not meet"
NO_CONDITION = "no condition"
PAGE_SIZE = 50000
OUT_DIR = Path(__file__).resolve().parent.parent / "images" / "blog-posts"
def fetch_all(endpoint: str) -> list[dict]:
types_clause = " OR ".join(f"complaint_type='{t}'" for t in TYPES)
where = f"({types_clause})"
rows: list[dict] = []
offset = 0
while True:
params = {"$where": where, "$limit": PAGE_SIZE, "$offset": offset}
r = requests.get(endpoint, params=params, timeout=60)
r.raise_for_status()
batch = r.json()
if not batch:
break
rows.extend(batch)
offset += len(batch)
print(f" {endpoint.split('/')[-1]}: pulled {len(rows)} rows", file=sys.stderr)
if len(batch) < PAGE_SIZE:
break
return rows
def to_frame() -> pd.DataFrame:
rows: list[dict] = []
for name, url in ENDPOINTS.items():
print(f"fetching {name}...", file=sys.stderr)
rows.extend(fetch_all(url))
df = pd.DataFrame(rows)
df["created_date"] = pd.to_datetime(df["created_date"], errors="coerce")
df["year"] = df["created_date"].dt.year
df["resolution_description"] = df.get("resolution_description", "").fillna("")
return df
def classify(desc: str) -> str:
if any(p in desc for p in REMOVED_PHRASES):
return "removed"
low = desc.lower()
if DID_NOT_MEET in low:
return "did_not_meet"
if NO_CONDITION in low:
return "no_condition"
if not desc:
return "empty"
return "other"
def chart_volume_by_year(df: pd.DataFrame, out: Path) -> None:
counts = df.groupby("year").size().sort_index()
counts = counts[counts.index >= 2010]
years = counts.index.astype(int)
fig, ax = plt.subplots(figsize=(10, 5))
ax.bar(years, counts.values, color="#3b82f6")
ax.set_title("NYC abandoned bike complaints by year, 2010-2026")
ax.set_xlabel("Year")
ax.set_ylabel("Complaints")
ax.set_xticks(list(years))
ax.tick_params(axis="x", rotation=45)
ax.grid(axis="y", alpha=0.3)
fig.tight_layout()
fig.savefig(out, dpi=144)
print(f"wrote {out}", file=sys.stderr)
def chart_removal_rate(df: pd.DataFrame, out: Path) -> None:
df = df.copy()
df["bucket"] = df["resolution_description"].map(classify)
by_year = df.groupby("year")["bucket"].value_counts().unstack(fill_value=0)
by_year = by_year[by_year.index >= 2010]
naive = (by_year.get("removed", 0) / by_year.sum(axis=1)).fillna(0) * 100
df["addr"] = df.get("incident_address", "").fillna("") + "|" + df["year"].astype(str)
deduped = df.drop_duplicates(subset=["addr"])
dby = deduped.groupby("year")["bucket"].value_counts().unstack(fill_value=0)
dby = dby[dby.index >= 2010]
dedup_rate = (dby.get("removed", 0) / dby.sum(axis=1)).fillna(0) * 100
fig, ax = plt.subplots(figsize=(10, 5))
years = naive.index.astype(int)
ax.plot(years, naive.values, marker="o", label="Naive removal rate", color="#ef4444")
ax.plot(dedup_rate.index.astype(int), dedup_rate.values, marker="s", label="Deduplicated removal rate", color="#10b981")
ax.set_title("NYC derelict bike complaints: naive vs deduplicated removal rate")
ax.set_xlabel("Year")
ax.set_ylabel("Removal rate (%)")
ax.set_xticks(list(years))
ax.tick_params(axis="x", rotation=45)
ax.legend()
ax.grid(alpha=0.3)
fig.tight_layout()
fig.savefig(out, dpi=144)
print(f"wrote {out}", file=sys.stderr)
def chart_outcomes(df: pd.DataFrame, out: Path) -> None:
df = df.copy()
df["bucket"] = df["resolution_description"].map(classify)
by_year = df.groupby("year")["bucket"].value_counts().unstack(fill_value=0)
by_year = by_year[by_year.index >= 2010]
pct = by_year.div(by_year.sum(axis=1), axis=0) * 100
order = ["did_not_meet", "no_condition", "other", "empty", "removed"]
colors = {
"did_not_meet": "#f97316",
"no_condition": "#3b82f6",
"other": "#a3a3a3",
"empty": "#e5e7eb",
"removed": "#10b981",
}
labels = {
"did_not_meet": "Did not meet criteria",
"no_condition": "No condition found",
"other": "Other",
"empty": "No resolution logged",
"removed": "Removed",
}
fig, ax = plt.subplots(figsize=(10, 5))
years = pct.index.astype(int)
bottom = pd.Series(0.0, index=pct.index)
for key in order:
if key not in pct:
continue
ax.bar(years, pct[key], bottom=bottom, label=labels[key], color=colors[key])
bottom = bottom + pct[key]
ax.set_title("What actually happens to NYC derelict bike complaints")
ax.set_xlabel("Year")
ax.set_ylabel("% of complaints")
ax.set_xticks(list(years))
ax.tick_params(axis="x", rotation=45)
ax.legend(loc="lower left", fontsize=8)
ax.set_ylim(0, 100)
fig.tight_layout()
fig.savefig(out, dpi=144)
print(f"wrote {out}", file=sys.stderr)
def main() -> None:
OUT_DIR.mkdir(parents=True, exist_ok=True)
df = to_frame()
print(f"total rows: {len(df)}", file=sys.stderr)
print(f"resolution buckets: {Counter(df['resolution_description'].map(classify))}", file=sys.stderr)
chart_volume_by_year(df, OUT_DIR / "abandoned-bikes-volume-by-year.png")
chart_removal_rate(df, OUT_DIR / "abandoned-bikes-removal-rate.png")
chart_outcomes(df, OUT_DIR / "abandoned-bikes-outcomes.png")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment