Skip to content

Instantly share code, notes, and snippets.

@patcon
Last active February 1, 2026 20:49
Show Gist options
  • Select an option

  • Save patcon/32fd34df75b18d066e343ae1ea78a134 to your computer and use it in GitHub Desktop.

Select an option

Save patcon/32fd34df75b18d066e343ae1ea78a134 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "psycopg[binary]>=3.1",
# ]
# ///
"""
polis_remove_duplicate_votes.py
Remove duplicate votes for a *single* Polis conversation, keeping only
the MOST RECENT vote per (conversation_id, participant_id, comment_id).
If multiple votes exist for the same participant on the same comment
within a conversation, this script keeps the newest one and removes
all earlier votes.
SAFETY GUARANTEES
-----------------
- Requires an explicit --conversation-id
- Will never operate on the full database
- Supports --dry-run to preview deletions
- Prints help when run with no arguments
Example:
uv run polis_remove_duplicate_votes.py \
--conversation-id 3ntrtcehat \
--dry-run
"""
import argparse
import os
import sys
import psycopg
# ---------------------------------------------------------------------
# Argument parsing
# ---------------------------------------------------------------------
def parse_args(argv: list[str]) -> argparse.Namespace:
parser = argparse.ArgumentParser(
description=(
"Remove duplicate votes for a single Polis conversation,\n"
"keeping only the most recent vote per participant/comment.\n\n"
"Example:\n"
" uv run polis_remove_duplicate_votes.py "
"--conversation-id 3ntrtcehat --dry-run"
),
formatter_class=argparse.RawTextHelpFormatter,
)
parser.add_argument(
"--conversation-id",
required=True,
help="Conversation ID to operate on (e.g. 3ntrtcehat)",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Show what would be deleted, but do not modify the database",
)
return parser.parse_args(argv)
# ---------------------------------------------------------------------
# SQL
# ---------------------------------------------------------------------
# Source: https://github.com/search?q=repo%3Acrownshy%2Fpolis+votes_latest_unique+path%3Aserver%2Fpostgres%2Fmigrations&type=code
# Find all votes that are NOT the most recent within each duplicate set
FIND_DUPLICATE_VOTES_SQL = """
WITH ranked_votes AS (
SELECT
id,
conversation_id,
participant_id,
comment_id,
created_at,
ROW_NUMBER() OVER (
PARTITION BY conversation_id, participant_id, comment_id
ORDER BY created_at DESC, id DESC
) AS rn
FROM votes
WHERE conversation_id = %(conversation_id)s
)
SELECT
id,
participant_id,
comment_id,
created_at
FROM ranked_votes
WHERE rn > 1
ORDER BY participant_id, comment_id, created_at;
"""
# Delete all but the most recent vote per duplicate set
DELETE_DUPLICATE_VOTES_SQL = """
DELETE FROM votes
WHERE id IN (
WITH ranked_votes AS (
SELECT
id,
ROW_NUMBER() OVER (
PARTITION BY conversation_id, participant_id, comment_id
ORDER BY created_at DESC, id DESC
) AS rn
FROM votes
WHERE conversation_id = %(conversation_id)s
)
SELECT id
FROM ranked_votes
WHERE rn > 1
);
"""
# ---------------------------------------------------------------------
# Core logic
# ---------------------------------------------------------------------
def process_conversation(*, conversation_id: str, dry_run: bool) -> None:
db_url = os.environ.get("DATABASE_URL")
if not db_url:
raise RuntimeError("DATABASE_URL environment variable is not set")
print(f"Conversation ID: {conversation_id}")
with psycopg.connect(db_url) as conn:
with conn.cursor() as cur:
cur.execute(
FIND_DUPLICATE_VOTES_SQL,
{"conversation_id": conversation_id},
)
rows = cur.fetchall()
if not rows:
print("No duplicate votes found.")
return
print(
f"Found {len(rows)} duplicate vote rows "
"(older votes that will be removed):"
)
for vote_id, pid, cid, created_at in rows:
print(
f" vote_id={vote_id} "
f"participant_id={pid} "
f"comment_id={cid} "
f"created_at={created_at}"
)
if dry_run:
print("🧪 DRY RUN — no rows deleted")
return
print("Deleting older duplicate votes…")
cur.execute(
DELETE_DUPLICATE_VOTES_SQL,
{"conversation_id": conversation_id},
)
print(f"Deleted {cur.rowcount} rows.")
conn.commit()
print("Done.")
# ---------------------------------------------------------------------
# Entrypoint
# ---------------------------------------------------------------------
def main(argv: list[str]) -> None:
# Print help if run with no arguments
if not argv:
parse_args(["--help"])
return
args = parse_args(argv)
process_conversation(
conversation_id=args.conversation_id,
dry_run=args.dry_run,
)
if __name__ == "__main__":
main(sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment