Last active
February 1, 2026 20:49
-
-
Save patcon/32fd34df75b18d066e343ae1ea78a134 to your computer and use it in GitHub Desktop.
Source LLM agent convo: https://chatgpt.com/share/697fbbe3-0304-800b-bcf5-fa8955c69132
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| # /// script | |
| # requires-python = ">=3.10" | |
| # dependencies = [ | |
| # "psycopg[binary]>=3.1", | |
| # ] | |
| # /// | |
| """ | |
| polis_remove_duplicate_votes.py | |
| Remove duplicate votes for a *single* Polis conversation, keeping only | |
| the MOST RECENT vote per (conversation_id, participant_id, comment_id). | |
| If multiple votes exist for the same participant on the same comment | |
| within a conversation, this script keeps the newest one and removes | |
| all earlier votes. | |
| SAFETY GUARANTEES | |
| ----------------- | |
| - Requires an explicit --conversation-id | |
| - Will never operate on the full database | |
| - Supports --dry-run to preview deletions | |
| - Prints help when run with no arguments | |
| Example: | |
| uv run polis_remove_duplicate_votes.py \ | |
| --conversation-id 3ntrtcehat \ | |
| --dry-run | |
| """ | |
| import argparse | |
| import os | |
| import sys | |
| import psycopg | |
| # --------------------------------------------------------------------- | |
| # Argument parsing | |
| # --------------------------------------------------------------------- | |
| def parse_args(argv: list[str]) -> argparse.Namespace: | |
| parser = argparse.ArgumentParser( | |
| description=( | |
| "Remove duplicate votes for a single Polis conversation,\n" | |
| "keeping only the most recent vote per participant/comment.\n\n" | |
| "Example:\n" | |
| " uv run polis_remove_duplicate_votes.py " | |
| "--conversation-id 3ntrtcehat --dry-run" | |
| ), | |
| formatter_class=argparse.RawTextHelpFormatter, | |
| ) | |
| parser.add_argument( | |
| "--conversation-id", | |
| required=True, | |
| help="Conversation ID to operate on (e.g. 3ntrtcehat)", | |
| ) | |
| parser.add_argument( | |
| "--dry-run", | |
| action="store_true", | |
| help="Show what would be deleted, but do not modify the database", | |
| ) | |
| return parser.parse_args(argv) | |
| # --------------------------------------------------------------------- | |
| # SQL | |
| # --------------------------------------------------------------------- | |
| # Source: https://github.com/search?q=repo%3Acrownshy%2Fpolis+votes_latest_unique+path%3Aserver%2Fpostgres%2Fmigrations&type=code | |
| # Find all votes that are NOT the most recent within each duplicate set | |
| FIND_DUPLICATE_VOTES_SQL = """ | |
| WITH ranked_votes AS ( | |
| SELECT | |
| id, | |
| conversation_id, | |
| participant_id, | |
| comment_id, | |
| created_at, | |
| ROW_NUMBER() OVER ( | |
| PARTITION BY conversation_id, participant_id, comment_id | |
| ORDER BY created_at DESC, id DESC | |
| ) AS rn | |
| FROM votes | |
| WHERE conversation_id = %(conversation_id)s | |
| ) | |
| SELECT | |
| id, | |
| participant_id, | |
| comment_id, | |
| created_at | |
| FROM ranked_votes | |
| WHERE rn > 1 | |
| ORDER BY participant_id, comment_id, created_at; | |
| """ | |
| # Delete all but the most recent vote per duplicate set | |
| DELETE_DUPLICATE_VOTES_SQL = """ | |
| DELETE FROM votes | |
| WHERE id IN ( | |
| WITH ranked_votes AS ( | |
| SELECT | |
| id, | |
| ROW_NUMBER() OVER ( | |
| PARTITION BY conversation_id, participant_id, comment_id | |
| ORDER BY created_at DESC, id DESC | |
| ) AS rn | |
| FROM votes | |
| WHERE conversation_id = %(conversation_id)s | |
| ) | |
| SELECT id | |
| FROM ranked_votes | |
| WHERE rn > 1 | |
| ); | |
| """ | |
| # --------------------------------------------------------------------- | |
| # Core logic | |
| # --------------------------------------------------------------------- | |
| def process_conversation(*, conversation_id: str, dry_run: bool) -> None: | |
| db_url = os.environ.get("DATABASE_URL") | |
| if not db_url: | |
| raise RuntimeError("DATABASE_URL environment variable is not set") | |
| print(f"Conversation ID: {conversation_id}") | |
| with psycopg.connect(db_url) as conn: | |
| with conn.cursor() as cur: | |
| cur.execute( | |
| FIND_DUPLICATE_VOTES_SQL, | |
| {"conversation_id": conversation_id}, | |
| ) | |
| rows = cur.fetchall() | |
| if not rows: | |
| print("No duplicate votes found.") | |
| return | |
| print( | |
| f"Found {len(rows)} duplicate vote rows " | |
| "(older votes that will be removed):" | |
| ) | |
| for vote_id, pid, cid, created_at in rows: | |
| print( | |
| f" vote_id={vote_id} " | |
| f"participant_id={pid} " | |
| f"comment_id={cid} " | |
| f"created_at={created_at}" | |
| ) | |
| if dry_run: | |
| print("🧪 DRY RUN — no rows deleted") | |
| return | |
| print("Deleting older duplicate votes…") | |
| cur.execute( | |
| DELETE_DUPLICATE_VOTES_SQL, | |
| {"conversation_id": conversation_id}, | |
| ) | |
| print(f"Deleted {cur.rowcount} rows.") | |
| conn.commit() | |
| print("Done.") | |
| # --------------------------------------------------------------------- | |
| # Entrypoint | |
| # --------------------------------------------------------------------- | |
| def main(argv: list[str]) -> None: | |
| # Print help if run with no arguments | |
| if not argv: | |
| parse_args(["--help"]) | |
| return | |
| args = parse_args(argv) | |
| process_conversation( | |
| conversation_id=args.conversation_id, | |
| dry_run=args.dry_run, | |
| ) | |
| if __name__ == "__main__": | |
| main(sys.argv[1:]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment