Created
February 12, 2026 09:40
-
-
Save do-me/21bef0f095ee30fd95e886bfa9c4e962 to your computer and use it in GitHub Desktop.
Batch delete parquet files on root level on huggingface dataset (when accidentally pushed), leave anything else intact
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from huggingface_hub import HfApi, CommitOperationDelete, RepoFile | |
| # Configure your repo details | |
| repo_id = "user/repo" | |
| token = "your token" # Ensure your token has 'write' permissions | |
| api = HfApi(token=token) | |
| # 1. List files in the repo (non-recursive) | |
| files = api.list_repo_tree(repo_id, repo_type="dataset") | |
| # 2. Filter for: | |
| # - Is a RepoFile (not a folder) | |
| # - No "/" in the path (ensures it's root level) | |
| # - Ends with .parquet | |
| to_delete = [ | |
| CommitOperationDelete(path_in_repo=f.path) | |
| for f in files | |
| if isinstance(f, RepoFile) | |
| and "/" not in f.path | |
| and f.path.endswith(".parquet") | |
| ] | |
| # 3. Execution logic | |
| if to_delete: | |
| print(f"Found {len(to_delete)} parquet files to delete from root:") | |
| for op in to_delete: | |
| print(f" - {op.path_in_repo}") | |
| confirm = input("\nConfirm deletion? (y/n): ") | |
| if confirm.lower() == 'y': | |
| api.create_commit( | |
| repo_id=repo_id, | |
| operations=to_delete, | |
| commit_message="Cleanup: remove root-level parquet files", | |
| repo_type="dataset" | |
| ) | |
| print("Deletion complete.") | |
| else: | |
| print("Aborted.") | |
| else: | |
| print("No root-level .parquet files found.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment