Created
January 30, 2026 06:10
-
-
Save pythonhacker/327599ca5f0f3598de0cb08dbfab6b0c to your computer and use it in GitHub Desktop.
Benchmark caseless dict vs caseless UserDict subclass
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Benchmark: dict-subclass vs UserDict-based case-insensitive dicts. | |
| Run with: | |
| python bench_caselessdict.py | |
| Requires: Python >= 3.10 | |
| """ | |
| from collections import UserDict | |
| from time import perf_counter | |
| import random | |
| import string | |
| class CaselessDict(dict): | |
| """ A caseless key dictionary """ | |
| def _norm(self, key: str) -> str: | |
| return key.casefold() | |
| def __setitem__(self, key, value): | |
| super().__setitem__(self._norm(key), value) | |
| def __getitem__(self, key): | |
| return super().__getitem__(self._norm(key)) | |
| def __contains__(self, key): | |
| return super().__contains__(self._norm(key)) | |
| class CaselessUserDict(UserDict): | |
| """ A caseless key user dictionary """ | |
| def _norm(self, key): | |
| return key.casefold() | |
| def __setitem__(self, key, value): | |
| self.data[self._norm(key)] = value | |
| def __getitem__(self, key): | |
| return self.data[self._norm(key)] | |
| def __contains__(self, key): | |
| return self._norm(key) in self.data | |
| def make_keys(n): | |
| """ Generate random mixed-case keys """ | |
| keys = [] | |
| for i in range(n): | |
| base = ''.join(random.choices(string.ascii_letters, k=8)) | |
| keys.append(base) | |
| return keys | |
| def bench_write(mapping_cls, keys): | |
| """ Benchmark write function """ | |
| d = mapping_cls() | |
| start = perf_counter() | |
| for i, k in enumerate(keys): | |
| d[k] = i | |
| return perf_counter() - start | |
| def bench_read(mapping_cls, keys): | |
| """ Benchmark read function """ | |
| d = mapping_cls() | |
| for i, k in enumerate(keys): | |
| d[k] = i | |
| # randomize lookup casing | |
| lookups = [k.upper() if i % 2 else k.lower() for i, k in enumerate(keys)] | |
| start = perf_counter() | |
| for k in lookups: | |
| _ = d[k] | |
| return perf_counter() - start | |
| def bench_mixed(mapping_cls, keys): | |
| """ Benchmark mixed read + write """ | |
| d = mapping_cls() | |
| start = perf_counter() | |
| for i, k in enumerate(keys): | |
| d[k] = i | |
| _ = d[k.swapcase()] | |
| return perf_counter() - start | |
| def run(): | |
| n = 100000 | |
| keys = make_keys(n) | |
| print(f"Benchmarking with {n} keys") | |
| for name, cls in [ | |
| ("dict subclass", CaselessDict), | |
| ("UserDict subclass", CaselessUserDict), | |
| ]: | |
| w = bench_write(cls, keys) | |
| r = bench_read(cls, keys) | |
| m = bench_mixed(cls, keys) | |
| print(f"{name:14s} | write: {w:.4f}s | read: {r:.4f}s | mixed: {m:.4f}s") | |
| if __name__ == "__main__": | |
| run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment