Created
February 11, 2026 08:54
-
-
Save 0xntpower/c1b613d7e63b0b44c0173f10ecd64ec0 to your computer and use it in GitHub Desktop.
A tool to compute Shannon entropy score of a file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import sys | |
| import math | |
| import argparse | |
| from pathlib import Path | |
| from collections import Counter | |
| from typing import NoReturn | |
| try: | |
| import pefile | |
| except ImportError: | |
| pefile = None | |
| # ── ANSI helpers ───────────────────────────────────────────────────────────── | |
| COLORS = ["\033[31m", "\033[32m", "\033[33m", "\033[34m", "\033[35m", "\033[36m"] | |
| RESET = "\033[0m" | |
| def _colorize(text: str, index: int) -> str: | |
| return f"{COLORS[index % len(COLORS)]}{text}{RESET}" | |
| # ── Core logic ─────────────────────────────────────────────────────────────── | |
| def calc_entropy(data: bytes) -> float: | |
| """Return the Shannon entropy (bits per byte) of *data*.""" | |
| if not data: | |
| return 0.0 | |
| length = len(data) | |
| freq = Counter(data) | |
| return -sum( | |
| (count / length) * math.log2(count / length) | |
| for count in freq.values() | |
| ) | |
| def calc_file_entropy(path: Path) -> None: | |
| """Print the entropy of an entire file.""" | |
| data = path.read_bytes() | |
| entropy = calc_entropy(data) | |
| print(f"[*] Entropy of \"{path.name}\" (whole file): {entropy:.5f}") | |
| def calc_pe_entropy(path: Path) -> None: | |
| """Parse a PE and print per-section entropy.""" | |
| if pefile is None: | |
| _fatal("The 'pefile' package is required for PE analysis. Install it with: pip install pefile") | |
| pe = pefile.PE(str(path)) | |
| print(f"[*] Parsing PE section headers of \"{path.name}\" ...\n") | |
| for idx, section in enumerate(pe.sections): | |
| name = section.Name.rstrip(b"\x00").decode(errors="replace") | |
| entropy = calc_entropy(section.get_data()) | |
| colored_name = _colorize(f"\"{name}\"", idx) | |
| colored_val = _colorize(f"{entropy:.5f}", idx) | |
| print(f" >>> {colored_name} entropy: {colored_val}") | |
| # ── CLI ────────────────────────────────────────────────────────────────────── | |
| def _fatal(msg: str) -> NoReturn: | |
| print(f"[!] {msg}", file=sys.stderr) | |
| sys.exit(1) | |
| def _resolve_file(raw: str) -> Path: | |
| """Validate that *raw* points to an existing file and return its Path.""" | |
| p = Path(raw) | |
| if not p.is_file(): | |
| _fatal(f"\"{raw}\" is not a valid file.") | |
| return p | |
| def _build_parser() -> argparse.ArgumentParser: | |
| parser = argparse.ArgumentParser( | |
| description="Calculate Shannon entropy of files and PE sections.", | |
| ) | |
| parser.add_argument( | |
| "file", | |
| help="target file path", | |
| ) | |
| parser.add_argument( | |
| "-pe", "--pe", | |
| action="store_true", | |
| help="parse PE section headers and display per-section entropy", | |
| ) | |
| return parser | |
| def main() -> None: | |
| parser = _build_parser() | |
| args = parser.parse_args() | |
| path = _resolve_file(args.file) | |
| if args.pe: | |
| try: | |
| calc_pe_entropy(path) | |
| except pefile.PEFormatError: | |
| _fatal(f"\"{path.name}\" is not a valid PE file.") | |
| else: | |
| calc_file_entropy(path) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment