Created
December 10, 2025 13:53
-
-
Save vinicius-oa/c71f1843532b5518cb9002795993e1a6 to your computer and use it in GitHub Desktop.
Compare two jsons
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import json | |
| import sys | |
| import hashlib | |
| from typing import Any, List, Tuple | |
| def load_json_file(filepath): | |
| """Load JSON from file""" | |
| with open(filepath, 'r', encoding='utf-8') as f: | |
| return json.load(f) | |
| def sort_json_recursively(obj): | |
| """ | |
| Recursively sort all arrays and dictionaries in the JSON structure. | |
| For arrays of objects, sort by their JSON representation to ensure consistency. | |
| """ | |
| if isinstance(obj, dict): | |
| # Sort dictionary by keys and recursively sort values | |
| return {k: sort_json_recursively(v) for k, v in sorted(obj.items())} | |
| elif isinstance(obj, list): | |
| # First, recursively sort all items in the list | |
| sorted_list = [sort_json_recursively(item) for item in obj] | |
| # Then sort the list itself by JSON representation | |
| # This ensures arrays of objects are in consistent order | |
| try: | |
| sorted_list = sorted(sorted_list, key=lambda x: json.dumps(x, sort_keys=True)) | |
| except (TypeError, KeyError) as e: | |
| # If sorting fails, keep the list as is | |
| pass | |
| return sorted_list | |
| else: | |
| return obj | |
| def normalize_json(data): | |
| """Normalize JSON by sorting everything""" | |
| sorted_data = sort_json_recursively(data) | |
| return json.dumps(sorted_data, sort_keys=True, separators=(',', ':')) | |
| def hash_json(data): | |
| """Hash normalized JSON""" | |
| normalized = normalize_json(data) | |
| return hashlib.sha256(normalized.encode()).hexdigest() | |
| def find_differences(obj1, obj2, path="root"): | |
| """ | |
| Recursively find differences between two JSON objects. | |
| Returns a list of difference descriptions. | |
| """ | |
| differences = [] | |
| # Both are None | |
| if obj1 is None and obj2 is None: | |
| return differences | |
| # One is None | |
| if obj1 is None or obj2 is None: | |
| differences.append({ | |
| 'path': path, | |
| 'type': 'value_diff', | |
| 'value1': obj1, | |
| 'value2': obj2 | |
| }) | |
| return differences | |
| # Different types | |
| if type(obj1) != type(obj2): | |
| differences.append({ | |
| 'path': path, | |
| 'type': 'type_diff', | |
| 'type1': type(obj1).__name__, | |
| 'type2': type(obj2).__name__, | |
| 'value1': obj1, | |
| 'value2': obj2 | |
| }) | |
| return differences | |
| # Both are dictionaries | |
| if isinstance(obj1, dict): | |
| all_keys = set(obj1.keys()) | set(obj2.keys()) | |
| for key in sorted(all_keys): | |
| new_path = f"{path}.{key}" | |
| if key not in obj1: | |
| differences.append({ | |
| 'path': new_path, | |
| 'type': 'missing_in_first', | |
| 'value2': obj2[key] | |
| }) | |
| elif key not in obj2: | |
| differences.append({ | |
| 'path': new_path, | |
| 'type': 'missing_in_second', | |
| 'value1': obj1[key] | |
| }) | |
| else: | |
| differences.extend(find_differences(obj1[key], obj2[key], new_path)) | |
| # Both are lists | |
| elif isinstance(obj1, list): | |
| if len(obj1) != len(obj2): | |
| differences.append({ | |
| 'path': path, | |
| 'type': 'list_length_diff', | |
| 'length1': len(obj1), | |
| 'length2': len(obj2) | |
| }) | |
| # Compare elements up to the shorter length | |
| for i in range(min(len(obj1), len(obj2))): | |
| differences.extend(find_differences(obj1[i], obj2[i], f"{path}[{i}]")) | |
| # Report extra elements | |
| if len(obj1) > len(obj2): | |
| for i in range(len(obj2), len(obj1)): | |
| differences.append({ | |
| 'path': f"{path}[{i}]", | |
| 'type': 'extra_in_first', | |
| 'value1': obj1[i] | |
| }) | |
| elif len(obj2) > len(obj1): | |
| for i in range(len(obj1), len(obj2)): | |
| differences.append({ | |
| 'path': f"{path}[{i}]", | |
| 'type': 'extra_in_second', | |
| 'value2': obj2[i] | |
| }) | |
| # Primitive values | |
| else: | |
| if obj1 != obj2: | |
| differences.append({ | |
| 'path': path, | |
| 'type': 'value_diff', | |
| 'value1': obj1, | |
| 'value2': obj2 | |
| }) | |
| return differences | |
| def format_value(value, max_length=100): | |
| """Format a value for display, truncating if necessary""" | |
| if isinstance(value, (dict, list)): | |
| s = json.dumps(value, ensure_ascii=False) | |
| else: | |
| s = str(value) | |
| if len(s) > max_length: | |
| return s[:max_length] + "..." | |
| return s | |
| def print_differences(differences, file1_name, file2_name): | |
| """Print differences in a readable format""" | |
| if not differences: | |
| return | |
| print(f"\n{'='*80}") | |
| print(f"Found {len(differences)} difference(s):") | |
| print(f"{'='*80}\n") | |
| for i, diff in enumerate(differences, 1): | |
| print(f"Difference #{i}:") | |
| print(f" Location: {diff['path']}") | |
| if diff['type'] == 'value_diff': | |
| print(f" Type: Value differs") | |
| print(f" {file1_name}: {format_value(diff['value1'])}") | |
| print(f" {file2_name}: {format_value(diff['value2'])}") | |
| elif diff['type'] == 'type_diff': | |
| print(f" Type: Data type differs") | |
| print(f" {file1_name}: {diff['type1']} = {format_value(diff['value1'])}") | |
| print(f" {file2_name}: {diff['type2']} = {format_value(diff['value2'])}") | |
| elif diff['type'] == 'missing_in_first': | |
| print(f" Type: Key/element missing in {file1_name}") | |
| print(f" {file2_name}: {format_value(diff['value2'])}") | |
| elif diff['type'] == 'missing_in_second': | |
| print(f" Type: Key/element missing in {file2_name}") | |
| print(f" {file1_name}: {format_value(diff['value1'])}") | |
| elif diff['type'] == 'list_length_diff': | |
| print(f" Type: List length differs") | |
| print(f" {file1_name}: {diff['length1']} elements") | |
| print(f" {file2_name}: {diff['length2']} elements") | |
| elif diff['type'] == 'extra_in_first': | |
| print(f" Type: Extra element in {file1_name}") | |
| print(f" Value: {format_value(diff['value1'])}") | |
| elif diff['type'] == 'extra_in_second': | |
| print(f" Type: Extra element in {file2_name}") | |
| print(f" Value: {format_value(diff['value2'])}") | |
| print() | |
| def compare_json_files(file1, file2, save_normalized=False, show_diff=True): | |
| """Compare two JSON files""" | |
| print(f"Loading {file1}...") | |
| data1 = load_json_file(file1) | |
| print(f"Loading {file2}...") | |
| data2 = load_json_file(file2) | |
| if save_normalized: | |
| print("\nSaving normalized versions...") | |
| normalized1 = sort_json_recursively(data1) | |
| normalized2 = sort_json_recursively(data2) | |
| with open(f"{file1}.normalized.json", 'w', encoding='utf-8') as f: | |
| json.dump(normalized1, f, indent=2, sort_keys=True, ensure_ascii=False) | |
| with open(f"{file2}.normalized.json", 'w', encoding='utf-8') as f: | |
| json.dump(normalized2, f, indent=2, sort_keys=True, ensure_ascii=False) | |
| print(f"Saved: {file1}.normalized.json") | |
| print(f"Saved: {file2}.normalized.json") | |
| print("\nπ‘ Tip: You can diff these normalized files with a tool like 'diff' or 'meld':") | |
| print(f" diff {file1}.normalized.json {file2}.normalized.json") | |
| # Calculate hashes | |
| hash1 = hash_json(data1) | |
| hash2 = hash_json(data2) | |
| print(f"\nFile 1 hash: {hash1}") | |
| print(f"File 2 hash: {hash2}") | |
| # Compare | |
| if hash1 == hash2: | |
| print("\nβ JSON files are identical (after normalization)!") | |
| return True | |
| else: | |
| print("\nβ JSON files differ!") | |
| if show_diff: | |
| # Find and display differences on normalized data | |
| print("\nπ Analyzing differences (on normalized/sorted data)...") | |
| sorted1 = sort_json_recursively(data1) | |
| sorted2 = sort_json_recursively(data2) | |
| differences = find_differences(sorted1, sorted2) | |
| if differences: | |
| print_differences(differences, file1, file2) | |
| else: | |
| print("β οΈ No structural differences found after normalization.") | |
| print(" (This shouldn't happen if hashes differ - possible hash collision)") | |
| return False | |
| if __name__ == "__main__": | |
| if len(sys.argv) < 3: | |
| print("Usage: python compare_json.py <file1.json> <file2.json> [--save-normalized] [--no-diff]") | |
| print("\nOptions:") | |
| print(" --save-normalized Save normalized versions of both files") | |
| print(" --no-diff Don't show detailed differences") | |
| sys.exit(1) | |
| file1 = sys.argv[1] | |
| file2 = sys.argv[2] | |
| save_normalized = "--save-normalized" in sys.argv | |
| show_diff = "--no-diff" not in sys.argv | |
| try: | |
| compare_json_files(file1, file2, save_normalized, show_diff) | |
| except FileNotFoundError as e: | |
| print(f"β Error: {e}") | |
| sys.exit(1) | |
| except json.JSONDecodeError as e: | |
| print(f"β Error: Invalid JSON - {e}") | |
| sys.exit(1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment