Skip to content

Instantly share code, notes, and snippets.

@0xatm
Created August 22, 2025 23:59
Show Gist options
  • Select an option

  • Save 0xatm/d56c936777101c6b7c5a0a23a40621c4 to your computer and use it in GitHub Desktop.

Select an option

Save 0xatm/d56c936777101c6b7c5a0a23a40621c4 to your computer and use it in GitHub Desktop.
Defining metadata in matched evaluation metadata spec models (unfortunately blows up type checking)
from typing import Optional, Dict, Any, List
from typing_extensions import Self
from pydantic import (
BaseModel,
ConfigDict,
Field,
model_validator,
)
# Existing evaluation config models
class ListMatchConfig(BaseModel):
"""Configuration for list matching behavior."""
min_pair_similarity: float = 1.0
allow_reorder: bool = True
dedupe: bool = True
class FieldEvalMetadata(BaseModel):
"""Annotation configuration for a field."""
comparator: str = "exact"
weight: float = 1.0
threshold: float = 1.0
list_match: Optional[ListMatchConfig] = None
normalizer: Optional[str] = None
# Provide evaluation spec
# Dynamically validate that provided metadata matches entity fields
class EvaluationMetadataSpec(BaseModel):
"""Base class for evaluation metadata specifications."""
model_config = ConfigDict(extra="allow")
mapped_to: type[BaseModel]
@model_validator(mode="after")
def assert_same_extra_field_types(self, exclude="mapped_to") -> Self:
if not self.model_extra:
return self
data: Dict[str, Any] = self.model_extra
# Ensure type of all data values is FieldEvalMetadata
if not all(
isinstance(v, FieldEvalMetadata) for k, v in data.items() if k != exclude
):
raise ValueError(
"All fields in metadata spec must be of type FieldEvalMetadata."
)
return self
@model_validator(mode="after")
def assert_all_extra_fields_match(self, exclude="corresponding_entity") -> Self:
if not self.model_extra:
return self
data: Dict[str, Any] = self.model_extra
entity_fields = set(self.mapped_to.model_fields.keys())
metadata_fields = set(data.keys()) - {exclude}
missing_in_entity = metadata_fields - entity_fields
if missing_in_entity:
raise ValueError(
f"Fields {missing_in_entity} in metadata spec do not exist in entity."
)
missing_in_metadata = entity_fields - metadata_fields
if missing_in_metadata:
raise ValueError(
f"Fields {missing_in_metadata} in entity do not exist in metadata spec."
)
return self
# Example usage
class PricedListing(BaseModel):
"""Sample entity for testing purposes."""
name: str = Field(..., description="Name of the sample entity")
description: str = Field(..., description="Description of the sample entity")
price: float = Field(..., description="Price of the sample entity")
features: List[str] = Field(
..., description="List of features of the sample entity"
)
priced_listing_evaluation_spec = EvaluationMetadataSpec(
mapped_to=PricedListing,
# Type checking shits itself because they're not explicitly declared parameters
name=FieldEvalMetadata(comparator="exact", weight=1.0, threshold=1.0),
description=FieldEvalMetadata(comparator="fuzzy", weight=0.5, threshold=0.8),
price=FieldEvalMetadata(comparator="numeric", weight=1.0, threshold=0.95),
features=FieldEvalMetadata(
comparator="list",
weight=0.7,
threshold=0.9,
list_match=ListMatchConfig(
min_pair_similarity=0.8, allow_reorder=True, dedupe=True
),
),
)
# Access metadata for a specific field
print(priced_listing_evaluation_spec.name.model_dump_json(indent=2))
# {
# "comparator": "exact",
# "weight": 1.0,
# "threshold": 1.0,
# "list_match": null,
# "normalizer": null
# }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment