Feature Spec¶
Feature specs act as source of truth for all metadata related to features: their dependencies, fields, code versions, and so on.
FeatureSpec
pydantic-model
¶
FeatureSpec(*, key: str, id_columns: IDColumns, deps: list[FeatureDep] | None = None, fields: Sequence[str | FieldSpec] | None = None, metadata: Mapping[str, JsonValue] | None = None, **kwargs: Any)
Bases: FrozenBaseModel
Show JSON schema:
{
"$defs": {
"AllFieldsMapping": {
"description": "Field mapping that explicitly depends on all upstream features and all their fields.\n\nExamples:\n >>> # Explicitly depend on all upstream fields\n >>> AllFieldsMapping()\n\n >>> # Or use the classmethod\n >>> FieldsMapping.all()",
"properties": {
"type": {
"const": "all",
"default": "all",
"title": "Type",
"type": "string"
}
},
"title": "AllFieldsMapping",
"type": "object"
},
"DefaultFieldsMapping": {
"description": "Default automatic field mapping configuration.\n\nWhen used, automatically maps fields to matching upstream fields based on field keys.\n\nAttributes:\n type: Always \"DEFAULT\" for discriminated union serialization\n match_suffix: If True, allows suffix matching (e.g., \"french\" matches \"audio/french\")\n exclude_fields: List of field keys to exclude from auto-mapping\n\nExamples:\n >>> # Exact match only (default)\n >>> DefaultFieldsMapping()\n\n >>> # Enable suffix matching\n >>> DefaultFieldsMapping(match_suffix=True)\n\n >>> # Exclude specific fields from being auto-mapped\n >>> DefaultFieldsMapping(exclude_fields=[FieldKey([\"metadata\"])])",
"properties": {
"type": {
"const": "default",
"default": "default",
"title": "Type",
"type": "string"
},
"match_suffix": {
"default": false,
"title": "Match Suffix",
"type": "boolean"
},
"exclude_fields": {
"items": {
"$ref": "#/$defs/FieldKey"
},
"title": "Exclude Fields",
"type": "array"
}
},
"title": "DefaultFieldsMapping",
"type": "object"
},
"FeatureDep": {
"description": "Feature dependency specification with optional column selection and renaming.\n\nAttributes:\n key: The feature key to depend on. Accepts string (\"a/b/c\"), list ([\"a\", \"b\", \"c\"]),\n or FeatureKey instance.\n columns: Optional tuple of column names to select from upstream feature.\n - None (default): Keep all columns from upstream\n - Empty tuple (): Keep only system columns (sample_uid, provenance_by_field, etc.)\n - Tuple of names: Keep only specified columns (plus system columns)\n rename: Optional mapping of old column names to new names.\n Applied after column selection.\n fields_mapping: Optional field mapping configuration for automatic field dependency resolution.\n When provided, fields without explicit deps will automatically map to matching upstream fields.\n Defaults to using `[FieldsMapping.default()][metaxy.models.fields_mapping.DefaultFieldsMapping]`.\n\nExamples:\n ```py\n # Keep all columns with default field mapping\n FeatureDep(feature=\"upstream\")\n\n # Keep all columns with suffix matching\n FeatureDep(feature=\"upstream\", fields_mapping=FieldsMapping.default(match_suffix=True))\n\n # Keep all columns with all fields mapping\n FeatureDep(feature=\"upstream\", fields_mapping=FieldsMapping.all())\n\n # Keep only specific columns\n FeatureDep(\n feature=\"upstream/feature\",\n columns=(\"col1\", \"col2\")\n )\n\n # Rename columns to avoid conflicts\n FeatureDep(\n feature=\"upstream/feature\",\n rename={\"old_name\": \"new_name\"}\n )\n\n # Select and rename\n FeatureDep(\n feature=\"upstream/feature\",\n columns=(\"col1\", \"col2\"),\n rename={\"col1\": \"upstream_col1\"}\n )\n ```",
"properties": {
"feature": {
"$ref": "#/$defs/FeatureKey"
},
"columns": {
"anyOf": [
{
"items": {
"type": "string"
},
"type": "array"
},
{
"type": "null"
}
],
"default": null,
"title": "Columns"
},
"rename": {
"anyOf": [
{
"additionalProperties": {
"type": "string"
},
"type": "object"
},
{
"type": "null"
}
],
"default": null,
"title": "Rename"
},
"fields_mapping": {
"$ref": "#/$defs/FieldsMapping"
}
},
"required": [
"feature",
"fields_mapping"
],
"title": "FeatureDep",
"type": "object"
},
"FeatureKey": {
"description": "Feature key as a sequence of string parts.\n\nHashable for use as dict keys in registries.\nParts cannot contain forward slashes (/) or double underscores (__).\n\nExamples:\n ```py\n FeatureKey(\"a/b/c\") # String format\n # FeatureKey(parts=['a', 'b', 'c'])\n\n FeatureKey([\"a\", \"b\", \"c\"]) # List format\n # FeatureKey(parts=['a', 'b', 'c'])\n\n FeatureKey(FeatureKey([\"a\", \"b\", \"c\"])) # FeatureKey copy\n # FeatureKey(parts=['a', 'b', 'c'])\n\n FeatureKey(\"a\", \"b\", \"c\") # Variadic format\n # FeatureKey(parts=['a', 'b', 'c'])\n ```",
"properties": {
"parts": {
"items": {
"type": "string"
},
"title": "Parts",
"type": "array"
}
},
"required": [
"parts"
],
"title": "FeatureKey",
"type": "object"
},
"FieldDep": {
"properties": {
"feature": {
"$ref": "#/$defs/FeatureKey"
},
"fields": {
"anyOf": [
{
"items": {
"$ref": "#/$defs/FieldKey"
},
"type": "array"
},
{
"const": "__METAXY_ALL_DEP__",
"type": "string"
}
],
"default": "__METAXY_ALL_DEP__",
"title": "Fields"
}
},
"required": [
"feature"
],
"title": "FieldDep",
"type": "object"
},
"FieldKey": {
"description": "Field key as a sequence of string parts.\n\nHashable for use as dict keys in registries.\nParts cannot contain forward slashes (/) or double underscores (__).\n\nExamples:\n ```py\n FieldKey(\"a/b/c\") # String format\n # FieldKey(parts=['a', 'b', 'c'])\n\n FieldKey([\"a\", \"b\", \"c\"]) # List format\n # FieldKey(parts=['a', 'b', 'c'])\n\n FieldKey(FieldKey([\"a\", \"b\", \"c\"])) # FieldKey copy\n # FieldKey(parts=['a', 'b', 'c'])\n\n FieldKey(\"a\", \"b\", \"c\") # Variadic format\n # FieldKey(parts=['a', 'b', 'c'])\n ```",
"properties": {
"parts": {
"items": {
"type": "string"
},
"title": "Parts",
"type": "array"
}
},
"required": [
"parts"
],
"title": "FieldKey",
"type": "object"
},
"FieldsMapping": {
"description": "Base class for field mapping configurations.\n\nField mappings define how a field automatically resolves its dependencies\nbased on upstream feature fields. This is separate from explicit field\ndependencies which are defined directly.",
"properties": {
"mapping": {
"discriminator": {
"mapping": {
"all": "#/$defs/AllFieldsMapping",
"default": "#/$defs/DefaultFieldsMapping",
"none": "#/$defs/NoneFieldsMapping",
"specific": "#/$defs/SpecificFieldsMapping"
},
"propertyName": "type"
},
"oneOf": [
{
"$ref": "#/$defs/AllFieldsMapping"
},
{
"$ref": "#/$defs/SpecificFieldsMapping"
},
{
"$ref": "#/$defs/NoneFieldsMapping"
},
{
"$ref": "#/$defs/DefaultFieldsMapping"
}
],
"title": "Mapping"
}
},
"required": [
"mapping"
],
"title": "FieldsMapping",
"type": "object"
},
"JsonValue": {},
"NoneFieldsMapping": {
"description": "Field mapping that never matches any upstream fields.",
"properties": {
"type": {
"const": "none",
"default": "none",
"title": "Type",
"type": "string"
}
},
"title": "NoneFieldsMapping",
"type": "object"
},
"SpecialFieldDep": {
"enum": [
"__METAXY_ALL_DEP__"
],
"title": "SpecialFieldDep",
"type": "string"
},
"SpecificFieldsMapping": {
"description": "Field mapping that explicitly depends on specific upstream fields.\n\nArguments:\n type: Always \"SPECIFIC\" for discriminated union serialization\n mapping: Mapping of downstream field keys to their corresponding upstream field keys.\n\nExamples:\n >>> # Explicitly depend on specific upstream fields\n >>> SpecificFieldsMapping({\"\"downstream\": {\"upstream_1\", \"upstream_2\"}})\n\n >>> # Or use the classmethod\n >>> FieldsMapping.specific({\"field1\", \"field2\"})",
"properties": {
"type": {
"const": "specific",
"default": "specific",
"title": "Type",
"type": "string"
},
"mapping": {
"additionalProperties": {
"items": {
"$ref": "#/$defs/FieldKey"
},
"type": "array",
"uniqueItems": true
},
"propertyNames": {
"$ref": "#/$defs/FieldKey"
},
"title": "Mapping",
"type": "object"
}
},
"required": [
"mapping"
],
"title": "SpecificFieldsMapping",
"type": "object"
}
},
"properties": {
"key": {
"$ref": "#/$defs/FeatureKey"
},
"id_columns": {
"description": "Columns that uniquely identify a sample in this feature.",
"items": {
"type": "string"
},
"title": "Id Columns",
"type": "array"
},
"deps": {
"items": {
"$ref": "#/$defs/FeatureDep"
},
"title": "Deps",
"type": "array"
},
"fields": {
"items": {
"properties": {
"key": {
"$ref": "#/$defs/FieldKey"
},
"code_version": {
"default": "__metaxy_initial__",
"title": "Code Version",
"type": "string"
},
"deps": {
"anyOf": [
{
"$ref": "#/$defs/SpecialFieldDep"
},
{
"items": {
"$ref": "#/$defs/FieldDep"
},
"type": "array"
}
],
"title": "Deps"
}
},
"title": "FieldSpec",
"type": "object"
},
"title": "Fields",
"type": "array"
},
"metadata": {
"additionalProperties": {
"$ref": "#/$defs/JsonValue"
},
"description": "Metadata attached to this feature.",
"title": "Metadata",
"type": "object"
}
},
"required": [
"key",
"id_columns"
],
"title": "FeatureSpec",
"type": "object"
}
Fields:
-
key(FeatureKey) -
id_columns(tuple[str, ...]) -
deps(list[FeatureDep]) -
fields(list[FieldSpec]) -
metadata(dict[str, JsonValue])
Validators:
Source code in src/metaxy/models/feature_spec.py
Attributes¶
id_columns
pydantic-field
¶
Columns that uniquely identify a sample in this feature.
code_version
cached
property
¶
code_version: str
Hash of this feature's field code_versions only (no dependencies).
feature_spec_version
property
¶
feature_spec_version: str
Compute SHA256 hash of the complete feature specification.
This property provides a deterministic hash of ALL specification properties, including key, deps, fields, and any metadata/tags. Used for audit trail and tracking specification changes.
Unlike feature_version which only hashes computational properties (for migration triggering), feature_spec_version captures the entire specification for complete reproducibility and audit purposes.
Returns:
-
str–SHA256 hex digest of the specification
Functions¶
validate_unique_field_keys
pydantic-validator
¶
Validate that all fields have unique keys.
Source code in src/metaxy/models/feature_spec.py
@pydantic.model_validator(mode="after")
def validate_unique_field_keys(self) -> Self:
"""Validate that all fields have unique keys."""
seen_keys: set[tuple[str, ...]] = set()
for field in self.fields:
# Convert to tuple for hashability in case it's a plain list
key_tuple = tuple(field.key)
if key_tuple in seen_keys:
raise ValueError(
f"Duplicate field key found: {field.key}. "
f"All fields must have unique keys."
)
seen_keys.add(key_tuple)
return self
validate_id_columns
pydantic-validator
¶
Validate that id_columns is non-empty if specified.
Source code in src/metaxy/models/feature_spec.py
Feature Dependencies¶
FeatureDep
pydantic-model
¶
FeatureDep(*, feature: str, columns: tuple[str, ...] | None = None, rename: dict[str, str] | None = None, fields_mapping: FieldsMapping | None = None)
FeatureDep(*, feature: Sequence[str], columns: tuple[str, ...] | None = None, rename: dict[str, str] | None = None, fields_mapping: FieldsMapping | None = None)
FeatureDep(*, feature: FeatureKey, columns: tuple[str, ...] | None = None, rename: dict[str, str] | None = None, fields_mapping: FieldsMapping | None = None)
FeatureDep(*, feature: CoercibleToFeatureKey | FeatureSpecProtocol | type[BaseFeature], columns: tuple[str, ...] | None = None, rename: dict[str, str] | None = None, fields_mapping: FieldsMapping | None = None, **kwargs: Any)
Bases: BaseModel
Feature dependency specification with optional column selection and renaming.
Attributes:
-
key–The feature key to depend on. Accepts string ("a/b/c"), list (["a", "b", "c"]), or FeatureKey instance.
-
columns(tuple[str, ...] | None) –Optional tuple of column names to select from upstream feature. - None (default): Keep all columns from upstream - Empty tuple (): Keep only system columns (sample_uid, provenance_by_field, etc.) - Tuple of names: Keep only specified columns (plus system columns)
-
rename(dict[str, str] | None) –Optional mapping of old column names to new names. Applied after column selection.
-
fields_mapping(FieldsMapping) –Optional field mapping configuration for automatic field dependency resolution. When provided, fields without explicit deps will automatically map to matching upstream fields. Defaults to using
[FieldsMapping.default()][metaxy.models.fields_mapping.DefaultFieldsMapping].
Examples:
# Keep all columns with default field mapping
FeatureDep(feature="upstream")
# Keep all columns with suffix matching
FeatureDep(feature="upstream", fields_mapping=FieldsMapping.default(match_suffix=True))
# Keep all columns with all fields mapping
FeatureDep(feature="upstream", fields_mapping=FieldsMapping.all())
# Keep only specific columns
FeatureDep(
feature="upstream/feature",
columns=("col1", "col2")
)
# Rename columns to avoid conflicts
FeatureDep(
feature="upstream/feature",
rename={"old_name": "new_name"}
)
# Select and rename
FeatureDep(
feature="upstream/feature",
columns=("col1", "col2"),
rename={"col1": "upstream_col1"}
)
Show JSON schema:
{
"$defs": {
"AllFieldsMapping": {
"description": "Field mapping that explicitly depends on all upstream features and all their fields.\n\nExamples:\n >>> # Explicitly depend on all upstream fields\n >>> AllFieldsMapping()\n\n >>> # Or use the classmethod\n >>> FieldsMapping.all()",
"properties": {
"type": {
"const": "all",
"default": "all",
"title": "Type",
"type": "string"
}
},
"title": "AllFieldsMapping",
"type": "object"
},
"DefaultFieldsMapping": {
"description": "Default automatic field mapping configuration.\n\nWhen used, automatically maps fields to matching upstream fields based on field keys.\n\nAttributes:\n type: Always \"DEFAULT\" for discriminated union serialization\n match_suffix: If True, allows suffix matching (e.g., \"french\" matches \"audio/french\")\n exclude_fields: List of field keys to exclude from auto-mapping\n\nExamples:\n >>> # Exact match only (default)\n >>> DefaultFieldsMapping()\n\n >>> # Enable suffix matching\n >>> DefaultFieldsMapping(match_suffix=True)\n\n >>> # Exclude specific fields from being auto-mapped\n >>> DefaultFieldsMapping(exclude_fields=[FieldKey([\"metadata\"])])",
"properties": {
"type": {
"const": "default",
"default": "default",
"title": "Type",
"type": "string"
},
"match_suffix": {
"default": false,
"title": "Match Suffix",
"type": "boolean"
},
"exclude_fields": {
"items": {
"$ref": "#/$defs/FieldKey"
},
"title": "Exclude Fields",
"type": "array"
}
},
"title": "DefaultFieldsMapping",
"type": "object"
},
"FeatureKey": {
"description": "Feature key as a sequence of string parts.\n\nHashable for use as dict keys in registries.\nParts cannot contain forward slashes (/) or double underscores (__).\n\nExamples:\n ```py\n FeatureKey(\"a/b/c\") # String format\n # FeatureKey(parts=['a', 'b', 'c'])\n\n FeatureKey([\"a\", \"b\", \"c\"]) # List format\n # FeatureKey(parts=['a', 'b', 'c'])\n\n FeatureKey(FeatureKey([\"a\", \"b\", \"c\"])) # FeatureKey copy\n # FeatureKey(parts=['a', 'b', 'c'])\n\n FeatureKey(\"a\", \"b\", \"c\") # Variadic format\n # FeatureKey(parts=['a', 'b', 'c'])\n ```",
"properties": {
"parts": {
"items": {
"type": "string"
},
"title": "Parts",
"type": "array"
}
},
"required": [
"parts"
],
"title": "FeatureKey",
"type": "object"
},
"FieldKey": {
"description": "Field key as a sequence of string parts.\n\nHashable for use as dict keys in registries.\nParts cannot contain forward slashes (/) or double underscores (__).\n\nExamples:\n ```py\n FieldKey(\"a/b/c\") # String format\n # FieldKey(parts=['a', 'b', 'c'])\n\n FieldKey([\"a\", \"b\", \"c\"]) # List format\n # FieldKey(parts=['a', 'b', 'c'])\n\n FieldKey(FieldKey([\"a\", \"b\", \"c\"])) # FieldKey copy\n # FieldKey(parts=['a', 'b', 'c'])\n\n FieldKey(\"a\", \"b\", \"c\") # Variadic format\n # FieldKey(parts=['a', 'b', 'c'])\n ```",
"properties": {
"parts": {
"items": {
"type": "string"
},
"title": "Parts",
"type": "array"
}
},
"required": [
"parts"
],
"title": "FieldKey",
"type": "object"
},
"FieldsMapping": {
"description": "Base class for field mapping configurations.\n\nField mappings define how a field automatically resolves its dependencies\nbased on upstream feature fields. This is separate from explicit field\ndependencies which are defined directly.",
"properties": {
"mapping": {
"discriminator": {
"mapping": {
"all": "#/$defs/AllFieldsMapping",
"default": "#/$defs/DefaultFieldsMapping",
"none": "#/$defs/NoneFieldsMapping",
"specific": "#/$defs/SpecificFieldsMapping"
},
"propertyName": "type"
},
"oneOf": [
{
"$ref": "#/$defs/AllFieldsMapping"
},
{
"$ref": "#/$defs/SpecificFieldsMapping"
},
{
"$ref": "#/$defs/NoneFieldsMapping"
},
{
"$ref": "#/$defs/DefaultFieldsMapping"
}
],
"title": "Mapping"
}
},
"required": [
"mapping"
],
"title": "FieldsMapping",
"type": "object"
},
"NoneFieldsMapping": {
"description": "Field mapping that never matches any upstream fields.",
"properties": {
"type": {
"const": "none",
"default": "none",
"title": "Type",
"type": "string"
}
},
"title": "NoneFieldsMapping",
"type": "object"
},
"SpecificFieldsMapping": {
"description": "Field mapping that explicitly depends on specific upstream fields.\n\nArguments:\n type: Always \"SPECIFIC\" for discriminated union serialization\n mapping: Mapping of downstream field keys to their corresponding upstream field keys.\n\nExamples:\n >>> # Explicitly depend on specific upstream fields\n >>> SpecificFieldsMapping({\"\"downstream\": {\"upstream_1\", \"upstream_2\"}})\n\n >>> # Or use the classmethod\n >>> FieldsMapping.specific({\"field1\", \"field2\"})",
"properties": {
"type": {
"const": "specific",
"default": "specific",
"title": "Type",
"type": "string"
},
"mapping": {
"additionalProperties": {
"items": {
"$ref": "#/$defs/FieldKey"
},
"type": "array",
"uniqueItems": true
},
"propertyNames": {
"$ref": "#/$defs/FieldKey"
},
"title": "Mapping",
"type": "object"
}
},
"required": [
"mapping"
],
"title": "SpecificFieldsMapping",
"type": "object"
}
},
"description": "Feature dependency specification with optional column selection and renaming.\n\nAttributes:\n key: The feature key to depend on. Accepts string (\"a/b/c\"), list ([\"a\", \"b\", \"c\"]),\n or FeatureKey instance.\n columns: Optional tuple of column names to select from upstream feature.\n - None (default): Keep all columns from upstream\n - Empty tuple (): Keep only system columns (sample_uid, provenance_by_field, etc.)\n - Tuple of names: Keep only specified columns (plus system columns)\n rename: Optional mapping of old column names to new names.\n Applied after column selection.\n fields_mapping: Optional field mapping configuration for automatic field dependency resolution.\n When provided, fields without explicit deps will automatically map to matching upstream fields.\n Defaults to using `[FieldsMapping.default()][metaxy.models.fields_mapping.DefaultFieldsMapping]`.\n\nExamples:\n ```py\n # Keep all columns with default field mapping\n FeatureDep(feature=\"upstream\")\n\n # Keep all columns with suffix matching\n FeatureDep(feature=\"upstream\", fields_mapping=FieldsMapping.default(match_suffix=True))\n\n # Keep all columns with all fields mapping\n FeatureDep(feature=\"upstream\", fields_mapping=FieldsMapping.all())\n\n # Keep only specific columns\n FeatureDep(\n feature=\"upstream/feature\",\n columns=(\"col1\", \"col2\")\n )\n\n # Rename columns to avoid conflicts\n FeatureDep(\n feature=\"upstream/feature\",\n rename={\"old_name\": \"new_name\"}\n )\n\n # Select and rename\n FeatureDep(\n feature=\"upstream/feature\",\n columns=(\"col1\", \"col2\"),\n rename={\"col1\": \"upstream_col1\"}\n )\n ```",
"properties": {
"feature": {
"$ref": "#/$defs/FeatureKey"
},
"columns": {
"anyOf": [
{
"items": {
"type": "string"
},
"type": "array"
},
{
"type": "null"
}
],
"default": null,
"title": "Columns"
},
"rename": {
"anyOf": [
{
"additionalProperties": {
"type": "string"
},
"type": "object"
},
{
"type": "null"
}
],
"default": null,
"title": "Rename"
},
"fields_mapping": {
"$ref": "#/$defs/FieldsMapping"
}
},
"required": [
"feature",
"fields_mapping"
],
"title": "FeatureDep",
"type": "object"
}
Fields:
-
feature(FeatureKey) -
columns(tuple[str, ...] | None) -
rename(dict[str, str] | None) -
fields_mapping(FieldsMapping)
Source code in src/metaxy/models/feature_spec.py
def __init__(
self,
*,
feature: CoercibleToFeatureKey | FeatureSpecProtocol | type[BaseFeature],
columns: tuple[str, ...] | None = None,
rename: dict[str, str] | None = None,
fields_mapping: FieldsMapping | None = None,
**kwargs: Any,
) -> None:
# Handle different key types with proper type checking
resolved_key: FeatureKey
# Check if it's a FeatureSpec instance (using Protocol)
if isinstance(feature, FeatureSpecProtocol):
resolved_key = feature.key
# Check if it's a Feature class (using Protocol for runtime check)
elif isinstance(feature, type) and hasattr(feature, "spec"):
resolved_key = feature.spec().key
# Check if it's already a FeatureKey
elif isinstance(feature, FeatureKey):
resolved_key = feature
else:
# Must be a CoercibleToFeatureKey (str or list of str)
resolved_key = FeatureKeyAdapter.validate_python(feature)
super().__init__(
feature=resolved_key,
columns=columns,
rename=rename,
fields_mapping=fields_mapping or FieldsMapping.default(),
**kwargs,
)