Skip to content

Feature

FeatureKey pydantic-model

FeatureKey(key: str)
FeatureKey(key: Sequence[str])
FeatureKey(key: Self)
FeatureKey(*parts: str)
FeatureKey(*, parts: Sequence[str])
FeatureKey(*args: str | _CoercibleToKey | Self, **kwargs: Any)

Bases: _Key

Feature key as a sequence of string parts.

Hashable for use as dict keys in registries. Parts cannot contain forward slashes (/) or double underscores (__).

Examples:

FeatureKey("a/b/c")  # String format
# FeatureKey(parts=['a', 'b', 'c'])

FeatureKey(["a", "b", "c"])  # List format
# FeatureKey(parts=['a', 'b', 'c'])

FeatureKey(FeatureKey(["a", "b", "c"]))  # FeatureKey copy
# FeatureKey(parts=['a', 'b', 'c'])

FeatureKey("a", "b", "c")  # Variadic format
# FeatureKey(parts=['a', 'b', 'c'])
Show JSON schema:
{
  "description": "Feature key as a sequence of string parts.\n\nHashable for use as dict keys in registries.\nParts cannot contain forward slashes (/) or double underscores (__).\n\nExamples:\n    ```py\n    FeatureKey(\"a/b/c\")  # String format\n    # FeatureKey(parts=['a', 'b', 'c'])\n\n    FeatureKey([\"a\", \"b\", \"c\"])  # List format\n    # FeatureKey(parts=['a', 'b', 'c'])\n\n    FeatureKey(FeatureKey([\"a\", \"b\", \"c\"]))  # FeatureKey copy\n    # FeatureKey(parts=['a', 'b', 'c'])\n\n    FeatureKey(\"a\", \"b\", \"c\")  # Variadic format\n    # FeatureKey(parts=['a', 'b', 'c'])\n    ```",
  "properties": {
    "parts": {
      "items": {
        "type": "string"
      },
      "title": "Parts",
      "type": "array"
    }
  },
  "required": [
    "parts"
  ],
  "title": "FeatureKey",
  "type": "object"
}

Fields:

Validators:

  • _validate_input
  • _validate_parts_contentparts
Source code in src/metaxy/models/types.py
def __init__(self, *args: str | _CoercibleToKey | Self, **kwargs: Any) -> None:
    """Initialize FeatureKey from various input types."""
    super().__init__(*args, **kwargs)

Attributes

table_name property

table_name: str

Get SQL-like table name for this feature key.

Functions

to_string

to_string() -> str

Convert to string representation with "/" separator.

Source code in src/metaxy/models/types.py
def to_string(self) -> str:
    """Convert to string representation with "/" separator."""
    return KEY_SEPARATOR.join(self.parts)

__repr__

__repr__() -> str

Return string representation.

Source code in src/metaxy/models/types.py
def __repr__(self) -> str:
    """Return string representation."""
    return self.to_string()

__str__

__str__() -> str

Return string representation.

Source code in src/metaxy/models/types.py
def __str__(self) -> str:
    """Return string representation."""
    return self.to_string()

__lt__

__lt__(other: Any) -> bool

Less than comparison for sorting.

Source code in src/metaxy/models/types.py
def __lt__(self, other: Any) -> bool:
    """Less than comparison for sorting."""
    if isinstance(other, self.__class__):
        return self.parts < other.parts
    return NotImplemented

__le__

__le__(other: Any) -> bool

Less than or equal comparison for sorting.

Source code in src/metaxy/models/types.py
def __le__(self, other: Any) -> bool:
    """Less than or equal comparison for sorting."""
    if isinstance(other, self.__class__):
        return self.parts <= other.parts
    return NotImplemented

__gt__

__gt__(other: Any) -> bool

Greater than comparison for sorting.

Source code in src/metaxy/models/types.py
def __gt__(self, other: Any) -> bool:
    """Greater than comparison for sorting."""
    if isinstance(other, self.__class__):
        return self.parts > other.parts
    return NotImplemented

__ge__

__ge__(other: Any) -> bool

Greater than or equal comparison for sorting.

Source code in src/metaxy/models/types.py
def __ge__(self, other: Any) -> bool:
    """Greater than or equal comparison for sorting."""
    if isinstance(other, self.__class__):
        return self.parts >= other.parts
    return NotImplemented

__iter__

__iter__() -> Iterator[str]

Return iterator over parts.

Source code in src/metaxy/models/types.py
def __iter__(self) -> Iterator[str]:  # pyright: ignore[reportIncompatibleMethodOverride]
    """Return iterator over parts."""
    return iter(self.parts)

__getitem__

__getitem__(index: int) -> str

Get part by index.

Source code in src/metaxy/models/types.py
def __getitem__(self, index: int) -> str:
    """Get part by index."""
    return self.parts[index]

__len__

__len__() -> int

Get number of parts.

Source code in src/metaxy/models/types.py
def __len__(self) -> int:
    """Get number of parts."""
    return len(self.parts)

__contains__

__contains__(item: str) -> bool

Check if part is in key.

Source code in src/metaxy/models/types.py
def __contains__(self, item: str) -> bool:
    """Check if part is in key."""
    return item in self.parts

__reversed__

__reversed__()

Return reversed iterator over parts.

Source code in src/metaxy/models/types.py
def __reversed__(self):
    """Return reversed iterator over parts."""
    return reversed(self.parts)

__get_validators__ classmethod

__get_validators__()

Pydantic validator for when used as a field type.

Source code in src/metaxy/models/types.py
@classmethod
def __get_validators__(cls):
    """Pydantic validator for when used as a field type."""
    yield cls.validate

validate classmethod

validate(value: Any) -> FeatureKey

Convert various inputs to FeatureKey.

Source code in src/metaxy/models/types.py
@classmethod
def validate(cls, value: Any) -> FeatureKey:
    """Convert various inputs to FeatureKey."""
    if isinstance(value, cls):
        return value
    return cls(value)

model_dump

model_dump(**kwargs: Any) -> Any

Serialize to list format for backward compatibility.

Source code in src/metaxy/models/types.py
def model_dump(self, **kwargs: Any) -> Any:
    """Serialize to list format for backward compatibility."""
    # When serializing this key, return it as a list of parts
    # instead of the full Pydantic model structure
    return list(self.parts)

__hash__

__hash__() -> int

Return hash for use as dict keys.

Source code in src/metaxy/models/types.py
def __hash__(self) -> int:
    """Return hash for use as dict keys."""
    return hash(self.parts)

__eq__

__eq__(other: Any) -> bool

Check equality with another instance.

Source code in src/metaxy/models/types.py
def __eq__(self, other: Any) -> bool:
    """Check equality with another instance."""
    if isinstance(other, self.__class__):
        return self.parts == other.parts
    return super().__eq__(other)

BaseFeature is the most important class in Metaxy.

Users can extend this class to define their features.

BaseFeature pydantic-model

Bases: FrozenBaseModel

Show JSON schema:
{
  "properties": {},
  "title": "BaseFeature",
  "type": "object"
}

Functions

table_name classmethod

table_name() -> str

Get SQL-like table name for this feature.

Converts feature key to SQL-compatible table name by joining parts with double underscores, consistent with IbisMetadataStore.

Returns:

  • str

    Table name string (e.g., "my_namespace__my_feature")

Example
class VideoFeature(Feature, spec=FeatureSpec(
    key=FeatureKey(["video", "processing"]),
    ...
)):
    pass
VideoFeature.table_name()
# 'video__processing'
Source code in src/metaxy/models/feature.py
@classmethod
def table_name(cls) -> str:
    """Get SQL-like table name for this feature.

    Converts feature key to SQL-compatible table name by joining
    parts with double underscores, consistent with IbisMetadataStore.

    Returns:
        Table name string (e.g., "my_namespace__my_feature")

    Example:
        ```py
        class VideoFeature(Feature, spec=FeatureSpec(
            key=FeatureKey(["video", "processing"]),
            ...
        )):
            pass
        VideoFeature.table_name()
        # 'video__processing'
        ```
    """
    return cls.spec().table_name()

feature_version classmethod

feature_version() -> str

Get hash of feature specification.

Returns a hash representing the feature's complete configuration: - Feature key - Field definitions and code versions - Dependencies (feature-level and field-level)

This hash changes when you modify: - Field code versions - Dependencies - Field definitions

Used to distinguish current vs historical metafield provenance hashes. Stored in the 'metaxy_feature_version' column of metadata DataFrames.

Returns:

  • str

    SHA256 hex digest (like git short hashes)

Example
class MyFeature(Feature, spec=FeatureSpec(
    key=FeatureKey(["my", "feature"]),
    fields=[FieldSpec(key=FieldKey(["default"]), code_version="1")],
)):
    pass
MyFeature.feature_version()
# 'a3f8b2c1...'
Source code in src/metaxy/models/feature.py
@classmethod
def feature_version(cls) -> str:
    """Get hash of feature specification.

    Returns a hash representing the feature's complete configuration:
    - Feature key
    - Field definitions and code versions
    - Dependencies (feature-level and field-level)

    This hash changes when you modify:
    - Field code versions
    - Dependencies
    - Field definitions

    Used to distinguish current vs historical metafield provenance hashes.
    Stored in the 'metaxy_feature_version' column of metadata DataFrames.

    Returns:
        SHA256 hex digest (like git short hashes)

    Example:
        ```py
        class MyFeature(Feature, spec=FeatureSpec(
            key=FeatureKey(["my", "feature"]),
            fields=[FieldSpec(key=FieldKey(["default"]), code_version="1")],
        )):
            pass
        MyFeature.feature_version()
        # 'a3f8b2c1...'
        ```
    """
    return cls.graph.get_feature_version(cls.spec().key)

feature_spec_version classmethod

feature_spec_version() -> str

Get hash of the complete feature specification.

Returns a hash representing ALL specification properties including: - Feature key - Dependencies - Fields - Code versions - Any future metadata, tags, or other properties

Unlike feature_version which only hashes computational properties (for migration triggering), feature_spec_version captures the entire specification for complete reproducibility and audit purposes.

Stored in the 'metaxy_feature_spec_version' column of metadata DataFrames.

Returns:

  • str

    SHA256 hex digest of the complete specification

Example
class MyFeature(Feature, spec=FeatureSpec(
    key=FeatureKey(["my", "feature"]),
    fields=[FieldSpec(key=FieldKey(["default"]), code_version="1")],
)):
    pass
MyFeature.feature_spec_version()
# 'def456...'  # Different from feature_version
Source code in src/metaxy/models/feature.py
@classmethod
def feature_spec_version(cls) -> str:
    """Get hash of the complete feature specification.

    Returns a hash representing ALL specification properties including:
    - Feature key
    - Dependencies
    - Fields
    - Code versions
    - Any future metadata, tags, or other properties

    Unlike feature_version which only hashes computational properties
    (for migration triggering), feature_spec_version captures the entire specification
    for complete reproducibility and audit purposes.

    Stored in the 'metaxy_feature_spec_version' column of metadata DataFrames.

    Returns:
        SHA256 hex digest of the complete specification

    Example:
        ```py
        class MyFeature(Feature, spec=FeatureSpec(
            key=FeatureKey(["my", "feature"]),
            fields=[FieldSpec(key=FieldKey(["default"]), code_version="1")],
        )):
            pass
        MyFeature.feature_spec_version()
        # 'def456...'  # Different from feature_version
        ```
    """
    return cls.spec().feature_spec_version

feature_tracking_version classmethod

feature_tracking_version() -> str

Get hash combining feature spec version and project.

This version is used in system tables to track when features move between projects or when their specifications change. It combines: - feature_spec_version: Complete feature specification hash - project: The project this feature belongs to

This allows the migration system to detect when a feature moves from one project to another, triggering appropriate migrations.

Returns:

  • str

    SHA256 hex digest of feature_spec_version + project

Example
class MyFeature(Feature, spec=FeatureSpec(...)):
    pass
MyFeature.feature_tracking_version()  # Combines spec + project
# 'abc789...'
Source code in src/metaxy/models/feature.py
@classmethod
def feature_tracking_version(cls) -> str:
    """Get hash combining feature spec version and project.

    This version is used in system tables to track when features move between projects
    or when their specifications change. It combines:
    - feature_spec_version: Complete feature specification hash
    - project: The project this feature belongs to

    This allows the migration system to detect when a feature moves from one project
    to another, triggering appropriate migrations.

    Returns:
        SHA256 hex digest of feature_spec_version + project

    Example:
        ```py
        class MyFeature(Feature, spec=FeatureSpec(...)):
            pass
        MyFeature.feature_tracking_version()  # Combines spec + project
        # 'abc789...'
        ```
    """
    hasher = hashlib.sha256()
    hasher.update(cls.feature_spec_version().encode())
    hasher.update(cls.project.encode())
    return truncate_hash(hasher.hexdigest())

provenance_by_field classmethod

provenance_by_field() -> dict[str, str]

Get the code-level field provenance for this feature.

This returns a static hash based on code versions and dependencies, not sample-level field provenance computed from upstream data.

Returns:

  • dict[str, str]

    Dictionary mapping field keys to their provenance hashes.

Source code in src/metaxy/models/feature.py
@classmethod
def provenance_by_field(cls) -> dict[str, str]:
    """Get the code-level field provenance for this feature.

    This returns a static hash based on code versions and dependencies,
    not sample-level field provenance computed from upstream data.

    Returns:
        Dictionary mapping field keys to their provenance hashes.
    """
    return cls.graph.get_feature_version_by_field(cls.spec().key)

load_input classmethod

load_input(joiner: UpstreamJoiner, upstream_refs: dict[str, LazyFrame[Any]]) -> tuple[LazyFrame[Any], dict[str, str]]

Join upstream feature metadata.

Override for custom join logic (1:many, different keys, filtering, etc.).

Parameters:

  • joiner (UpstreamJoiner) –

    UpstreamJoiner from MetadataStore

  • upstream_refs (dict[str, LazyFrame[Any]]) –

    Upstream feature metadata references (lazy where possible)

Returns:

Source code in src/metaxy/models/feature.py
@classmethod
def load_input(
    cls,
    joiner: "UpstreamJoiner",
    upstream_refs: dict[str, "nw.LazyFrame[Any]"],
) -> tuple["nw.LazyFrame[Any]", dict[str, str]]:
    """Join upstream feature metadata.

    Override for custom join logic (1:many, different keys, filtering, etc.).

    Args:
        joiner: UpstreamJoiner from MetadataStore
        upstream_refs: Upstream feature metadata references (lazy where possible)

    Returns:
        (joined_upstream, upstream_column_mapping)
        - joined_upstream: All upstream data joined together
        - upstream_column_mapping: Maps upstream_key -> column name
    """
    from metaxy.models.feature_spec import FeatureDep

    # Extract columns and renames from deps
    upstream_columns: dict[str, tuple[str, ...] | None] = {}
    upstream_renames: dict[str, dict[str, str] | None] = {}

    deps = cls.spec().deps
    if deps:
        for dep in deps:
            if isinstance(dep, FeatureDep):
                dep_key_str = dep.feature.to_string()
                upstream_columns[dep_key_str] = dep.columns
                upstream_renames[dep_key_str] = dep.rename

    return joiner.join_upstream(
        upstream_refs=upstream_refs,
        feature_spec=cls.spec(),
        feature_plan=cls.graph.get_feature_plan(cls.spec().key),
        upstream_columns=upstream_columns,
        upstream_renames=upstream_renames,
    )

resolve_data_version_diff classmethod

resolve_data_version_diff(diff_resolver: MetadataDiffResolver, target_provenance: LazyFrame[Any], current_metadata: LazyFrame[Any] | None, *, lazy: bool = False) -> Increment | LazyIncrement

Resolve differences between target and current field provenance.

Override for custom diff logic (ignore certain fields, custom rules, etc.).

Parameters:

  • diff_resolver (MetadataDiffResolver) –

    MetadataDiffResolver from MetadataStore

  • target_provenance (LazyFrame[Any]) –

    Calculated target field provenance (Narwhals LazyFrame)

  • current_metadata (LazyFrame[Any] | None) –

    Current metadata for this feature (Narwhals LazyFrame, or None). Should be pre-filtered by feature_version at the store level.

  • lazy (bool, default: False ) –

    If True, return LazyIncrement. If False, return Increment.

Returns:

Example (default):

class MyFeature(Feature, spec=...):
    pass  # Uses diff resolver's default implementation

Example (ignore certain field changes):

class MyFeature(Feature, spec=...):
    @classmethod
    def resolve_data_version_diff(cls, diff_resolver, target_provenance, current_metadata, **kwargs):
        # Get standard diff
        result = diff_resolver.find_changes(target_provenance, current_metadata, cls.spec().id_columns)

        # Custom: Only consider 'frames' field changes, ignore 'audio'
        # Users can filter/modify the increment here

        return result  # Return modified Increment

Source code in src/metaxy/models/feature.py
@classmethod
def resolve_data_version_diff(
    cls,
    diff_resolver: "MetadataDiffResolver",
    target_provenance: "nw.LazyFrame[Any]",
    current_metadata: "nw.LazyFrame[Any] | None",
    *,
    lazy: bool = False,
) -> "Increment | LazyIncrement":
    """Resolve differences between target and current field provenance.

    Override for custom diff logic (ignore certain fields, custom rules, etc.).

    Args:
        diff_resolver: MetadataDiffResolver from MetadataStore
        target_provenance: Calculated target field provenance (Narwhals LazyFrame)
        current_metadata: Current metadata for this feature (Narwhals LazyFrame, or None).
            Should be pre-filtered by feature_version at the store level.
        lazy: If True, return LazyIncrement. If False, return Increment.

    Returns:
        Increment (eager) or LazyIncrement (lazy) with added, changed, removed

    Example (default):
        ```py
        class MyFeature(Feature, spec=...):
            pass  # Uses diff resolver's default implementation
        ```

    Example (ignore certain field changes):
        ```py
        class MyFeature(Feature, spec=...):
            @classmethod
            def resolve_data_version_diff(cls, diff_resolver, target_provenance, current_metadata, **kwargs):
                # Get standard diff
                result = diff_resolver.find_changes(target_provenance, current_metadata, cls.spec().id_columns)

                # Custom: Only consider 'frames' field changes, ignore 'audio'
                # Users can filter/modify the increment here

                return result  # Return modified Increment
        ```
    """
    # Diff resolver always returns LazyIncrement - materialize if needed
    lazy_result = diff_resolver.find_changes(
        target_provenance=target_provenance,
        current_metadata=current_metadata,
        id_columns=cls.spec().id_columns,  # Pass ID columns from feature spec
    )

    # Materialize to Increment if lazy=False
    if not lazy:
        from metaxy.data_versioning.diff import Increment

        return Increment(
            added=lazy_result.added.collect(),
            changed=lazy_result.changed.collect(),
            removed=lazy_result.removed.collect(),
        )

    return lazy_result

! "Code Version Access" Retrieve a feature's code version from its spec: MyFeature.spec().code_version.

Feature pydantic-model

Bases: BaseFeature

A default specialization of BaseFeature that uses a sample_uid ID column.

Show JSON schema:
{
  "description": "A default specialization of BaseFeature that uses a `sample_uid` ID column.",
  "properties": {},
  "title": "Feature",
  "type": "object"
}

Functions

table_name classmethod

table_name() -> str

Get SQL-like table name for this feature.

Converts feature key to SQL-compatible table name by joining parts with double underscores, consistent with IbisMetadataStore.

Returns:

  • str

    Table name string (e.g., "my_namespace__my_feature")

Example
class VideoFeature(Feature, spec=FeatureSpec(
    key=FeatureKey(["video", "processing"]),
    ...
)):
    pass
VideoFeature.table_name()
# 'video__processing'
Source code in src/metaxy/models/feature.py
@classmethod
def table_name(cls) -> str:
    """Get SQL-like table name for this feature.

    Converts feature key to SQL-compatible table name by joining
    parts with double underscores, consistent with IbisMetadataStore.

    Returns:
        Table name string (e.g., "my_namespace__my_feature")

    Example:
        ```py
        class VideoFeature(Feature, spec=FeatureSpec(
            key=FeatureKey(["video", "processing"]),
            ...
        )):
            pass
        VideoFeature.table_name()
        # 'video__processing'
        ```
    """
    return cls.spec().table_name()

feature_version classmethod

feature_version() -> str

Get hash of feature specification.

Returns a hash representing the feature's complete configuration: - Feature key - Field definitions and code versions - Dependencies (feature-level and field-level)

This hash changes when you modify: - Field code versions - Dependencies - Field definitions

Used to distinguish current vs historical metafield provenance hashes. Stored in the 'metaxy_feature_version' column of metadata DataFrames.

Returns:

  • str

    SHA256 hex digest (like git short hashes)

Example
class MyFeature(Feature, spec=FeatureSpec(
    key=FeatureKey(["my", "feature"]),
    fields=[FieldSpec(key=FieldKey(["default"]), code_version="1")],
)):
    pass
MyFeature.feature_version()
# 'a3f8b2c1...'
Source code in src/metaxy/models/feature.py
@classmethod
def feature_version(cls) -> str:
    """Get hash of feature specification.

    Returns a hash representing the feature's complete configuration:
    - Feature key
    - Field definitions and code versions
    - Dependencies (feature-level and field-level)

    This hash changes when you modify:
    - Field code versions
    - Dependencies
    - Field definitions

    Used to distinguish current vs historical metafield provenance hashes.
    Stored in the 'metaxy_feature_version' column of metadata DataFrames.

    Returns:
        SHA256 hex digest (like git short hashes)

    Example:
        ```py
        class MyFeature(Feature, spec=FeatureSpec(
            key=FeatureKey(["my", "feature"]),
            fields=[FieldSpec(key=FieldKey(["default"]), code_version="1")],
        )):
            pass
        MyFeature.feature_version()
        # 'a3f8b2c1...'
        ```
    """
    return cls.graph.get_feature_version(cls.spec().key)

feature_spec_version classmethod

feature_spec_version() -> str

Get hash of the complete feature specification.

Returns a hash representing ALL specification properties including: - Feature key - Dependencies - Fields - Code versions - Any future metadata, tags, or other properties

Unlike feature_version which only hashes computational properties (for migration triggering), feature_spec_version captures the entire specification for complete reproducibility and audit purposes.

Stored in the 'metaxy_feature_spec_version' column of metadata DataFrames.

Returns:

  • str

    SHA256 hex digest of the complete specification

Example
class MyFeature(Feature, spec=FeatureSpec(
    key=FeatureKey(["my", "feature"]),
    fields=[FieldSpec(key=FieldKey(["default"]), code_version="1")],
)):
    pass
MyFeature.feature_spec_version()
# 'def456...'  # Different from feature_version
Source code in src/metaxy/models/feature.py
@classmethod
def feature_spec_version(cls) -> str:
    """Get hash of the complete feature specification.

    Returns a hash representing ALL specification properties including:
    - Feature key
    - Dependencies
    - Fields
    - Code versions
    - Any future metadata, tags, or other properties

    Unlike feature_version which only hashes computational properties
    (for migration triggering), feature_spec_version captures the entire specification
    for complete reproducibility and audit purposes.

    Stored in the 'metaxy_feature_spec_version' column of metadata DataFrames.

    Returns:
        SHA256 hex digest of the complete specification

    Example:
        ```py
        class MyFeature(Feature, spec=FeatureSpec(
            key=FeatureKey(["my", "feature"]),
            fields=[FieldSpec(key=FieldKey(["default"]), code_version="1")],
        )):
            pass
        MyFeature.feature_spec_version()
        # 'def456...'  # Different from feature_version
        ```
    """
    return cls.spec().feature_spec_version

feature_tracking_version classmethod

feature_tracking_version() -> str

Get hash combining feature spec version and project.

This version is used in system tables to track when features move between projects or when their specifications change. It combines: - feature_spec_version: Complete feature specification hash - project: The project this feature belongs to

This allows the migration system to detect when a feature moves from one project to another, triggering appropriate migrations.

Returns:

  • str

    SHA256 hex digest of feature_spec_version + project

Example
class MyFeature(Feature, spec=FeatureSpec(...)):
    pass
MyFeature.feature_tracking_version()  # Combines spec + project
# 'abc789...'
Source code in src/metaxy/models/feature.py
@classmethod
def feature_tracking_version(cls) -> str:
    """Get hash combining feature spec version and project.

    This version is used in system tables to track when features move between projects
    or when their specifications change. It combines:
    - feature_spec_version: Complete feature specification hash
    - project: The project this feature belongs to

    This allows the migration system to detect when a feature moves from one project
    to another, triggering appropriate migrations.

    Returns:
        SHA256 hex digest of feature_spec_version + project

    Example:
        ```py
        class MyFeature(Feature, spec=FeatureSpec(...)):
            pass
        MyFeature.feature_tracking_version()  # Combines spec + project
        # 'abc789...'
        ```
    """
    hasher = hashlib.sha256()
    hasher.update(cls.feature_spec_version().encode())
    hasher.update(cls.project.encode())
    return truncate_hash(hasher.hexdigest())

provenance_by_field classmethod

provenance_by_field() -> dict[str, str]

Get the code-level field provenance for this feature.

This returns a static hash based on code versions and dependencies, not sample-level field provenance computed from upstream data.

Returns:

  • dict[str, str]

    Dictionary mapping field keys to their provenance hashes.

Source code in src/metaxy/models/feature.py
@classmethod
def provenance_by_field(cls) -> dict[str, str]:
    """Get the code-level field provenance for this feature.

    This returns a static hash based on code versions and dependencies,
    not sample-level field provenance computed from upstream data.

    Returns:
        Dictionary mapping field keys to their provenance hashes.
    """
    return cls.graph.get_feature_version_by_field(cls.spec().key)

load_input classmethod

load_input(joiner: UpstreamJoiner, upstream_refs: dict[str, LazyFrame[Any]]) -> tuple[LazyFrame[Any], dict[str, str]]

Join upstream feature metadata.

Override for custom join logic (1:many, different keys, filtering, etc.).

Parameters:

  • joiner (UpstreamJoiner) –

    UpstreamJoiner from MetadataStore

  • upstream_refs (dict[str, LazyFrame[Any]]) –

    Upstream feature metadata references (lazy where possible)

Returns:

Source code in src/metaxy/models/feature.py
@classmethod
def load_input(
    cls,
    joiner: "UpstreamJoiner",
    upstream_refs: dict[str, "nw.LazyFrame[Any]"],
) -> tuple["nw.LazyFrame[Any]", dict[str, str]]:
    """Join upstream feature metadata.

    Override for custom join logic (1:many, different keys, filtering, etc.).

    Args:
        joiner: UpstreamJoiner from MetadataStore
        upstream_refs: Upstream feature metadata references (lazy where possible)

    Returns:
        (joined_upstream, upstream_column_mapping)
        - joined_upstream: All upstream data joined together
        - upstream_column_mapping: Maps upstream_key -> column name
    """
    from metaxy.models.feature_spec import FeatureDep

    # Extract columns and renames from deps
    upstream_columns: dict[str, tuple[str, ...] | None] = {}
    upstream_renames: dict[str, dict[str, str] | None] = {}

    deps = cls.spec().deps
    if deps:
        for dep in deps:
            if isinstance(dep, FeatureDep):
                dep_key_str = dep.feature.to_string()
                upstream_columns[dep_key_str] = dep.columns
                upstream_renames[dep_key_str] = dep.rename

    return joiner.join_upstream(
        upstream_refs=upstream_refs,
        feature_spec=cls.spec(),
        feature_plan=cls.graph.get_feature_plan(cls.spec().key),
        upstream_columns=upstream_columns,
        upstream_renames=upstream_renames,
    )

resolve_data_version_diff classmethod

resolve_data_version_diff(diff_resolver: MetadataDiffResolver, target_provenance: LazyFrame[Any], current_metadata: LazyFrame[Any] | None, *, lazy: bool = False) -> Increment | LazyIncrement

Resolve differences between target and current field provenance.

Override for custom diff logic (ignore certain fields, custom rules, etc.).

Parameters:

  • diff_resolver (MetadataDiffResolver) –

    MetadataDiffResolver from MetadataStore

  • target_provenance (LazyFrame[Any]) –

    Calculated target field provenance (Narwhals LazyFrame)

  • current_metadata (LazyFrame[Any] | None) –

    Current metadata for this feature (Narwhals LazyFrame, or None). Should be pre-filtered by feature_version at the store level.

  • lazy (bool, default: False ) –

    If True, return LazyIncrement. If False, return Increment.

Returns:

Example (default):

class MyFeature(Feature, spec=...):
    pass  # Uses diff resolver's default implementation

Example (ignore certain field changes):

class MyFeature(Feature, spec=...):
    @classmethod
    def resolve_data_version_diff(cls, diff_resolver, target_provenance, current_metadata, **kwargs):
        # Get standard diff
        result = diff_resolver.find_changes(target_provenance, current_metadata, cls.spec().id_columns)

        # Custom: Only consider 'frames' field changes, ignore 'audio'
        # Users can filter/modify the increment here

        return result  # Return modified Increment

Source code in src/metaxy/models/feature.py
@classmethod
def resolve_data_version_diff(
    cls,
    diff_resolver: "MetadataDiffResolver",
    target_provenance: "nw.LazyFrame[Any]",
    current_metadata: "nw.LazyFrame[Any] | None",
    *,
    lazy: bool = False,
) -> "Increment | LazyIncrement":
    """Resolve differences between target and current field provenance.

    Override for custom diff logic (ignore certain fields, custom rules, etc.).

    Args:
        diff_resolver: MetadataDiffResolver from MetadataStore
        target_provenance: Calculated target field provenance (Narwhals LazyFrame)
        current_metadata: Current metadata for this feature (Narwhals LazyFrame, or None).
            Should be pre-filtered by feature_version at the store level.
        lazy: If True, return LazyIncrement. If False, return Increment.

    Returns:
        Increment (eager) or LazyIncrement (lazy) with added, changed, removed

    Example (default):
        ```py
        class MyFeature(Feature, spec=...):
            pass  # Uses diff resolver's default implementation
        ```

    Example (ignore certain field changes):
        ```py
        class MyFeature(Feature, spec=...):
            @classmethod
            def resolve_data_version_diff(cls, diff_resolver, target_provenance, current_metadata, **kwargs):
                # Get standard diff
                result = diff_resolver.find_changes(target_provenance, current_metadata, cls.spec().id_columns)

                # Custom: Only consider 'frames' field changes, ignore 'audio'
                # Users can filter/modify the increment here

                return result  # Return modified Increment
        ```
    """
    # Diff resolver always returns LazyIncrement - materialize if needed
    lazy_result = diff_resolver.find_changes(
        target_provenance=target_provenance,
        current_metadata=current_metadata,
        id_columns=cls.spec().id_columns,  # Pass ID columns from feature spec
    )

    # Materialize to Increment if lazy=False
    if not lazy:
        from metaxy.data_versioning.diff import Increment

        return Increment(
            added=lazy_result.added.collect(),
            changed=lazy_result.changed.collect(),
            removed=lazy_result.removed.collect(),
        )

    return lazy_result

get_feature_by_key

get_feature_by_key(key: FeatureKey) -> type[BaseFeature]

Get a feature class by its key from the active graph.

Convenience function that retrieves Metaxy feature class from the currently active feature graph. Can be useful when receiving a feature key from storage or across process boundaries.

Parameters:

Returns:

Raises:

  • KeyError

    If no feature with the given key is registered

Example
from metaxy import get_feature_by_key, FeatureKey
parent_key = FeatureKey(["examples", "parent"])
ParentFeature = get_feature_by_key(parent_key)
Source code in src/metaxy/models/feature.py
def get_feature_by_key(key: "FeatureKey") -> type["BaseFeature"]:
    """Get a feature class by its key from the active graph.

    Convenience function that retrieves Metaxy feature class from the currently active [feature graph][metaxy.FeatureGraph]. Can be useful when receiving a feature key from storage or across process boundaries.

    Args:
        key: Feature key to look up

    Returns:
        Feature class

    Raises:
        KeyError: If no feature with the given key is registered

    Example:
        ```py
        from metaxy import get_feature_by_key, FeatureKey
        parent_key = FeatureKey(["examples", "parent"])
        ParentFeature = get_feature_by_key(parent_key)
        ```
    """
    graph = FeatureGraph.get_active()
    return graph.get_feature_by_key(key)