In-Memory Metadata Store¶
This one is mostly useful for testing.
Configuration¶
fallback_stores¶
List of fallback store names to search when features are not found in the current store.
Type: list[str]
hash_algorithm¶
Hash algorithm for versioning. If None, uses store's default.
Type: metaxy.versioning.types.HashAlgorithm | None
versioning_engine¶
Which versioning engine to use: 'auto' (prefer native), 'native', or 'polars'.
Type: Literal['auto', 'native', 'polars'] | Default: "auto"
API Reference¶
metaxy.InMemoryMetadataStore
¶
InMemoryMetadataStore(**kwargs: Any)
Bases: MetadataStore
In-memory metadata store using dict-based storage.
Features: - Simple dict storage: {FeatureKey: pl.DataFrame} - Fast for testing and prototyping - No persistence (data lost when process exits) - Schema validation on write - Uses Polars components for all operations
Limitations: - Not suitable for production - Data lost on process exit - No concurrency support across processes - Memory-bound (all data in RAM)
Notes
Uses Narwhals LazyFrames (nw.LazyFrame) for all operations
Components
Components are created on-demand in resolve_update(). Uses Polars internally but exposes Narwhals interface. Only supports Polars components (no native backend).
Parameters:
-
**kwargs(Any, default:{}) –Passed to MetadataStore.init (e.g., fallback_stores, hash_algorithm)
Source code in src/metaxy/metadata_store/memory.py
def __init__(self, **kwargs: Any):
"""
Initialize in-memory store.
Args:
**kwargs: Passed to MetadataStore.__init__ (e.g., fallback_stores, hash_algorithm)
"""
# Use tuple as key (hashable) instead of string to avoid parsing issues
self._storage: dict[tuple[str, ...], pl.DataFrame] = {}
super().__init__(**kwargs, versioning_engine_cls=PolarsVersioningEngine)
Functions¶
metaxy.InMemoryMetadataStore.write_metadata_to_store
¶
write_metadata_to_store(feature_key: FeatureKey, df: Frame, **kwargs: Any) -> None
Internal write implementation for in-memory storage.
Parameters:
-
feature_key(FeatureKey) –Feature key to write to
-
df(Frame) –Narwhals Frame (eager or lazy) with metadata (already validated)
-
**kwargs(Any, default:{}) –Backend-specific parameters (currently unused)
Source code in src/metaxy/metadata_store/memory.py
def write_metadata_to_store(
self,
feature_key: FeatureKey,
df: Frame,
**kwargs: Any,
) -> None:
"""
Internal write implementation for in-memory storage.
Args:
feature_key: Feature key to write to
df: Narwhals Frame (eager or lazy) with metadata (already validated)
**kwargs: Backend-specific parameters (currently unused)
"""
df_polars: pl.DataFrame = collect_to_polars(df)
storage_key = self._get_storage_key(feature_key)
# Append or create
if storage_key in self._storage:
existing_df = self._storage[storage_key]
# Handle schema evolution: ensure both DataFrames have matching columns
# Add missing columns as null to the existing DataFrame
for col_name in df_polars.columns:
if col_name not in existing_df.columns:
# Get the data type from the new DataFrame
col_dtype = df_polars.schema[col_name]
# Add column with null values of the appropriate type
existing_df = existing_df.with_columns(
pl.lit(None).cast(col_dtype).alias(col_name)
)
# Add missing columns to the new DataFrame
for col_name in existing_df.columns:
if col_name not in df_polars.columns:
# Get the data type from the existing DataFrame
col_dtype = existing_df.schema[col_name]
# Add column with null values of the appropriate type
df_polars = df_polars.with_columns(
pl.lit(None).cast(col_dtype).alias(col_name)
) # type: ignore[arg-type,union-attr]
# Ensure column order matches by selecting columns in consistent order
all_columns = sorted(set(existing_df.columns) | set(df_polars.columns))
existing_df = existing_df.select(all_columns)
df_polars = df_polars.select(all_columns)
# Now we can safely concat
self._storage[storage_key] = pl.concat(
[existing_df, df_polars],
how="vertical",
)
else:
# Create new
self._storage[storage_key] = df_polars
metaxy.InMemoryMetadataStore.read_metadata_in_store
¶
read_metadata_in_store(feature: CoercibleToFeatureKey, *, feature_version: str | None = None, filters: Sequence[Expr] | None = None, columns: Sequence[str] | None = None, **kwargs: Any) -> LazyFrame[Any] | None
Read metadata from this store only (no fallback).
Parameters:
-
feature(CoercibleToFeatureKey) –Feature to read
-
feature_version(str | None, default:None) –Filter by specific feature_version
-
filters(Sequence[Expr] | None, default:None) –List of Narwhals filter expressions
-
columns(Sequence[str] | None, default:None) –Optional list of columns to select
-
**kwargs(Any, default:{}) –Backend-specific parameters (currently unused)
Returns:
Raises:
-
StoreNotOpenError–If store is not open
Source code in src/metaxy/metadata_store/memory.py
def read_metadata_in_store(
self,
feature: CoercibleToFeatureKey,
*,
feature_version: str | None = None,
filters: Sequence[nw.Expr] | None = None,
columns: Sequence[str] | None = None,
**kwargs: Any,
) -> nw.LazyFrame[Any] | None:
"""
Read metadata from this store only (no fallback).
Args:
feature: Feature to read
feature_version: Filter by specific feature_version
filters: List of Narwhals filter expressions
columns: Optional list of columns to select
**kwargs: Backend-specific parameters (currently unused)
Returns:
Narwhals LazyFrame with metadata, or None if not found
Raises:
StoreNotOpenError: If store is not open
"""
self._check_open()
feature_key = self._resolve_feature_key(feature)
storage_key = self._get_storage_key(feature_key)
if storage_key not in self._storage:
return None
# Start with lazy Polars DataFrame, wrap with Narwhals
df_lazy = self._storage[storage_key].lazy()
nw_lazy = nw.from_native(df_lazy)
# Apply feature_version filter
if feature_version is not None:
nw_lazy = nw_lazy.filter(
nw.col("metaxy_feature_version") == feature_version
)
# Apply generic Narwhals filters
if filters is not None:
for filter_expr in filters:
nw_lazy = nw_lazy.filter(filter_expr)
# Select columns
if columns is not None:
nw_lazy = nw_lazy.select(columns)
# Check if result would be empty (we need to check the underlying frame)
# For now, return the lazy frame - emptiness check happens when materializing
return nw_lazy
metaxy.InMemoryMetadataStore.clear
¶
metaxy.InMemoryMetadataStore.open
¶
open(mode: AccessMode = 'read') -> Iterator[Self]
Open the in-memory store (no-op for in-memory, but accepts mode for consistency).
Parameters:
-
mode(AccessMode, default:'read') –Access mode (accepted for consistency but ignored).
Yields:
-
Self(Self) –The store instance
Source code in src/metaxy/metadata_store/memory.py
@contextmanager
def open(self, mode: AccessMode = "read") -> Iterator[Self]:
"""Open the in-memory store (no-op for in-memory, but accepts mode for consistency).
Args:
mode: Access mode (accepted for consistency but ignored).
Yields:
Self: The store instance
"""
# Increment context depth to support nested contexts
self._context_depth += 1
try:
# Only perform actual open on first entry
if self._context_depth == 1:
# No actual connection needed for in-memory
# Mark store as open and validate
self._is_open = True
self._validate_after_open()
yield self
finally:
# Decrement context depth
self._context_depth -= 1
# Only perform actual close on last exit
if self._context_depth == 0:
# Nothing to clean up
self._is_open = False
metaxy.InMemoryMetadataStore.config_model
classmethod
¶
config_model() -> type[InMemoryMetadataStoreConfig]
Return the configuration model class for this store type.
Subclasses must override this to return their specific config class.
Returns:
-
type[MetadataStoreConfig]–The config class type (e.g., DuckDBMetadataStoreConfig)
Note
Subclasses override this with a more specific return type. Type checkers may show a warning about incompatible override, but this is intentional - each store returns its own config type.