Skip to content

Configuration

This is the Python SDK for Metaxy's configuration. See config file reference to learn how to configure Metaxy via TOML files.

metaxy.MetaxyConfig

Bases: BaseSettings

Main Metaxy configuration.

Loads from (in order of precedence):

  1. Init arguments

  2. Environment variables (METAXY_*)

  3. Config file (metaxy.toml or [tool.metaxy] in pyproject.toml )

Environment variables can be templated with ${MY_VAR:-default} syntax.

Accessing current configuration
config = MetaxyConfig.load()
Getting a configured metadata store
store = config.get_store("prod")
Templating environment variables
metaxy.toml
[stores.branch.config]
root_path = "s3://my-bucket/${BRANCH_NAME}"

The default store is "dev"; METAXY_STORE can be used to override it.

Attributes

metaxy.MetaxyConfig.config_file property

config_file: Path | None

The config file path used to load this configuration.

Returns None if the config was created directly (not via load()).

metaxy.MetaxyConfig.plugins property

plugins: list[str]

Returns all enabled plugin names from ext configuration.

Functions

metaxy.MetaxyConfig.validate_project classmethod

validate_project(v: str) -> str

Validate project name follows naming rules.

Source code in src/metaxy/config.py
@field_validator("project")
@classmethod
def validate_project(cls, v: str) -> str:
    """Validate project name follows naming rules."""
    if not v:
        raise ValueError("project name cannot be empty")
    if "/" in v:
        raise ValueError(
            f"project name '{v}' cannot contain forward slashes (/). "
            f"Forward slashes are reserved for FeatureKey separation"
        )
    if "__" in v:
        raise ValueError(
            f"project name '{v}' cannot contain double underscores (__). "
            f"Double underscores are reserved for table name generation"
        )
    import re

    if not re.match(r"^[a-zA-Z0-9_-]+$", v):
        raise ValueError(
            f"project name '{v}' must contain only alphanumeric characters, underscores, and hyphens"
        )
    return v

metaxy.MetaxyConfig.get_plugin classmethod

get_plugin(name: str, plugin_cls: type[PluginConfigT]) -> PluginConfigT

Get the plugin config from the global Metaxy config.

Unlike get(), this method does not warn when the global config is not initialized. This is intentional because plugins may call this at import time to read their configuration, and returning default plugin config is always safe.

Source code in src/metaxy/config.py
@classmethod
def get_plugin(cls, name: str, plugin_cls: type[PluginConfigT]) -> PluginConfigT:
    """Get the plugin config from the global Metaxy config.

    Unlike `get()`, this method does not warn when the global config is not
    initialized. This is intentional because plugins may call this at import
    time to read their configuration, and returning default plugin config
    is always safe.
    """
    ext = cls.get(_allow_default_config=True).ext
    if name in ext:
        existing = ext[name]
        if isinstance(existing, plugin_cls):
            # Already the correct type
            plugin = existing
        else:
            # Convert from generic PluginConfig or dict to specific plugin class
            plugin = plugin_cls.model_validate(existing.model_dump())
    else:
        # Return default config if plugin not configured
        plugin = plugin_cls()
    return plugin

metaxy.MetaxyConfig.validate_hash_truncation_length classmethod

validate_hash_truncation_length(v: int | None) -> int | None

Validate hash truncation length is at least 8 if set.

Source code in src/metaxy/config.py
@field_validator("hash_truncation_length")
@classmethod
def validate_hash_truncation_length(cls, v: int | None) -> int | None:
    """Validate hash truncation length is at least 8 if set."""
    if v is not None and v < 8:
        raise ValueError(
            f"hash_truncation_length must be at least 8 characters, got {v}"
        )
    return v

metaxy.MetaxyConfig.settings_customise_sources classmethod

settings_customise_sources(settings_cls: type[BaseSettings], init_settings: PydanticBaseSettingsSource, env_settings: PydanticBaseSettingsSource, dotenv_settings: PydanticBaseSettingsSource, file_secret_settings: PydanticBaseSettingsSource) -> tuple[PydanticBaseSettingsSource, ...]

Customize settings sources: init → env → TOML.

Priority (first wins): 1. Init arguments 2. Environment variables 3. TOML file

Source code in src/metaxy/config.py
@classmethod
def settings_customise_sources(
    cls,
    settings_cls: type[BaseSettings],
    init_settings: PydanticBaseSettingsSource,
    env_settings: PydanticBaseSettingsSource,
    dotenv_settings: PydanticBaseSettingsSource,
    file_secret_settings: PydanticBaseSettingsSource,
) -> tuple[PydanticBaseSettingsSource, ...]:
    """Customize settings sources: init → env → TOML.

    Priority (first wins):
    1. Init arguments
    2. Environment variables
    3. TOML file
    """
    toml_settings = TomlConfigSettingsSource(settings_cls)
    return (init_settings, env_settings, toml_settings)

metaxy.MetaxyConfig.get classmethod

get(*, _allow_default_config: bool = False) -> MetaxyConfig

Get the current Metaxy configuration.

Parameters:

  • _allow_default_config (bool, default: False ) –

    Internal parameter. When True, returns default config without warning if global config is not set. Used by methods like get_plugin that may be called at import time.

Source code in src/metaxy/config.py
@classmethod
def get(cls, *, _allow_default_config: bool = False) -> "MetaxyConfig":
    """Get the current Metaxy configuration.

    Args:
        _allow_default_config: Internal parameter. When True, returns default
            config without warning if global config is not set. Used by methods
            like `get_plugin` that may be called at import time.
    """
    cfg = _metaxy_config.get()
    if cfg is None:
        if not _allow_default_config:
            warnings.warn(
                UserWarning(
                    "Global Metaxy configuration not initialized. It can be set with MetaxyConfig.set(config) typically after loading it from a toml file. Returning default configuration (with environment variables and other pydantic settings sources resolved, project='default')."
                ),
                stacklevel=2,
            )
        return cls(project="default")
    else:
        return cfg

metaxy.MetaxyConfig.set classmethod

set(config: Self | None) -> None

Set the current Metaxy configuration.

Source code in src/metaxy/config.py
@classmethod
def set(cls, config: Self | None) -> None:
    """Set the current Metaxy configuration."""
    _metaxy_config.set(config)

metaxy.MetaxyConfig.is_set classmethod

is_set() -> bool

Check if the current Metaxy configuration is set.

Source code in src/metaxy/config.py
@classmethod
def is_set(cls) -> bool:
    """Check if the current Metaxy configuration is set."""
    return _metaxy_config.get() is not None

metaxy.MetaxyConfig.reset classmethod

reset() -> None

Reset the current Metaxy configuration to None.

Source code in src/metaxy/config.py
@classmethod
def reset(cls) -> None:
    """Reset the current Metaxy configuration to None."""
    _metaxy_config.set(None)

metaxy.MetaxyConfig.use

use() -> Iterator[Self]

Use this configuration temporarily, restoring previous config on exit.

Example
config = MetaxyConfig(project="test")
with config.use():
    # Code here uses test config
    assert MetaxyConfig.get().project == "test"
# Previous config restored
Source code in src/metaxy/config.py
@contextmanager
def use(self) -> Iterator[Self]:
    """Use this configuration temporarily, restoring previous config on exit.

    Example:
        ```py
        config = MetaxyConfig(project="test")
        with config.use():
            # Code here uses test config
            assert MetaxyConfig.get().project == "test"
        # Previous config restored
        ```
    """
    previous = _metaxy_config.get()
    _metaxy_config.set(self)
    try:
        yield self
    finally:
        _metaxy_config.set(previous)

metaxy.MetaxyConfig.load classmethod

load(config_file: str | Path | None = None, *, search_parents: bool = True, auto_discovery_start: Path | None = None) -> MetaxyConfig

Load config with auto-discovery and parent directory search.

Parameters:

  • config_file (str | Path | None, default: None ) –

    Optional config file path.

    Tip

    METAXY_CONFIG environment variable can be used to set this parameter

  • search_parents (bool, default: True ) –

    Search parent directories for config file

  • auto_discovery_start (Path | None, default: None ) –

    Directory to start search from. Defaults to current working directory.

Returns:

Example
# Auto-discover with parent search
config = MetaxyConfig.load()

# Explicit file
config = MetaxyConfig.load("custom.toml")

# Auto-discover without parent search
config = MetaxyConfig.load(search_parents=False)

# Auto-discover from a specific directory
config = MetaxyConfig.load(auto_discovery_start=Path("/path/to/project"))
Source code in src/metaxy/config.py
@classmethod
def load(
    cls,
    config_file: str | Path | None = None,
    *,
    search_parents: bool = True,
    auto_discovery_start: Path | None = None,
) -> "MetaxyConfig":
    """Load config with auto-discovery and parent directory search.

    Args:
        config_file: Optional config file path.

            !!! tip
                `METAXY_CONFIG` environment variable can be used to set this parameter

        search_parents: Search parent directories for config file
        auto_discovery_start: Directory to start search from.
            Defaults to current working directory.

    Returns:
        Loaded config (TOML + env vars merged)

    Example:
        ```py
        # Auto-discover with parent search
        config = MetaxyConfig.load()

        # Explicit file
        config = MetaxyConfig.load("custom.toml")

        # Auto-discover without parent search
        config = MetaxyConfig.load(search_parents=False)

        # Auto-discover from a specific directory
        config = MetaxyConfig.load(auto_discovery_start=Path("/path/to/project"))
        ```
    """
    # Search for config file if not explicitly provided

    if config_from_env := os.getenv("METAXY_CONFIG"):
        config_file = Path(config_from_env)

    if config_file is None and search_parents:
        config_file = cls._discover_config_with_parents(auto_discovery_start)

    # For explicit file, temporarily patch the TomlConfigSettingsSource
    # to use that file, then use normal instantiation
    # This ensures env vars still work

    if config_file:
        # Create a custom settings source class for this file
        toml_path = Path(config_file)

        class CustomTomlSource(TomlConfigSettingsSource):
            def __init__(self, settings_cls: type[BaseSettings]):
                # Skip auto-discovery, use explicit file
                super(TomlConfigSettingsSource, self).__init__(settings_cls)
                self.toml_file = toml_path
                self.toml_data = self._load_toml()

        # Customize sources to use custom TOML file
        original_method = cls.settings_customise_sources

        @classmethod
        def custom_sources(
            cls_inner,
            settings_cls,
            init_settings,
            env_settings,
            dotenv_settings,
            file_secret_settings,
        ):
            toml_settings = CustomTomlSource(settings_cls)
            return (init_settings, env_settings, toml_settings)

        # Temporarily replace method
        cls.settings_customise_sources = custom_sources  # ty: ignore[invalid-assignment]
        config = cls()
        cls.settings_customise_sources = original_method  # ty: ignore[invalid-assignment]
        # Store the resolved config file path
        config._config_file = toml_path.resolve()
    else:
        # Use default sources (auto-discovery + env vars)
        config = cls()
        # No config file used
        config._config_file = None

    cls.set(config)

    # Load plugins after config is set (plugins may access MetaxyConfig.get())
    config._load_plugins()

    return config

metaxy.MetaxyConfig.get_store

get_store(name: str | None = None, *, expected_type: Literal[None] = None, **kwargs: Any) -> MetadataStore
get_store(name: str | None = None, *, expected_type: type[StoreTypeT], **kwargs: Any) -> StoreTypeT
get_store(name: str | None = None, *, expected_type: type[StoreTypeT] | None = None, **kwargs: Any) -> MetadataStore | StoreTypeT

Instantiate metadata store by name.

Parameters:

  • name (str | None, default: None ) –

    Store name (uses config.store if None)

  • expected_type (type[StoreTypeT] | None, default: None ) –

    Expected type of the store. If the actual store type does not match the expected type, a TypeError is raised.

  • **kwargs (Any, default: {} ) –

    Additional keyword arguments to pass to the store constructor.

Returns:

Raises:

  • ValueError

    If store name not found in config, or if fallback stores have different hash algorithms than the parent store

  • ImportError

    If store class cannot be imported

  • TypeError

    If the actual store type does not match the expected type

Example
config = MetaxyConfig.load()
store = config.get_store("prod")

# Use default store
store = config.get_store()
Source code in src/metaxy/config.py
def get_store(
    self,
    name: str | None = None,
    *,
    expected_type: type[StoreTypeT] | None = None,
    **kwargs: Any,
) -> "MetadataStore | StoreTypeT":
    """Instantiate metadata store by name.

    Args:
        name: Store name (uses config.store if None)
        expected_type: Expected type of the store.
            If the actual store type does not match the expected type, a `TypeError` is raised.
        **kwargs: Additional keyword arguments to pass to the store constructor.

    Returns:
        Instantiated metadata store

    Raises:
        ValueError: If store name not found in config, or if fallback stores
            have different hash algorithms than the parent store
        ImportError: If store class cannot be imported
        TypeError: If the actual store type does not match the expected type

    Example:
        ```py
        config = MetaxyConfig.load()
        store = config.get_store("prod")

        # Use default store
        store = config.get_store()
        ```
    """
    from metaxy.versioning.types import HashAlgorithm

    if len(self.stores) == 0:
        raise InvalidConfigError.from_config(
            self,
            "No Metaxy stores available. They should be configured in metaxy.toml|pyproject.toml or via environment variables.",
        )

    name = name or self.store

    if name not in self.stores:
        raise InvalidConfigError.from_config(
            self,
            f"Store '{name}' not found in config. Available stores: {list(self.stores.keys())}",
        )

    store_config = self.stores[name]

    # Get store class (already imported by Pydantic's ImportString)
    store_class = store_config.type

    if expected_type is not None and not issubclass(store_class, expected_type):
        raise InvalidConfigError.from_config(
            self,
            f"Store '{name}' is not of type '{expected_type.__name__}'",
        )

    # Extract configuration and prepare for typed config model
    config_copy = store_config.config.copy()

    # Get hash_algorithm from config (if specified) and convert to enum
    configured_hash_algorithm = config_copy.get("hash_algorithm")
    if configured_hash_algorithm is not None:
        # Convert string to enum if needed
        if isinstance(configured_hash_algorithm, str):
            configured_hash_algorithm = HashAlgorithm(configured_hash_algorithm)
            config_copy["hash_algorithm"] = configured_hash_algorithm
    else:
        # Don't set a default here - let the store choose its own default
        configured_hash_algorithm = None

    # Get the store's config model class and create typed config
    config_model_cls = store_class.config_model()

    # Get auto_create_tables from global config only if the config model supports it
    if (
        "auto_create_tables" not in config_copy
        and self.auto_create_tables is not None
        and "auto_create_tables" in config_model_cls.model_fields
    ):
        # Use global setting from MetaxyConfig if not specified per-store
        config_copy["auto_create_tables"] = self.auto_create_tables

    # Separate kwargs into config fields and extra constructor args
    config_fields = set(config_model_cls.model_fields.keys())
    extra_kwargs = {}
    for key, value in kwargs.items():
        if key in config_fields:
            config_copy[key] = value
        else:
            extra_kwargs[key] = value

    try:
        typed_config = config_model_cls.model_validate(config_copy)
    except Exception as e:
        raise InvalidConfigError.from_config(
            self,
            f"Failed to validate config for store '{name}': {e}",
        ) from e

    # Instantiate using from_config() - fallback stores are resolved via MetaxyConfig.get()
    # Use self.use() to ensure this config is available for fallback resolution
    try:
        with self.use():
            store = store_class.from_config(typed_config, **extra_kwargs)
    except InvalidConfigError:
        # Don't re-wrap InvalidConfigError (e.g., from nested fallback store resolution)
        raise
    except Exception as e:
        raise InvalidConfigError.from_config(
            self,
            f"Failed to instantiate store '{name}' ({store_class.__name__}): {e}",
        ) from e

    # Verify the store actually uses the hash algorithm we configured
    # (in case a store subclass overrides the default or ignores the parameter)
    # Only check if we explicitly configured a hash algorithm
    if (
        configured_hash_algorithm is not None
        and store.hash_algorithm != configured_hash_algorithm
    ):
        raise InvalidConfigError.from_config(
            self,
            f"Store '{name}' ({store_class.__name__}) was configured with "
            f"hash_algorithm='{configured_hash_algorithm.value}' but is using "
            f"'{store.hash_algorithm.value}'. The store class may have overridden "
            f"the hash algorithm. All stores must use the same hash algorithm.",
        )

    if expected_type is not None and not isinstance(store, expected_type):
        raise InvalidConfigError.from_config(
            self,
            f"Store '{name}' is not of type '{expected_type.__name__}'",
        )

    return store

metaxy.StoreConfig

Bases: BaseSettings

Configuration for a single metadata store.

Example
config = StoreConfig(
    type="metaxy_delta.DeltaMetadataStore",
    config={
        "root_path": "s3://bucket/metadata",
        "region": "us-west-2",
        "fallback_stores": ["prod"],
    }
)

metaxy.config.InvalidConfigError

InvalidConfigError(message: str, *, config_file: Path | None = None)

Bases: Exception

Raised when Metaxy configuration is invalid.

This error includes helpful context about where the configuration was loaded from and how environment variables can affect configuration.

Source code in src/metaxy/config.py
def __init__(
    self,
    message: str,
    *,
    config_file: Path | None = None,
):
    self.config_file = config_file
    self.base_message = message

    # Build the full error message with context
    parts = [message]

    if config_file:
        parts.append(f"Config file: {config_file}")

    parts.append(
        "Note: METAXY_* environment variables can override config file settings "
    )

    super().__init__("\n".join(parts))

Functions

metaxy.config.InvalidConfigError.from_config classmethod

from_config(config: MetaxyConfig, message: str) -> InvalidConfigError

Create an InvalidConfigError from a MetaxyConfig instance.

Parameters:

  • config (MetaxyConfig) –

    The MetaxyConfig instance that has the invalid configuration.

  • message (str) –

    The error message describing what's wrong.

Returns:

Source code in src/metaxy/config.py
@classmethod
def from_config(cls, config: "MetaxyConfig", message: str) -> "InvalidConfigError":
    """Create an InvalidConfigError from a MetaxyConfig instance.

    Args:
        config: The MetaxyConfig instance that has the invalid configuration.
        message: The error message describing what's wrong.

    Returns:
        An InvalidConfigError with context from the config.
    """
    return cls(message, config_file=config._config_file)