Skip to content

Configuration

This is the Python SDK for Metaxy's configuration. See config file reference to learn how to configure Metaxy via TOML files.

MetaxyConfig

Bases: BaseSettings

Main Metaxy configuration.

Loads from (in order of precedence):

  1. Init arguments

  2. Environment variables (METAXY_*)

  3. Config file (metaxy.toml or [tool.metaxy] in pyproject.toml )

Accessing current configuration
config = MetaxyConfig.load()
Getting a configured metadata store
store = config.get_store("prod")

The default store is "dev"; METAXY_STORE can be used to override it.

Attributes

plugins property

plugins: list[str]

Returns all enabled plugin names from ext configuration.

Functions

validate_project classmethod

validate_project(v: str) -> str

Validate project name follows naming rules.

Source code in src/metaxy/config.py
@field_validator("project")
@classmethod
def validate_project(cls, v: str) -> str:
    """Validate project name follows naming rules."""
    if not v:
        raise ValueError("project name cannot be empty")
    if "/" in v:
        raise ValueError(
            f"project name '{v}' cannot contain forward slashes (/). "
            f"Forward slashes are reserved for FeatureKey separation"
        )
    if "__" in v:
        raise ValueError(
            f"project name '{v}' cannot contain double underscores (__). "
            f"Double underscores are reserved for table name generation"
        )
    import re

    if not re.match(r"^[a-zA-Z0-9_-]+$", v):
        raise ValueError(
            f"project name '{v}' must contain only alphanumeric characters, underscores, and hyphens"
        )
    return v

validate_hash_truncation_length classmethod

validate_hash_truncation_length(v: int | None) -> int | None

Validate hash truncation length is at least 8 if set.

Source code in src/metaxy/config.py
@field_validator("hash_truncation_length")
@classmethod
def validate_hash_truncation_length(cls, v: int | None) -> int | None:
    """Validate hash truncation length is at least 8 if set."""
    if v is not None and v < 8:
        raise ValueError(
            f"hash_truncation_length must be at least 8 characters, got {v}"
        )
    return v

settings_customise_sources classmethod

settings_customise_sources(settings_cls: type[BaseSettings], init_settings: PydanticBaseSettingsSource, env_settings: PydanticBaseSettingsSource, dotenv_settings: PydanticBaseSettingsSource, file_secret_settings: PydanticBaseSettingsSource) -> tuple[PydanticBaseSettingsSource, ...]

Customize settings sources: init → env → TOML.

Priority (first wins): 1. Init arguments 2. Environment variables 3. TOML file

Source code in src/metaxy/config.py
@classmethod
def settings_customise_sources(
    cls,
    settings_cls: type[BaseSettings],
    init_settings: PydanticBaseSettingsSource,
    env_settings: PydanticBaseSettingsSource,
    dotenv_settings: PydanticBaseSettingsSource,
    file_secret_settings: PydanticBaseSettingsSource,
) -> tuple[PydanticBaseSettingsSource, ...]:
    """Customize settings sources: init → env → TOML.

    Priority (first wins):
    1. Init arguments
    2. Environment variables
    3. TOML file
    """
    toml_settings = TomlConfigSettingsSource(settings_cls)
    return (init_settings, env_settings, toml_settings)

get classmethod

get() -> MetaxyConfig

Get the current Metaxy configuration.

Source code in src/metaxy/config.py
@classmethod
def get(cls) -> "MetaxyConfig":
    """Get the current Metaxy configuration."""
    cfg = _metaxy_config.get()
    if cfg is None:
        warnings.warn(
            UserWarning(
                "Global Metaxy configuration not initialized. It can be set with MetaxyConfig.set(config) typically after loading it from a toml file. Returning default configuration (with environment variables and other pydantic settings sources resolved, project='default')."
            )
        )
        return cls(project="default")
    else:
        return cfg

set classmethod

set(config: Self | None) -> None

Set the current Metaxy configuration.

Source code in src/metaxy/config.py
@classmethod
def set(cls, config: Self | None) -> None:
    """Set the current Metaxy configuration."""
    _metaxy_config.set(config)

is_set classmethod

is_set() -> bool

Check if the current Metaxy configuration is set.

Source code in src/metaxy/config.py
@classmethod
def is_set(cls) -> bool:
    """Check if the current Metaxy configuration is set."""
    return _metaxy_config.get() is not None

reset classmethod

reset() -> None

Reset the current Metaxy configuration to None.

Source code in src/metaxy/config.py
@classmethod
def reset(cls) -> None:
    """Reset the current Metaxy configuration to None."""
    _metaxy_config.set(None)

load classmethod

load(config_file: str | Path | None = None, *, search_parents: bool = True, auto_discovery_start: Path | None = None) -> MetaxyConfig

Load config with auto-discovery and parent directory search.

Parameters:

  • config_file (str | Path | None, default: None ) –

    Optional config file path (overrides auto-discovery)

  • search_parents (bool, default: True ) –

    Search parent directories for config file (default: True)

  • auto_discovery_start (Path | None, default: None ) –

    Directory to start search from (defaults to cwd)

Returns:

Example
# Auto-discover with parent search
config = MetaxyConfig.load()

# Explicit file
config = MetaxyConfig.load("custom.toml")

# Auto-discover without parent search
config = MetaxyConfig.load(search_parents=False)

# Auto-discover from a specific directory
config = MetaxyConfig.load(auto_discovery_start=Path("/path/to/project"))
Source code in src/metaxy/config.py
@classmethod
def load(
    cls,
    config_file: str | Path | None = None,
    *,
    search_parents: bool = True,
    auto_discovery_start: Path | None = None,
) -> "MetaxyConfig":
    """Load config with auto-discovery and parent directory search.

    Args:
        config_file: Optional config file path (overrides auto-discovery)
        search_parents: Search parent directories for config file (default: True)
        auto_discovery_start: Directory to start search from (defaults to cwd)

    Returns:
        Loaded config (TOML + env vars merged)

    Example:
        ```py
        # Auto-discover with parent search
        config = MetaxyConfig.load()

        # Explicit file
        config = MetaxyConfig.load("custom.toml")

        # Auto-discover without parent search
        config = MetaxyConfig.load(search_parents=False)

        # Auto-discover from a specific directory
        config = MetaxyConfig.load(auto_discovery_start=Path("/path/to/project"))
        ```
    """
    # Search for config file if not explicitly provided
    if config_file is None and search_parents:
        config_file = cls._discover_config_with_parents(auto_discovery_start)

    # For explicit file, temporarily patch the TomlConfigSettingsSource
    # to use that file, then use normal instantiation
    # This ensures env vars still work

    if config_file:
        # Create a custom settings source class for this file
        toml_path = Path(config_file)

        class CustomTomlSource(TomlConfigSettingsSource):
            def __init__(self, settings_cls: type[BaseSettings]):
                # Skip auto-discovery, use explicit file
                super(TomlConfigSettingsSource, self).__init__(settings_cls)
                self.toml_file = toml_path
                self.toml_data = self._load_toml()

        # Customize sources to use custom TOML file
        original_method = cls.settings_customise_sources

        @classmethod  # type: ignore[misc]
        def custom_sources(
            cls_inner,
            settings_cls,
            init_settings,
            env_settings,
            dotenv_settings,
            file_secret_settings,
        ):
            toml_settings = CustomTomlSource(settings_cls)
            return (init_settings, env_settings, toml_settings)

        # Temporarily replace method
        cls.settings_customise_sources = custom_sources  # type: ignore[assignment]
        config = cls()
        cls.settings_customise_sources = original_method  # type: ignore[method-assign]
    else:
        # Use default sources (auto-discovery + env vars)
        config = cls()

    cls.set(config)

    return config

get_store

get_store(name: str | None = None) -> MetadataStore

Instantiate metadata store by name.

Parameters:

  • name (str | None, default: None ) –

    Store name (uses config.store if None)

Returns:

Raises:

  • ValueError

    If store name not found in config, or if fallback stores have different hash algorithms than the parent store

  • ImportError

    If store class cannot be imported

Example
config = MetaxyConfig.load()
store = config.get_store("prod")

# Use default store
store = config.get_store()
Source code in src/metaxy/config.py
def get_store(self, name: str | None = None) -> "MetadataStore":
    """Instantiate metadata store by name.

    Args:
        name: Store name (uses config.store if None)

    Returns:
        Instantiated metadata store

    Raises:
        ValueError: If store name not found in config, or if fallback stores
            have different hash algorithms than the parent store
        ImportError: If store class cannot be imported

    Example:
        ```py
        config = MetaxyConfig.load()
        store = config.get_store("prod")

        # Use default store
        store = config.get_store()
        ```
    """
    from metaxy.data_versioning.hash_algorithms import HashAlgorithm

    if len(self.stores) == 0:
        raise ValueError(
            "No Metaxy stores available. They should be configured in metaxy.toml|pyproject.toml or via environment variables."
        )

    name = name or self.store

    if name not in self.stores:
        raise ValueError(
            f"Store '{name}' not found in config. "
            f"Available stores: {list(self.stores.keys())}"
        )

    store_config = self.stores[name]

    # Import store class
    store_class = self._import_class(store_config.type)

    # Extract configuration
    config_copy = store_config.config.copy()
    fallback_store_names = config_copy.pop("fallback_stores", [])

    # Get hash_algorithm from config (if specified) and convert to enum
    configured_hash_algorithm = config_copy.get("hash_algorithm")
    if configured_hash_algorithm is not None:
        # Convert string to enum if needed
        if isinstance(configured_hash_algorithm, str):
            configured_hash_algorithm = HashAlgorithm(configured_hash_algorithm)
            config_copy["hash_algorithm"] = configured_hash_algorithm
    else:
        # Use default
        configured_hash_algorithm = HashAlgorithm.XXHASH64
        config_copy["hash_algorithm"] = configured_hash_algorithm

    # Get hash_truncation_length from global config (unless overridden in store config)
    if (
        "hash_truncation_length" not in config_copy
        and self.hash_truncation_length is not None
    ):
        # Use global setting from MetaxyConfig if not specified per-store
        config_copy["hash_truncation_length"] = self.hash_truncation_length

    # Get auto_create_tables from global config (unless overridden in store config)
    if (
        "auto_create_tables" not in config_copy
        and self.auto_create_tables is not None
    ):
        # Use global setting from MetaxyConfig if not specified per-store
        config_copy["auto_create_tables"] = self.auto_create_tables

    # Build fallback stores recursively
    fallback_stores = []
    for fallback_name in fallback_store_names:
        fallback_store = self.get_store(fallback_name)
        fallback_stores.append(fallback_store)

    # Instantiate store with config + fallback_stores
    store = store_class(
        fallback_stores=fallback_stores,
        **config_copy,
    )

    # Verify the store actually uses the hash algorithm we configured
    # (in case a store subclass overrides the default or ignores the parameter)
    if store.hash_algorithm != configured_hash_algorithm:
        raise ValueError(
            f"Store '{name}' ({store_class.__name__}) was configured with "
            f"hash_algorithm='{configured_hash_algorithm.value}' but is using "
            f"'{store.hash_algorithm.value}'. The store class may have overridden "
            f"the hash algorithm. All stores must use the same hash algorithm."
        )

    return store

StoreConfig

Bases: BaseSettings

Configuration for a single metadata store.

Example
config = StoreConfig(
    type="metaxy_delta.DeltaMetadataStore",
    config={
        "table_uri": "s3://bucket/metadata",
        "region": "us-west-2",
        "fallback_stores": ["prod"],
    }
)