Configuration¶
This is the Python SDK for Metaxy's configuration. See config file reference to learn how to configure Metaxy via TOML files.
MetaxyConfig
¶
Bases: BaseSettings
Main Metaxy configuration.
Loads from (in order of precedence):
-
Init arguments
-
Environment variables (METAXY_*)
-
Config file (
metaxy.tomlor[tool.metaxy]inpyproject.toml)
The default store is "dev"; METAXY_STORE can be used to override it.
Attributes¶
Functions¶
validate_project
classmethod
¶
Validate project name follows naming rules.
Source code in src/metaxy/config.py
@field_validator("project")
@classmethod
def validate_project(cls, v: str) -> str:
"""Validate project name follows naming rules."""
if not v:
raise ValueError("project name cannot be empty")
if "/" in v:
raise ValueError(
f"project name '{v}' cannot contain forward slashes (/). "
f"Forward slashes are reserved for FeatureKey separation"
)
if "__" in v:
raise ValueError(
f"project name '{v}' cannot contain double underscores (__). "
f"Double underscores are reserved for table name generation"
)
import re
if not re.match(r"^[a-zA-Z0-9_-]+$", v):
raise ValueError(
f"project name '{v}' must contain only alphanumeric characters, underscores, and hyphens"
)
return v
validate_hash_truncation_length
classmethod
¶
Validate hash truncation length is at least 8 if set.
Source code in src/metaxy/config.py
@field_validator("hash_truncation_length")
@classmethod
def validate_hash_truncation_length(cls, v: int | None) -> int | None:
"""Validate hash truncation length is at least 8 if set."""
if v is not None and v < 8:
raise ValueError(
f"hash_truncation_length must be at least 8 characters, got {v}"
)
return v
settings_customise_sources
classmethod
¶
settings_customise_sources(settings_cls: type[BaseSettings], init_settings: PydanticBaseSettingsSource, env_settings: PydanticBaseSettingsSource, dotenv_settings: PydanticBaseSettingsSource, file_secret_settings: PydanticBaseSettingsSource) -> tuple[PydanticBaseSettingsSource, ...]
Customize settings sources: init → env → TOML.
Priority (first wins): 1. Init arguments 2. Environment variables 3. TOML file
Source code in src/metaxy/config.py
@classmethod
def settings_customise_sources(
cls,
settings_cls: type[BaseSettings],
init_settings: PydanticBaseSettingsSource,
env_settings: PydanticBaseSettingsSource,
dotenv_settings: PydanticBaseSettingsSource,
file_secret_settings: PydanticBaseSettingsSource,
) -> tuple[PydanticBaseSettingsSource, ...]:
"""Customize settings sources: init → env → TOML.
Priority (first wins):
1. Init arguments
2. Environment variables
3. TOML file
"""
toml_settings = TomlConfigSettingsSource(settings_cls)
return (init_settings, env_settings, toml_settings)
get
classmethod
¶
get() -> MetaxyConfig
Get the current Metaxy configuration.
Source code in src/metaxy/config.py
@classmethod
def get(cls) -> "MetaxyConfig":
"""Get the current Metaxy configuration."""
cfg = _metaxy_config.get()
if cfg is None:
warnings.warn(
UserWarning(
"Global Metaxy configuration not initialized. It can be set with MetaxyConfig.set(config) typically after loading it from a toml file. Returning default configuration (with environment variables and other pydantic settings sources resolved, project='default')."
)
)
return cls(project="default")
else:
return cfg
set
classmethod
¶
reset
classmethod
¶
load
classmethod
¶
load(config_file: str | Path | None = None, *, search_parents: bool = True, auto_discovery_start: Path | None = None) -> MetaxyConfig
Load config with auto-discovery and parent directory search.
Parameters:
-
config_file(str | Path | None, default:None) –Optional config file path (overrides auto-discovery)
-
search_parents(bool, default:True) –Search parent directories for config file (default: True)
-
auto_discovery_start(Path | None, default:None) –Directory to start search from (defaults to cwd)
Returns:
-
MetaxyConfig–Loaded config (TOML + env vars merged)
Example
# Auto-discover with parent search
config = MetaxyConfig.load()
# Explicit file
config = MetaxyConfig.load("custom.toml")
# Auto-discover without parent search
config = MetaxyConfig.load(search_parents=False)
# Auto-discover from a specific directory
config = MetaxyConfig.load(auto_discovery_start=Path("/path/to/project"))
Source code in src/metaxy/config.py
@classmethod
def load(
cls,
config_file: str | Path | None = None,
*,
search_parents: bool = True,
auto_discovery_start: Path | None = None,
) -> "MetaxyConfig":
"""Load config with auto-discovery and parent directory search.
Args:
config_file: Optional config file path (overrides auto-discovery)
search_parents: Search parent directories for config file (default: True)
auto_discovery_start: Directory to start search from (defaults to cwd)
Returns:
Loaded config (TOML + env vars merged)
Example:
```py
# Auto-discover with parent search
config = MetaxyConfig.load()
# Explicit file
config = MetaxyConfig.load("custom.toml")
# Auto-discover without parent search
config = MetaxyConfig.load(search_parents=False)
# Auto-discover from a specific directory
config = MetaxyConfig.load(auto_discovery_start=Path("/path/to/project"))
```
"""
# Search for config file if not explicitly provided
if config_file is None and search_parents:
config_file = cls._discover_config_with_parents(auto_discovery_start)
# For explicit file, temporarily patch the TomlConfigSettingsSource
# to use that file, then use normal instantiation
# This ensures env vars still work
if config_file:
# Create a custom settings source class for this file
toml_path = Path(config_file)
class CustomTomlSource(TomlConfigSettingsSource):
def __init__(self, settings_cls: type[BaseSettings]):
# Skip auto-discovery, use explicit file
super(TomlConfigSettingsSource, self).__init__(settings_cls)
self.toml_file = toml_path
self.toml_data = self._load_toml()
# Customize sources to use custom TOML file
original_method = cls.settings_customise_sources
@classmethod # type: ignore[misc]
def custom_sources(
cls_inner,
settings_cls,
init_settings,
env_settings,
dotenv_settings,
file_secret_settings,
):
toml_settings = CustomTomlSource(settings_cls)
return (init_settings, env_settings, toml_settings)
# Temporarily replace method
cls.settings_customise_sources = custom_sources # type: ignore[assignment]
config = cls()
cls.settings_customise_sources = original_method # type: ignore[method-assign]
else:
# Use default sources (auto-discovery + env vars)
config = cls()
cls.set(config)
return config
get_store
¶
get_store(name: str | None = None) -> MetadataStore
Instantiate metadata store by name.
Parameters:
-
name(str | None, default:None) –Store name (uses config.store if None)
Returns:
-
MetadataStore–Instantiated metadata store
Raises:
-
ValueError–If store name not found in config, or if fallback stores have different hash algorithms than the parent store
-
ImportError–If store class cannot be imported
Example
Source code in src/metaxy/config.py
def get_store(self, name: str | None = None) -> "MetadataStore":
"""Instantiate metadata store by name.
Args:
name: Store name (uses config.store if None)
Returns:
Instantiated metadata store
Raises:
ValueError: If store name not found in config, or if fallback stores
have different hash algorithms than the parent store
ImportError: If store class cannot be imported
Example:
```py
config = MetaxyConfig.load()
store = config.get_store("prod")
# Use default store
store = config.get_store()
```
"""
from metaxy.data_versioning.hash_algorithms import HashAlgorithm
if len(self.stores) == 0:
raise ValueError(
"No Metaxy stores available. They should be configured in metaxy.toml|pyproject.toml or via environment variables."
)
name = name or self.store
if name not in self.stores:
raise ValueError(
f"Store '{name}' not found in config. "
f"Available stores: {list(self.stores.keys())}"
)
store_config = self.stores[name]
# Import store class
store_class = self._import_class(store_config.type)
# Extract configuration
config_copy = store_config.config.copy()
fallback_store_names = config_copy.pop("fallback_stores", [])
# Get hash_algorithm from config (if specified) and convert to enum
configured_hash_algorithm = config_copy.get("hash_algorithm")
if configured_hash_algorithm is not None:
# Convert string to enum if needed
if isinstance(configured_hash_algorithm, str):
configured_hash_algorithm = HashAlgorithm(configured_hash_algorithm)
config_copy["hash_algorithm"] = configured_hash_algorithm
else:
# Use default
configured_hash_algorithm = HashAlgorithm.XXHASH64
config_copy["hash_algorithm"] = configured_hash_algorithm
# Get hash_truncation_length from global config (unless overridden in store config)
if (
"hash_truncation_length" not in config_copy
and self.hash_truncation_length is not None
):
# Use global setting from MetaxyConfig if not specified per-store
config_copy["hash_truncation_length"] = self.hash_truncation_length
# Get auto_create_tables from global config (unless overridden in store config)
if (
"auto_create_tables" not in config_copy
and self.auto_create_tables is not None
):
# Use global setting from MetaxyConfig if not specified per-store
config_copy["auto_create_tables"] = self.auto_create_tables
# Build fallback stores recursively
fallback_stores = []
for fallback_name in fallback_store_names:
fallback_store = self.get_store(fallback_name)
fallback_stores.append(fallback_store)
# Instantiate store with config + fallback_stores
store = store_class(
fallback_stores=fallback_stores,
**config_copy,
)
# Verify the store actually uses the hash algorithm we configured
# (in case a store subclass overrides the default or ignores the parameter)
if store.hash_algorithm != configured_hash_algorithm:
raise ValueError(
f"Store '{name}' ({store_class.__name__}) was configured with "
f"hash_algorithm='{configured_hash_algorithm.value}' but is using "
f"'{store.hash_algorithm.value}'. The store class may have overridden "
f"the hash algorithm. All stores must use the same hash algorithm."
)
return store