Skip to content

API Reference

Schema

tacit.Schema

Base class for tacit schema definitions.

Subclass and declare columns as annotated class attributes:

class Iris(Schema):
    sepal_length: float
    species: str
Source code in src/tacit/schema.py
class Schema:
    """Base class for tacit schema definitions.

    Subclass and declare columns as annotated class attributes:

        class Iris(Schema):
            sepal_length: float
            species: str
    """

    _fields: ClassVar[dict[str, type]]
    _field_checks: ClassVar[dict[str, list[pa.Check]]]
    _field_nullable: ClassVar[dict[str, bool]]

    def __init_subclass__(cls, **kwargs: object) -> None:
        super().__init_subclass__(**kwargs)
        fields: dict[str, type] = {}
        field_checks: dict[str, list[pa.Check]] = {}
        field_nullable: dict[str, bool] = {}

        for name, hint in get_type_hints(cls, include_extras=True).items():
            if get_origin(hint) is ClassVar:
                continue

            if get_origin(hint) is Annotated:
                args = get_args(hint)
                fields[name] = args[0]
                checks = [a for a in args[1:] if isinstance(a, pa.Check)]
                if checks:
                    field_checks[name] = checks
                for a in args[1:]:
                    if isinstance(a, Nullable):
                        field_nullable[name] = a.allow
                        break
            else:
                fields[name] = hint

        cls._fields = fields
        cls._field_checks = field_checks
        cls._field_nullable = field_nullable

    @classmethod
    def _get_fields(cls) -> dict[str, type]:
        return cls._fields

    @classmethod
    def _ibis_schema(cls) -> ibis.Schema:
        return ibis.schema(cls._get_fields())

    @classmethod
    def _pandera_schema(cls) -> pa.DataFrameSchema:
        columns = {}
        for name, dtype in cls._ibis_schema().items():
            checks = cls._field_checks.get(name, [])
            nullable = cls._field_nullable.get(name, False)
            columns[name] = pa.Column(dtype, checks=checks, nullable=nullable)
        return pa.DataFrameSchema(columns, strict=True)

    @classmethod
    def _check_columns(
        cls,
        target: ibis.Schema,
        actual: ibis.Schema,
        *,
        phase: ValidationPhase,
    ) -> None:
        target_names = set(target.names)  # type: ignore[reportArgumentType]  # ibis has no py.typed
        actual_names = set(actual.names)  # type: ignore[reportArgumentType]  # ibis has no py.typed

        missing = sorted(target_names - actual_names)
        extra = sorted(actual_names - target_names)
        if missing or extra:
            raise structural_error_for_columns(
                schema=cls,
                phase=phase,
                missing=missing,
                extra=extra,
            )

    @classmethod
    def parse(cls, table: ir.Table) -> DataFrame[Self]:
        """Full parsing: coerce types + validate with pandera + wrap as DataFrame.

        Executes queries against the engine. Use at pipeline boundaries where
        you're ingesting untrusted data.

        Raises:
            tacit.errors.ValidationError: Data fails structural, coercion, or
                validation checks.
        """
        target = cls._ibis_schema()
        actual = table.schema()
        cls._check_columns(target, actual, phase=ValidationPhase.PARSE)

        # Pandera's ibis backend doesn't support coercion — handle it ourselves
        cast_map = {
            col: target_type
            for col, target_type in target.items()
            if actual[col] != target_type
        }
        if cast_map:
            try:
                table = table.cast(cast_map)
            except Exception as exc:
                coercion_error = coercion_error_for_cast_failure(
                    schema=cls,
                    phase=ValidationPhase.PARSE,
                    cast_map=cast_map,
                    original=exc,
                )
                raise coercion_error from exc

        try:
            validated = cls._pandera_schema().validate(table)
        except (pe.SchemaError, pe.SchemaErrors) as exc:
            validation_error = validation_error_from_pandera(
                schema=cls,
                phase=ValidationPhase.PARSE,
                original=exc,
            )
            raise validation_error from exc
        except Exception as exc:
            if cast_map and looks_like_coercion_failure(exc):
                coercion_error = coercion_error_for_cast_failure(
                    schema=cls,
                    phase=ValidationPhase.PARSE,
                    cast_map=cast_map,
                    original=exc,
                )
                raise coercion_error from exc
            validation_error = validation_error_from_execution(
                schema=cls,
                phase=ValidationPhase.PARSE,
                original=exc,
            )
            raise validation_error from exc
        return DataFrame._from_table(validated, cls)

    @classmethod
    def cast(cls, table: ir.Table) -> DataFrame[Self]:
        """Structural check: verify column names and types match, wrap as DataFrame.

        Metadata-only — does not execute queries. Use at internal pipeline
        boundaries where you trust the data but want type safety.

        Raises:
            tacit.errors.StructuralError: Missing, extra, or wrong-type columns.
        """
        target = cls._ibis_schema()
        actual = table.schema()
        target_names = set(target.names)  # type: ignore[reportArgumentType]
        actual_names = set(actual.names)  # type: ignore[reportArgumentType]
        missing = sorted(target_names - actual_names)
        extra = sorted(actual_names - target_names)
        if missing or extra:
            raise structural_error_for_columns(
                schema=cls,
                phase=ValidationPhase.CAST,
                missing=missing,
                extra=extra,
            )

        type_errors: list[tuple[str, object, object]] = []
        for col_name, expected_type in target.items():
            actual_type = actual[col_name]
            if actual_type != expected_type:
                type_errors.append((col_name, expected_type, actual_type))
        if type_errors:
            raise structural_error_for_type_mismatches(
                schema=cls,
                phase=ValidationPhase.CAST,
                mismatches=type_errors,
            )

        return DataFrame._from_table(table, cls)

parse(table) classmethod

Full parsing: coerce types + validate with pandera + wrap as DataFrame.

Executes queries against the engine. Use at pipeline boundaries where you're ingesting untrusted data.

Raises:

Type Description
ValidationError

Data fails structural, coercion, or validation checks.

Source code in src/tacit/schema.py
@classmethod
def parse(cls, table: ir.Table) -> DataFrame[Self]:
    """Full parsing: coerce types + validate with pandera + wrap as DataFrame.

    Executes queries against the engine. Use at pipeline boundaries where
    you're ingesting untrusted data.

    Raises:
        tacit.errors.ValidationError: Data fails structural, coercion, or
            validation checks.
    """
    target = cls._ibis_schema()
    actual = table.schema()
    cls._check_columns(target, actual, phase=ValidationPhase.PARSE)

    # Pandera's ibis backend doesn't support coercion — handle it ourselves
    cast_map = {
        col: target_type
        for col, target_type in target.items()
        if actual[col] != target_type
    }
    if cast_map:
        try:
            table = table.cast(cast_map)
        except Exception as exc:
            coercion_error = coercion_error_for_cast_failure(
                schema=cls,
                phase=ValidationPhase.PARSE,
                cast_map=cast_map,
                original=exc,
            )
            raise coercion_error from exc

    try:
        validated = cls._pandera_schema().validate(table)
    except (pe.SchemaError, pe.SchemaErrors) as exc:
        validation_error = validation_error_from_pandera(
            schema=cls,
            phase=ValidationPhase.PARSE,
            original=exc,
        )
        raise validation_error from exc
    except Exception as exc:
        if cast_map and looks_like_coercion_failure(exc):
            coercion_error = coercion_error_for_cast_failure(
                schema=cls,
                phase=ValidationPhase.PARSE,
                cast_map=cast_map,
                original=exc,
            )
            raise coercion_error from exc
        validation_error = validation_error_from_execution(
            schema=cls,
            phase=ValidationPhase.PARSE,
            original=exc,
        )
        raise validation_error from exc
    return DataFrame._from_table(validated, cls)

cast(table) classmethod

Structural check: verify column names and types match, wrap as DataFrame.

Metadata-only — does not execute queries. Use at internal pipeline boundaries where you trust the data but want type safety.

Raises:

Type Description
StructuralError

Missing, extra, or wrong-type columns.

Source code in src/tacit/schema.py
@classmethod
def cast(cls, table: ir.Table) -> DataFrame[Self]:
    """Structural check: verify column names and types match, wrap as DataFrame.

    Metadata-only — does not execute queries. Use at internal pipeline
    boundaries where you trust the data but want type safety.

    Raises:
        tacit.errors.StructuralError: Missing, extra, or wrong-type columns.
    """
    target = cls._ibis_schema()
    actual = table.schema()
    target_names = set(target.names)  # type: ignore[reportArgumentType]
    actual_names = set(actual.names)  # type: ignore[reportArgumentType]
    missing = sorted(target_names - actual_names)
    extra = sorted(actual_names - target_names)
    if missing or extra:
        raise structural_error_for_columns(
            schema=cls,
            phase=ValidationPhase.CAST,
            missing=missing,
            extra=extra,
        )

    type_errors: list[tuple[str, object, object]] = []
    for col_name, expected_type in target.items():
        actual_type = actual[col_name]
        if actual_type != expected_type:
            type_errors.append((col_name, expected_type, actual_type))
    if type_errors:
        raise structural_error_for_type_mismatches(
            schema=cls,
            phase=ValidationPhase.CAST,
            mismatches=type_errors,
        )

    return DataFrame._from_table(table, cls)

DataFrame

tacit.DataFrame

Bases: Table

Schema-aware DataFrame. Wraps an ibis Table with a schema type parameter.

DataFrame[S] IS an ibis Table (subclass), so the full ibis API works transparently. ibis operations (.mutate(), .filter(), etc.) return plain ir.Table — the schema type drops off, which is correct by design.

Source code in src/tacit/schema.py
class DataFrame[S: "Schema"](ir.Table):
    """Schema-aware DataFrame. Wraps an ibis Table with a schema type parameter.

    DataFrame[S] IS an ibis Table (subclass), so the full ibis API works
    transparently. ibis operations (.mutate(), .filter(), etc.) return plain
    ir.Table — the schema type drops off, which is correct by design.
    """

    __slots__ = ("_tacit_schema",)

    # S is phantom (not stored in instance attrs). Without this stub, pyright
    # infers S as covariant, making DataFrame[Child] assignable to
    # DataFrame[Parent]. Both positions force invariance inference.
    def __tacit_type_is_invariant__(self, x: S) -> S: ...

    @classmethod
    def _from_table(cls, table: ir.Table, schema_type: type[Schema]) -> DataFrame[S]:
        df = cls(table.op())
        object.__setattr__(df, "_tacit_schema", schema_type)
        return df

contract

tacit.contract

contract(fn=None, /, *, validate=False, returns=None)

contract(fn: Callable[P, R]) -> Callable[P, R]
contract(
    *, returns: type[S], validate: bool = ...
) -> Callable[
    [Callable[P, ir.Table]], Callable[P, DataFrame[S]]
]
contract(
    *, validate: bool = ...
) -> Callable[[Callable[P, R]], Callable[P, R]]

Decorator that enforces DataFrame schema contracts at function boundaries.

Inspects type annotations to find DataFrame[S] parameters and return type. Calls Schema.cast() on inputs and outputs by default (structural checks only, zero execution cost). With validate=True, calls Schema.parse() instead (full pandera validation, executes queries).

Non-DataFrame parameters and return values are passed through unchanged.

The returns parameter lets the decorator own the output schema so the function body can return a plain ir.Table without a type error::

@tacit.contract(returns=IrisFeatures)
def transform(df: DataFrame[Iris]) -> ir.Table:
    return df.mutate(sepal_ratio=df.sepal_length / df.sepal_width)

Call sites still see DataFrame[IrisFeatures] as the return type.

Usage

@tacit.contract def transform(df: DataFrame[Iris]) -> DataFrame[IrisFeatures]: return IrisFeatures.cast(df.mutate(...))

@tacit.contract(validate=True) def ingest(df: DataFrame[Iris]) -> DataFrame[IrisFeatures]: return IrisFeatures.cast(df.mutate(...))

@tacit.contract(returns=IrisFeatures) def transform(df: DataFrame[Iris]) -> ir.Table: return df.mutate(...)

Source code in src/tacit/contract.py
def contract(fn=None, /, *, validate=False, returns=None) -> Any:
    """Decorator that enforces DataFrame schema contracts at function boundaries.

    Inspects type annotations to find DataFrame[S] parameters and return type.
    Calls Schema.cast() on inputs and outputs by default (structural checks only,
    zero execution cost). With validate=True, calls Schema.parse() instead (full
    pandera validation, executes queries).

    Non-DataFrame parameters and return values are passed through unchanged.

    The ``returns`` parameter lets the decorator own the output schema so the
    function body can return a plain ``ir.Table`` without a type error::

        @tacit.contract(returns=IrisFeatures)
        def transform(df: DataFrame[Iris]) -> ir.Table:
            return df.mutate(sepal_ratio=df.sepal_length / df.sepal_width)

    Call sites still see ``DataFrame[IrisFeatures]`` as the return type.

    Usage:
        @tacit.contract
        def transform(df: DataFrame[Iris]) -> DataFrame[IrisFeatures]:
            return IrisFeatures.cast(df.mutate(...))

        @tacit.contract(validate=True)
        def ingest(df: DataFrame[Iris]) -> DataFrame[IrisFeatures]:
            return IrisFeatures.cast(df.mutate(...))

        @tacit.contract(returns=IrisFeatures)
        def transform(df: DataFrame[Iris]) -> ir.Table:
            return df.mutate(...)
    """
    if fn is not None:
        return _wrap(fn, validate=validate, returns=returns)
    return lambda f: _wrap(f, validate=validate, returns=returns)

tacit.errors.ValidationError

tacit.errors.ValidationError

Bases: Exception

Base class for tacit validation failures.

Source code in src/tacit/errors.py
class ValidationError(Exception):
    """Base class for tacit validation failures."""

    _summary = "Validation"

    def __init__(
        self,
        *,
        schema: type[Schema],
        phase: ValidationPhase,
        detail: str | None = None,
        boundary_label: str | None = None,
        reason_code: str | None = None,
        check: object | None = None,
        failure_cases: object | None = None,
        column: str | None = None,
        original: BaseException | None = None,
    ) -> None:
        self.schema = schema
        self.phase = phase
        self.detail = detail
        self.boundary_label = boundary_label
        self.reason_code = reason_code
        self.check = check
        self.failure_cases = failure_cases
        self.column = column
        self.original = original
        super().__init__(self._build_message())

    def _build_message(self) -> str:
        message = f"{self._summary} failed for schema {self.schema.__name__} {self._phase_phrase()}."
        if self.detail:
            return f"{message[:-1]}: {self.detail}"
        return message

    def _phase_phrase(self) -> str:
        if self.phase is ValidationPhase.CAST:
            return "during cast"
        if self.phase is ValidationPhase.PARSE:
            return "during parse"
        if self.phase is ValidationPhase.CONTRACT_INPUT:
            return f"on {self.boundary_label or 'contract input'}"
        return f"on {self.boundary_label or 'return value'}"

tacit.errors.StructuralError

tacit.errors.StructuralError

Bases: ValidationError

Source code in src/tacit/errors.py
class StructuralError(ValidationError):
    _summary = "Structural validation"

tacit.errors.CoercionError

tacit.errors.CoercionError

Bases: ValidationError

Source code in src/tacit/errors.py
class CoercionError(ValidationError):
    _summary = "Coercion"

tacit.errors.ConstraintError

tacit.errors.ConstraintError

Bases: ValidationError

Source code in src/tacit/errors.py
class ConstraintError(ValidationError):
    _summary = "Constraint validation"

tacit.errors.CheckExecutionError

tacit.errors.CheckExecutionError

Bases: ValidationError

Source code in src/tacit/errors.py
class CheckExecutionError(ValidationError):
    _summary = "Check execution"

tacit.errors.ValidationPhase

tacit.errors.ValidationPhase

Bases: Enum

Source code in src/tacit/errors.py
class ValidationPhase(Enum):
    CAST = "cast"
    PARSE = "parse"
    CONTRACT_INPUT = "contract_input"
    CONTRACT_OUTPUT = "contract_output"

Check

tacit.Check = pa.Check module-attribute

Nullable

tacit.Nullable dataclass

Source code in src/tacit/constraints.py
@dataclass(frozen=True)
class Nullable:
    allow: bool = True