Skip to content

Pydantic

Pydantic is a data validation library in Python. LanceDB integrates with Pydantic for schema inference, data ingestion, and query result casting. Using LanceModel, users can seamlessly integrate Pydantic with the rest of the LanceDB APIs.

import lancedb
from lancedb.pydantic import Vector, LanceModel

class PersonModel(LanceModel):
    name: str
    age: int
    vector: Vector(2)


url = "./example"
db = lancedb.connect(url)
table = db.create_table("person", schema=PersonModel)
table.add(
    [
        PersonModel(name="bob", age=1, vector=[1.0, 2.0]),
        PersonModel(name="alice", age=2, vector=[3.0, 4.0]),
    ]
)
assert table.count_rows() == 2
person = table.search([0.0, 0.0]).limit(1).to_pydantic(PersonModel)
assert person[0].name == "bob"

Vector Field

LanceDB provides a Vector(dim) method to define a vector Field in a Pydantic Model.

lancedb.pydantic.Vector

Vector(dim: int, value_type: DataType = pa.float32(), nullable: bool = True) -> Type[FixedSizeListMixin]

Pydantic Vector Type.

Warning

Experimental feature.

Parameters:

  • dim (int) –

    The dimension of the vector.

  • value_type (DataType, default: float32() ) –

    The value type of the vector, by default pa.float32()

  • nullable (bool, default: True ) –

    Whether the vector is nullable, by default it is True.

Examples:

>>> import pydantic
>>> from lancedb.pydantic import Vector
...
>>> class MyModel(pydantic.BaseModel):
...     id: int
...     url: str
...     embeddings: Vector(768)
>>> schema = pydantic_to_schema(MyModel)
>>> assert schema == pa.schema([
...     pa.field("id", pa.int64(), False),
...     pa.field("url", pa.utf8(), False),
...     pa.field("embeddings", pa.list_(pa.float32(), 768))
... ])
Source code in lancedb/pydantic.py
def Vector(
    dim: int, value_type: pa.DataType = pa.float32(), nullable: bool = True
) -> Type[FixedSizeListMixin]:
    """Pydantic Vector Type.

    !!! warning
        Experimental feature.

    Parameters
    ----------
    dim : int
        The dimension of the vector.
    value_type : pyarrow.DataType, optional
        The value type of the vector, by default pa.float32()
    nullable : bool, optional
        Whether the vector is nullable, by default it is True.

    Examples
    --------

    >>> import pydantic
    >>> from lancedb.pydantic import Vector
    ...
    >>> class MyModel(pydantic.BaseModel):
    ...     id: int
    ...     url: str
    ...     embeddings: Vector(768)
    >>> schema = pydantic_to_schema(MyModel)
    >>> assert schema == pa.schema([
    ...     pa.field("id", pa.int64(), False),
    ...     pa.field("url", pa.utf8(), False),
    ...     pa.field("embeddings", pa.list_(pa.float32(), 768))
    ... ])
    """

    # TODO: make a public parameterized type.
    class FixedSizeList(list, FixedSizeListMixin):
        def __repr__(self):
            return f"FixedSizeList(dim={dim})"

        @staticmethod
        def nullable() -> bool:
            return nullable

        @staticmethod
        def dim() -> int:
            return dim

        @staticmethod
        def value_arrow_type() -> pa.DataType:
            return value_type

        @classmethod
        def __get_pydantic_core_schema__(
            cls, _source_type: Any, _handler: pydantic.GetCoreSchemaHandler
        ) -> CoreSchema:
            return core_schema.no_info_after_validator_function(
                cls,
                core_schema.list_schema(
                    min_length=dim,
                    max_length=dim,
                    items_schema=core_schema.float_schema(),
                ),
            )

        @classmethod
        def __get_validators__(cls) -> Generator[Callable, None, None]:
            yield cls.validate

        # For pydantic v1
        @classmethod
        def validate(cls, v):
            if not isinstance(v, (list, range, np.ndarray)) or len(v) != dim:
                raise TypeError("A list of numbers or numpy.ndarray is needed")
            return cls(v)

        if PYDANTIC_VERSION.major < 2:

            @classmethod
            def __modify_schema__(cls, field_schema: Dict[str, Any]):
                field_schema["items"] = {"type": "number"}
                field_schema["maxItems"] = dim
                field_schema["minItems"] = dim

    return FixedSizeList

Type Conversion

LanceDB automatically convert Pydantic fields to Apache Arrow DataType.

Current supported type conversions:

Pydantic Field Type PyArrow Data Type
int pyarrow.int64
float pyarrow.float64
bool pyarrow.bool
str pyarrow.utf8()
list pyarrow.List
BaseModel pyarrow.Struct
Vector(n) pyarrow.FixedSizeList(float32, n)

LanceDB supports to create Apache Arrow Schema from a Pydantic BaseModel via pydantic_to_schema() method.

lancedb.pydantic.pydantic_to_schema

pydantic_to_schema(model: Type[BaseModel]) -> Schema

Convert a Pydantic Model to a PyArrow Schema.

Parameters:

  • model (Type[BaseModel]) –

    The Pydantic BaseModel to convert to Arrow Schema.

Returns:

Examples:

>>> from typing import List, Optional
>>> import pydantic
>>> from lancedb.pydantic import pydantic_to_schema, Vector
>>> class FooModel(pydantic.BaseModel):
...     id: int
...     s: str
...     vec: Vector(1536)  # fixed_size_list<item: float32>[1536]
...     li: List[int]
...
>>> schema = pydantic_to_schema(FooModel)
>>> assert schema == pa.schema([
...     pa.field("id", pa.int64(), False),
...     pa.field("s", pa.utf8(), False),
...     pa.field("vec", pa.list_(pa.float32(), 1536)),
...     pa.field("li", pa.list_(pa.int64()), False),
... ])
Source code in lancedb/pydantic.py
def pydantic_to_schema(model: Type[pydantic.BaseModel]) -> pa.Schema:
    """Convert a [Pydantic Model][pydantic.BaseModel] to a
       [PyArrow Schema][pyarrow.Schema].

    Parameters
    ----------
    model : Type[pydantic.BaseModel]
        The Pydantic BaseModel to convert to Arrow Schema.

    Returns
    -------
    pyarrow.Schema
        The Arrow Schema

    Examples
    --------

    >>> from typing import List, Optional
    >>> import pydantic
    >>> from lancedb.pydantic import pydantic_to_schema, Vector
    >>> class FooModel(pydantic.BaseModel):
    ...     id: int
    ...     s: str
    ...     vec: Vector(1536)  # fixed_size_list<item: float32>[1536]
    ...     li: List[int]
    ...
    >>> schema = pydantic_to_schema(FooModel)
    >>> assert schema == pa.schema([
    ...     pa.field("id", pa.int64(), False),
    ...     pa.field("s", pa.utf8(), False),
    ...     pa.field("vec", pa.list_(pa.float32(), 1536)),
    ...     pa.field("li", pa.list_(pa.int64()), False),
    ... ])
    """
    fields = _pydantic_model_to_fields(model)
    return pa.schema(fields)