Skip to content

API Docs

geneva.connect

connect(
    uri: str | Path | None = None,
    *,
    region: str | None = None,
    api_key: Credential | str | None = None,
    host_override: str | None = None,
    storage_options: dict[str, str] | None = None,
    checkpoint: str | CheckpointStore | None = None,
    system_namespace: list[str] | None = None,
    namespace_impl: str | None = None,
    namespace_properties: dict[str, str] | None = None,
    **kwargs,
) -> Connection

Create a Geneva Connection to an existing database.

Examples:

>>> import geneva
>>> # Connect to a database in object storage
>>> conn = geneva.connect("s3://my-storage-bucket/my-database")
>>> # Connect using directory namespace
>>> conn = geneva.connect(
...     namespace_impl="dir", namespace_properties={"root": "/path"}
... )
>>> # Connect using REST namespace
>>> conn = geneva.connect(
...     namespace_impl="rest", namespace_properties={"uri": f"http://127.0.0.1:1234"}
... )
>>> tbl = conn.open_table("youtube_dataset")

Parameters:

  • uri (str | Path | None, default: None ) –

    LanceDB Database URI, or a S3/GCS path. If not provided and namespace_impl is set, defaults to "namespace://".

  • region (str | None, default: None ) –

    LanceDB cloud region. Set to None on LanceDB Enterprise

  • api_key (Credential | str | None, default: None ) –

    API key to connect to the DB instance.

  • host_override (str | None, default: None ) –

    Set to the host of the enterprise stack

  • system_namespace (list[str] | None, default: None ) –

    Namespace for system tables (manifests, clusters, jobs, errors). Defaults to config value if not provided.

  • namespace_impl (str | None, default: None ) –

    The namespace implementation to use (e.g., "dir", "rest"). If provided, connects using namespace instead of local database.

  • namespace_properties (dict[str, str] | None, default: None ) –

    Configuration properties for the namespace implementation.

Returns:

  • Connection - A LanceDB connection
Source code in geneva/db.py
def connect(
    uri: str | Path | None = None,
    *,
    region: str | None = None,
    api_key: Credential | str | None = None,
    host_override: str | None = None,
    storage_options: dict[str, str] | None = None,
    checkpoint: str | CheckpointStore | None = None,
    system_namespace: list[str] | None = None,
    namespace_impl: str | None = None,
    namespace_properties: dict[str, str] | None = None,
    **kwargs,
) -> Connection:
    """Create a Geneva Connection to an existing database.

    Examples
    --------
        >>> import geneva
        >>> # Connect to a database in object storage
        >>> conn = geneva.connect("s3://my-storage-bucket/my-database")
        >>> # Connect using directory namespace
        >>> conn = geneva.connect(
        ...     namespace_impl="dir", namespace_properties={"root": "/path"}
        ... )
        >>> # Connect using REST namespace
        >>> conn = geneva.connect(
        ...     namespace_impl="rest", namespace_properties={"uri": f"http://127.0.0.1:1234"}
        ... )
        >>> tbl = conn.open_table("youtube_dataset")

    Parameters
    ----------
    uri: geneva URI, or Path, optional
        LanceDB Database URI, or a S3/GCS path.
        If not provided and namespace_impl is set, defaults to "namespace://".
    region: str | None
        LanceDB cloud region. Set to `None` on LanceDB Enterprise
    api_key: str | None
        API key to connect to the DB instance.
    host_override: str | None
        Set to the host of the enterprise stack
    system_namespace: list[str] | None
        Namespace for system tables (manifests, clusters, jobs, errors).
        Defaults to config value if not provided.
    namespace_impl: str | None
        The namespace implementation to use (e.g., "dir", "rest").
        If provided, connects using namespace instead of local database.
    namespace_properties: dict[str, str] | None
        Configuration properties for the namespace implementation.
    Returns
    -------
    Connection - A LanceDB connection
    """

    api_key, checkpoint_store, host_override, region, uri, system_namespace = (
        _pre_connect(
            api_key,
            checkpoint,
            host_override,
            region,
            uri,
            namespace_impl,
            namespace_properties,
            system_namespace,
        )
    )

    conn = Connection(
        str(uri),
        region=region,
        api_key=api_key,
        host_override=host_override,
        storage_options=storage_options,
        checkpoint_store=checkpoint_store,
        namespace_impl=namespace_impl,
        namespace_properties=namespace_properties,
        system_namespace=system_namespace,
        **kwargs,
    )

    # Set up default uploader if not already configured
    # This is needed for cluster operations (like upload_local_env) that don't have
    # table context
    from geneva.config import override_config_kv

    try:
        Uploader.get()
    except (TypeError, ValueError):
        # Uploader not configured - set a default upload_dir
        default_upload_dir = f"{str(uri)}/zips"
        override_config_kv({"uploader.upload_dir": default_upload_dir})

    # Validate and create system_namespace if using namespace connection
    if namespace_impl is not None:
        _ensure_system_namespace_exists(conn)

    return conn