Sessions#

Note

Daft’s session APIs are early in their development and may change.

class Session[source]#

Session holds a connection’s state and orchestrates execution of DataFrame and SQL queries against catalogs.

attach(object: daft.catalog.Catalog | daft.catalog.Table, alias: Optional[str] = None) None[source]#

Attaches a known attachable object like a Catalog or Table.

Parameters:

object (Catalog|Table) – object which is attachable to a session

Returns:

None

attach_catalog(catalog: daft.catalog.Catalog | object, alias: Optional[str] = None) Catalog[source]#

Attaches an external catalog to this session.

Parameters:
  • catalog (object) – catalog instance or supported catalog object

  • alias (str|None) – optional alias for name resolution

Returns:

new daft catalog instance

Return type:

Catalog

attach_table(table: daft.catalog.Table | object, alias: Optional[str] = None) Table[source]#

Attaches an external table instance to this session.

Parameters:
  • table (Table | object) – table instance or supported table object

  • alias (str | None) – optional alias for name resolution

Returns:

new daft table instance

Return type:

Table

create_namespace(identifier: daft.catalog.Identifier | str)[source]#

Creates a namespace in the current catalog.

create_table(identifier: daft.catalog.Identifier | str, source: daft.catalog.TableSource | object) Table[source]#

Creates a table in the current catalog.

create_temp_table(identifier: str, source: Optional[Union[TableSource, object]] = None) Table[source]#

Creates a temp table scoped to this session’s lifetime.

Example

>>> import daft
>>> from daft.session import Session
>>> sess = Session()
>>> sess.create_temp_table("T", daft.from_pydict({"x": [1, 2, 3]}))
>>> sess.create_temp_table("S", daft.from_pydict({"y": [4, 5, 6]}))
>>> sess.list_tables()
[Identifier(''T''), Identifier(''S'')]
Parameters:
  • identifier (str) – table identifier (name)

  • source (TableSource|object) – table source like a schema or dataframe

Returns:

new table instance

Return type:

Table

current_catalog() daft.catalog.Catalog | None[source]#

Get the session’s current catalog or None.

Parameters:

None

Returns:

current catalog or None if one is not set

Return type:

Catalog

current_namespace() daft.catalog.Identifier | None[source]#

Get the session’s current namespace or None.

Parameters:

None

Returns:

current namespace or none if one is not set

Return type:

Identifier

detach_catalog(alias: str)[source]#

Detaches the catalog from this session or raises if the catalog does not exist.

Parameters:

alias (str) – catalog alias to detach

Returns:

None

detach_table(alias: str)[source]#

Detaches the table from this session or raises if the table does not exist.

Parameters:

alias (str) – catalog alias to detach

Returns:

None

drop_namespace(identifier: daft.catalog.Identifier | str)[source]#

Drop the given namespace in the current catalog.

Parameters:

identifier (Identifier|str) – table identifier

Returns:

None

drop_table(identifier: daft.catalog.Identifier | str)[source]#

Drop the given table in the current catalog.

Parameters:

identifier (Identifier|str) – table identifier

Returns:

None

get_catalog(identifier: str) Catalog[source]#

Returns the catalog or raises an exception if it does not exist.

Parameters:

identifier (str) – catalog identifier (name)

Returns:

The catalog object.

Return type:

Catalog

Raises:

ValueError – If the catalog does not exist.

get_table(identifier: daft.catalog.Identifier | str) Table[source]#

Returns the table or raises an exception if it does not exist.

Parameters:

identifier (Identifier|str) – table identifier or identifier string

Returns:

The table object.

Return type:

Table

Raises:

ValueError – If the table does not exist.

has_catalog(identifier: str) bool[source]#

Returns true if a catalog with the given identifier exists.

has_table(identifier: daft.catalog.Identifier | str) bool[source]#

Returns true if a table with the given identifier exists.

list_catalogs(pattern: Optional[str] = None) list[str][source]#

Returns a list of available catalogs matching the pattern.

This API currently returns a list of catalog names for backwards compatibility. In 0.5.0 this API will return a list of Catalog objects.

Parameters:

pattern (str) – catalog name pattern

Returns:

list of available catalog names

Return type:

list[str]

list_namespaces(pattern: Optional[str] = None) list[daft.catalog.Identifier][source]#

Returns a list of matching namespaces in the current catalog.

list_tables(pattern: Optional[str] = None) list[daft.catalog.Identifier][source]#

Returns a list of available tables.

Parameters:

pattern (str) – table name pattern

Returns:

list of available tables

Return type:

list[Identifier]

read_table(identifier: daft.catalog.Identifier | str, **options) DataFrame[source]#

Returns the table as a DataFrame or raises an exception if it does not exist.

Parameters:

identifier (Identifier|str) – table identifier

Return type:

DataFrame

Raises:

ValueError – If the tables odes not exist.

set_catalog(identifier: str | None)[source]#

Set the given catalog as current_catalog or raises an err if it does not exist.

Parameters:

identifier (str) – sets the current catalog

Returns:

None

Raises:

ValueError – If the catalog does not exist.

set_namespace(identifier: daft.catalog.Identifier | str | None)[source]#

Set the given namespace as current_namespace for table resolution.

Parameters:

identifier (Identifier | str) – namespace identifier

sql(sql: str) daft.dataframe.dataframe.DataFrame | None[source]#

Executes the SQL statement using this session.

Parameters:

sql (str) – input SQL statement

Returns:

dataframe instance if this was a data statement (DQL, DDL, DML).

Return type:

DataFrame

use(identifier: Optional[Union[Identifier, str]] = None)[source]#

Use sets the current catalog and namespace.

write_table(identifier: daft.catalog.Identifier | str, df: daft.dataframe.dataframe.DataFrame | object, mode: Literal['append', 'overwrite'] = 'append', **options)[source]#

Writes the DataFrame to the table specified by the identifier.

Parameters:
  • identifier (Identifier|str) – table identifier

  • df (DataFrame|object) – dataframe to write

  • mode ("append"|"overwrite") – write mode, defaults to “append”

  • options** – additional, format-specific write options

Returns:

None