678 lines
23 KiB
Python
678 lines
23 KiB
Python
from __future__ import annotations
|
|
|
|
import contextlib
|
|
import re
|
|
from typing import (
|
|
TYPE_CHECKING,
|
|
Callable,
|
|
Generic,
|
|
Union,
|
|
overload,
|
|
)
|
|
|
|
from polars._dependencies import _check_for_pandas, _check_for_pyarrow
|
|
from polars._dependencies import pandas as pd
|
|
from polars._dependencies import pyarrow as pa
|
|
from polars._typing import FrameType
|
|
from polars._utils.deprecation import deprecate_renamed_parameter
|
|
from polars._utils.pycapsule import is_pycapsule
|
|
from polars._utils.unstable import issue_unstable_warning
|
|
from polars._utils.various import _get_stack_locals, qualified_type_name
|
|
from polars._utils.wrap import wrap_ldf
|
|
from polars.convert import from_arrow, from_pandas
|
|
from polars.dataframe import DataFrame
|
|
from polars.lazyframe import LazyFrame
|
|
from polars.series import Series
|
|
|
|
with contextlib.suppress(ImportError): # Module not available when building docs
|
|
from polars._plr import PySQLContext
|
|
|
|
if TYPE_CHECKING:
|
|
import sys
|
|
from collections.abc import Collection, Mapping
|
|
from types import TracebackType
|
|
from typing import Any, Final, Literal
|
|
|
|
if sys.version_info >= (3, 10):
|
|
from typing import TypeAlias
|
|
else:
|
|
from typing_extensions import TypeAlias
|
|
|
|
if sys.version_info >= (3, 11):
|
|
from typing import Self
|
|
else:
|
|
from typing_extensions import Self
|
|
|
|
CompatibleFrameType: TypeAlias = Union[
|
|
DataFrame,
|
|
LazyFrame,
|
|
Series,
|
|
pd.DataFrame,
|
|
pd.Series[Any],
|
|
pa.Table,
|
|
pa.RecordBatch,
|
|
]
|
|
|
|
__all__ = ["SQLContext"]
|
|
|
|
|
|
def _compatible_frame(obj: Any) -> bool:
|
|
"""Check if the object can be converted to DataFrame."""
|
|
return (
|
|
is_pycapsule(obj)
|
|
or isinstance(obj, LazyFrame)
|
|
or (_check_for_pandas(obj) and isinstance(obj, (pd.DataFrame, pd.Series)))
|
|
or (_check_for_pyarrow(obj) and isinstance(obj, (pa.Table, pa.RecordBatch)))
|
|
)
|
|
|
|
|
|
def _ensure_lazyframe(obj: Any) -> LazyFrame:
|
|
"""Return LazyFrame from compatible input."""
|
|
if isinstance(obj, (DataFrame, LazyFrame)):
|
|
return obj.lazy()
|
|
elif isinstance(obj, Series):
|
|
return obj.to_frame().lazy()
|
|
elif _check_for_pandas(obj) and isinstance(obj, (pd.DataFrame, pd.Series)):
|
|
if isinstance(frame := from_pandas(obj), Series):
|
|
frame = frame.to_frame()
|
|
return frame.lazy()
|
|
elif is_pycapsule(obj) or (
|
|
_check_for_pyarrow(obj) and isinstance(obj, (pa.Table, pa.RecordBatch))
|
|
):
|
|
return from_arrow(obj).lazy() # type: ignore[union-attr]
|
|
else:
|
|
msg = f"unrecognised frame type: {qualified_type_name(obj)}"
|
|
raise ValueError(msg)
|
|
|
|
|
|
def _get_frame_locals(
|
|
*,
|
|
all_compatible: bool,
|
|
n_objects: int | None = None,
|
|
named: str | Collection[str] | Callable[[str], bool] | None = None,
|
|
) -> dict[str, Any]:
|
|
"""Return compatible frame objects from the local stack."""
|
|
of_type = _compatible_frame if all_compatible else (DataFrame, LazyFrame, Series)
|
|
return _get_stack_locals(of_type=of_type, n_objects=n_objects, named=named) # type: ignore[arg-type]
|
|
|
|
|
|
class SQLContext(Generic[FrameType]):
|
|
"""
|
|
Run SQL queries against DataFrame, LazyFrame, and Series data.
|
|
|
|
.. warning::
|
|
This functionality is considered **unstable**, although it is close to being
|
|
considered stable. It may be changed at any point without it being considered
|
|
a breaking change.
|
|
"""
|
|
|
|
_ctxt: PySQLContext
|
|
_eager_execution: Final[bool]
|
|
_tables_scope_stack: list[set[str]]
|
|
|
|
# note: the type-overloaded methods are required to support accurate typing
|
|
# of the frame return from "execute" (which may be DataFrame or LazyFrame),
|
|
# as that is influenced by both the "eager" flag at init-time AND the "eager"
|
|
# flag at query-time (if anyone can find a lighter-weight set of annotations
|
|
# that successfully resolves this, please go for it... ;)
|
|
|
|
@overload
|
|
def __init__(
|
|
self: SQLContext[LazyFrame],
|
|
frames: Mapping[str, CompatibleFrameType | None] | None = ...,
|
|
*,
|
|
register_globals: bool | int = ...,
|
|
all_compatible: bool = ...,
|
|
eager: Literal[False] = False,
|
|
**named_frames: CompatibleFrameType | None,
|
|
) -> None: ...
|
|
|
|
@overload
|
|
def __init__(
|
|
self: SQLContext[DataFrame],
|
|
frames: Mapping[str, CompatibleFrameType | None] | None = ...,
|
|
*,
|
|
register_globals: bool | int = ...,
|
|
all_compatible: bool = ...,
|
|
eager: Literal[True],
|
|
**named_frames: CompatibleFrameType | None,
|
|
) -> None: ...
|
|
|
|
@overload
|
|
def __init__(
|
|
self: SQLContext[DataFrame],
|
|
frames: Mapping[str, CompatibleFrameType | None] | None = ...,
|
|
*,
|
|
register_globals: bool | int = ...,
|
|
all_compatible: bool = ...,
|
|
eager: bool,
|
|
**named_frames: CompatibleFrameType | None,
|
|
) -> None: ...
|
|
|
|
@deprecate_renamed_parameter("eager_execution", "eager", version="0.20.31")
|
|
def __init__(
|
|
self,
|
|
frames: Mapping[str, CompatibleFrameType | None] | None = None,
|
|
*,
|
|
register_globals: bool | int = False,
|
|
eager: bool = False,
|
|
**named_frames: CompatibleFrameType | None,
|
|
) -> None:
|
|
"""
|
|
Initialize a new `SQLContext`.
|
|
|
|
.. versionchanged:: 0.20.31
|
|
The `eager_execution` parameter was renamed `eager`.
|
|
|
|
Parameters
|
|
----------
|
|
frames
|
|
A `{name:frame, ...}` mapping which can include Polars frames *and*
|
|
pandas DataFrames, Series and pyarrow Table and RecordBatch objects.
|
|
register_globals
|
|
Register compatible objects (polars DataFrame, LazyFrame, and Series) found
|
|
in the globals, automatically mapping their variable name to a table name.
|
|
To register other objects (pandas/pyarrow data) pass them explicitly, or
|
|
call the `execute_global` classmethod. If given an integer then only the
|
|
most recent "n" objects found will be registered.
|
|
eager
|
|
If True, returns execution results as `DataFrame` instead of `LazyFrame`.
|
|
(Note that the query itself is always executed in lazy-mode; this parameter
|
|
impacts whether :meth:`execute` returns an eager or lazy result frame).
|
|
**named_frames
|
|
Named eager/lazy frames, provided as kwargs.
|
|
|
|
Examples
|
|
--------
|
|
>>> lf = pl.LazyFrame({"a": [1, 2, 3], "b": ["x", None, "z"]})
|
|
>>> res = pl.SQLContext(frame=lf).execute(
|
|
... "SELECT b, a*2 AS two_a FROM frame WHERE b IS NOT NULL"
|
|
... )
|
|
>>> res.collect()
|
|
shape: (2, 2)
|
|
┌─────┬───────┐
|
|
│ b ┆ two_a │
|
|
│ --- ┆ --- │
|
|
│ str ┆ i64 │
|
|
╞═════╪═══════╡
|
|
│ x ┆ 2 │
|
|
│ z ┆ 6 │
|
|
└─────┴───────┘
|
|
"""
|
|
issue_unstable_warning(
|
|
"`SQLContext` is considered **unstable**, although it is close to being considered stable."
|
|
)
|
|
self._ctxt = PySQLContext.new()
|
|
self._eager_execution = eager
|
|
|
|
frames = dict(frames or {})
|
|
if register_globals:
|
|
for name, obj in _get_frame_locals(
|
|
all_compatible=False,
|
|
).items():
|
|
if name not in frames and name not in named_frames:
|
|
named_frames[name] = obj
|
|
|
|
if frames or named_frames:
|
|
frames.update(named_frames)
|
|
self.register_many(frames)
|
|
|
|
@overload
|
|
@classmethod
|
|
def execute_global(
|
|
cls, query: str, *, eager: Literal[False] = False
|
|
) -> LazyFrame: ...
|
|
|
|
@overload
|
|
@classmethod
|
|
def execute_global(cls, query: str, *, eager: Literal[True]) -> DataFrame: ...
|
|
|
|
@overload
|
|
@classmethod
|
|
def execute_global(cls, query: str, *, eager: bool) -> DataFrame | LazyFrame: ...
|
|
|
|
@classmethod
|
|
def execute_global(
|
|
cls, query: str, *, eager: bool = False
|
|
) -> DataFrame | LazyFrame:
|
|
"""
|
|
Immediately execute a SQL query, automatically registering frame globals.
|
|
|
|
Notes
|
|
-----
|
|
* This convenience method automatically registers all compatible objects in
|
|
the local stack that are referenced in the query, mapping their variable name
|
|
to a table name. Note that in addition to polars DataFrame, LazyFrame, and
|
|
Series this method *also* registers pandas DataFrame, Series, and pyarrow
|
|
Table and RecordBatch objects.
|
|
* Instead of calling this classmethod you should consider using `pl.sql`,
|
|
which will use this code internally.
|
|
|
|
Parameters
|
|
----------
|
|
query
|
|
A valid SQL query string.
|
|
eager
|
|
If True, returns execution results as `DataFrame` instead of `LazyFrame`.
|
|
(Note that the query itself is always executed in lazy-mode).
|
|
|
|
Examples
|
|
--------
|
|
>>> import pandas as pd
|
|
>>> df = pl.LazyFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
|
>>> df_pandas = pd.DataFrame({"a": [2, 3, 4], "c": [7, 8, 9]})
|
|
|
|
Join a polars LazyFrame with a pandas DataFrame (note use of the preferred
|
|
`pl.sql` method, which is equivalent to `SQLContext.execute_global`):
|
|
|
|
>>> pl.sql("SELECT df.*, c FROM df JOIN df_pandas USING(a)").collect()
|
|
shape: (2, 3)
|
|
┌─────┬─────┬─────┐
|
|
│ a ┆ b ┆ c │
|
|
│ --- ┆ --- ┆ --- │
|
|
│ i64 ┆ i64 ┆ i64 │
|
|
╞═════╪═════╪═════╡
|
|
│ 2 ┆ 5 ┆ 7 │
|
|
│ 3 ┆ 6 ┆ 8 │
|
|
└─────┴─────┴─────┘
|
|
"""
|
|
# basic extraction of possible table names from the query, so we don't register
|
|
# unnecessary objects from the globals (ideally we shuoold look to make the
|
|
# underlying `sqlparser-rs` lib parse the query to identify table names)
|
|
q = re.split(r"\bFROM\b", query, maxsplit=1, flags=re.I)
|
|
possible_names = (
|
|
{
|
|
nm.strip('"')
|
|
for nm in re.split(r"\b", q[1])
|
|
if re.match(r'^("[^"]+")$', nm) or nm.isidentifier()
|
|
}
|
|
if len(q) > 1
|
|
else set()
|
|
)
|
|
# get compatible frame objects from the globals, constraining by possible names
|
|
named_frames = _get_frame_locals(all_compatible=True, named=possible_names)
|
|
with cls(frames=named_frames, register_globals=False) as ctx:
|
|
return ctx.execute(query=query, eager=eager)
|
|
|
|
def __enter__(self) -> SQLContext[FrameType]:
|
|
"""Track currently registered tables on scope entry; supports nested scopes."""
|
|
self._tables_scope_stack = getattr(self, "_tables_scope_stack", [])
|
|
self._tables_scope_stack.append(set(self.tables()))
|
|
return self
|
|
|
|
def __exit__(
|
|
self,
|
|
exc_type: type[BaseException] | None,
|
|
exc_val: BaseException | None,
|
|
exc_tb: TracebackType | None,
|
|
) -> None:
|
|
"""
|
|
Unregister any tables created within the given scope on context exit.
|
|
|
|
See Also
|
|
--------
|
|
unregister
|
|
"""
|
|
self.unregister(
|
|
names=(set(self.tables()) - self._tables_scope_stack.pop()),
|
|
)
|
|
|
|
def __repr__(self) -> str:
|
|
n_tables = len(self.tables())
|
|
return f"<SQLContext [tables:{n_tables}] at 0x{id(self):x}>"
|
|
|
|
# these overloads are necessary to cover the possible permutations
|
|
# of the init-time "eager" param, and the local "eager" param.
|
|
|
|
@overload
|
|
def execute(
|
|
self: SQLContext[DataFrame], query: str, *, eager: None = ...
|
|
) -> DataFrame: ...
|
|
|
|
@overload
|
|
def execute(
|
|
self: SQLContext[DataFrame], query: str, *, eager: Literal[False]
|
|
) -> LazyFrame: ...
|
|
|
|
@overload
|
|
def execute(
|
|
self: SQLContext[DataFrame], query: str, *, eager: Literal[True]
|
|
) -> DataFrame: ...
|
|
|
|
@overload
|
|
def execute(
|
|
self: SQLContext[LazyFrame], query: str, *, eager: None = ...
|
|
) -> LazyFrame: ...
|
|
|
|
@overload
|
|
def execute(
|
|
self: SQLContext[LazyFrame], query: str, *, eager: Literal[False]
|
|
) -> LazyFrame: ...
|
|
|
|
@overload
|
|
def execute(
|
|
self: SQLContext[LazyFrame], query: str, *, eager: Literal[True]
|
|
) -> DataFrame: ...
|
|
|
|
@overload
|
|
def execute(
|
|
self, query: str, *, eager: bool | None = ...
|
|
) -> LazyFrame | DataFrame: ...
|
|
|
|
def execute(
|
|
self, query: str, *, eager: bool | None = None
|
|
) -> LazyFrame | DataFrame:
|
|
"""
|
|
Parse the given SQL query and execute it against the registered frame data.
|
|
|
|
Parameters
|
|
----------
|
|
query
|
|
A valid string SQL query.
|
|
eager
|
|
Apply the query eagerly, returning `DataFrame` instead of `LazyFrame`.
|
|
If unset, the value of the init-time "eager" parameter will be used.
|
|
Note that the query itself is always executed in lazy-mode; this
|
|
parameter only impacts the type of the returned frame.
|
|
|
|
Examples
|
|
--------
|
|
Declare frame data and register with a SQLContext:
|
|
|
|
>>> df = pl.DataFrame(
|
|
... data=[
|
|
... ("The Godfather", 1972, 6_000_000, 134_821_952, 9.2),
|
|
... ("The Dark Knight", 2008, 185_000_000, 533_316_061, 9.0),
|
|
... ("Schindler's List", 1993, 22_000_000, 96_067_179, 8.9),
|
|
... ("Pulp Fiction", 1994, 8_000_000, 107_930_000, 8.9),
|
|
... ("The Shawshank Redemption", 1994, 25_000_000, 28_341_469, 9.3),
|
|
... ],
|
|
... schema=["title", "release_year", "budget", "gross", "imdb_score"],
|
|
... orient="row",
|
|
... )
|
|
>>> ctx = pl.SQLContext(films=df)
|
|
|
|
Execute a SQL query against the registered frame data:
|
|
|
|
>>> ctx.execute(
|
|
... '''
|
|
... SELECT title, release_year, imdb_score
|
|
... FROM films
|
|
... WHERE release_year > 1990
|
|
... ORDER BY imdb_score DESC
|
|
... ''',
|
|
... eager=True,
|
|
... )
|
|
shape: (4, 3)
|
|
┌──────────────────────────┬──────────────┬────────────┐
|
|
│ title ┆ release_year ┆ imdb_score │
|
|
│ --- ┆ --- ┆ --- │
|
|
│ str ┆ i64 ┆ f64 │
|
|
╞══════════════════════════╪══════════════╪════════════╡
|
|
│ The Shawshank Redemption ┆ 1994 ┆ 9.3 │
|
|
│ The Dark Knight ┆ 2008 ┆ 9.0 │
|
|
│ Schindler's List ┆ 1993 ┆ 8.9 │
|
|
│ Pulp Fiction ┆ 1994 ┆ 8.9 │
|
|
└──────────────────────────┴──────────────┴────────────┘
|
|
|
|
Execute a GROUP BY query:
|
|
|
|
>>> ctx.execute(
|
|
... '''
|
|
... SELECT
|
|
... MAX(release_year / 10) * 10 AS decade,
|
|
... SUM(gross) AS total_gross,
|
|
... COUNT(title) AS n_films,
|
|
... FROM films
|
|
... GROUP BY (release_year / 10) -- decade
|
|
... ORDER BY total_gross DESC
|
|
... ''',
|
|
... eager=True,
|
|
... )
|
|
shape: (3, 3)
|
|
┌────────┬─────────────┬─────────┐
|
|
│ decade ┆ total_gross ┆ n_films │
|
|
│ --- ┆ --- ┆ --- │
|
|
│ i64 ┆ i64 ┆ u32 │
|
|
╞════════╪═════════════╪═════════╡
|
|
│ 2000 ┆ 533316061 ┆ 1 │
|
|
│ 1990 ┆ 232338648 ┆ 3 │
|
|
│ 1970 ┆ 134821952 ┆ 1 │
|
|
└────────┴─────────────┴─────────┘
|
|
"""
|
|
res = wrap_ldf(self._ctxt.execute(query))
|
|
return res.collect() if (eager or self._eager_execution) else res
|
|
|
|
def register(self, name: str, frame: CompatibleFrameType | None) -> Self:
|
|
"""
|
|
Register a single frame as a table, using the given name.
|
|
|
|
Parameters
|
|
----------
|
|
name
|
|
Name of the table.
|
|
frame
|
|
eager/lazy frame to associate with this table name.
|
|
|
|
See Also
|
|
--------
|
|
register_globals
|
|
register_many
|
|
unregister
|
|
|
|
Examples
|
|
--------
|
|
>>> df = pl.DataFrame({"hello": ["world"]})
|
|
>>> ctx = pl.SQLContext()
|
|
>>> ctx.register("frame_data", df).execute("SELECT * FROM frame_data").collect()
|
|
shape: (1, 1)
|
|
┌───────┐
|
|
│ hello │
|
|
│ --- │
|
|
│ str │
|
|
╞═══════╡
|
|
│ world │
|
|
└───────┘
|
|
"""
|
|
frame = LazyFrame() if frame is None else _ensure_lazyframe(frame)
|
|
self._ctxt.register(name, frame._ldf)
|
|
return self
|
|
|
|
def register_globals(
|
|
self, n: int | None = None, *, all_compatible: bool = True
|
|
) -> Self:
|
|
"""
|
|
Register all frames (lazy or eager) found in the current globals scope.
|
|
|
|
Automatically maps variable names to table names.
|
|
|
|
See Also
|
|
--------
|
|
register
|
|
register_many
|
|
unregister
|
|
|
|
Parameters
|
|
----------
|
|
n
|
|
Register only the most recent "n" frames.
|
|
all_compatible
|
|
Control whether we *also* register pandas DataFrame, Series, and
|
|
pyarrow Table and RecordBatch objects. If False, only Polars
|
|
classes are registered with the SQL engine.
|
|
|
|
Examples
|
|
--------
|
|
>>> df1 = pl.DataFrame({"a": [1, 2, 3], "b": ["x", None, "z"]})
|
|
>>> df2 = pl.DataFrame({"a": [2, 3, 4], "c": ["t", "w", "v"]})
|
|
|
|
Register frames directly from variables found in the current globals scope:
|
|
|
|
>>> ctx = pl.SQLContext(register_globals=True)
|
|
>>> ctx.tables()
|
|
['df1', 'df2']
|
|
|
|
Query using the register variable/frame names
|
|
|
|
>>> ctx.execute(
|
|
... "SELECT a, b, c FROM df1 LEFT JOIN df2 USING (a) ORDER BY a DESC"
|
|
... ).collect()
|
|
shape: (3, 3)
|
|
┌─────┬──────┬──────┐
|
|
│ a ┆ b ┆ c │
|
|
│ --- ┆ --- ┆ --- │
|
|
│ i64 ┆ str ┆ str │
|
|
╞═════╪══════╪══════╡
|
|
│ 3 ┆ z ┆ w │
|
|
│ 2 ┆ null ┆ t │
|
|
│ 1 ┆ x ┆ null │
|
|
└─────┴──────┴──────┘
|
|
"""
|
|
frames = _get_frame_locals(all_compatible=all_compatible, n_objects=n)
|
|
return self.register_many(frames=frames)
|
|
|
|
def register_many(
|
|
self,
|
|
frames: Mapping[str, CompatibleFrameType | None] | None = None,
|
|
**named_frames: CompatibleFrameType | None,
|
|
) -> Self:
|
|
"""
|
|
Register multiple eager/lazy frames as tables, using the associated names.
|
|
|
|
Parameters
|
|
----------
|
|
frames
|
|
A `{name:frame, ...}` mapping.
|
|
**named_frames
|
|
Named eager/lazy frames, provided as kwargs.
|
|
|
|
See Also
|
|
--------
|
|
register
|
|
register_globals
|
|
unregister
|
|
|
|
Examples
|
|
--------
|
|
>>> lf1 = pl.LazyFrame({"a": [1, 2, 3], "b": ["m", "n", "o"]})
|
|
>>> lf2 = pl.LazyFrame({"a": [2, 3, 4], "c": ["p", "q", "r"]})
|
|
>>> lf3 = pl.LazyFrame({"a": [3, 4, 5], "b": ["s", "t", "u"]})
|
|
>>> lf4 = pl.LazyFrame({"a": [4, 5, 6], "c": ["v", "w", "x"]})
|
|
|
|
Register multiple frames at once, either by passing in as a dict...
|
|
|
|
>>> ctx = pl.SQLContext().register_many({"tbl1": lf1, "tbl2": lf2})
|
|
>>> ctx.tables()
|
|
['tbl1', 'tbl2']
|
|
|
|
...or using keyword args:
|
|
|
|
>>> ctx.register_many(tbl3=lf3, tbl4=lf4).tables()
|
|
['tbl1', 'tbl2', 'tbl3', 'tbl4']
|
|
"""
|
|
frames = dict(frames or {})
|
|
frames.update(named_frames)
|
|
for name, frame in frames.items():
|
|
self.register(name, frame)
|
|
return self
|
|
|
|
def unregister(self, names: str | Collection[str]) -> Self:
|
|
"""
|
|
Unregister one or more eager/lazy frames by name.
|
|
|
|
Parameters
|
|
----------
|
|
names
|
|
Names of the tables to unregister.
|
|
|
|
Notes
|
|
-----
|
|
You can also control table registration lifetime by using `SQLContext` as a
|
|
context manager; this can often be more useful when such control is wanted:
|
|
|
|
>>> df0 = pl.DataFrame({"colx": [0, 1, 2]})
|
|
>>> df1 = pl.DataFrame({"colx": [1, 2, 3]})
|
|
>>> df2 = pl.DataFrame({"colx": [2, 3, 4]})
|
|
|
|
Frames registered in-scope are automatically unregistered on scope-exit. Note
|
|
that frames registered on construction will persist through subsequent scopes.
|
|
|
|
>>> # register one frame at construction time, and the other two in-scope
|
|
>>> with pl.SQLContext(tbl0=df0) as ctx:
|
|
... ctx.register_many(tbl1=df1, tbl2=df2).tables()
|
|
['tbl0', 'tbl1', 'tbl2']
|
|
|
|
After scope exit, none of the tables registered in-scope remain:
|
|
|
|
>>> ctx.tables()
|
|
['tbl0']
|
|
|
|
See Also
|
|
--------
|
|
register
|
|
register_globals
|
|
register_many
|
|
|
|
Examples
|
|
--------
|
|
>>> df0 = pl.DataFrame({"ints": [9, 8, 7, 6, 5]})
|
|
>>> lf1 = pl.LazyFrame({"text": ["a", "b", "c"]})
|
|
>>> lf2 = pl.LazyFrame({"misc": ["testing1234"]})
|
|
|
|
Register with a SQLContext object:
|
|
|
|
>>> ctx = pl.SQLContext(test1=df0, test2=lf1, test3=lf2)
|
|
>>> ctx.tables()
|
|
['test1', 'test2', 'test3']
|
|
|
|
Unregister one or more of the tables:
|
|
|
|
>>> ctx.unregister(["test1", "test3"]).tables()
|
|
['test2']
|
|
>>> ctx.unregister("test2").tables()
|
|
[]
|
|
"""
|
|
if isinstance(names, str):
|
|
names = [names]
|
|
for nm in names:
|
|
self._ctxt.unregister(nm)
|
|
return self
|
|
|
|
def tables(self) -> list[str]:
|
|
"""
|
|
Return a list of the registered table names.
|
|
|
|
Notes
|
|
-----
|
|
The :meth:`tables` method will return the same values as the
|
|
"SHOW TABLES" SQL statement, but as a list instead of a frame.
|
|
|
|
Executing as SQL:
|
|
|
|
>>> frame_data = pl.DataFrame({"hello": ["world"]})
|
|
>>> ctx = pl.SQLContext(hello_world=frame_data)
|
|
>>> ctx.execute("SHOW TABLES", eager=True)
|
|
shape: (1, 1)
|
|
┌─────────────┐
|
|
│ name │
|
|
│ --- │
|
|
│ str │
|
|
╞═════════════╡
|
|
│ hello_world │
|
|
└─────────────┘
|
|
|
|
Calling the method:
|
|
|
|
>>> ctx.tables()
|
|
['hello_world']
|
|
|
|
Examples
|
|
--------
|
|
>>> df1 = pl.DataFrame({"hello": ["world"]})
|
|
>>> df2 = pl.DataFrame({"foo": ["bar", "baz"]})
|
|
>>> ctx = pl.SQLContext(hello_data=df1, foo_bar=df2)
|
|
>>> ctx.tables()
|
|
['foo_bar', 'hello_data']
|
|
"""
|
|
return sorted(self._ctxt.get_tables())
|