DriverTrac/venv/lib/python3.12/site-packages/polars/sql/functions.py

from __future__ import annotations

from typing import TYPE_CHECKING, Literal, overload

if TYPE_CHECKING:
    from polars.dataframe import DataFrame
    from polars.lazyframe import LazyFrame


__all__ = ["sql"]


@overload
def sql(query: str, *, eager: Literal[False] = False) -> LazyFrame: ...


@overload
def sql(query: str, *, eager: Literal[True]) -> DataFrame: ...


def sql(query: str, *, eager: bool = False) -> DataFrame | LazyFrame:
    """
    Execute a SQL query against frames in the global namespace.

    .. versionadded:: 0.20.31

    Parameters
    ----------
    query
        SQL query to execute.
    eager
        Automatically collect the result and return a DataFrame instead of a LazyFrame.

    Notes
    -----
    * The Polars SQL engine can operate against Polars DataFrame, LazyFrame, and Series
      objects, as well as Pandas DataFrame and Series, PyArrow Table and RecordBatch.
    * Additional control over registration and execution behaviour is available
      with the :class:`SQLContext` object.

    See Also
    --------
    SQLContext

    Examples
    --------
    >>> lf1 = pl.LazyFrame({"a": [1, 2, 3], "b": [6, 7, 8], "c": ["z", "y", "x"]})
    >>> lf2 = pl.LazyFrame({"a": [3, 2, 1], "d": [125, -654, 888]})

    Query the LazyFrame using SQL:

    >>> lf1.sql("SELECT c, b FROM self WHERE a > 1").collect()
    shape: (2, 2)
    ┌─────┬─────┐
    │ c   ┆ b   │
    │ --- ┆ --- │
    │ str ┆ i64 │
    ╞═════╪═════╡
    │ y   ┆ 7   │
    │ x   ┆ 8   │
    └─────┴─────┘

    Join two LazyFrames:

    >>> pl.sql(
    ...     '''
    ...     SELECT lf1.*, d
    ...     FROM lf1
    ...     INNER JOIN lf2 USING (a)
    ...     WHERE a > 1 AND b < 8
    ...     '''
    ... ).collect()
    shape: (1, 4)
    ┌─────┬─────┬─────┬──────┐
    │ a   ┆ b   ┆ c   ┆ d    │
    │ --- ┆ --- ┆ --- ┆ ---  │
    │ i64 ┆ i64 ┆ str ┆ i64  │
    ╞═════╪═════╪═════╪══════╡
    │ 2   ┆ 7   ┆ y   ┆ -654 │
    └─────┴─────┴─────┴──────┘

    Apply SQL transforms and subsequently filter natively (you can freely mix SQL and
    native operations):

    >>> pl.sql(
    ...     query='''
    ...         SELECT
    ...             a,
    ...             (a % 2 == 0) AS a_is_even,
    ...             (b::float4 / 2) AS "b/2",
    ...             CONCAT_WS(':', c, c, c) AS c_c_c
    ...         FROM lf1
    ...         ORDER BY a
    ...     ''',
    ... ).filter(~pl.col("c_c_c").str.starts_with("x")).collect()
    shape: (2, 4)
    ┌─────┬───────────┬─────┬───────┐
    │ a   ┆ a_is_even ┆ b/2 ┆ c_c_c │
    │ --- ┆ ---       ┆ --- ┆ ---   │
    │ i64 ┆ bool      ┆ f32 ┆ str   │
    ╞═════╪═══════════╪═════╪═══════╡
    │ 1   ┆ false     ┆ 3.0 ┆ z:z:z │
    │ 2   ┆ true      ┆ 3.5 ┆ y:y:y │
    └─────┴───────────┴─────┴───────┘

    Join polars LazyFrame with a pandas DataFrame and a pyarrow Table:

    >>> import pandas as pd
    >>> import pyarrow as pa
    >>> pl_frame = lf1
    >>> pd_frame = pd.DataFrame({"a": [2, 3, 4], "d": [-0.5, 0.0, 0.5]})
    >>> pa_table = pa.Table.from_arrays(
    ...     [pa.array([1, 2, 3]), pa.array(["x", "y", "z"])],
    ...     names=["a", "e"],
    ... )
    >>> pl.sql(
    ...     query='''
    ...         SELECT pl_frame.*, d, e
    ...         FROM pl_frame
    ...         JOIN pd_frame USING(a)
    ...         JOIN pa_table USING(a)
    ...     ''',
    ... ).collect()
    shape: (2, 5)
    ┌─────┬─────┬─────┬──────┬─────┐
    │ a   ┆ b   ┆ c   ┆ d    ┆ e   │
    │ --- ┆ --- ┆ --- ┆ ---  ┆ --- │
    │ i64 ┆ i64 ┆ str ┆ f64  ┆ str │
    ╞═════╪═════╪═════╪══════╪═════╡
    │ 2   ┆ 7   ┆ y   ┆ -0.5 ┆ y   │
    │ 3   ┆ 8   ┆ x   ┆ 0.0  ┆ z   │
    └─────┴─────┴─────┴──────┴─────┘
    """
    from polars.sql import SQLContext

    return SQLContext.execute_global(
        query=query,
        eager=eager,
    )