DriverTrac/venv/lib/python3.12/site-packages/polars/sql/functions.py
2025-11-28 09:08:33 +05:30

140 lines
4.5 KiB
Python

from __future__ import annotations
from typing import TYPE_CHECKING, Literal, overload
if TYPE_CHECKING:
from polars.dataframe import DataFrame
from polars.lazyframe import LazyFrame
__all__ = ["sql"]
@overload
def sql(query: str, *, eager: Literal[False] = False) -> LazyFrame: ...
@overload
def sql(query: str, *, eager: Literal[True]) -> DataFrame: ...
def sql(query: str, *, eager: bool = False) -> DataFrame | LazyFrame:
"""
Execute a SQL query against frames in the global namespace.
.. versionadded:: 0.20.31
Parameters
----------
query
SQL query to execute.
eager
Automatically collect the result and return a DataFrame instead of a LazyFrame.
Notes
-----
* The Polars SQL engine can operate against Polars DataFrame, LazyFrame, and Series
objects, as well as Pandas DataFrame and Series, PyArrow Table and RecordBatch.
* Additional control over registration and execution behaviour is available
with the :class:`SQLContext` object.
See Also
--------
SQLContext
Examples
--------
>>> lf1 = pl.LazyFrame({"a": [1, 2, 3], "b": [6, 7, 8], "c": ["z", "y", "x"]})
>>> lf2 = pl.LazyFrame({"a": [3, 2, 1], "d": [125, -654, 888]})
Query the LazyFrame using SQL:
>>> lf1.sql("SELECT c, b FROM self WHERE a > 1").collect()
shape: (2, 2)
┌─────┬─────┐
│ c ┆ b │
│ --- ┆ --- │
│ str ┆ i64 │
╞═════╪═════╡
│ y ┆ 7 │
│ x ┆ 8 │
└─────┴─────┘
Join two LazyFrames:
>>> pl.sql(
... '''
... SELECT lf1.*, d
... FROM lf1
... INNER JOIN lf2 USING (a)
... WHERE a > 1 AND b < 8
... '''
... ).collect()
shape: (1, 4)
┌─────┬─────┬─────┬──────┐
│ a ┆ b ┆ c ┆ d │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str ┆ i64 │
╞═════╪═════╪═════╪══════╡
│ 2 ┆ 7 ┆ y ┆ -654 │
└─────┴─────┴─────┴──────┘
Apply SQL transforms and subsequently filter natively (you can freely mix SQL and
native operations):
>>> pl.sql(
... query='''
... SELECT
... a,
... (a % 2 == 0) AS a_is_even,
... (b::float4 / 2) AS "b/2",
... CONCAT_WS(':', c, c, c) AS c_c_c
... FROM lf1
... ORDER BY a
... ''',
... ).filter(~pl.col("c_c_c").str.starts_with("x")).collect()
shape: (2, 4)
┌─────┬───────────┬─────┬───────┐
│ a ┆ a_is_even ┆ b/2 ┆ c_c_c │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ bool ┆ f32 ┆ str │
╞═════╪═══════════╪═════╪═══════╡
│ 1 ┆ false ┆ 3.0 ┆ z:z:z │
│ 2 ┆ true ┆ 3.5 ┆ y:y:y │
└─────┴───────────┴─────┴───────┘
Join polars LazyFrame with a pandas DataFrame and a pyarrow Table:
>>> import pandas as pd
>>> import pyarrow as pa
>>> pl_frame = lf1
>>> pd_frame = pd.DataFrame({"a": [2, 3, 4], "d": [-0.5, 0.0, 0.5]})
>>> pa_table = pa.Table.from_arrays(
... [pa.array([1, 2, 3]), pa.array(["x", "y", "z"])],
... names=["a", "e"],
... )
>>> pl.sql(
... query='''
... SELECT pl_frame.*, d, e
... FROM pl_frame
... JOIN pd_frame USING(a)
... JOIN pa_table USING(a)
... ''',
... ).collect()
shape: (2, 5)
┌─────┬─────┬─────┬──────┬─────┐
│ a ┆ b ┆ c ┆ d ┆ e │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str ┆ f64 ┆ str │
╞═════╪═════╪═════╪══════╪═════╡
│ 2 ┆ 7 ┆ y ┆ -0.5 ┆ y │
│ 3 ┆ 8 ┆ x ┆ 0.0 ┆ z │
└─────┴─────┴─────┴──────┴─────┘
"""
from polars.sql import SQLContext
return SQLContext.execute_global(
query=query,
eager=eager,
)