140 lines
4.5 KiB
Python
140 lines
4.5 KiB
Python
from __future__ import annotations
|
|
|
|
from typing import TYPE_CHECKING, Literal, overload
|
|
|
|
if TYPE_CHECKING:
|
|
from polars.dataframe import DataFrame
|
|
from polars.lazyframe import LazyFrame
|
|
|
|
|
|
__all__ = ["sql"]
|
|
|
|
|
|
@overload
|
|
def sql(query: str, *, eager: Literal[False] = False) -> LazyFrame: ...
|
|
|
|
|
|
@overload
|
|
def sql(query: str, *, eager: Literal[True]) -> DataFrame: ...
|
|
|
|
|
|
def sql(query: str, *, eager: bool = False) -> DataFrame | LazyFrame:
|
|
"""
|
|
Execute a SQL query against frames in the global namespace.
|
|
|
|
.. versionadded:: 0.20.31
|
|
|
|
Parameters
|
|
----------
|
|
query
|
|
SQL query to execute.
|
|
eager
|
|
Automatically collect the result and return a DataFrame instead of a LazyFrame.
|
|
|
|
Notes
|
|
-----
|
|
* The Polars SQL engine can operate against Polars DataFrame, LazyFrame, and Series
|
|
objects, as well as Pandas DataFrame and Series, PyArrow Table and RecordBatch.
|
|
* Additional control over registration and execution behaviour is available
|
|
with the :class:`SQLContext` object.
|
|
|
|
See Also
|
|
--------
|
|
SQLContext
|
|
|
|
Examples
|
|
--------
|
|
>>> lf1 = pl.LazyFrame({"a": [1, 2, 3], "b": [6, 7, 8], "c": ["z", "y", "x"]})
|
|
>>> lf2 = pl.LazyFrame({"a": [3, 2, 1], "d": [125, -654, 888]})
|
|
|
|
Query the LazyFrame using SQL:
|
|
|
|
>>> lf1.sql("SELECT c, b FROM self WHERE a > 1").collect()
|
|
shape: (2, 2)
|
|
┌─────┬─────┐
|
|
│ c ┆ b │
|
|
│ --- ┆ --- │
|
|
│ str ┆ i64 │
|
|
╞═════╪═════╡
|
|
│ y ┆ 7 │
|
|
│ x ┆ 8 │
|
|
└─────┴─────┘
|
|
|
|
Join two LazyFrames:
|
|
|
|
>>> pl.sql(
|
|
... '''
|
|
... SELECT lf1.*, d
|
|
... FROM lf1
|
|
... INNER JOIN lf2 USING (a)
|
|
... WHERE a > 1 AND b < 8
|
|
... '''
|
|
... ).collect()
|
|
shape: (1, 4)
|
|
┌─────┬─────┬─────┬──────┐
|
|
│ a ┆ b ┆ c ┆ d │
|
|
│ --- ┆ --- ┆ --- ┆ --- │
|
|
│ i64 ┆ i64 ┆ str ┆ i64 │
|
|
╞═════╪═════╪═════╪══════╡
|
|
│ 2 ┆ 7 ┆ y ┆ -654 │
|
|
└─────┴─────┴─────┴──────┘
|
|
|
|
Apply SQL transforms and subsequently filter natively (you can freely mix SQL and
|
|
native operations):
|
|
|
|
>>> pl.sql(
|
|
... query='''
|
|
... SELECT
|
|
... a,
|
|
... (a % 2 == 0) AS a_is_even,
|
|
... (b::float4 / 2) AS "b/2",
|
|
... CONCAT_WS(':', c, c, c) AS c_c_c
|
|
... FROM lf1
|
|
... ORDER BY a
|
|
... ''',
|
|
... ).filter(~pl.col("c_c_c").str.starts_with("x")).collect()
|
|
shape: (2, 4)
|
|
┌─────┬───────────┬─────┬───────┐
|
|
│ a ┆ a_is_even ┆ b/2 ┆ c_c_c │
|
|
│ --- ┆ --- ┆ --- ┆ --- │
|
|
│ i64 ┆ bool ┆ f32 ┆ str │
|
|
╞═════╪═══════════╪═════╪═══════╡
|
|
│ 1 ┆ false ┆ 3.0 ┆ z:z:z │
|
|
│ 2 ┆ true ┆ 3.5 ┆ y:y:y │
|
|
└─────┴───────────┴─────┴───────┘
|
|
|
|
Join polars LazyFrame with a pandas DataFrame and a pyarrow Table:
|
|
|
|
>>> import pandas as pd
|
|
>>> import pyarrow as pa
|
|
>>> pl_frame = lf1
|
|
>>> pd_frame = pd.DataFrame({"a": [2, 3, 4], "d": [-0.5, 0.0, 0.5]})
|
|
>>> pa_table = pa.Table.from_arrays(
|
|
... [pa.array([1, 2, 3]), pa.array(["x", "y", "z"])],
|
|
... names=["a", "e"],
|
|
... )
|
|
>>> pl.sql(
|
|
... query='''
|
|
... SELECT pl_frame.*, d, e
|
|
... FROM pl_frame
|
|
... JOIN pd_frame USING(a)
|
|
... JOIN pa_table USING(a)
|
|
... ''',
|
|
... ).collect()
|
|
shape: (2, 5)
|
|
┌─────┬─────┬─────┬──────┬─────┐
|
|
│ a ┆ b ┆ c ┆ d ┆ e │
|
|
│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
|
│ i64 ┆ i64 ┆ str ┆ f64 ┆ str │
|
|
╞═════╪═════╪═════╪══════╪═════╡
|
|
│ 2 ┆ 7 ┆ y ┆ -0.5 ┆ y │
|
|
│ 3 ┆ 8 ┆ x ┆ 0.0 ┆ z │
|
|
└─────┴─────┴─────┴──────┴─────┘
|
|
"""
|
|
from polars.sql import SQLContext
|
|
|
|
return SQLContext.execute_global(
|
|
query=query,
|
|
eager=eager,
|
|
)
|