DriverTrac/venv/lib/python3.12/site-packages/polars/api.py
2025-11-28 09:08:33 +05:30

371 lines
13 KiB
Python

from __future__ import annotations
from typing import TYPE_CHECKING, Callable, Generic, TypeVar
from warnings import warn
import polars._reexport as pl
from polars._utils.various import find_stacklevel
if TYPE_CHECKING:
from polars import DataFrame, Expr, LazyFrame, Series
__all__ = [
"register_dataframe_namespace",
"register_expr_namespace",
"register_lazyframe_namespace",
"register_series_namespace",
]
# do not allow override of polars' own namespaces (as registered by '_accessors')
_reserved_namespaces: set[str] = set.union(
*(cls._accessors for cls in (pl.DataFrame, pl.Expr, pl.LazyFrame, pl.Series))
)
NS = TypeVar("NS")
class NameSpace(Generic[NS]):
"""Establish property-like namespace object for user-defined functionality."""
def __init__(self, name: str, namespace: type[NS]) -> None:
self._accessor = name
self._ns = namespace
def __get__(self, instance: NS | None, cls: type[NS]) -> NS | type[NS]:
if instance is None:
return self._ns
ns_instance = self._ns(instance) # type: ignore[call-arg]
setattr(instance, self._accessor, ns_instance)
return ns_instance
def _create_namespace(
name: str, cls: type[Expr | DataFrame | LazyFrame | Series]
) -> Callable[[type[NS]], type[NS]]:
"""Register custom namespace against the underlying Polars class."""
def namespace(ns_class: type[NS]) -> type[NS]:
if name in _reserved_namespaces:
msg = f"cannot override reserved namespace {name!r}"
raise AttributeError(msg)
elif hasattr(cls, name):
warn(
f"Overriding existing custom namespace {name!r} (on {cls.__name__!r})",
UserWarning,
stacklevel=find_stacklevel(),
)
setattr(cls, name, NameSpace(name, ns_class))
cls._accessors.add(name)
return ns_class
return namespace
def register_expr_namespace(name: str) -> Callable[[type[NS]], type[NS]]:
"""
Decorator for registering custom functionality with a Polars Expr.
Parameters
----------
name
Name under which the functionality will be accessed.
See Also
--------
register_dataframe_namespace : Register functionality on a DataFrame.
register_lazyframe_namespace : Register functionality on a LazyFrame.
register_series_namespace : Register functionality on a Series.
Examples
--------
>>> @pl.api.register_expr_namespace("pow_n")
... class PowersOfN:
... def __init__(self, expr: pl.Expr) -> None:
... self._expr = expr
...
... def next(self, p: int) -> pl.Expr:
... return (p ** (self._expr.log(p).ceil()).cast(pl.Int64)).cast(pl.Int64)
...
... def previous(self, p: int) -> pl.Expr:
... return (p ** (self._expr.log(p).floor()).cast(pl.Int64)).cast(pl.Int64)
...
... def nearest(self, p: int) -> pl.Expr:
... return (p ** (self._expr.log(p)).round(0).cast(pl.Int64)).cast(pl.Int64)
>>>
>>> df = pl.DataFrame([1.4, 24.3, 55.0, 64.001], schema=["n"])
>>> df.select(
... pl.col("n"),
... pl.col("n").pow_n.next(p=2).alias("next_pow2"),
... pl.col("n").pow_n.previous(p=2).alias("prev_pow2"),
... pl.col("n").pow_n.nearest(p=2).alias("nearest_pow2"),
... )
shape: (4, 4)
┌────────┬───────────┬───────────┬──────────────┐
│ n ┆ next_pow2 ┆ prev_pow2 ┆ nearest_pow2 │
│ --- ┆ --- ┆ --- ┆ --- │
│ f64 ┆ i64 ┆ i64 ┆ i64 │
╞════════╪═══════════╪═══════════╪══════════════╡
│ 1.4 ┆ 2 ┆ 1 ┆ 1 │
│ 24.3 ┆ 32 ┆ 16 ┆ 32 │
│ 55.0 ┆ 64 ┆ 32 ┆ 64 │
│ 64.001 ┆ 128 ┆ 64 ┆ 64 │
└────────┴───────────┴───────────┴──────────────┘
"""
return _create_namespace(name, pl.Expr)
def register_dataframe_namespace(name: str) -> Callable[[type[NS]], type[NS]]:
"""
Decorator for registering custom functionality with a Polars DataFrame.
Parameters
----------
name
Name under which the functionality will be accessed.
See Also
--------
register_expr_namespace : Register functionality on an Expr.
register_lazyframe_namespace : Register functionality on a LazyFrame.
register_series_namespace : Register functionality on a Series.
Examples
--------
>>> @pl.api.register_dataframe_namespace("split")
... class SplitFrame:
... def __init__(self, df: pl.DataFrame) -> None:
... self._df = df
...
... def by_first_letter_of_column_names(self) -> list[pl.DataFrame]:
... return [
... self._df.select([col for col in self._df.columns if col[0] == f])
... for f in dict.fromkeys(col[0] for col in self._df.columns)
... ]
...
... def by_first_letter_of_column_values(self, col: str) -> list[pl.DataFrame]:
... return [
... self._df.filter(pl.col(col).str.starts_with(c))
... for c in sorted(
... set(df.select(pl.col(col).str.slice(0, 1)).to_series())
... )
... ]
>>>
>>> df = pl.DataFrame(
... data=[["xx", 2, 3, 4], ["xy", 4, 5, 6], ["yy", 5, 6, 7], ["yz", 6, 7, 8]],
... schema=["a1", "a2", "b1", "b2"],
... orient="row",
... )
>>> df
shape: (4, 4)
┌─────┬─────┬─────┬─────┐
│ a1 ┆ a2 ┆ b1 ┆ b2 │
│ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ i64 ┆ i64 │
╞═════╪═════╪═════╪═════╡
│ xx ┆ 2 ┆ 3 ┆ 4 │
│ xy ┆ 4 ┆ 5 ┆ 6 │
│ yy ┆ 5 ┆ 6 ┆ 7 │
│ yz ┆ 6 ┆ 7 ┆ 8 │
└─────┴─────┴─────┴─────┘
>>> df.split.by_first_letter_of_column_names()
[shape: (4, 2)
┌─────┬─────┐
│ a1 ┆ a2 │
│ --- ┆ --- │
│ str ┆ i64 │
╞═════╪═════╡
│ xx ┆ 2 │
│ xy ┆ 4 │
│ yy ┆ 5 │
│ yz ┆ 6 │
└─────┴─────┘,
shape: (4, 2)
┌─────┬─────┐
│ b1 ┆ b2 │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 3 ┆ 4 │
│ 5 ┆ 6 │
│ 6 ┆ 7 │
│ 7 ┆ 8 │
└─────┴─────┘]
>>> df.split.by_first_letter_of_column_values("a1")
[shape: (2, 4)
┌─────┬─────┬─────┬─────┐
│ a1 ┆ a2 ┆ b1 ┆ b2 │
│ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ i64 ┆ i64 │
╞═════╪═════╪═════╪═════╡
│ xx ┆ 2 ┆ 3 ┆ 4 │
│ xy ┆ 4 ┆ 5 ┆ 6 │
└─────┴─────┴─────┴─────┘, shape: (2, 4)
┌─────┬─────┬─────┬─────┐
│ a1 ┆ a2 ┆ b1 ┆ b2 │
│ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ i64 ┆ i64 │
╞═════╪═════╪═════╪═════╡
│ yy ┆ 5 ┆ 6 ┆ 7 │
│ yz ┆ 6 ┆ 7 ┆ 8 │
└─────┴─────┴─────┴─────┘]
"""
return _create_namespace(name, pl.DataFrame)
def register_lazyframe_namespace(name: str) -> Callable[[type[NS]], type[NS]]:
"""
Decorator for registering custom functionality with a Polars LazyFrame.
Parameters
----------
name
Name under which the functionality will be accessed.
See Also
--------
register_expr_namespace : Register functionality on an Expr.
register_dataframe_namespace : Register functionality on a DataFrame.
register_series_namespace : Register functionality on a Series.
Examples
--------
>>> @pl.api.register_lazyframe_namespace("types")
... class DTypeOperations:
... def __init__(self, lf: pl.LazyFrame) -> None:
... self._lf = lf
...
... def split_by_column_dtypes(self) -> list[pl.LazyFrame]:
... return [
... self._lf.select(pl.col(tp))
... for tp in dict.fromkeys(self._lf.collect_schema().dtypes())
... ]
...
... def upcast_integer_types(self) -> pl.LazyFrame:
... return self._lf.with_columns(
... pl.col(tp).cast(pl.Int64) for tp in (pl.Int8, pl.Int16, pl.Int32)
... )
>>>
>>> lf = pl.LazyFrame(
... data={"a": [1, 2], "b": [3, 4], "c": [5.6, 6.7]},
... schema=[("a", pl.Int16), ("b", pl.Int32), ("c", pl.Float32)],
... )
>>> lf.collect()
shape: (2, 3)
┌─────┬─────┬─────┐
│ a ┆ b ┆ c │
│ --- ┆ --- ┆ --- │
│ i16 ┆ i32 ┆ f32 │
╞═════╪═════╪═════╡
│ 1 ┆ 3 ┆ 5.6 │
│ 2 ┆ 4 ┆ 6.7 │
└─────┴─────┴─────┘
>>> lf.types.upcast_integer_types().collect()
shape: (2, 3)
┌─────┬─────┬─────┐
│ a ┆ b ┆ c │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ f32 │
╞═════╪═════╪═════╡
│ 1 ┆ 3 ┆ 5.6 │
│ 2 ┆ 4 ┆ 6.7 │
└─────┴─────┴─────┘
>>> lf = pl.LazyFrame(
... data=[["xx", 2, 3, 4], ["xy", 4, 5, 6], ["yy", 5, 6, 7], ["yz", 6, 7, 8]],
... schema=["a1", "a2", "b1", "b2"],
... orient="row",
... )
>>> lf.collect()
shape: (4, 4)
┌─────┬─────┬─────┬─────┐
│ a1 ┆ a2 ┆ b1 ┆ b2 │
│ --- ┆ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ i64 ┆ i64 │
╞═════╪═════╪═════╪═════╡
│ xx ┆ 2 ┆ 3 ┆ 4 │
│ xy ┆ 4 ┆ 5 ┆ 6 │
│ yy ┆ 5 ┆ 6 ┆ 7 │
│ yz ┆ 6 ┆ 7 ┆ 8 │
└─────┴─────┴─────┴─────┘
>>> pl.collect_all(lf.types.split_by_column_dtypes())
[shape: (4, 1)
┌─────┐
│ a1 │
│ --- │
│ str │
╞═════╡
│ xx │
│ xy │
│ yy │
│ yz │
└─────┘, shape: (4, 3)
┌─────┬─────┬─────┐
│ a2 ┆ b1 ┆ b2 │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ 2 ┆ 3 ┆ 4 │
│ 4 ┆ 5 ┆ 6 │
│ 5 ┆ 6 ┆ 7 │
│ 6 ┆ 7 ┆ 8 │
└─────┴─────┴─────┘]
"""
return _create_namespace(name, pl.LazyFrame)
def register_series_namespace(name: str) -> Callable[[type[NS]], type[NS]]:
"""
Decorator for registering custom functionality with a polars Series.
Parameters
----------
name
Name under which the functionality will be accessed.
See Also
--------
register_expr_namespace : Register functionality on an Expr.
register_dataframe_namespace : Register functionality on a DataFrame.
register_lazyframe_namespace : Register functionality on a LazyFrame.
Examples
--------
>>> @pl.api.register_series_namespace("math")
... class MathShortcuts:
... def __init__(self, s: pl.Series) -> None:
... self._s = s
...
... def square(self) -> pl.Series:
... return self._s * self._s
...
... def cube(self) -> pl.Series:
... return self._s * self._s * self._s
>>>
>>> s = pl.Series("n", [1.5, 31.0, 42.0, 64.5])
>>> s.math.square().alias("s^2")
shape: (4,)
Series: 's^2' [f64]
[
2.25
961.0
1764.0
4160.25
]
>>> s = pl.Series("n", [1, 2, 3, 4, 5])
>>> s.math.cube().alias("s^3")
shape: (5,)
Series: 's^3' [i64]
[
1
8
27
64
125
]
"""
return _create_namespace(name, pl.Series)