DriverTrac/venv/lib/python3.12/site-packages/polars/expr/struct.py
2025-11-28 09:08:33 +05:30

358 lines
14 KiB
Python

from __future__ import annotations
import os
from typing import TYPE_CHECKING
from polars._utils.parse import parse_into_list_of_expressions
from polars._utils.various import qualified_type_name
from polars._utils.wrap import wrap_expr
if TYPE_CHECKING:
from collections.abc import Iterable, Sequence
from polars import Expr
from polars._typing import IntoExpr
class ExprStructNameSpace:
"""Namespace for struct related expressions."""
_accessor = "struct"
def __init__(self, expr: Expr) -> None:
self._pyexpr = expr._pyexpr
def __getitem__(self, item: str | int) -> Expr:
if isinstance(item, str):
return self.field(item)
elif isinstance(item, int):
return wrap_expr(self._pyexpr.struct_field_by_index(item))
else:
msg = f"expected type 'int | str', got {qualified_type_name(item)!r} ({item!r})"
raise TypeError(msg)
def field(self, name: str | list[str], *more_names: str) -> Expr:
"""
Retrieve one or multiple `Struct` field(s) as a new Series.
Parameters
----------
name
Name of the struct field to retrieve.
*more_names
Additional struct field names.
Examples
--------
>>> df = pl.DataFrame(
... {
... "aaa": [1, 2],
... "bbb": ["ab", "cd"],
... "ccc": [True, None],
... "ddd": [[1, 2], [3]],
... }
... ).select(pl.struct("aaa", "bbb", "ccc", "ddd").alias("struct_col"))
>>> df
shape: (2, 1)
┌──────────────────────┐
│ struct_col │
│ --- │
│ struct[4] │
╞══════════════════════╡
{1,"ab",true,[1, 2]} │
{2,"cd",null,[3]} │
└──────────────────────┘
Retrieve struct field(s) as Series:
>>> df.select(pl.col("struct_col").struct.field("bbb"))
shape: (2, 1)
┌─────┐
│ bbb │
│ --- │
│ str │
╞═════╡
│ ab │
│ cd │
└─────┘
>>> df.select(
... pl.col("struct_col").struct.field("bbb"),
... pl.col("struct_col").struct.field("ddd"),
... )
shape: (2, 2)
┌─────┬───────────┐
│ bbb ┆ ddd │
│ --- ┆ --- │
│ str ┆ list[i64] │
╞═════╪═══════════╡
│ ab ┆ [1, 2] │
│ cd ┆ [3] │
└─────┴───────────┘
Use wildcard expansion:
>>> df.select(pl.col("struct_col").struct.field("*"))
shape: (2, 4)
┌─────┬─────┬──────┬───────────┐
│ aaa ┆ bbb ┆ ccc ┆ ddd │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ str ┆ bool ┆ list[i64] │
╞═════╪═════╪══════╪═══════════╡
│ 1 ┆ ab ┆ true ┆ [1, 2] │
│ 2 ┆ cd ┆ null ┆ [3] │
└─────┴─────┴──────┴───────────┘
Retrieve multiple fields by name:
>>> df.select(pl.col("struct_col").struct.field("aaa", "bbb"))
shape: (2, 2)
┌─────┬─────┐
│ aaa ┆ bbb │
│ --- ┆ --- │
│ i64 ┆ str │
╞═════╪═════╡
│ 1 ┆ ab │
│ 2 ┆ cd │
└─────┴─────┘
Retrieve multiple fields by regex expansion:
>>> df.select(pl.col("struct_col").struct.field("^a.*|b.*$"))
shape: (2, 2)
┌─────┬─────┐
│ aaa ┆ bbb │
│ --- ┆ --- │
│ i64 ┆ str │
╞═════╪═════╡
│ 1 ┆ ab │
│ 2 ┆ cd │
└─────┴─────┘
Notes
-----
The `struct` namespace has implemented `__getitem__`
so you can also access fields by index:
>>> df.select(pl.col("struct_col").struct[1])
shape: (2, 1)
┌─────┐
│ bbb │
│ --- │
│ str │
╞═════╡
│ ab │
│ cd │
└─────┘
"""
if more_names:
name = [*([name] if isinstance(name, str) else name), *more_names]
if isinstance(name, list):
return wrap_expr(self._pyexpr.struct_multiple_fields(name))
return wrap_expr(self._pyexpr.struct_field_by_name(name))
def unnest(self) -> Expr:
"""
Expand the struct into its individual fields.
Alias for `Expr.struct.field("*")`.
>>> df = pl.DataFrame(
... {
... "aaa": [1, 2],
... "bbb": ["ab", "cd"],
... "ccc": [True, None],
... "ddd": [[1, 2], [3]],
... }
... ).select(pl.struct("aaa", "bbb", "ccc", "ddd").alias("struct_col"))
>>> df
shape: (2, 1)
┌──────────────────────┐
│ struct_col │
│ --- │
│ struct[4] │
╞══════════════════════╡
{1,"ab",true,[1, 2]} │
{2,"cd",null,[3]} │
└──────────────────────┘
>>> df.select(pl.col("struct_col").struct.unnest())
shape: (2, 4)
┌─────┬─────┬──────┬───────────┐
│ aaa ┆ bbb ┆ ccc ┆ ddd │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ str ┆ bool ┆ list[i64] │
╞═════╪═════╪══════╪═══════════╡
│ 1 ┆ ab ┆ true ┆ [1, 2] │
│ 2 ┆ cd ┆ null ┆ [3] │
└─────┴─────┴──────┴───────────┘
"""
return self.field("*")
def rename_fields(self, names: Sequence[str]) -> Expr:
"""
Rename the fields of the struct.
Parameters
----------
names
New names, given in the same order as the struct's fields.
Examples
--------
>>> df = pl.DataFrame(
... {
... "aaa": [1, 2],
... "bbb": ["ab", "cd"],
... "ccc": [True, None],
... "ddd": [[1, 2], [3]],
... }
... ).select(pl.struct("aaa", "bbb", "ccc", "ddd").alias("struct_col"))
>>> df
shape: (2, 1)
┌──────────────────────┐
│ struct_col │
│ --- │
│ struct[4] │
╞══════════════════════╡
{1,"ab",true,[1, 2]} │
{2,"cd",null,[3]} │
└──────────────────────┘
>>> df.unnest("struct_col")
shape: (2, 4)
┌─────┬─────┬──────┬───────────┐
│ aaa ┆ bbb ┆ ccc ┆ ddd │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ str ┆ bool ┆ list[i64] │
╞═════╪═════╪══════╪═══════════╡
│ 1 ┆ ab ┆ true ┆ [1, 2] │
│ 2 ┆ cd ┆ null ┆ [3] │
└─────┴─────┴──────┴───────────┘
Rename fields:
>>> df = df.select(
... pl.col("struct_col").struct.rename_fields(["www", "xxx", "yyy", "zzz"])
... )
>>> df.unnest("struct_col")
shape: (2, 4)
┌─────┬─────┬──────┬───────────┐
│ www ┆ xxx ┆ yyy ┆ zzz │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ str ┆ bool ┆ list[i64] │
╞═════╪═════╪══════╪═══════════╡
│ 1 ┆ ab ┆ true ┆ [1, 2] │
│ 2 ┆ cd ┆ null ┆ [3] │
└─────┴─────┴──────┴───────────┘
Following a rename, the previous field names (obviously) cannot be referenced:
>>> df.select(pl.col("struct_col").struct.field("aaa")) # doctest: +SKIP
StructFieldNotFoundError: aaa
"""
return wrap_expr(self._pyexpr.struct_rename_fields(names))
def json_encode(self) -> Expr:
"""
Convert this struct to a string column with json values.
Examples
--------
>>> pl.DataFrame(
... {"a": [{"a": [1, 2], "b": [45]}, {"a": [9, 1, 3], "b": None}]}
... ).with_columns(pl.col("a").struct.json_encode().alias("encoded"))
shape: (2, 2)
┌──────────────────┬────────────────────────┐
│ a ┆ encoded │
│ --- ┆ --- │
│ struct[2] ┆ str │
╞══════════════════╪════════════════════════╡
{[1, 2],[45]} ┆ {"a":[1,2],"b":[45]} │
{[9, 1, 3],null} ┆ {"a":[9,1,3],"b":null} │
└──────────────────┴────────────────────────┘
"""
return wrap_expr(self._pyexpr.struct_json_encode())
def with_fields(
self,
*exprs: IntoExpr | Iterable[IntoExpr],
**named_exprs: IntoExpr,
) -> Expr:
"""
Add or overwrite fields of this struct.
This is similar to `with_columns` on `DataFrame`.
.. versionadded:: 0.20.27
Examples
--------
>>> df = pl.DataFrame(
... {
... "coords": [{"x": 1, "y": 4}, {"x": 4, "y": 9}, {"x": 9, "y": 16}],
... "multiply": [10, 2, 3],
... }
... )
>>> df
shape: (3, 2)
┌───────────┬──────────┐
│ coords ┆ multiply │
│ --- ┆ --- │
│ struct[2] ┆ i64 │
╞═══════════╪══════════╡
{1,4} ┆ 10 │
{4,9} ┆ 2 │
{9,16} ┆ 3 │
└───────────┴──────────┘
>>> df = df.with_columns(
... pl.col("coords").struct.with_fields(
... pl.field("x").sqrt(),
... y_mul=pl.field("y") * pl.col("multiply"),
... )
... )
>>> df
shape: (3, 2)
┌─────────────┬──────────┐
│ coords ┆ multiply │
│ --- ┆ --- │
│ struct[3] ┆ i64 │
╞═════════════╪══════════╡
{1.0,4,40} ┆ 10 │
{2.0,9,18} ┆ 2 │
{3.0,16,48} ┆ 3 │
└─────────────┴──────────┘
>>> df.unnest("coords")
shape: (3, 4)
┌─────┬─────┬───────┬──────────┐
│ x ┆ y ┆ y_mul ┆ multiply │
│ --- ┆ --- ┆ --- ┆ --- │
│ f64 ┆ i64 ┆ i64 ┆ i64 │
╞═════╪═════╪═══════╪══════════╡
│ 1.0 ┆ 4 ┆ 40 ┆ 10 │
│ 2.0 ┆ 9 ┆ 18 ┆ 2 │
│ 3.0 ┆ 16 ┆ 48 ┆ 3 │
└─────┴─────┴───────┴──────────┘
Parameters
----------
*exprs
Field(s) to add, specified as positional arguments.
Accepts expression input. Strings are parsed as column names, other
non-expression inputs are parsed as literals.
**named_exprs
Additional fields to add, specified as keyword arguments.
The columns will be renamed to the keyword used.
See Also
--------
field
"""
structify = bool(int(os.environ.get("POLARS_AUTO_STRUCTIFY", 0)))
pyexprs = parse_into_list_of_expressions(
*exprs, **named_exprs, __structify=structify
)
return wrap_expr(self._pyexpr.struct_with_fields(pyexprs))