DriverTrac/venv/lib/python3.12/site-packages/polars/expr/meta.py

445 lines
14 KiB
Python

from __future__ import annotations
from typing import TYPE_CHECKING, Literal, overload
import polars._reexport as pl
from polars._utils.deprecation import deprecated
from polars._utils.serde import serialize_polars_object
from polars._utils.various import display_dot_graph
from polars._utils.wrap import wrap_expr
from polars.exceptions import ComputeError
if TYPE_CHECKING:
import sys
from io import IOBase
from pathlib import Path
from polars import Expr
from polars._typing import SchemaDict, SerializationFormat
if sys.version_info >= (3, 13):
from warnings import deprecated
else:
from typing_extensions import deprecated # noqa: TC004
class ExprMetaNameSpace:
"""Namespace for expressions on a meta level."""
_accessor = "meta"
def __init__(self, expr: Expr) -> None:
self._pyexpr = expr._pyexpr
def __eq__(self, other: ExprMetaNameSpace | Expr) -> bool: # type: ignore[override]
return self._pyexpr.meta_eq(other._pyexpr)
def __ne__(self, other: ExprMetaNameSpace | Expr) -> bool: # type: ignore[override]
return not self == other
def eq(self, other: ExprMetaNameSpace | Expr) -> bool:
"""
Indicate if this expression is the same as another expression.
Examples
--------
>>> foo_bar = pl.col("foo").alias("bar")
>>> foo = pl.col("foo")
>>> foo_bar.meta.eq(foo)
False
>>> foo_bar2 = pl.col("foo").alias("bar")
>>> foo_bar.meta.eq(foo_bar2)
True
"""
return self._pyexpr.meta_eq(other._pyexpr)
def ne(self, other: ExprMetaNameSpace | Expr) -> bool:
"""
Indicate if this expression is NOT the same as another expression.
Examples
--------
>>> foo_bar = pl.col("foo").alias("bar")
>>> foo = pl.col("foo")
>>> foo_bar.meta.ne(foo)
True
>>> foo_bar2 = pl.col("foo").alias("bar")
>>> foo_bar.meta.ne(foo_bar2)
False
"""
return not self.eq(other)
def has_multiple_outputs(self) -> bool:
"""
Indicate if this expression expands into multiple expressions.
Examples
--------
>>> e = pl.col(["a", "b"]).name.suffix("_foo")
>>> e.meta.has_multiple_outputs()
True
"""
return self._pyexpr.meta_has_multiple_outputs()
def is_column(self) -> bool:
r"""
Indicate if this expression is a basic (non-regex) unaliased column.
Examples
--------
>>> e = pl.col("foo")
>>> e.meta.is_column()
True
>>> e = pl.col("foo") * pl.col("bar")
>>> e.meta.is_column()
False
>>> e = pl.col(r"^col.*\d+$")
>>> e.meta.is_column()
False
"""
return self._pyexpr.meta_is_column()
def is_regex_projection(self) -> bool:
"""
Indicate if this expression expands to columns that match a regex pattern.
Examples
--------
>>> e = pl.col("^.*$").name.prefix("foo_")
>>> e.meta.is_regex_projection()
True
"""
return self._pyexpr.meta_is_regex_projection()
def is_column_selection(self, *, allow_aliasing: bool = False) -> bool:
"""
Indicate if this expression only selects columns (optionally with aliasing).
This can include bare columns, columns matched by regex or dtype, selectors
and exclude ops, and (optionally) column/expression aliasing.
.. versionadded:: 0.20.30
Parameters
----------
allow_aliasing
If False (default), any aliasing is not considered to be column selection.
Set True to allow for column selection that also includes aliasing.
Examples
--------
>>> import polars.selectors as cs
>>> e = pl.col("foo")
>>> e.meta.is_column_selection()
True
>>> e = pl.col("foo").alias("bar")
>>> e.meta.is_column_selection()
False
>>> e.meta.is_column_selection(allow_aliasing=True)
True
>>> e = pl.col("foo") * pl.col("bar")
>>> e.meta.is_column_selection()
False
>>> e = cs.starts_with("foo")
>>> e.meta.is_column_selection()
True
>>> e = cs.starts_with("foo").exclude("foo!")
>>> e.meta.is_column_selection()
True
"""
return self._pyexpr.meta_is_column_selection(allow_aliasing)
def is_literal(self, *, allow_aliasing: bool = False) -> bool:
"""
Indicate if this expression is a literal value (optionally aliased).
.. versionadded:: 1.14
Parameters
----------
allow_aliasing
If False (default), only a bare literal will match.
Set True to also allow for aliased literals.
Examples
--------
>>> from datetime import datetime
>>> e = pl.lit(123)
>>> e.meta.is_literal()
True
>>> e = pl.lit(987.654321).alias("foo")
>>> e.meta.is_literal()
False
>>> e = pl.lit(datetime.now()).alias("bar")
>>> e.meta.is_literal(allow_aliasing=True)
True
"""
return self._pyexpr.meta_is_literal(allow_aliasing)
@overload
def output_name(self, *, raise_if_undetermined: Literal[True] = True) -> str: ...
@overload
def output_name(self, *, raise_if_undetermined: Literal[False]) -> str | None: ...
def output_name(self, *, raise_if_undetermined: bool = True) -> str | None:
"""
Get the column name that this expression would produce.
It may not always be possible to determine the output name as that can depend
on the schema of the context; in that case this will raise `ComputeError` if
`raise_if_undetermined` is True (the default), or `None` otherwise.
Examples
--------
>>> e = pl.col("foo") * pl.col("bar")
>>> e.meta.output_name()
'foo'
>>> e_filter = pl.col("foo").filter(pl.col("bar") == 13)
>>> e_filter.meta.output_name()
'foo'
>>> e_sum_over = pl.sum("foo").over("groups")
>>> e_sum_over.meta.output_name()
'foo'
>>> e_sum_slice = pl.sum("foo").slice(pl.len() - 10, pl.col("bar"))
>>> e_sum_slice.meta.output_name()
'foo'
>>> pl.len().meta.output_name()
'len'
"""
try:
return self._pyexpr.meta_output_name()
except ComputeError:
if not raise_if_undetermined:
return None
raise
def pop(self, *, schema: SchemaDict | None = None) -> list[Expr]:
"""
Pop the latest expression and return the input(s) of the popped expression.
Returns
-------
list of Expr
A list of expressions which in most cases will have a unit length.
This is not the case when an expression has multiple inputs.
For instance in a `fold` expression.
Examples
--------
>>> e = pl.col("foo") + pl.col("bar")
>>> first = e.meta.pop()[0]
>>> first.meta == pl.col("bar")
True
>>> first.meta == pl.col("foo")
False
"""
return [wrap_expr(e) for e in self._pyexpr.meta_pop(schema)]
def root_names(self) -> list[str]:
"""
Get a list with the root column name.
Examples
--------
>>> e = pl.col("foo") * pl.col("bar")
>>> e.meta.root_names()
['foo', 'bar']
>>> e_filter = pl.col("foo").filter(pl.col("bar") == 13)
>>> e_filter.meta.root_names()
['foo', 'bar']
>>> e_sum_over = pl.sum("foo").over("groups")
>>> e_sum_over.meta.root_names()
['foo', 'groups']
>>> e_sum_slice = pl.sum("foo").slice(pl.len() - 10, pl.col("bar"))
>>> e_sum_slice.meta.root_names()
['foo', 'bar']
"""
return self._pyexpr.meta_root_names()
def undo_aliases(self) -> Expr:
"""
Undo any renaming operation like `alias` or `name.keep`.
Examples
--------
>>> e = pl.col("foo").alias("bar")
>>> e.meta.undo_aliases().meta == pl.col("foo")
True
>>> e = pl.col("foo").sum().over("bar")
>>> e.name.keep().meta.undo_aliases().meta == e
True
"""
return wrap_expr(self._pyexpr.meta_undo_aliases())
def as_selector(self) -> pl.Selector:
"""
Try to turn this expression in a selector.
Raises if the underlying expressions is not a column or selector.
.. warning::
This functionality is considered **unstable**. It may be changed
at any point without it being considered a breaking change.
"""
return pl.Selector._from_pyselector(self._pyexpr.into_selector())
@overload
def serialize(
self, file: None = ..., *, format: Literal["binary"] = ...
) -> bytes: ...
@overload
def serialize(self, file: None = ..., *, format: Literal["json"]) -> str: ...
@overload
def serialize(
self, file: IOBase | str | Path, *, format: SerializationFormat = ...
) -> None: ...
def serialize(
self,
file: IOBase | str | Path | None = None,
*,
format: SerializationFormat = "binary",
) -> bytes | str | None:
r"""
Serialize this expression to a file or string in JSON format.
Parameters
----------
file
File path to which the result should be written. If set to `None`
(default), the output is returned as a string instead.
format
The format in which to serialize. Options:
- `"binary"`: Serialize to binary format (bytes). This is the default.
- `"json"`: Serialize to JSON format (string).
See Also
--------
Expr.deserialize
Notes
-----
Serialization is not stable across Polars versions: a LazyFrame serialized
in one Polars version may not be deserializable in another Polars version.
Examples
--------
Serialize the expression into a binary representation.
>>> expr = pl.col("foo").sum().over("bar")
>>> bytes = expr.meta.serialize()
>>> type(bytes)
<class 'bytes'>
The bytes can later be deserialized back into an `Expr` object.
>>> import io
>>> pl.Expr.deserialize(io.BytesIO(bytes))
<Expr ['col("foo").sum().over([col("ba…'] at ...>
"""
if format == "binary":
serializer = self._pyexpr.serialize_binary
elif format == "json":
serializer = self._pyexpr.serialize_json
else:
msg = f"`format` must be one of {{'binary', 'json'}}, got {format!r}"
raise ValueError(msg)
return serialize_polars_object(serializer, file, format)
@overload
def write_json(self, file: None = ...) -> str: ...
@overload
def write_json(self, file: IOBase | str | Path) -> None: ...
@deprecated("`meta.write_json` was renamed; use `meta.serialize` instead")
def write_json(self, file: IOBase | str | Path | None = None) -> str | None:
"""
Write expression to json.
.. deprecated:: 0.20.11
This method has been renamed to :meth:`serialize`.
"""
return self.serialize(file, format="json")
@overload
def tree_format(
self,
*,
return_as_string: Literal[False] = ...,
schema: None | SchemaDict = None,
) -> None: ...
@overload
def tree_format(
self, *, return_as_string: Literal[True], schema: None | SchemaDict = None
) -> str: ...
def tree_format( # noqa: D417 (TODO: document schema parameter)
self, *, return_as_string: bool = False, schema: None | SchemaDict = None
) -> str | None:
"""
Format the expression as a tree.
Parameters
----------
return_as_string:
If True, return as string rather than printing to stdout.
Examples
--------
>>> e = (pl.col("foo") * pl.col("bar")).sum().over(pl.col("ham")) / 2
>>> e.meta.tree_format(return_as_string=True) # doctest: +SKIP
"""
s = self._pyexpr.meta_tree_format(schema)
if return_as_string:
return s
else:
print(s)
return None
def show_graph( # noqa: D417 (TODO: document schema parameter)
self,
*,
show: bool = True,
output_path: str | Path | None = None,
raw_output: bool = False,
figsize: tuple[float, float] = (16.0, 12.0),
schema: None | SchemaDict = None,
) -> str | None:
"""
Format the expression as a Graphviz graph.
Note that Graphviz must be installed to render the visualization (if not
already present, you can download it here: `<https://graphviz.org/download>`_).
Parameters
----------
show
Show the figure.
output_path
Write the figure to disk.
raw_output
Return dot syntax. This cannot be combined with `show` and/or `output_path`.
figsize
Passed to matplotlib if `show == True`.
Examples
--------
>>> e = (pl.col("foo") * pl.col("bar")).sum().over(pl.col("ham")) / 2
>>> e.meta.show_graph() # doctest: +SKIP
"""
dot = self._pyexpr.meta_show_graph(schema)
return display_dot_graph(
dot=dot,
show=show,
output_path=output_path,
raw_output=raw_output,
figsize=figsize,
)