DriverTrac/venv/lib/python3.12/site-packages/polars/functions/whenthen.py

356 lines
14 KiB
Python

from __future__ import annotations
import contextlib
from typing import TYPE_CHECKING, Any
import polars._reexport as pl
from polars._utils.parse import parse_predicates_constraints_into_expression
with contextlib.suppress(ImportError): # Module not available when building docs
import polars._plr as plr
if TYPE_CHECKING:
from collections.abc import Iterable
from polars._typing import IntoExprColumn
def when(
*predicates: IntoExprColumn | Iterable[IntoExprColumn] | bool,
**constraints: Any,
) -> pl.When:
"""
Start a `when-then-otherwise` expression.
Always initiated by a `pl.when().then()`., and optionally followed by chaining one
or more `.when().then()` statements.
An optional `.otherwise()` can be appended at the end. If not declared, a default
of `.otherwise(None)` is used.
Similar to :func:`coalesce`, the value from the first condition that
evaluates to True will be picked.
If all conditions are False, the `otherwise` value is picked.
Parameters
----------
predicates
Condition(s) that must be met in order to apply the subsequent statement.
Accepts one or more boolean expressions, which are implicitly combined with
`&`.
constraints
Apply conditions as `col_name = value` keyword arguments that are treated as
equality matches, such as `x = 123`. As with the predicates parameter, multiple
conditions are implicitly combined using `&`.
Warnings
--------
Polars computes all expressions passed to `when-then-otherwise` in parallel and
filters afterwards. This means each expression must be valid on its own, regardless
of the conditions in the `when-then-otherwise` chain.
Notes
-----
* String inputs e.g. `when("string")`, `then("string")` or `otherwise("string")`
are parsed as column names. :func:`lit` can be used to create string values.
* The expression output name is taken from the first `then` statement. It is
not affected by `predicates`, nor by `constraints`.
Examples
--------
Below we add a column with the value 1, where column "foo" > 2 and the value
1 + column "bar" where it isn't.
>>> df = pl.DataFrame({"foo": [1, 3, 4], "bar": [3, 4, 0]})
>>> df.with_columns(
... pl.when(pl.col.foo > 2).then(1).otherwise(1 + pl.col.bar).alias("val")
... )
shape: (3, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ val │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ 1 ┆ 3 ┆ 4 │
│ 3 ┆ 4 ┆ 1 │
│ 4 ┆ 0 ┆ 1 │
└─────┴─────┴─────┘
Note that `when-then` always executes all expressions.
The results are folded left to right, picking the `then` value from the first `when`
condition that is True.
If no `when` condition is True the `otherwise` value is picked.
>>> df.with_columns(
... when = pl.col.foo > 2,
... then = 1,
... otherwise = 1 + pl.col.bar
... ).with_columns(
... pl.when("when").then("then").otherwise("otherwise").alias("val")
... )
shape: (3, 6)
┌─────┬─────┬───────┬──────┬───────────┬─────┐
│ foo ┆ bar ┆ when ┆ then ┆ otherwise ┆ val │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ bool ┆ i32 ┆ i64 ┆ i64 │
╞═════╪═════╪═══════╪══════╪═══════════╪═════╡
│ 1 ┆ 3 ┆ false ┆ 1 ┆ 4 ┆ 4 │
│ 3 ┆ 4 ┆ true ┆ 1 ┆ 5 ┆ 1 │
│ 4 ┆ 0 ┆ true ┆ 1 ┆ 1 ┆ 1 │
└─────┴─────┴───────┴──────┴───────────┴─────┘
Note that in regular Polars usage, a single string is parsed as a column name.
>>> df.with_columns(
... when = pl.col.foo > 2,
... then = "foo",
... otherwise = "bar"
... )
shape: (3, 5)
┌─────┬─────┬───────┬──────┬───────────┐
│ foo ┆ bar ┆ when ┆ then ┆ otherwise │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ bool ┆ i64 ┆ i64 │
╞═════╪═════╪═══════╪══════╪═══════════╡
│ 1 ┆ 3 ┆ false ┆ 1 ┆ 3 │
│ 3 ┆ 4 ┆ true ┆ 3 ┆ 4 │
│ 4 ┆ 0 ┆ true ┆ 4 ┆ 0 │
└─────┴─────┴───────┴──────┴───────────┘
For consistency, `when-then` behaves in the same way.
>>> df.with_columns(
... pl.when(pl.col.foo > 2).then("foo").otherwise("bar").alias("val")
... )
shape: (3, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ val │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i64 │
╞═════╪═════╪═════╡
│ 1 ┆ 3 ┆ 3 │
│ 3 ┆ 4 ┆ 3 │
│ 4 ┆ 0 ┆ 4 │
└─────┴─────┴─────┘
:func:`lit` can be used to create string values.
>>> df.with_columns(
... pl.when(pl.col.foo > 2)
... .then(pl.lit("foo"))
... .otherwise(pl.lit("bar"))
... .alias("val")
... )
shape: (3, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ val │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str │
╞═════╪═════╪═════╡
│ 1 ┆ 3 ┆ bar │
│ 3 ┆ 4 ┆ foo │
│ 4 ┆ 0 ┆ foo │
└─────┴─────┴─────┘
Multiple `when-then` statements can be chained.
>>> df.with_columns(
... pl.when(pl.col.foo > 2)
... .then(1)
... .when(pl.col.bar > 2)
... .then(4)
... .otherwise(-1)
... .alias("val")
... )
shape: (3, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ val │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ i32 │
╞═════╪═════╪═════╡
│ 1 ┆ 3 ┆ 4 │
│ 3 ┆ 4 ┆ 1 │
│ 4 ┆ 0 ┆ 1 │
└─────┴─────┴─────┘
In the case of `foo=3` and `bar=4`, both conditions are True but the first value
(i.e. 1) is picked.
>>> df.with_columns(
... when1 = pl.col.foo > 2,
... then1 = 1,
... when2 = pl.col.bar > 2,
... then2 = 4,
... otherwise = -1
... )
shape: (3, 7)
┌─────┬─────┬───────┬───────┬───────┬───────┬───────────┐
│ foo ┆ bar ┆ when1 ┆ then1 ┆ when2 ┆ then2 ┆ otherwise │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ bool ┆ i32 ┆ bool ┆ i32 ┆ i32 │
╞═════╪═════╪═══════╪═══════╪═══════╪═══════╪═══════════╡
│ 1 ┆ 3 ┆ false ┆ 1 ┆ true ┆ 4 ┆ -1 │
│ 3 ┆ 4 ┆ true ┆ 1 ┆ true ┆ 4 ┆ -1 │
│ 4 ┆ 0 ┆ true ┆ 1 ┆ false ┆ 4 ┆ -1 │
└─────┴─────┴───────┴───────┴───────┴───────┴───────────┘
The `otherwise` statement is optional and defaults to `.otherwise(None)`
if not given.
This idiom is commonly used to null out values.
>>> df.with_columns(pl.when(pl.col.foo == 3).then("bar"))
shape: (3, 2)
┌─────┬──────┐
│ foo ┆ bar │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪══════╡
│ 1 ┆ null │
│ 3 ┆ 4 │
│ 4 ┆ null │
└─────┴──────┘
`when` accepts keyword arguments as shorthand for equality conditions.
>>> df.with_columns(pl.when(foo=3).then("bar"))
shape: (3, 2)
┌─────┬──────┐
│ foo ┆ bar │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪══════╡
│ 1 ┆ null │
│ 3 ┆ 4 │
│ 4 ┆ null │
└─────┴──────┘
Multiple predicates passed to `when` are combined with `&`
>>> df.with_columns(
... pl.when(pl.col.foo > 2, pl.col.bar < 3) # when((pred1) & (pred2))
... .then(pl.lit("Yes"))
... .otherwise(pl.lit("No"))
... .alias("val")
... )
shape: (3, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ val │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str │
╞═════╪═════╪═════╡
│ 1 ┆ 3 ┆ No │
│ 3 ┆ 4 ┆ No │
│ 4 ┆ 0 ┆ Yes │
└─────┴─────┴─────┘
It could also be thought of as an implicit :func:`all_horizontal` being present.
>>> df.with_columns(
... when = pl.all_horizontal(pl.col.foo > 2, pl.col.bar < 3)
... )
shape: (3, 3)
┌─────┬─────┬───────┐
│ foo ┆ bar ┆ when │
│ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ bool │
╞═════╪═════╪═══════╡
│ 1 ┆ 3 ┆ false │
│ 3 ┆ 4 ┆ false │
│ 4 ┆ 0 ┆ true │
└─────┴─────┴───────┘
Structs can be used as a way to return multiple values.
Here we swap the "foo" and "bar" values when "foo" is greater than 2.
>>> df.with_columns(
... pl.when(pl.col.foo > 2)
... .then(pl.struct(foo="bar", bar="foo"))
... .otherwise(pl.struct("foo", "bar"))
... .struct.unnest()
... )
shape: (3, 2)
┌─────┬─────┐
│ foo ┆ bar │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1 ┆ 3 │
│ 4 ┆ 3 │
│ 0 ┆ 4 │
└─────┴─────┘
The struct fields are given the same name as the target columns, which are then
unnested.
>>> df.with_columns(
... when = pl.col.foo > 2,
... then = pl.struct(foo="bar", bar="foo"),
... otherwise = pl.struct("foo", "bar")
... )
shape: (3, 5)
┌─────┬─────┬───────┬───────────┬───────────┐
│ foo ┆ bar ┆ when ┆ then ┆ otherwise │
│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ bool ┆ struct[2] ┆ struct[2] │
╞═════╪═════╪═══════╪═══════════╪═══════════╡
│ 1 ┆ 3 ┆ false ┆ {3,1} ┆ {1,3} │
│ 3 ┆ 4 ┆ true ┆ {4,3} ┆ {3,4} │
│ 4 ┆ 0 ┆ true ┆ {0,4} ┆ {4,0} │
└─────┴─────┴───────┴───────────┴───────────┘
The output name of a `when-then` expression comes from the first `then` branch.
Here we try to set all columns to 0 if any column contains a value less than 2.
>>> df.with_columns( # doctest: +SKIP
... pl.when(pl.any_horizontal(pl.all() < 2))
... .then(0)
... .otherwise(pl.all())
... )
# ComputeError: the name 'literal' passed to `LazyFrame.with_columns` is duplicate
:meth:`.name.keep` could be used to give preference to the column expression.
>>> df.with_columns(
... pl.when(pl.any_horizontal(pl.all() < 2))
... .then(0)
... .otherwise(pl.all())
... .name.keep()
... )
shape: (3, 2)
┌─────┬─────┐
│ foo ┆ bar │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 0 ┆ 0 │
│ 3 ┆ 4 │
│ 0 ┆ 0 │
└─────┴─────┘
The logic could also be changed to move the column expression inside `then`.
>>> df.with_columns(
... pl.when(pl.any_horizontal(pl.all() < 2).not_())
... .then(pl.all())
... .otherwise(0)
... )
shape: (3, 2)
┌─────┬─────┐
│ foo ┆ bar │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 0 ┆ 0 │
│ 3 ┆ 4 │
│ 0 ┆ 0 │
└─────┴─────┘
""" # fmt: skip
condition = parse_predicates_constraints_into_expression(*predicates, **constraints)
return pl.When(plr.when(condition))