356 lines
14 KiB
Python
356 lines
14 KiB
Python
from __future__ import annotations
|
|
|
|
import contextlib
|
|
from typing import TYPE_CHECKING, Any
|
|
|
|
import polars._reexport as pl
|
|
from polars._utils.parse import parse_predicates_constraints_into_expression
|
|
|
|
with contextlib.suppress(ImportError): # Module not available when building docs
|
|
import polars._plr as plr
|
|
|
|
if TYPE_CHECKING:
|
|
from collections.abc import Iterable
|
|
|
|
from polars._typing import IntoExprColumn
|
|
|
|
|
|
def when(
|
|
*predicates: IntoExprColumn | Iterable[IntoExprColumn] | bool,
|
|
**constraints: Any,
|
|
) -> pl.When:
|
|
"""
|
|
Start a `when-then-otherwise` expression.
|
|
|
|
Always initiated by a `pl.when().then()`., and optionally followed by chaining one
|
|
or more `.when().then()` statements.
|
|
|
|
An optional `.otherwise()` can be appended at the end. If not declared, a default
|
|
of `.otherwise(None)` is used.
|
|
|
|
Similar to :func:`coalesce`, the value from the first condition that
|
|
evaluates to True will be picked.
|
|
|
|
If all conditions are False, the `otherwise` value is picked.
|
|
|
|
Parameters
|
|
----------
|
|
predicates
|
|
Condition(s) that must be met in order to apply the subsequent statement.
|
|
Accepts one or more boolean expressions, which are implicitly combined with
|
|
`&`.
|
|
constraints
|
|
Apply conditions as `col_name = value` keyword arguments that are treated as
|
|
equality matches, such as `x = 123`. As with the predicates parameter, multiple
|
|
conditions are implicitly combined using `&`.
|
|
|
|
Warnings
|
|
--------
|
|
Polars computes all expressions passed to `when-then-otherwise` in parallel and
|
|
filters afterwards. This means each expression must be valid on its own, regardless
|
|
of the conditions in the `when-then-otherwise` chain.
|
|
|
|
Notes
|
|
-----
|
|
* String inputs e.g. `when("string")`, `then("string")` or `otherwise("string")`
|
|
are parsed as column names. :func:`lit` can be used to create string values.
|
|
* The expression output name is taken from the first `then` statement. It is
|
|
not affected by `predicates`, nor by `constraints`.
|
|
|
|
Examples
|
|
--------
|
|
Below we add a column with the value 1, where column "foo" > 2 and the value
|
|
1 + column "bar" where it isn't.
|
|
|
|
>>> df = pl.DataFrame({"foo": [1, 3, 4], "bar": [3, 4, 0]})
|
|
>>> df.with_columns(
|
|
... pl.when(pl.col.foo > 2).then(1).otherwise(1 + pl.col.bar).alias("val")
|
|
... )
|
|
shape: (3, 3)
|
|
┌─────┬─────┬─────┐
|
|
│ foo ┆ bar ┆ val │
|
|
│ --- ┆ --- ┆ --- │
|
|
│ i64 ┆ i64 ┆ i64 │
|
|
╞═════╪═════╪═════╡
|
|
│ 1 ┆ 3 ┆ 4 │
|
|
│ 3 ┆ 4 ┆ 1 │
|
|
│ 4 ┆ 0 ┆ 1 │
|
|
└─────┴─────┴─────┘
|
|
|
|
Note that `when-then` always executes all expressions.
|
|
|
|
The results are folded left to right, picking the `then` value from the first `when`
|
|
condition that is True.
|
|
|
|
If no `when` condition is True the `otherwise` value is picked.
|
|
|
|
>>> df.with_columns(
|
|
... when = pl.col.foo > 2,
|
|
... then = 1,
|
|
... otherwise = 1 + pl.col.bar
|
|
... ).with_columns(
|
|
... pl.when("when").then("then").otherwise("otherwise").alias("val")
|
|
... )
|
|
shape: (3, 6)
|
|
┌─────┬─────┬───────┬──────┬───────────┬─────┐
|
|
│ foo ┆ bar ┆ when ┆ then ┆ otherwise ┆ val │
|
|
│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
|
│ i64 ┆ i64 ┆ bool ┆ i32 ┆ i64 ┆ i64 │
|
|
╞═════╪═════╪═══════╪══════╪═══════════╪═════╡
|
|
│ 1 ┆ 3 ┆ false ┆ 1 ┆ 4 ┆ 4 │
|
|
│ 3 ┆ 4 ┆ true ┆ 1 ┆ 5 ┆ 1 │
|
|
│ 4 ┆ 0 ┆ true ┆ 1 ┆ 1 ┆ 1 │
|
|
└─────┴─────┴───────┴──────┴───────────┴─────┘
|
|
|
|
Note that in regular Polars usage, a single string is parsed as a column name.
|
|
|
|
>>> df.with_columns(
|
|
... when = pl.col.foo > 2,
|
|
... then = "foo",
|
|
... otherwise = "bar"
|
|
... )
|
|
shape: (3, 5)
|
|
┌─────┬─────┬───────┬──────┬───────────┐
|
|
│ foo ┆ bar ┆ when ┆ then ┆ otherwise │
|
|
│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
|
│ i64 ┆ i64 ┆ bool ┆ i64 ┆ i64 │
|
|
╞═════╪═════╪═══════╪══════╪═══════════╡
|
|
│ 1 ┆ 3 ┆ false ┆ 1 ┆ 3 │
|
|
│ 3 ┆ 4 ┆ true ┆ 3 ┆ 4 │
|
|
│ 4 ┆ 0 ┆ true ┆ 4 ┆ 0 │
|
|
└─────┴─────┴───────┴──────┴───────────┘
|
|
|
|
For consistency, `when-then` behaves in the same way.
|
|
|
|
>>> df.with_columns(
|
|
... pl.when(pl.col.foo > 2).then("foo").otherwise("bar").alias("val")
|
|
... )
|
|
shape: (3, 3)
|
|
┌─────┬─────┬─────┐
|
|
│ foo ┆ bar ┆ val │
|
|
│ --- ┆ --- ┆ --- │
|
|
│ i64 ┆ i64 ┆ i64 │
|
|
╞═════╪═════╪═════╡
|
|
│ 1 ┆ 3 ┆ 3 │
|
|
│ 3 ┆ 4 ┆ 3 │
|
|
│ 4 ┆ 0 ┆ 4 │
|
|
└─────┴─────┴─────┘
|
|
|
|
:func:`lit` can be used to create string values.
|
|
|
|
>>> df.with_columns(
|
|
... pl.when(pl.col.foo > 2)
|
|
... .then(pl.lit("foo"))
|
|
... .otherwise(pl.lit("bar"))
|
|
... .alias("val")
|
|
... )
|
|
shape: (3, 3)
|
|
┌─────┬─────┬─────┐
|
|
│ foo ┆ bar ┆ val │
|
|
│ --- ┆ --- ┆ --- │
|
|
│ i64 ┆ i64 ┆ str │
|
|
╞═════╪═════╪═════╡
|
|
│ 1 ┆ 3 ┆ bar │
|
|
│ 3 ┆ 4 ┆ foo │
|
|
│ 4 ┆ 0 ┆ foo │
|
|
└─────┴─────┴─────┘
|
|
|
|
Multiple `when-then` statements can be chained.
|
|
|
|
>>> df.with_columns(
|
|
... pl.when(pl.col.foo > 2)
|
|
... .then(1)
|
|
... .when(pl.col.bar > 2)
|
|
... .then(4)
|
|
... .otherwise(-1)
|
|
... .alias("val")
|
|
... )
|
|
shape: (3, 3)
|
|
┌─────┬─────┬─────┐
|
|
│ foo ┆ bar ┆ val │
|
|
│ --- ┆ --- ┆ --- │
|
|
│ i64 ┆ i64 ┆ i32 │
|
|
╞═════╪═════╪═════╡
|
|
│ 1 ┆ 3 ┆ 4 │
|
|
│ 3 ┆ 4 ┆ 1 │
|
|
│ 4 ┆ 0 ┆ 1 │
|
|
└─────┴─────┴─────┘
|
|
|
|
In the case of `foo=3` and `bar=4`, both conditions are True but the first value
|
|
(i.e. 1) is picked.
|
|
|
|
>>> df.with_columns(
|
|
... when1 = pl.col.foo > 2,
|
|
... then1 = 1,
|
|
... when2 = pl.col.bar > 2,
|
|
... then2 = 4,
|
|
... otherwise = -1
|
|
... )
|
|
shape: (3, 7)
|
|
┌─────┬─────┬───────┬───────┬───────┬───────┬───────────┐
|
|
│ foo ┆ bar ┆ when1 ┆ then1 ┆ when2 ┆ then2 ┆ otherwise │
|
|
│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
|
│ i64 ┆ i64 ┆ bool ┆ i32 ┆ bool ┆ i32 ┆ i32 │
|
|
╞═════╪═════╪═══════╪═══════╪═══════╪═══════╪═══════════╡
|
|
│ 1 ┆ 3 ┆ false ┆ 1 ┆ true ┆ 4 ┆ -1 │
|
|
│ 3 ┆ 4 ┆ true ┆ 1 ┆ true ┆ 4 ┆ -1 │
|
|
│ 4 ┆ 0 ┆ true ┆ 1 ┆ false ┆ 4 ┆ -1 │
|
|
└─────┴─────┴───────┴───────┴───────┴───────┴───────────┘
|
|
|
|
The `otherwise` statement is optional and defaults to `.otherwise(None)`
|
|
if not given.
|
|
|
|
This idiom is commonly used to null out values.
|
|
|
|
>>> df.with_columns(pl.when(pl.col.foo == 3).then("bar"))
|
|
shape: (3, 2)
|
|
┌─────┬──────┐
|
|
│ foo ┆ bar │
|
|
│ --- ┆ --- │
|
|
│ i64 ┆ i64 │
|
|
╞═════╪══════╡
|
|
│ 1 ┆ null │
|
|
│ 3 ┆ 4 │
|
|
│ 4 ┆ null │
|
|
└─────┴──────┘
|
|
|
|
`when` accepts keyword arguments as shorthand for equality conditions.
|
|
|
|
>>> df.with_columns(pl.when(foo=3).then("bar"))
|
|
shape: (3, 2)
|
|
┌─────┬──────┐
|
|
│ foo ┆ bar │
|
|
│ --- ┆ --- │
|
|
│ i64 ┆ i64 │
|
|
╞═════╪══════╡
|
|
│ 1 ┆ null │
|
|
│ 3 ┆ 4 │
|
|
│ 4 ┆ null │
|
|
└─────┴──────┘
|
|
|
|
Multiple predicates passed to `when` are combined with `&`
|
|
|
|
>>> df.with_columns(
|
|
... pl.when(pl.col.foo > 2, pl.col.bar < 3) # when((pred1) & (pred2))
|
|
... .then(pl.lit("Yes"))
|
|
... .otherwise(pl.lit("No"))
|
|
... .alias("val")
|
|
... )
|
|
shape: (3, 3)
|
|
┌─────┬─────┬─────┐
|
|
│ foo ┆ bar ┆ val │
|
|
│ --- ┆ --- ┆ --- │
|
|
│ i64 ┆ i64 ┆ str │
|
|
╞═════╪═════╪═════╡
|
|
│ 1 ┆ 3 ┆ No │
|
|
│ 3 ┆ 4 ┆ No │
|
|
│ 4 ┆ 0 ┆ Yes │
|
|
└─────┴─────┴─────┘
|
|
|
|
It could also be thought of as an implicit :func:`all_horizontal` being present.
|
|
|
|
>>> df.with_columns(
|
|
... when = pl.all_horizontal(pl.col.foo > 2, pl.col.bar < 3)
|
|
... )
|
|
shape: (3, 3)
|
|
┌─────┬─────┬───────┐
|
|
│ foo ┆ bar ┆ when │
|
|
│ --- ┆ --- ┆ --- │
|
|
│ i64 ┆ i64 ┆ bool │
|
|
╞═════╪═════╪═══════╡
|
|
│ 1 ┆ 3 ┆ false │
|
|
│ 3 ┆ 4 ┆ false │
|
|
│ 4 ┆ 0 ┆ true │
|
|
└─────┴─────┴───────┘
|
|
|
|
Structs can be used as a way to return multiple values.
|
|
|
|
Here we swap the "foo" and "bar" values when "foo" is greater than 2.
|
|
|
|
>>> df.with_columns(
|
|
... pl.when(pl.col.foo > 2)
|
|
... .then(pl.struct(foo="bar", bar="foo"))
|
|
... .otherwise(pl.struct("foo", "bar"))
|
|
... .struct.unnest()
|
|
... )
|
|
shape: (3, 2)
|
|
┌─────┬─────┐
|
|
│ foo ┆ bar │
|
|
│ --- ┆ --- │
|
|
│ i64 ┆ i64 │
|
|
╞═════╪═════╡
|
|
│ 1 ┆ 3 │
|
|
│ 4 ┆ 3 │
|
|
│ 0 ┆ 4 │
|
|
└─────┴─────┘
|
|
|
|
The struct fields are given the same name as the target columns, which are then
|
|
unnested.
|
|
|
|
>>> df.with_columns(
|
|
... when = pl.col.foo > 2,
|
|
... then = pl.struct(foo="bar", bar="foo"),
|
|
... otherwise = pl.struct("foo", "bar")
|
|
... )
|
|
shape: (3, 5)
|
|
┌─────┬─────┬───────┬───────────┬───────────┐
|
|
│ foo ┆ bar ┆ when ┆ then ┆ otherwise │
|
|
│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
|
|
│ i64 ┆ i64 ┆ bool ┆ struct[2] ┆ struct[2] │
|
|
╞═════╪═════╪═══════╪═══════════╪═══════════╡
|
|
│ 1 ┆ 3 ┆ false ┆ {3,1} ┆ {1,3} │
|
|
│ 3 ┆ 4 ┆ true ┆ {4,3} ┆ {3,4} │
|
|
│ 4 ┆ 0 ┆ true ┆ {0,4} ┆ {4,0} │
|
|
└─────┴─────┴───────┴───────────┴───────────┘
|
|
|
|
The output name of a `when-then` expression comes from the first `then` branch.
|
|
|
|
Here we try to set all columns to 0 if any column contains a value less than 2.
|
|
|
|
>>> df.with_columns( # doctest: +SKIP
|
|
... pl.when(pl.any_horizontal(pl.all() < 2))
|
|
... .then(0)
|
|
... .otherwise(pl.all())
|
|
... )
|
|
# ComputeError: the name 'literal' passed to `LazyFrame.with_columns` is duplicate
|
|
|
|
:meth:`.name.keep` could be used to give preference to the column expression.
|
|
|
|
>>> df.with_columns(
|
|
... pl.when(pl.any_horizontal(pl.all() < 2))
|
|
... .then(0)
|
|
... .otherwise(pl.all())
|
|
... .name.keep()
|
|
... )
|
|
shape: (3, 2)
|
|
┌─────┬─────┐
|
|
│ foo ┆ bar │
|
|
│ --- ┆ --- │
|
|
│ i64 ┆ i64 │
|
|
╞═════╪═════╡
|
|
│ 0 ┆ 0 │
|
|
│ 3 ┆ 4 │
|
|
│ 0 ┆ 0 │
|
|
└─────┴─────┘
|
|
|
|
The logic could also be changed to move the column expression inside `then`.
|
|
|
|
>>> df.with_columns(
|
|
... pl.when(pl.any_horizontal(pl.all() < 2).not_())
|
|
... .then(pl.all())
|
|
... .otherwise(0)
|
|
... )
|
|
shape: (3, 2)
|
|
┌─────┬─────┐
|
|
│ foo ┆ bar │
|
|
│ --- ┆ --- │
|
|
│ i64 ┆ i64 │
|
|
╞═════╪═════╡
|
|
│ 0 ┆ 0 │
|
|
│ 3 ┆ 4 │
|
|
│ 0 ┆ 0 │
|
|
└─────┴─────┘
|
|
""" # fmt: skip
|
|
condition = parse_predicates_constraints_into_expression(*predicates, **constraints)
|
|
return pl.When(plr.when(condition))
|