1149 lines
28 KiB
Python
1149 lines
28 KiB
Python
from __future__ import annotations
|
|
|
|
from collections.abc import Sequence
|
|
from typing import TYPE_CHECKING, Any, Callable
|
|
|
|
from polars import functions as F
|
|
from polars._utils.unstable import unstable
|
|
from polars._utils.wrap import wrap_s
|
|
from polars.series.utils import expr_dispatch
|
|
|
|
if TYPE_CHECKING:
|
|
from collections.abc import Collection
|
|
|
|
from polars import Expr, Series
|
|
from polars._plr import PySeries
|
|
from polars._typing import (
|
|
IntoExpr,
|
|
IntoExprColumn,
|
|
ListToStructWidthStrategy,
|
|
NullBehavior,
|
|
)
|
|
|
|
|
|
@expr_dispatch
|
|
class ListNameSpace:
|
|
"""Namespace for list related methods."""
|
|
|
|
_accessor = "list"
|
|
|
|
def __init__(self, series: Series) -> None:
|
|
self._s: PySeries = series._s
|
|
|
|
def all(self) -> Series:
|
|
"""
|
|
Evaluate whether all boolean values in a list are true.
|
|
|
|
Returns
|
|
-------
|
|
Series
|
|
Series of data type :class:`Boolean`.
|
|
|
|
Notes
|
|
-----
|
|
If there are no non-null elements in a row, the output is `True`.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series(
|
|
... [[True, True], [False, True], [False, False], [None], [], None],
|
|
... dtype=pl.List(pl.Boolean),
|
|
... )
|
|
>>> s.list.all()
|
|
shape: (6,)
|
|
Series: '' [bool]
|
|
[
|
|
true
|
|
false
|
|
false
|
|
true
|
|
true
|
|
null
|
|
]
|
|
"""
|
|
|
|
def any(self) -> Series:
|
|
"""
|
|
Evaluate whether any boolean value in a list is true.
|
|
|
|
Returns
|
|
-------
|
|
Series
|
|
Series of data type :class:`Boolean`.
|
|
|
|
Notes
|
|
-----
|
|
If there are no non-null elements in a row, the output is `False`.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series(
|
|
... [[True, True], [False, True], [False, False], [None], [], None],
|
|
... dtype=pl.List(pl.Boolean),
|
|
... )
|
|
>>> s.list.any()
|
|
shape: (6,)
|
|
Series: '' [bool]
|
|
[
|
|
true
|
|
true
|
|
false
|
|
false
|
|
false
|
|
null
|
|
]
|
|
"""
|
|
|
|
def len(self) -> Series:
|
|
"""
|
|
Return the number of elements in each list.
|
|
|
|
Null values count towards the total.
|
|
|
|
Returns
|
|
-------
|
|
Series
|
|
Series of data type :class:`UInt32`.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series([[1, 2, None], [5]])
|
|
>>> s.list.len()
|
|
shape: (2,)
|
|
Series: '' [u32]
|
|
[
|
|
3
|
|
1
|
|
]
|
|
"""
|
|
|
|
def drop_nulls(self) -> Series:
|
|
"""
|
|
Drop all null values in the list.
|
|
|
|
The original order of the remaining elements is preserved.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series("values", [[None, 1, None, 2], [None], [3, 4]])
|
|
>>> s.list.drop_nulls()
|
|
shape: (3,)
|
|
Series: 'values' [list[i64]]
|
|
[
|
|
[1, 2]
|
|
[]
|
|
[3, 4]
|
|
]
|
|
"""
|
|
|
|
def sample(
|
|
self,
|
|
n: int | IntoExprColumn | None = None,
|
|
*,
|
|
fraction: float | IntoExprColumn | None = None,
|
|
with_replacement: bool = False,
|
|
shuffle: bool = False,
|
|
seed: int | None = None,
|
|
) -> Series:
|
|
"""
|
|
Sample from this list.
|
|
|
|
Parameters
|
|
----------
|
|
n
|
|
Number of items to return. Cannot be used with `fraction`. Defaults to 1 if
|
|
`fraction` is None.
|
|
fraction
|
|
Fraction of items to return. Cannot be used with `n`.
|
|
with_replacement
|
|
Allow values to be sampled more than once.
|
|
shuffle
|
|
Shuffle the order of sampled data points.
|
|
seed
|
|
Seed for the random number generator. If set to None (default), a
|
|
random seed is generated for each sample operation.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series("values", [[1, 2, 3], [4, 5]])
|
|
>>> s.list.sample(n=pl.Series("n", [2, 1]), seed=1)
|
|
shape: (2,)
|
|
Series: 'values' [list[i64]]
|
|
[
|
|
[2, 3]
|
|
[5]
|
|
]
|
|
"""
|
|
|
|
def sum(self) -> Series:
|
|
"""
|
|
Sum all the arrays in the list.
|
|
|
|
Notes
|
|
-----
|
|
If there are no non-null elements in a row, the output is `0`.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series("values", [[1], [2, 3]])
|
|
>>> s.list.sum()
|
|
shape: (2,)
|
|
Series: 'values' [i64]
|
|
[
|
|
1
|
|
5
|
|
]
|
|
"""
|
|
|
|
def max(self) -> Series:
|
|
"""
|
|
Compute the max value of the arrays in the list.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series("values", [[4, 1], [2, 3]])
|
|
>>> s.list.max()
|
|
shape: (2,)
|
|
Series: 'values' [i64]
|
|
[
|
|
4
|
|
3
|
|
]
|
|
"""
|
|
|
|
def min(self) -> Series:
|
|
"""
|
|
Compute the min value of the arrays in the list.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series("values", [[4, 1], [2, 3]])
|
|
>>> s.list.min()
|
|
shape: (2,)
|
|
Series: 'values' [i64]
|
|
[
|
|
1
|
|
2
|
|
]
|
|
"""
|
|
|
|
def mean(self) -> Series:
|
|
"""
|
|
Compute the mean value of the arrays in the list.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series("values", [[3, 1], [3, 3]])
|
|
>>> s.list.mean()
|
|
shape: (2,)
|
|
Series: 'values' [f64]
|
|
[
|
|
2.0
|
|
3.0
|
|
]
|
|
"""
|
|
|
|
def median(self) -> Series:
|
|
"""
|
|
Compute the median value of the arrays in the list.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series("values", [[-1, 0, 1], [1, 10]])
|
|
>>> s.list.median()
|
|
shape: (2,)
|
|
Series: 'values' [f64]
|
|
[
|
|
0.0
|
|
5.5
|
|
]
|
|
"""
|
|
|
|
def std(self, ddof: int = 1) -> Series:
|
|
"""
|
|
Compute the std value of the arrays in the list.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series("values", [[-1, 0, 1], [1, 10]])
|
|
>>> s.list.std()
|
|
shape: (2,)
|
|
Series: 'values' [f64]
|
|
[
|
|
1.0
|
|
6.363961
|
|
]
|
|
"""
|
|
|
|
def var(self, ddof: int = 1) -> Series:
|
|
"""
|
|
Compute the var value of the arrays in the list.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series("values", [[-1, 0, 1], [1, 10]])
|
|
>>> s.list.var()
|
|
shape: (2,)
|
|
Series: 'values' [f64]
|
|
[
|
|
1.0
|
|
40.5
|
|
]
|
|
"""
|
|
|
|
def sort(
|
|
self,
|
|
*,
|
|
descending: bool = False,
|
|
nulls_last: bool = False,
|
|
multithreaded: bool = True,
|
|
) -> Series:
|
|
"""
|
|
Sort the arrays in this column.
|
|
|
|
Parameters
|
|
----------
|
|
descending
|
|
Sort in descending order.
|
|
nulls_last
|
|
Place null values last.
|
|
multithreaded
|
|
Sort using multiple threads.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series("a", [[3, 2, 1], [9, 1, 2]])
|
|
>>> s.list.sort()
|
|
shape: (2,)
|
|
Series: 'a' [list[i64]]
|
|
[
|
|
[1, 2, 3]
|
|
[1, 2, 9]
|
|
]
|
|
>>> s.list.sort(descending=True)
|
|
shape: (2,)
|
|
Series: 'a' [list[i64]]
|
|
[
|
|
[3, 2, 1]
|
|
[9, 2, 1]
|
|
]
|
|
"""
|
|
|
|
def reverse(self) -> Series:
|
|
"""
|
|
Reverse the arrays in the list.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series("a", [[3, 2, 1], [9, 1, 2]])
|
|
>>> s.list.reverse()
|
|
shape: (2,)
|
|
Series: 'a' [list[i64]]
|
|
[
|
|
[1, 2, 3]
|
|
[2, 1, 9]
|
|
]
|
|
"""
|
|
|
|
def unique(self, *, maintain_order: bool = False) -> Series:
|
|
"""
|
|
Get the unique/distinct values in the list.
|
|
|
|
Parameters
|
|
----------
|
|
maintain_order
|
|
Maintain order of data. This requires more work.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series("a", [[1, 1, 2], [2, 3, 3]])
|
|
>>> s.list.unique()
|
|
shape: (2,)
|
|
Series: 'a' [list[i64]]
|
|
[
|
|
[1, 2]
|
|
[2, 3]
|
|
]
|
|
"""
|
|
|
|
def n_unique(self) -> Series:
|
|
"""
|
|
Count the number of unique values in every sub-lists.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series("a", [[1, 1, 2], [2, 3, 4]])
|
|
>>> s.list.n_unique()
|
|
shape: (2,)
|
|
Series: 'a' [u32]
|
|
[
|
|
2
|
|
3
|
|
]
|
|
"""
|
|
|
|
def concat(self, other: list[Series] | Series | list[Any]) -> Series:
|
|
"""
|
|
Concat the arrays in a Series dtype List in linear time.
|
|
|
|
Parameters
|
|
----------
|
|
other
|
|
Columns to concat into a List Series
|
|
|
|
Examples
|
|
--------
|
|
>>> s1 = pl.Series("a", [["a", "b"], ["c"]])
|
|
>>> s2 = pl.Series("b", [["c"], ["d", None]])
|
|
>>> s1.list.concat(s2)
|
|
shape: (2,)
|
|
Series: 'a' [list[str]]
|
|
[
|
|
["a", "b", "c"]
|
|
["c", "d", null]
|
|
]
|
|
"""
|
|
|
|
def get(
|
|
self,
|
|
index: int | Series | list[int],
|
|
*,
|
|
null_on_oob: bool = False,
|
|
) -> Series:
|
|
"""
|
|
Get the value by index in the sublists.
|
|
|
|
So index `0` would return the first item of every sublist
|
|
and index `-1` would return the last item of every sublist
|
|
if an index is out of bounds, it will return a `None`.
|
|
|
|
Parameters
|
|
----------
|
|
index
|
|
Index to return per sublist
|
|
null_on_oob
|
|
Behavior if an index is out of bounds:
|
|
|
|
* True -> set as null
|
|
* False -> raise an error
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series("a", [[3, 2, 1], [], [1, 2]])
|
|
>>> s.list.get(0, null_on_oob=True)
|
|
shape: (3,)
|
|
Series: 'a' [i64]
|
|
[
|
|
3
|
|
null
|
|
1
|
|
]
|
|
"""
|
|
|
|
def gather(
|
|
self,
|
|
indices: Series | list[int] | list[list[int]],
|
|
*,
|
|
null_on_oob: bool = False,
|
|
) -> Series:
|
|
"""
|
|
Take sublists by multiple indices.
|
|
|
|
The indices may be defined in a single column, or by sublists in another
|
|
column of dtype `List`.
|
|
|
|
Parameters
|
|
----------
|
|
indices
|
|
Indices to return per sublist
|
|
null_on_oob
|
|
Behavior if an index is out of bounds:
|
|
True -> set as null
|
|
False -> raise an error
|
|
Note that defaulting to raising an error is much cheaper
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series("a", [[3, 2, 1], [], [1, 2]])
|
|
>>> s.list.gather([0, 2], null_on_oob=True)
|
|
shape: (3,)
|
|
Series: 'a' [list[i64]]
|
|
[
|
|
[3, 1]
|
|
[null, null]
|
|
[1, null]
|
|
]
|
|
"""
|
|
|
|
def gather_every(
|
|
self, n: int | IntoExprColumn, offset: int | IntoExprColumn = 0
|
|
) -> Series:
|
|
"""
|
|
Take every n-th value start from offset in sublists.
|
|
|
|
Parameters
|
|
----------
|
|
n
|
|
Gather every n-th element.
|
|
offset
|
|
Starting index.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series("a", [[1, 2, 3], [], [6, 7, 8, 9]])
|
|
>>> s.list.gather_every(2, offset=1)
|
|
shape: (3,)
|
|
Series: 'a' [list[i64]]
|
|
[
|
|
[2]
|
|
[]
|
|
[7, 9]
|
|
]
|
|
"""
|
|
|
|
def __getitem__(self, item: int) -> Series:
|
|
return self.get(item)
|
|
|
|
def join(self, separator: IntoExprColumn, *, ignore_nulls: bool = True) -> Series:
|
|
"""
|
|
Join all string items in a sublist and place a separator between them.
|
|
|
|
This errors if inner type of list `!= String`.
|
|
|
|
Parameters
|
|
----------
|
|
separator
|
|
string to separate the items with
|
|
ignore_nulls
|
|
Ignore null values (default).
|
|
|
|
If set to ``False``, null values will be propagated.
|
|
If the sub-list contains any null values, the output is ``None``.
|
|
|
|
Returns
|
|
-------
|
|
Series
|
|
Series of data type :class:`String`.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series([["foo", "bar"], ["hello", "world"]])
|
|
>>> s.list.join(separator="-")
|
|
shape: (2,)
|
|
Series: '' [str]
|
|
[
|
|
"foo-bar"
|
|
"hello-world"
|
|
]
|
|
"""
|
|
|
|
def first(self) -> Series:
|
|
"""
|
|
Get the first value of the sublists.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series("a", [[3, 2, 1], [], [1, 2]])
|
|
>>> s.list.first()
|
|
shape: (3,)
|
|
Series: 'a' [i64]
|
|
[
|
|
3
|
|
null
|
|
1
|
|
]
|
|
"""
|
|
|
|
def last(self) -> Series:
|
|
"""
|
|
Get the last value of the sublists.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series("a", [[3, 2, 1], [], [1, 2]])
|
|
>>> s.list.last()
|
|
shape: (3,)
|
|
Series: 'a' [i64]
|
|
[
|
|
1
|
|
null
|
|
2
|
|
]
|
|
"""
|
|
|
|
@unstable()
|
|
def item(self) -> Series:
|
|
"""
|
|
Get the single value of the sublists.
|
|
|
|
This errors if the sublist length is not exactly one.
|
|
|
|
See Also
|
|
--------
|
|
:meth:`Series.list.get` : Get the value by index in the sublists.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series("a", [[1], [4], [6]])
|
|
>>> s.list.item()
|
|
shape: (3,)
|
|
Series: 'a' [i64]
|
|
[
|
|
1
|
|
4
|
|
6
|
|
]
|
|
>>> df = pl.Series("a", [[3, 2, 1], [1], [2]])
|
|
>>> df.list.item()
|
|
Traceback (most recent call last):
|
|
...
|
|
polars.exceptions.ComputeError: aggregation 'item' expected a single value, got 3 values
|
|
""" # noqa: W505
|
|
|
|
def contains(self, item: IntoExpr, *, nulls_equal: bool = True) -> Series:
|
|
"""
|
|
Check if sublists contain the given item.
|
|
|
|
Parameters
|
|
----------
|
|
item
|
|
Item that will be checked for membership
|
|
nulls_equal : bool, default True
|
|
If True, treat null as a distinct value. Null values will not propagate.
|
|
|
|
Returns
|
|
-------
|
|
Series
|
|
Series of data type :class:`Boolean`.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series("a", [[3, 2, 1], [], [1, 2]])
|
|
>>> s.list.contains(1)
|
|
shape: (3,)
|
|
Series: 'a' [bool]
|
|
[
|
|
true
|
|
false
|
|
true
|
|
]
|
|
"""
|
|
|
|
def arg_min(self) -> Series:
|
|
"""
|
|
Retrieve the index of the minimal value in every sublist.
|
|
|
|
Returns
|
|
-------
|
|
Series
|
|
Series of data type :class:`UInt32` or :class:`UInt64`
|
|
(depending on compilation).
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series("a", [[1, 2], [2, 1]])
|
|
>>> s.list.arg_min()
|
|
shape: (2,)
|
|
Series: 'a' [u32]
|
|
[
|
|
0
|
|
1
|
|
]
|
|
"""
|
|
|
|
def arg_max(self) -> Series:
|
|
"""
|
|
Retrieve the index of the maximum value in every sublist.
|
|
|
|
Returns
|
|
-------
|
|
Series
|
|
Series of data type :class:`UInt32` or :class:`UInt64`
|
|
(depending on compilation).
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series("a", [[1, 2], [2, 1]])
|
|
>>> s.list.arg_max()
|
|
shape: (2,)
|
|
Series: 'a' [u32]
|
|
[
|
|
1
|
|
0
|
|
]
|
|
"""
|
|
|
|
def diff(self, n: int = 1, null_behavior: NullBehavior = "ignore") -> Series:
|
|
"""
|
|
Calculate the first discrete difference between shifted items of every sublist.
|
|
|
|
Parameters
|
|
----------
|
|
n
|
|
Number of slots to shift.
|
|
null_behavior : {'ignore', 'drop'}
|
|
How to handle null values.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series("a", [[1, 2, 3, 4], [10, 2, 1]])
|
|
>>> s.list.diff()
|
|
shape: (2,)
|
|
Series: 'a' [list[i64]]
|
|
[
|
|
[null, 1, … 1]
|
|
[null, -8, -1]
|
|
]
|
|
|
|
>>> s.list.diff(n=2)
|
|
shape: (2,)
|
|
Series: 'a' [list[i64]]
|
|
[
|
|
[null, null, … 2]
|
|
[null, null, -9]
|
|
]
|
|
|
|
>>> s.list.diff(n=2, null_behavior="drop")
|
|
shape: (2,)
|
|
Series: 'a' [list[i64]]
|
|
[
|
|
[2, 2]
|
|
[-9]
|
|
]
|
|
"""
|
|
|
|
def shift(self, n: int | IntoExprColumn = 1) -> Series:
|
|
"""
|
|
Shift list values by the given number of indices.
|
|
|
|
Parameters
|
|
----------
|
|
n
|
|
Number of indices to shift forward. If a negative value is passed, values
|
|
are shifted in the opposite direction instead.
|
|
|
|
Notes
|
|
-----
|
|
This method is similar to the `LAG` operation in SQL when the value for `n`
|
|
is positive. With a negative value for `n`, it is similar to `LEAD`.
|
|
|
|
Examples
|
|
--------
|
|
By default, list values are shifted forward by one index.
|
|
|
|
>>> s = pl.Series([[1, 2, 3], [4, 5]])
|
|
>>> s.list.shift()
|
|
shape: (2,)
|
|
Series: '' [list[i64]]
|
|
[
|
|
[null, 1, 2]
|
|
[null, 4]
|
|
]
|
|
|
|
Pass a negative value to shift in the opposite direction instead.
|
|
|
|
>>> s.list.shift(-2)
|
|
shape: (2,)
|
|
Series: '' [list[i64]]
|
|
[
|
|
[3, null, null]
|
|
[null, null]
|
|
]
|
|
"""
|
|
|
|
def slice(self, offset: int | Expr, length: int | Expr | None = None) -> Series:
|
|
"""
|
|
Slice every sublist.
|
|
|
|
Parameters
|
|
----------
|
|
offset
|
|
Start index. Negative indexing is supported.
|
|
length
|
|
Length of the slice. If set to `None` (default), the slice is taken to the
|
|
end of the list.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series("a", [[1, 2, 3, 4], [10, 2, 1]])
|
|
>>> s.list.slice(1, 2)
|
|
shape: (2,)
|
|
Series: 'a' [list[i64]]
|
|
[
|
|
[2, 3]
|
|
[2, 1]
|
|
]
|
|
"""
|
|
|
|
def head(self, n: int | Expr = 5) -> Series:
|
|
"""
|
|
Slice the first `n` values of every sublist.
|
|
|
|
Parameters
|
|
----------
|
|
n
|
|
Number of values to return for each sublist.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series("a", [[1, 2, 3, 4], [10, 2, 1]])
|
|
>>> s.list.head(2)
|
|
shape: (2,)
|
|
Series: 'a' [list[i64]]
|
|
[
|
|
[1, 2]
|
|
[10, 2]
|
|
]
|
|
"""
|
|
|
|
def tail(self, n: int | Expr = 5) -> Series:
|
|
"""
|
|
Slice the last `n` values of every sublist.
|
|
|
|
Parameters
|
|
----------
|
|
n
|
|
Number of values to return for each sublist.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series("a", [[1, 2, 3, 4], [10, 2, 1]])
|
|
>>> s.list.tail(2)
|
|
shape: (2,)
|
|
Series: 'a' [list[i64]]
|
|
[
|
|
[3, 4]
|
|
[2, 1]
|
|
]
|
|
"""
|
|
|
|
def explode(self) -> Series:
|
|
"""
|
|
Returns a column with a separate row for every list element.
|
|
|
|
Returns
|
|
-------
|
|
Series
|
|
Series with the data type of the list elements.
|
|
|
|
See Also
|
|
--------
|
|
Series.reshape : Reshape this Series to a flat Series or a Series of Lists.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series("a", [[1, 2, 3], [4, 5, 6]])
|
|
>>> s.list.explode()
|
|
shape: (6,)
|
|
Series: 'a' [i64]
|
|
[
|
|
1
|
|
2
|
|
3
|
|
4
|
|
5
|
|
6
|
|
]
|
|
"""
|
|
|
|
def count_matches(self, element: IntoExpr) -> Series:
|
|
"""
|
|
Count how often the value produced by `element` occurs.
|
|
|
|
Parameters
|
|
----------
|
|
element
|
|
An expression that produces a single value
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series("a", [[0], [1], [1, 2, 3, 2], [1, 2, 1], [4, 4]])
|
|
>>> s.list.count_matches(1)
|
|
shape: (5,)
|
|
Series: 'a' [u32]
|
|
[
|
|
0
|
|
1
|
|
1
|
|
2
|
|
0
|
|
]
|
|
"""
|
|
|
|
def to_array(self, width: int) -> Series:
|
|
"""
|
|
Convert a List column into an Array column with the same inner data type.
|
|
|
|
Parameters
|
|
----------
|
|
width
|
|
Width of the resulting Array column.
|
|
|
|
Returns
|
|
-------
|
|
Series
|
|
Series of data type :class:`Array`.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series([[1, 2], [3, 4]], dtype=pl.List(pl.Int8))
|
|
>>> s.list.to_array(2)
|
|
shape: (2,)
|
|
Series: '' [array[i8, 2]]
|
|
[
|
|
[1, 2]
|
|
[3, 4]
|
|
]
|
|
"""
|
|
|
|
def to_struct(
|
|
self,
|
|
n_field_strategy: ListToStructWidthStrategy = "first_non_null",
|
|
fields: Callable[[int], str] | Sequence[str] | None = None,
|
|
) -> Series:
|
|
"""
|
|
Convert the series of type `List` to a series of type `Struct`.
|
|
|
|
Parameters
|
|
----------
|
|
n_field_strategy : {'first_non_null', 'max_width'}
|
|
Strategy to determine the number of fields of the struct.
|
|
|
|
* "first_non_null": set number of fields equal to the length of the
|
|
first non zero-length sublist.
|
|
* "max_width": set number of fields as max length of all sublists.
|
|
fields
|
|
If the name and number of the desired fields is known in advance
|
|
a list of field names can be given, which will be assigned by index.
|
|
Otherwise, to dynamically assign field names, a custom function can be
|
|
used; if neither are set, fields will be `field_0, field_1 .. field_n`.
|
|
|
|
Examples
|
|
--------
|
|
Convert list to struct with default field name assignment:
|
|
|
|
>>> s1 = pl.Series("n", [[0, 1, 2], [0, 1]])
|
|
>>> s2 = s1.list.to_struct()
|
|
>>> s2
|
|
shape: (2,)
|
|
Series: 'n' [struct[3]]
|
|
[
|
|
{0,1,2}
|
|
{0,1,null}
|
|
]
|
|
>>> s2.struct.fields
|
|
['field_0', 'field_1', 'field_2']
|
|
|
|
Convert list to struct with field name assignment by function/index:
|
|
|
|
>>> s3 = s1.list.to_struct(fields=lambda idx: f"n{idx:02}")
|
|
>>> s3.struct.fields
|
|
['n00', 'n01', 'n02']
|
|
|
|
Convert list to struct with field name assignment by index from a list of names:
|
|
|
|
>>> s1.list.to_struct(fields=["one", "two", "three"]).struct.unnest()
|
|
shape: (2, 3)
|
|
┌─────┬─────┬───────┐
|
|
│ one ┆ two ┆ three │
|
|
│ --- ┆ --- ┆ --- │
|
|
│ i64 ┆ i64 ┆ i64 │
|
|
╞═════╪═════╪═══════╡
|
|
│ 0 ┆ 1 ┆ 2 │
|
|
│ 0 ┆ 1 ┆ null │
|
|
└─────┴─────┴───────┘
|
|
"""
|
|
if isinstance(fields, Sequence):
|
|
s = wrap_s(self._s)
|
|
return (
|
|
s.to_frame()
|
|
.select_seq(F.col(s.name).list.to_struct(fields=fields))
|
|
.to_series()
|
|
)
|
|
|
|
return wrap_s(self._s.list_to_struct(n_field_strategy, fields))
|
|
|
|
def eval(self, expr: Expr, *, parallel: bool = False) -> Series:
|
|
"""
|
|
Run any polars expression against the lists' elements.
|
|
|
|
Parameters
|
|
----------
|
|
expr
|
|
Expression to run. Note that you can select an element with `pl.first()`, or
|
|
`pl.col()`
|
|
parallel
|
|
Run all expression parallel. Don't activate this blindly.
|
|
Parallelism is worth it if there is enough work to do per thread.
|
|
|
|
This likely should not be use in the group by context, because we already
|
|
parallel execution per group
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series("a", [[1, 4], [8, 5], [3, 2]])
|
|
>>> s.list.eval(pl.element().rank())
|
|
shape: (3,)
|
|
Series: 'a' [list[f64]]
|
|
[
|
|
[1.0, 2.0]
|
|
[2.0, 1.0]
|
|
[2.0, 1.0]
|
|
]
|
|
"""
|
|
|
|
def agg(self, expr: Expr) -> Series:
|
|
"""
|
|
|
|
Run any polars aggregation expression against the list' elements.
|
|
|
|
Parameters
|
|
----------
|
|
expr
|
|
Expression to run. Note that you can select an element with `pl.element()`.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pl.Series("a", [[1, None], [42, 13], [None, None]])
|
|
>>> s.list.agg(pl.element().null_count())
|
|
shape: (3,)
|
|
Series: 'a' [u32]
|
|
[
|
|
1
|
|
0
|
|
2
|
|
]
|
|
>>> s.list.agg(pl.element().drop_nulls())
|
|
shape: (3,)
|
|
Series: 'a' [list[i64]]
|
|
[
|
|
[1]
|
|
[42, 13]
|
|
[]
|
|
]
|
|
"""
|
|
|
|
def filter(self, predicate: Expr) -> Series:
|
|
"""
|
|
Filter elements in each list by a boolean expression, returning a new Series of lists.
|
|
|
|
Parameters
|
|
----------
|
|
predicate
|
|
A boolean expression evaluated on each list element.
|
|
Use `pl.element()` to refer to the current element.
|
|
|
|
Examples
|
|
--------
|
|
>>> import polars as pl
|
|
>>> s = pl.Series("a", [[1, 4], [8, 5], [3, 2]])
|
|
>>> s.list.filter(pl.element() % 2 == 0)
|
|
shape: (3,)
|
|
Series: 'a' [list[i64]]
|
|
[
|
|
[4]
|
|
[8]
|
|
[2]
|
|
]
|
|
""" # noqa: W505
|
|
|
|
def set_union(self, other: Series | Collection[Any]) -> Series:
|
|
"""
|
|
Compute the SET UNION between the elements in this list and the elements of `other`.
|
|
|
|
Parameters
|
|
----------
|
|
other
|
|
Right hand side of the set operation.
|
|
|
|
Examples
|
|
--------
|
|
>>> a = pl.Series([[1, 2, 3], [], [None, 3], [5, 6, 7]])
|
|
>>> b = pl.Series([[2, 3, 4], [3], [3, 4, None], [6, 8]])
|
|
>>> a.list.set_union(b) # doctest: +IGNORE_RESULT
|
|
shape: (4,)
|
|
Series: '' [list[i64]]
|
|
[
|
|
[1, 2, 3, 4]
|
|
[3]
|
|
[null, 3, 4]
|
|
[5, 6, 7, 8]
|
|
]
|
|
""" # noqa: W505
|
|
|
|
def set_difference(self, other: Series | Collection[Any]) -> Series:
|
|
"""
|
|
Compute the SET DIFFERENCE between the elements in this list and the elements of `other`.
|
|
|
|
Parameters
|
|
----------
|
|
other
|
|
Right hand side of the set operation.
|
|
|
|
See Also
|
|
--------
|
|
polars.Series.list.diff: Calculates the n-th discrete difference of every sublist.
|
|
|
|
Examples
|
|
--------
|
|
>>> a = pl.Series([[1, 2, 3], [], [None, 3], [5, 6, 7]])
|
|
>>> b = pl.Series([[2, 3, 4], [3], [3, 4, None], [6, 8]])
|
|
>>> a.list.set_difference(b)
|
|
shape: (4,)
|
|
Series: '' [list[i64]]
|
|
[
|
|
[1]
|
|
[]
|
|
[]
|
|
[5, 7]
|
|
]
|
|
""" # noqa: W505
|
|
|
|
def set_intersection(self, other: Series | Collection[Any]) -> Series:
|
|
"""
|
|
Compute the SET INTERSECTION between the elements in this list and the elements of `other`.
|
|
|
|
Parameters
|
|
----------
|
|
other
|
|
Right hand side of the set operation.
|
|
|
|
Examples
|
|
--------
|
|
>>> a = pl.Series([[1, 2, 3], [], [None, 3], [5, 6, 7]])
|
|
>>> b = pl.Series([[2, 3, 4], [3], [3, 4, None], [6, 8]])
|
|
>>> a.list.set_intersection(b)
|
|
shape: (4,)
|
|
Series: '' [list[i64]]
|
|
[
|
|
[2, 3]
|
|
[]
|
|
[null, 3]
|
|
[6]
|
|
]
|
|
""" # noqa: W505
|
|
|
|
def set_symmetric_difference(self, other: Series | Collection[Any]) -> Series:
|
|
"""
|
|
Compute the SET SYMMETRIC DIFFERENCE between the elements in this list and the elements of `other`.
|
|
|
|
Parameters
|
|
----------
|
|
other
|
|
Right hand side of the set operation.
|
|
|
|
Examples
|
|
--------
|
|
>>> a = pl.Series([[1, 2, 3], [], [None, 3], [5, 6, 7]])
|
|
>>> b = pl.Series([[2, 3, 4], [3], [3, 4, None], [6, 8]])
|
|
>>> a.list.set_symmetric_difference(b)
|
|
shape: (4,)
|
|
Series: '' [list[i64]]
|
|
[
|
|
[1, 4]
|
|
[3]
|
|
[4]
|
|
[5, 7, 8]
|
|
]
|
|
""" # noqa: W505
|