DriverTrac/venv/lib/python3.12/site-packages/polars/series/array.py

from __future__ import annotations

from typing import TYPE_CHECKING, Callable

from polars import functions as F
from polars._utils.wrap import wrap_s
from polars.series.utils import expr_dispatch

if TYPE_CHECKING:
    from collections.abc import Sequence

    from polars import Series
    from polars._plr import PySeries
    from polars._typing import IntoExpr, IntoExprColumn
    from polars.expr.expr import Expr


@expr_dispatch
class ArrayNameSpace:
    """Namespace for array related methods."""

    _accessor = "arr"

    def __init__(self, series: Series) -> None:
        self._s: PySeries = series._s

    def min(self) -> Series:
        """
        Compute the min values of the sub-arrays.

        Examples
        --------
        >>> s = pl.Series("a", [[1, 2], [4, 3]], dtype=pl.Array(pl.Int64, 2))
        >>> s.arr.min()
        shape: (2,)
        Series: 'a' [i64]
        [
            1
            3
        ]
        """

    def max(self) -> Series:
        """
        Compute the max values of the sub-arrays.

        Examples
        --------
        >>> s = pl.Series("a", [[1, 2], [4, 3]], dtype=pl.Array(pl.Int64, 2))
        >>> s.arr.max()
        shape: (2,)
        Series: 'a' [i64]
        [
            2
            4
        ]
        """

    def sum(self) -> Series:
        """
        Compute the sum values of the sub-arrays.

        Notes
        -----
        If there are no non-null elements in a row, the output is `0`.

        Examples
        --------
        >>> s = pl.Series([[1, 2], [4, 3]], dtype=pl.Array(pl.Int64, 2))
        >>> s.arr.sum()
        shape: (2,)
        Series: '' [i64]
        [
            3
            7
        ]
        """

    def std(self, ddof: int = 1) -> Series:
        """
        Compute the std of the values of the sub-arrays.

        Examples
        --------
        >>> s = pl.Series("a", [[1, 2], [4, 3]], dtype=pl.Array(pl.Int64, 2))
        >>> s.arr.std()
        shape: (2,)
        Series: 'a' [f64]
        [
            0.707107
            0.707107
        ]
        """

    def var(self, ddof: int = 1) -> Series:
        """
        Compute the var of the values of the sub-arrays.

        Examples
        --------
        >>> s = pl.Series("a", [[1, 2], [4, 3]], dtype=pl.Array(pl.Int64, 2))
        >>> s.arr.var()
        shape: (2,)
        Series: 'a' [f64]
        [
                0.5
                0.5
        ]
        """

    def median(self) -> Series:
        """
        Compute the median of the values of the sub-arrays.

        Examples
        --------
        >>> s = pl.Series("a", [[1, 2], [4, 3]], dtype=pl.Array(pl.Int64, 2))
        >>> s.arr.median()
        shape: (2,)
        Series: 'a' [f64]
        [
            1.5
            3.5
        ]
        """

    def unique(self, *, maintain_order: bool = False) -> Series:
        """
        Get the unique/distinct values in the array.

        Parameters
        ----------
        maintain_order
            Maintain order of data. This requires more work.

        Returns
        -------
        Series
            Series of data type :class:`List`.

        Examples
        --------
        >>> s = pl.Series([[1, 1, 2], [3, 4, 5]], dtype=pl.Array(pl.Int64, 3))
        >>> s.arr.unique()
        shape: (2,)
        Series: '' [list[i64]]
        [
            [1, 2]
            [3, 4, 5]
        ]
        """

    def n_unique(self) -> Series:
        """
        Count the number of unique values in every sub-arrays.

        Examples
        --------
        >>> s = pl.Series("a", [[1, 2], [4, 4]], dtype=pl.Array(pl.Int64, 2))
        >>> s.arr.n_unique()
        shape: (2,)
        Series: 'a' [u32]
        [
            2
            1
        ]
        """

    def to_list(self) -> Series:
        """
        Convert an Array column into a List column with the same inner data type.

        Returns
        -------
        Series
            Series of data type :class:`List`.

        Examples
        --------
        >>> s = pl.Series([[1, 2], [3, 4]], dtype=pl.Array(pl.Int8, 2))
        >>> s.arr.to_list()
        shape: (2,)
        Series: '' [list[i8]]
        [
                [1, 2]
                [3, 4]
        ]
        """

    def any(self) -> Series:
        """
        Evaluate whether any boolean value is true for every subarray.

        Returns
        -------
        Series
            Series of data type :class:`Boolean`.

        Notes
        -----
        If there are no non-null elements in a row, the output is `False`.

        Examples
        --------
        >>> s = pl.Series(
        ...     [[True, True], [False, True], [False, False], [None, None], None],
        ...     dtype=pl.Array(pl.Boolean, 2),
        ... )
        >>> s.arr.any()
        shape: (5,)
        Series: '' [bool]
        [
            true
            true
            false
            false
            null
        ]
        """

    def len(self) -> Series:
        """
        Return the number of elements in each array.

        Returns
        -------
        Series
            Series of data type :class:`UInt32`.

        Examples
        --------
        >>> s = pl.Series("a", [[1, 2], [4, 3]], dtype=pl.Array(pl.Int64, 2))
        >>> s.arr.len()
        shape: (2,)
        Series: 'a' [u32]
        [
            2
            2
        ]
        """

    def slice(
        self,
        offset: int | Expr,
        length: int | Expr | None = None,
        *,
        as_array: bool = False,
    ) -> Series:
        """
        Slice the sub-arrays.

        Parameters
        ----------
        offset
            The starting index of the slice.
        length
            The length of the slice.
        as_array
            Return the result as a Series of data type :class:`.Array`.

        Returns
        -------
        Series
            Series of data type :class:`.List` or :class:`.Array` if `as_array=True`.

        Examples
        --------
        >>> s = pl.Series(
        ...     [[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
        ...     dtype=pl.Array(pl.Int64, 6),
        ... )
        >>> s.arr.slice(1)
        shape: (2,)
        Series: '' [list[i64]]
        [
            [2, 3, … 6]
            [8, 9, … 12]
        ]
        >>> s.arr.slice(1, 3, as_array=True)
        shape: (2,)
        Series: '' [array[i64, 3]]
        [
            [2, 3, 4]
            [8, 9, 10]
        ]
        >>> s.arr.slice(-2)
        shape: (2,)
        Series: '' [list[i64]]
        [
            [5, 6]
            [11, 12]
        ]
        """

    def head(self, n: int | Expr = 5, *, as_array: bool = False) -> Series:
        """
        Get the first `n` elements of the sub-arrays.

        Parameters
        ----------
        n
            Number of values to return for each sublist.
        as_array
            Return result as a fixed-length `Array`, otherwise as a `List`.
            If true `n` must be a constant value.

        Examples
        --------
        >>> s = pl.Series(
        ...     [[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
        ...     dtype=pl.Array(pl.Int64, 6),
        ... )
        >>> s.arr.head()
        shape: (2,)
        Series: '' [list[i64]]
        [
            [1, 2, … 5]
            [7, 8, … 11]
        ]
        >>> s.arr.head(3, as_array=True)
        shape: (2,)
        Series: '' [array[i64, 3]]
        [
            [1, 2, 3]
            [7, 8, 9]
        ]
        """

    def tail(self, n: int | Expr = 5, *, as_array: bool = False) -> Series:
        """
        Slice the last `n` values of every sublist.

        Parameters
        ----------
        n
            Number of values to return for each sublist.
        as_array
            Return result as a fixed-length `Array`, otherwise as a `List`.
            If true `n` must be a constant value.

        Examples
        --------
        >>> s = pl.Series(
        ...     [[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]],
        ...     dtype=pl.Array(pl.Int64, 6),
        ... )
        >>> s.arr.tail()
        shape: (2,)
        Series: '' [list[i64]]
        [
            [2, 3, … 6]
            [8, 9, … 12]
        ]
        >>> s.arr.tail(3, as_array=True)
        shape: (2,)
        Series: '' [array[i64, 3]]
        [
            [4, 5, 6]
            [10, 11, 12]
        ]
        """

    def all(self) -> Series:
        """
        Evaluate whether all boolean values are true for every subarray.

        Returns
        -------
        Series
            Series of data type :class:`Boolean`.

        Notes
        -----
        If there are no non-null elements in a row, the output is `True`.

        Examples
        --------
        >>> s = pl.Series(
        ...     [[True, True], [False, True], [False, False], [None, None], None],
        ...     dtype=pl.Array(pl.Boolean, 2),
        ... )
        >>> s.arr.all()
        shape: (5,)
        Series: '' [bool]
        [
            true
            false
            false
            true
            null
        ]
        """

    def sort(
        self,
        *,
        descending: bool = False,
        nulls_last: bool = False,
        multithreaded: bool = True,
    ) -> Series:
        """
        Sort the arrays in this column.

        Parameters
        ----------
        descending
            Sort in descending order.
        nulls_last
            Place null values last.
        multithreaded
            Sort using multiple threads.

        Examples
        --------
        >>> s = pl.Series("a", [[3, 2, 1], [9, 1, 2]], dtype=pl.Array(pl.Int64, 3))
        >>> s.arr.sort()
        shape: (2,)
        Series: 'a' [array[i64, 3]]
        [
            [1, 2, 3]
            [1, 2, 9]
        ]
        >>> s.arr.sort(descending=True)
        shape: (2,)
        Series: 'a' [array[i64, 3]]
        [
            [3, 2, 1]
            [9, 2, 1]
        ]

        """

    def reverse(self) -> Series:
        """
        Reverse the arrays in this column.

        Examples
        --------
        >>> s = pl.Series("a", [[3, 2, 1], [9, 1, 2]], dtype=pl.Array(pl.Int64, 3))
        >>> s.arr.reverse()
        shape: (2,)
        Series: 'a' [array[i64, 3]]
        [
            [1, 2, 3]
            [2, 1, 9]
        ]

        """

    def arg_min(self) -> Series:
        """
        Retrieve the index of the minimal value in every sub-array.

        Returns
        -------
        Series
            Series of data type :class:`UInt32` or :class:`UInt64`
            (depending on compilation).

        Examples
        --------
        >>> s = pl.Series("a", [[3, 2, 1], [9, 1, 2]], dtype=pl.Array(pl.Int64, 3))
        >>> s.arr.arg_min()
        shape: (2,)
        Series: 'a' [u32]
        [
            2
            1
        ]

        """

    def arg_max(self) -> Series:
        """
        Retrieve the index of the maximum value in every sub-array.

        Returns
        -------
        Series
            Series of data type :class:`UInt32` or :class:`UInt64`
            (depending on compilation).

        Examples
        --------
        >>> s = pl.Series("a", [[0, 9, 3], [9, 1, 2]], dtype=pl.Array(pl.Int64, 3))
        >>> s.arr.arg_max()
        shape: (2,)
        Series: 'a' [u32]
        [
            1
            0
        ]

        """

    def get(self, index: int | IntoExprColumn, *, null_on_oob: bool = False) -> Series:
        """
        Get the value by index in the sub-arrays.

        So index `0` would return the first item of every sublist
        and index `-1` would return the last item of every sublist
        if an index is out of bounds, it will return a `None`.

        Parameters
        ----------
        index
            Index to return per sublist
        null_on_oob
            Behavior if an index is out of bounds:
            True -> set as null
            False -> raise an error

        Returns
        -------
        Series
            Series of innter data type.

        Examples
        --------
        >>> s = pl.Series(
        ...     "a", [[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=pl.Array(pl.Int32, 3)
        ... )
        >>> s.arr.get(pl.Series([1, -2, 0]), null_on_oob=True)
        shape: (3,)
        Series: 'a' [i32]
        [
            2
            5
            7
        ]

        """

    def first(self) -> Series:
        """
        Get the first value of the sub-arrays.

        Examples
        --------
        >>> s = pl.Series(
        ...     "a", [[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=pl.Array(pl.Int32, 3)
        ... )
        >>> s.arr.first()
        shape: (3,)
        Series: 'a' [i32]
        [
            1
            4
            7
        ]

        """

    def last(self) -> Series:
        """
        Get the last value of the sub-arrays.

        Examples
        --------
        >>> s = pl.Series(
        ...     "a", [[1, 2, 3], [4, 5, 6], [7, 9, 8]], dtype=pl.Array(pl.Int32, 3)
        ... )
        >>> s.arr.last()
        shape: (3,)
        Series: 'a' [i32]
        [
            3
            6
            8
        ]

        """

    def join(self, separator: IntoExprColumn, *, ignore_nulls: bool = True) -> Series:
        """
        Join all string items in a sub-array and place a separator between them.

        This errors if inner type of array `!= String`.

        Parameters
        ----------
        separator
            string to separate the items with
        ignore_nulls
            Ignore null values (default).

            If set to ``False``, null values will be propagated.
            If the sub-list contains any null values, the output is ``None``.

        Returns
        -------
        Series
            Series of data type :class:`String`.

        Examples
        --------
        >>> s = pl.Series([["x", "y"], ["a", "b"]], dtype=pl.Array(pl.String, 2))
        >>> s.arr.join(separator="-")
        shape: (2,)
        Series: '' [str]
        [
            "x-y"
            "a-b"
        ]

        """

    def explode(self) -> Series:
        """
        Returns a column with a separate row for every array element.

        Returns
        -------
        Series
            Series with the data type of the array elements.

        Examples
        --------
        >>> s = pl.Series("a", [[1, 2, 3], [4, 5, 6]], dtype=pl.Array(pl.Int64, 3))
        >>> s.arr.explode()
        shape: (6,)
        Series: 'a' [i64]
        [
            1
            2
            3
            4
            5
            6
        ]
        """

    def contains(self, item: IntoExpr, *, nulls_equal: bool = True) -> Series:
        """
        Check if sub-arrays contain the given item.

        Parameters
        ----------
        item
            Item that will be checked for membership
        nulls_equal : bool, default True
            If True, treat null as a distinct value. Null values will not propagate.

        Returns
        -------
        Series
            Series of data type :class:`Boolean`.

        Examples
        --------
        >>> s = pl.Series(
        ...     "a", [[3, 2, 1], [1, 2, 3], [4, 5, 6]], dtype=pl.Array(pl.Int32, 3)
        ... )
        >>> s.arr.contains(1)
        shape: (3,)
        Series: 'a' [bool]
        [
            true
            true
            false
        ]

        """

    def count_matches(self, element: IntoExpr) -> Series:
        """
        Count how often the value produced by `element` occurs.

        Parameters
        ----------
        element
            An expression that produces a single value

        Examples
        --------
        >>> s = pl.Series("a", [[1, 2, 3], [2, 2, 2]], dtype=pl.Array(pl.Int64, 3))
        >>> s.arr.count_matches(2)
        shape: (2,)
        Series: 'a' [u32]
        [
            1
            3
        ]

        """

    def to_struct(
        self,
        fields: Callable[[int], str] | Sequence[str] | None = None,
    ) -> Series:
        """
        Convert the series of type `Array` to a series of type `Struct`.

        Parameters
        ----------
        fields
            If the name and number of the desired fields is known in advance
            a list of field names can be given, which will be assigned by index.
            Otherwise, to dynamically assign field names, a custom function can be
            used; if neither are set, fields will be `field_0, field_1 .. field_n`.

        Examples
        --------
        Convert array to struct with default field name assignment:

        >>> s1 = pl.Series("n", [[0, 1, 2], [3, 4, 5]], dtype=pl.Array(pl.Int8, 3))
        >>> s2 = s1.arr.to_struct()
        >>> s2
        shape: (2,)
        Series: 'n' [struct[3]]
        [
            {0,1,2}
            {3,4,5}
        ]
        >>> s2.struct.fields
        ['field_0', 'field_1', 'field_2']

        Convert array to struct with field name assignment by function/index:

        >>> s3 = s1.arr.to_struct(fields=lambda idx: f"n{idx:02}")
        >>> s3.struct.fields
        ['n00', 'n01', 'n02']

        Convert array to struct with field name assignment by
        index from a list of names:

        >>> s1.arr.to_struct(fields=["one", "two", "three"]).struct.unnest()
        shape: (2, 3)
        ┌─────┬─────┬───────┐
        │ one ┆ two ┆ three │
        │ --- ┆ --- ┆ ---   │
        │ i8  ┆ i8  ┆ i8    │
        ╞═════╪═════╪═══════╡
        │ 0   ┆ 1   ┆ 2     │
        │ 3   ┆ 4   ┆ 5     │
        └─────┴─────┴───────┘
        """
        s = wrap_s(self._s)
        return s.to_frame().select(F.col(s.name).arr.to_struct(fields)).to_series()

    def shift(self, n: int | IntoExprColumn = 1) -> Series:
        """
        Shift array values by the given number of indices.

        Parameters
        ----------
        n
            Number of indices to shift forward. If a negative value is passed, values
            are shifted in the opposite direction instead.

        Notes
        -----
        This method is similar to the `LAG` operation in SQL when the value for `n`
        is positive. With a negative value for `n`, it is similar to `LEAD`.

        Examples
        --------
        By default, array values are shifted forward by one index.

        >>> s = pl.Series([[1, 2, 3], [4, 5, 6]], dtype=pl.Array(pl.Int64, 3))
        >>> s.arr.shift()
        shape: (2,)
        Series: '' [array[i64, 3]]
        [
            [null, 1, 2]
            [null, 4, 5]
        ]

        Pass a negative value to shift in the opposite direction instead.

        >>> s.arr.shift(-2)
        shape: (2,)
        Series: '' [array[i64, 3]]
        [
            [3, null, null]
            [6, null, null]
        ]
        """

    def eval(self, expr: Expr, *, as_list: bool = False) -> Series:
        """
        Run any polars expression against the arrays' elements.

        Parameters
        ----------
        expr
            Expression to run. Note that you can select an element with `pl.element()`
        as_list
            Collect the resulting data as a list. This allows for expressions which
            output a variable amount of data.

        Examples
        --------
        >>> s = pl.Series("a", [[1, 4], [8, 5], [3, 2]], pl.Array(pl.Int64, 2))
        >>> s.arr.eval(pl.element().rank())
        shape: (3,)
        Series: 'a' [array[f64, 2]]
        [
            [1.0, 2.0]
            [2.0, 1.0]
            [2.0, 1.0]
        ]
        """

    def agg(self, expr: Expr) -> Series:
        """
        Run any polars aggregation expression against the arrays' elements.

        Parameters
        ----------
        expr
            Expression to run. Note that you can select an element with `pl.element()`.

        Examples
        --------
        >>> s = pl.Series(
        ...     "a", [[1, None], [42, 13], [None, None]], pl.Array(pl.Int64, 2)
        ... )
        >>> s.arr.agg(pl.element().null_count())
        shape: (3,)
        Series: 'a' [u32]
        [
            1
            0
            2
        ]
        >>> s.arr.agg(pl.element().drop_nulls())
        shape: (3,)
        Series: 'a' [list[i64]]
        [
            [1]
            [42, 13]
            []
        ]
        """